Index: head/sys/conf/NOTES
===================================================================
--- head/sys/conf/NOTES	(revision 297747)
+++ head/sys/conf/NOTES	(revision 297748)
@@ -1,3036 +1,3044 @@
 # $FreeBSD$
 #
 # NOTES -- Lines that can be cut/pasted into kernel and hints configs.
 #
 # Lines that begin with 'device', 'options', 'machine', 'ident', 'maxusers',
 # 'makeoptions', 'hints', etc. go into the kernel configuration that you
 # run config(8) with.
 #
 # Lines that begin with 'hint.' are NOT for config(8), they go into your
 # hints file.  See /boot/device.hints and/or the 'hints' config(8) directive.
 #
 # Please use ``make LINT'' to create an old-style LINT file if you want to
 # do kernel test-builds.
 #
 # This file contains machine independent kernel configuration notes.  For
 # machine dependent notes, look in /sys/<arch>/conf/NOTES.
 #
 
 #
 # NOTES conventions and style guide:
 #
 # Large block comments should begin and end with a line containing only a
 # comment character.
 #
 # To describe a particular object, a block comment (if it exists) should
 # come first.  Next should come device, options, and hints lines in that
 # order.  All device and option lines must be described by a comment that
 # doesn't just expand the device or option name.  Use only a concise
 # comment on the same line if possible.  Very detailed descriptions of
 # devices and subsystems belong in man pages.
 #
 # A space followed by a tab separates 'options' from an option name.  Two
 # spaces followed by a tab separate 'device' from a device name.  Comments
 # after an option or device should use one space after the comment character.
 # To comment out a negative option that disables code and thus should not be
 # enabled for LINT builds, precede 'options' with "#!".
 #
 
 #
 # This is the ``identification'' of the kernel.  Usually this should
 # be the same as the name of your kernel.
 #
 ident		LINT
 
 #
 # The `maxusers' parameter controls the static sizing of a number of
 # internal system tables by a formula defined in subr_param.c.
 # Omitting this parameter or setting it to 0 will cause the system to
 # auto-size based on physical memory.
 #
 maxusers	10
 
 # To statically compile in device wiring instead of /boot/device.hints
 #hints		"LINT.hints"		# Default places to look for devices.
 
 # Use the following to compile in values accessible to the kernel
 # through getenv() (or kenv(1) in userland). The format of the file
 # is 'variable=value', see kenv(1)
 #
 #env		"LINT.env"
 
 #
 # The `makeoptions' parameter allows variables to be passed to the
 # generated Makefile in the build area.
 #
 # CONF_CFLAGS gives some extra compiler flags that are added to ${CFLAGS}
 # after most other flags.  Here we use it to inhibit use of non-optimal
 # gcc built-in functions (e.g., memcmp).
 #
 # DEBUG happens to be magic.
 # The following is equivalent to 'config -g KERNELNAME' and creates
 # 'kernel.debug' compiled with -g debugging as well as a normal
 # 'kernel'.  Use 'make install.debug' to install the debug kernel
 # but that isn't normally necessary as the debug symbols are not loaded
 # by the kernel and are not useful there anyway.
 #
 # KERNEL can be overridden so that you can change the default name of your
 # kernel.
 #
 # MODULES_OVERRIDE can be used to limit modules built to a specific list.
 #
 makeoptions	CONF_CFLAGS=-fno-builtin  #Don't allow use of memcmp, etc.
 #makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
 #makeoptions	KERNEL=foo		#Build kernel "foo" and install "/foo"
 # Only build ext2fs module plus those parts of the sound system I need.
 #makeoptions	MODULES_OVERRIDE="ext2fs sound/sound sound/driver/maestro3"
 makeoptions	DESTDIR=/tmp
 
 #
 # FreeBSD processes are subject to certain limits to their consumption
 # of system resources.  See getrlimit(2) for more details.  Each
 # resource limit has two values, a "soft" limit and a "hard" limit.
 # The soft limits can be modified during normal system operation, but
 # the hard limits are set at boot time.  Their default values are
 # in sys/<arch>/include/vmparam.h.  There are two ways to change them:
 # 
 # 1.  Set the values at kernel build time.  The options below are one
 #     way to allow that limit to grow to 1GB.  They can be increased
 #     further by changing the parameters:
 #	
 # 2.  In /boot/loader.conf, set the tunables kern.maxswzone,
 #     kern.maxbcache, kern.maxtsiz, kern.dfldsiz, kern.maxdsiz,
 #     kern.dflssiz, kern.maxssiz and kern.sgrowsiz.
 #
 # The options in /boot/loader.conf override anything in the kernel
 # configuration file.  See the function init_param1 in
 # sys/kern/subr_param.c for more details.
 #
 
 options 	MAXDSIZ=(1024UL*1024*1024)
 options 	MAXSSIZ=(128UL*1024*1024)
 options 	DFLDSIZ=(1024UL*1024*1024)
 
 #
 # BLKDEV_IOSIZE sets the default block size used in user block
 # device I/O.  Note that this value will be overridden by the label
 # when specifying a block device from a label with a non-0
 # partition blocksize.  The default is PAGE_SIZE.
 #
 options 	BLKDEV_IOSIZE=8192
 
 #
 # MAXPHYS and DFLTPHYS
 #
 # These are the maximal and safe 'raw' I/O block device access sizes.
 # Reads and writes will be split into MAXPHYS chunks for known good
 # devices and DFLTPHYS for the rest. Some applications have better
 # performance with larger raw I/O access sizes. Note that certain VM
 # parameters are derived from these values and making them too large
 # can make an unbootable kernel.
 #
 # The defaults are 64K and 128K respectively.
 options 	DFLTPHYS=(64*1024)
 options 	MAXPHYS=(128*1024)
 
 
 # This allows you to actually store this configuration file into
 # the kernel binary itself. See config(8) for more details.
 #
 options 	INCLUDE_CONFIG_FILE     # Include this file in kernel
 
 #
 # Compile-time defaults for various boot parameters
 #
 options 	BOOTVERBOSE=1
 options 	BOOTHOWTO=RB_MULTIPLE
 
 options 	GEOM_AES		# Don't use, use GEOM_BDE
 options 	GEOM_BDE		# Disk encryption.
 options 	GEOM_BSD		# BSD disklabels
 options 	GEOM_CACHE		# Disk cache.
 options 	GEOM_CONCAT		# Disk concatenation.
 options 	GEOM_ELI		# Disk encryption.
 options 	GEOM_FOX		# Redundant path mitigation
 options 	GEOM_GATE		# Userland services.
 options 	GEOM_JOURNAL		# Journaling.
 options 	GEOM_LABEL		# Providers labelization.
 options 	GEOM_LINUX_LVM		# Linux LVM2 volumes
 options 	GEOM_MAP		# Map based partitioning
 options 	GEOM_MBR		# DOS/MBR partitioning
 options 	GEOM_MIRROR		# Disk mirroring.
 options 	GEOM_MULTIPATH		# Disk multipath
 options 	GEOM_NOP		# Test class.
 options 	GEOM_PART_APM		# Apple partitioning
 options 	GEOM_PART_BSD		# BSD disklabel
 options 	GEOM_PART_BSD64		# BSD disklabel64
 options 	GEOM_PART_EBR		# Extended Boot Records
 options 	GEOM_PART_EBR_COMPAT	# Backward compatible partition names
 options 	GEOM_PART_GPT		# GPT partitioning
 options 	GEOM_PART_LDM		# Logical Disk Manager
 options 	GEOM_PART_MBR		# MBR partitioning
 options 	GEOM_PART_PC98		# PC-9800 disk partitioning
 options 	GEOM_PART_VTOC8		# SMI VTOC8 disk label
 options 	GEOM_PC98		# NEC PC9800 partitioning
 options 	GEOM_RAID		# Soft RAID functionality.
 options 	GEOM_RAID3		# RAID3 functionality.
 options 	GEOM_SHSEC		# Shared secret.
 options 	GEOM_STRIPE		# Disk striping.
 options 	GEOM_SUNLABEL		# Sun/Solaris partitioning
 options 	GEOM_UZIP		# Read-only compressed disks
 options 	GEOM_VINUM		# Vinum logical volume manager
 options 	GEOM_VIRSTOR		# Virtual storage.
 options 	GEOM_VOL		# Volume names from UFS superblock
 options 	GEOM_ZERO		# Performance testing helper.
 
 #
 # The root device and filesystem type can be compiled in;
 # this provides a fallback option if the root device cannot
 # be correctly guessed by the bootstrap code, or an override if
 # the RB_DFLTROOT flag (-r) is specified when booting the kernel.
 #
 options 	ROOTDEVNAME=\"ufs:da0s2e\"
 
 
 #####################################################################
 # Scheduler options:
 #
 # Specifying one of SCHED_4BSD or SCHED_ULE is mandatory.  These options
 # select which scheduler is compiled in.
 #
 # SCHED_4BSD is the historical, proven, BSD scheduler.  It has a global run
 # queue and no CPU affinity which makes it suboptimal for SMP.  It has very
 # good interactivity and priority selection.
 #
 # SCHED_ULE provides significant performance advantages over 4BSD on many
 # workloads on SMP machines.  It supports cpu-affinity, per-cpu runqueues
 # and scheduler locks.  It also has a stronger notion of interactivity 
 # which leads to better responsiveness even on uniprocessor machines.  This
 # is the default scheduler.
 #
 # SCHED_STATS is a debugging option which keeps some stats in the sysctl
 # tree at 'kern.sched.stats' and is useful for debugging scheduling decisions.
 #
 options 	SCHED_4BSD
 options 	SCHED_STATS
 #options 	SCHED_ULE
 
 #####################################################################
 # SMP OPTIONS:
 #
 # SMP enables building of a Symmetric MultiProcessor Kernel.
 
 # Mandatory:
 options 	SMP			# Symmetric MultiProcessor Kernel
 
 # MAXCPU defines the maximum number of CPUs that can boot in the system.
 # A default value should be already present, for every architecture.
 options 	MAXCPU=32
 
 # MAXMEMDOM defines the maximum number of memory domains that can boot in the
 # system.  A default value should already be defined by every architecture.
-options 	MAXMEMDOM=1
+options 	MAXMEMDOM=2
+
+# VM_NUMA_ALLOC enables use of memory domain-aware allocation in the VM
+# system.
+options 	VM_NUMA_ALLOC
+
+# DEVICE_NUMA enables reporting of domain affinity of I/O devices via
+# bus_get_domain(), etc.
+options 	DEVICE_NUMA
 
 # ADAPTIVE_MUTEXES changes the behavior of blocking mutexes to spin
 # if the thread that currently owns the mutex is executing on another
 # CPU.  This behavior is enabled by default, so this option can be used
 # to disable it.
 options 	NO_ADAPTIVE_MUTEXES
 
 # ADAPTIVE_RWLOCKS changes the behavior of reader/writer locks to spin
 # if the thread that currently owns the rwlock is executing on another
 # CPU.  This behavior is enabled by default, so this option can be used
 # to disable it.
 options 	NO_ADAPTIVE_RWLOCKS
 
 # ADAPTIVE_SX changes the behavior of sx locks to spin if the thread that
 # currently owns the sx lock is executing on another CPU.
 # This behavior is enabled by default, so this option can be used to
 # disable it.
 options 	NO_ADAPTIVE_SX
 
 # MUTEX_NOINLINE forces mutex operations to call functions to perform each
 # operation rather than inlining the simple cases.  This can be used to
 # shrink the size of the kernel text segment.  Note that this behavior is
 # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
 # and WITNESS options.
 options 	MUTEX_NOINLINE
 
 # RWLOCK_NOINLINE forces rwlock operations to call functions to perform each
 # operation rather than inlining the simple cases.  This can be used to
 # shrink the size of the kernel text segment.  Note that this behavior is
 # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
 # and WITNESS options.
 options 	RWLOCK_NOINLINE
 
 # SX_NOINLINE forces sx lock operations to call functions to perform each
 # operation rather than inlining the simple cases.  This can be used to
 # shrink the size of the kernel text segment.  Note that this behavior is
 # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
 # and WITNESS options.
 options 	SX_NOINLINE
 
 # SMP Debugging Options:
 #
 # CALLOUT_PROFILING enables rudimentary profiling of the callwheel data
 #	  structure used as backend in callout(9).
 # PREEMPTION allows the threads that are in the kernel to be preempted by
 #	  higher priority [interrupt] threads.  It helps with interactivity
 #	  and allows interrupt threads to run sooner rather than waiting.
 #	  WARNING! Only tested on amd64 and i386.
 # FULL_PREEMPTION instructs the kernel to preempt non-realtime kernel
 #	  threads.  Its sole use is to expose race conditions and other
 #	  bugs during development.  Enabling this option will reduce
 #	  performance and increase the frequency of kernel panics by
 #	  design.  If you aren't sure that you need it then you don't.
 #	  Relies on the PREEMPTION option.  DON'T TURN THIS ON.
 # MUTEX_DEBUG enables various extra assertions in the mutex code.
 # SLEEPQUEUE_PROFILING enables rudimentary profiling of the hash table
 #	  used to hold active sleep queues as well as sleep wait message
 #	  frequency.
 # TURNSTILE_PROFILING enables rudimentary profiling of the hash table
 #	  used to hold active lock queues.
 # UMTX_PROFILING enables rudimentary profiling of the hash table used 
 	  to hold active lock queues.
 # WITNESS enables the witness code which detects deadlocks and cycles
 #         during locking operations.
 # WITNESS_KDB causes the witness code to drop into the kernel debugger if
 #	  a lock hierarchy violation occurs or if locks are held when going to
 #	  sleep.
 # WITNESS_SKIPSPIN disables the witness checks on spin mutexes.
 options 	PREEMPTION
 options 	FULL_PREEMPTION
 options 	MUTEX_DEBUG
 options 	WITNESS
 options 	WITNESS_KDB
 options 	WITNESS_SKIPSPIN
 
 # LOCK_PROFILING - Profiling locks.  See LOCK_PROFILING(9) for details.
 options 	LOCK_PROFILING
 # Set the number of buffers and the hash size.  The hash size MUST be larger
 # than the number of buffers.  Hash size should be prime.
 options 	MPROF_BUFFERS="1536"
 options 	MPROF_HASH_SIZE="1543"
 
 # Profiling for the callout(9) backend.
 options 	CALLOUT_PROFILING
 
 # Profiling for internal hash tables.
 options 	SLEEPQUEUE_PROFILING
 options 	TURNSTILE_PROFILING
 options 	UMTX_PROFILING
 
 
 #####################################################################
 # COMPATIBILITY OPTIONS
 
 #
 # Implement system calls compatible with 4.3BSD and older versions of
 # FreeBSD.  You probably do NOT want to remove this as much current code
 # still relies on the 4.3 emulation.  Note that some architectures that
 # are supported by FreeBSD do not include support for certain important
 # aspects of this compatibility option, namely those related to the
 # signal delivery mechanism.
 #
 options 	COMPAT_43
 
 # Old tty interface.
 options 	COMPAT_43TTY
 
 # Note that as a general rule, COMPAT_FREEBSD<n> depends on
 # COMPAT_FREEBSD<n+1>, COMPAT_FREEBSD<n+2>, etc.
 
 # Enable FreeBSD4 compatibility syscalls
 options 	COMPAT_FREEBSD4
 
 # Enable FreeBSD5 compatibility syscalls
 options 	COMPAT_FREEBSD5
 
 # Enable FreeBSD6 compatibility syscalls
 options 	COMPAT_FREEBSD6
 
 # Enable FreeBSD7 compatibility syscalls
 options 	COMPAT_FREEBSD7
 
 # Enable FreeBSD9 compatibility syscalls
 options 	COMPAT_FREEBSD9
 
 # Enable FreeBSD10 compatibility syscalls
 options 	COMPAT_FREEBSD10
 
 # Enable Linux Kernel Programming Interface
 options 	COMPAT_LINUXKPI
 
 #
 # These three options provide support for System V Interface
 # Definition-style interprocess communication, in the form of shared
 # memory, semaphores, and message queues, respectively.
 #
 options 	SYSVSHM
 options 	SYSVSEM
 options 	SYSVMSG
 
 
 #####################################################################
 # DEBUGGING OPTIONS
 
 #
 # Compile with kernel debugger related code.
 #
 options 	KDB
 
 #
 # Print a stack trace of the current thread on the console for a panic.
 #
 options 	KDB_TRACE
 
 #
 # Don't enter the debugger for a panic. Intended for unattended operation
 # where you may want to enter the debugger from the console, but still want
 # the machine to recover from a panic.
 #
 options 	KDB_UNATTENDED
 
 #
 # Enable the ddb debugger backend.
 #
 options 	DDB
 
 #
 # Print the numerical value of symbols in addition to the symbolic
 # representation.
 #
 options 	DDB_NUMSYM
 
 #
 # Enable the remote gdb debugger backend.
 #
 options 	GDB
 
 #
 # SYSCTL_DEBUG enables a 'sysctl' debug tree that can be used to dump the
 # contents of the registered sysctl nodes on the console.  It is disabled by
 # default because it generates excessively verbose console output that can
 # interfere with serial console operation.
 #
 options 	SYSCTL_DEBUG
 
 #
 # Enable textdump by default, this disables kernel core dumps.
 #
 options		TEXTDUMP_PREFERRED
 
 #
 # Enable extra debug messages while performing textdumps.
 #
 options		TEXTDUMP_VERBOSE
 
 #
 # NO_SYSCTL_DESCR omits the sysctl node descriptions to save space in the
 # resulting kernel.
 options		NO_SYSCTL_DESCR
 
 #
 # MALLOC_DEBUG_MAXZONES enables multiple uma zones for malloc(9)
 # allocations that are smaller than a page.  The purpose is to isolate
 # different malloc types into hash classes, so that any buffer
 # overruns or use-after-free will usually only affect memory from
 # malloc types in that hash class.  This is purely a debugging tool;
 # by varying the hash function and tracking which hash class was
 # corrupted, the intersection of the hash classes from each instance
 # will point to a single malloc type that is being misused.  At this
 # point inspection or memguard(9) can be used to catch the offending
 # code.
 #
 options 	MALLOC_DEBUG_MAXZONES=8
 
 #
 # DEBUG_MEMGUARD builds and enables memguard(9), a replacement allocator
 # for the kernel used to detect modify-after-free scenarios.  See the
 # memguard(9) man page for more information on usage.
 #
 options 	DEBUG_MEMGUARD
 
 #
 # DEBUG_REDZONE enables buffer underflows and buffer overflows detection for
 # malloc(9).
 #
 options 	DEBUG_REDZONE
 
 #
 # EARLY_PRINTF enables support for calling a special printf (eprintf)
 # very early in the kernel (before cn_init() has been called).  This
 # should only be used for debugging purposes early in boot.  Normally,
 # it is not defined.  It is commented out here because this feature
 # isn't generally available. And the required eputc() isn't defined.
 #
 #options	EARLY_PRINTF
 
 #
 # KTRACE enables the system-call tracing facility ktrace(2).  To be more
 # SMP-friendly, KTRACE uses a worker thread to process most trace events
 # asynchronously to the thread generating the event.  This requires a
 # pre-allocated store of objects representing trace events.  The
 # KTRACE_REQUEST_POOL option specifies the initial size of this store.
 # The size of the pool can be adjusted both at boottime and runtime via
 # the kern.ktrace_request_pool tunable and sysctl.
 #
 options 	KTRACE			#kernel tracing
 options 	KTRACE_REQUEST_POOL=101
 
 #
 # KTR is a kernel tracing facility imported from BSD/OS.  It is
 # enabled with the KTR option.  KTR_ENTRIES defines the number of
 # entries in the circular trace buffer; it may be an arbitrary number.
 # KTR_BOOT_ENTRIES defines the number of entries during the early boot,
 # before malloc(9) is functional.
 # KTR_COMPILE defines the mask of events to compile into the kernel as
 # defined by the KTR_* constants in <sys/ktr.h>.  KTR_MASK defines the
 # initial value of the ktr_mask variable which determines at runtime
 # what events to trace.  KTR_CPUMASK determines which CPU's log
 # events, with bit X corresponding to CPU X.  The layout of the string
 # passed as KTR_CPUMASK must match a series of bitmasks each of them
 # separated by the "," character (ie:
 # KTR_CPUMASK=0xAF,0xFFFFFFFFFFFFFFFF).  KTR_VERBOSE enables
 # dumping of KTR events to the console by default.  This functionality
 # can be toggled via the debug.ktr_verbose sysctl and defaults to off
 # if KTR_VERBOSE is not defined.  See ktr(4) and ktrdump(8) for details.
 #
 options 	KTR
 options 	KTR_BOOT_ENTRIES=1024
 options 	KTR_ENTRIES=(128*1024)
 options 	KTR_COMPILE=(KTR_ALL)
 options 	KTR_MASK=KTR_INTR
 options 	KTR_CPUMASK=0x3
 options 	KTR_VERBOSE
 
 #
 # ALQ(9) is a facility for the asynchronous queuing of records from the kernel
 # to a vnode, and is employed by services such as ktr(4) to produce trace
 # files based on a kernel event stream.  Records are written asynchronously
 # in a worker thread.
 #
 options 	ALQ
 options 	KTR_ALQ
 
 #
 # The INVARIANTS option is used in a number of source files to enable
 # extra sanity checking of internal structures.  This support is not
 # enabled by default because of the extra time it would take to check
 # for these conditions, which can only occur as a result of
 # programming errors.
 #
 options 	INVARIANTS
 
 #
 # The INVARIANT_SUPPORT option makes us compile in support for
 # verifying some of the internal structures.  It is a prerequisite for
 # 'INVARIANTS', as enabling 'INVARIANTS' will make these functions be
 # called.  The intent is that you can set 'INVARIANTS' for single
 # source files (by changing the source file or specifying it on the
 # command line) if you have 'INVARIANT_SUPPORT' enabled.  Also, if you
 # wish to build a kernel module with 'INVARIANTS', then adding
 # 'INVARIANT_SUPPORT' to your kernel will provide all the necessary
 # infrastructure without the added overhead.
 #
 options 	INVARIANT_SUPPORT
 
 #
 # The DIAGNOSTIC option is used to enable extra debugging information
 # from some parts of the kernel.  As this makes everything more noisy,
 # it is disabled by default.
 #
 options 	DIAGNOSTIC
 
 #
 # REGRESSION causes optional kernel interfaces necessary only for regression
 # testing to be enabled.  These interfaces may constitute security risks
 # when enabled, as they permit processes to easily modify aspects of the
 # run-time environment to reproduce unlikely or unusual (possibly normally
 # impossible) scenarios.
 #
 options 	REGRESSION
 
 #
 # This option lets some drivers co-exist that can't co-exist in a running
 # system.  This is used to be able to compile all kernel code in one go for
 # quality assurance purposes (like this file, which the option takes it name
 # from.)
 #
 options 	COMPILING_LINT
 
 #
 # STACK enables the stack(9) facility, allowing the capture of kernel stack
 # for the purpose of procinfo(1), etc.  stack(9) will also be compiled in
 # automatically if DDB(4) is compiled into the kernel.
 #
 options 	STACK
 
 
 #####################################################################
 # PERFORMANCE MONITORING OPTIONS
 
 #
 # The hwpmc driver that allows the use of in-CPU performance monitoring
 # counters for performance monitoring.  The base kernel needs to be configured
 # with the 'options' line, while the hwpmc device can be either compiled
 # in or loaded as a loadable kernel module.
 #
 # Additional configuration options may be required on specific architectures,
 # please see hwpmc(4).
 
 device		hwpmc			# Driver (also a loadable module)
 options 	HWPMC_DEBUG
 options 	HWPMC_HOOKS		# Other necessary kernel hooks
 
 
 #####################################################################
 # NETWORKING OPTIONS
 
 #
 # Protocol families
 #
 options 	INET			#Internet communications protocols
 options 	INET6			#IPv6 communications protocols
 
 options 	ROUTETABLES=2		# allocated fibs up to 65536. default is 1.
 					# but that would be a bad idea as they are large.
 
 options 	TCP_OFFLOAD		# TCP offload support.
 
 # In order to enable IPSEC you MUST also add device crypto to 
 # your kernel configuration
 options 	IPSEC			#IP security (requires device crypto)
 #options 	IPSEC_DEBUG		#debug for IP security
 #
 # #DEPRECATED#
 # Set IPSEC_FILTERTUNNEL to change the default of the sysctl to force packets
 # coming through a tunnel to be processed by any configured packet filtering
 # twice. The default is that packets coming out of a tunnel are _not_ processed;
 # they are assumed trusted.
 #
 # IPSEC history is preserved for such packets, and can be filtered
 # using ipfw(8)'s 'ipsec' keyword, when this option is enabled.
 #
 #options 	IPSEC_FILTERTUNNEL	#filter ipsec packets from a tunnel
 #
 # Set IPSEC_NAT_T to enable NAT-Traversal support.  This enables
 # optional UDP encapsulation of ESP packets.
 #
 options		IPSEC_NAT_T		#NAT-T support, UDP encap of ESP
 
 #
 # SMB/CIFS requester
 # NETSMB enables support for SMB protocol, it requires LIBMCHAIN and LIBICONV
 # options.
 options 	NETSMB			#SMB/CIFS requester
 
 # mchain library. It can be either loaded as KLD or compiled into kernel
 options 	LIBMCHAIN
 
 # libalias library, performing NAT
 options 	LIBALIAS
 
 # flowtable cache
 options 	FLOWTABLE
 
 #
 # SCTP is a NEW transport protocol defined by
 # RFC2960 updated by RFC3309 and RFC3758.. and
 # soon to have a new base RFC and many many more
 # extensions. This release supports all the extensions
 # including many drafts (most about to become RFC's).
 # It is the reference implementation of SCTP
 # and is quite well tested.
 #
 # Note YOU MUST have both INET and INET6 defined.
 # You don't have to enable V6, but SCTP is 
 # dual stacked and so far we have not torn apart
 # the V6 and V4.. since an association can span
 # both a V6 and V4 address at the SAME time :-)
 #
 options 	SCTP
 # There are bunches of options:
 # this one turns on all sorts of
 # nastily printing that you can
 # do. It's all controlled by a
 # bit mask (settable by socket opt and
 # by sysctl). Including will not cause
 # logging until you set the bits.. but it
 # can be quite verbose.. so without this
 # option we don't do any of the tests for
 # bits and prints.. which makes the code run
 # faster.. if you are not debugging don't use.
 options 	SCTP_DEBUG
 #
 # This option turns off the CRC32c checksum. Basically,
 # you will not be able to talk to anyone else who
 # has not done this. Its more for experimentation to
 # see how much CPU the CRC32c really takes. Most new
 # cards for TCP support checksum offload.. so this 
 # option gives you a "view" into what SCTP would be
 # like with such an offload (which only exists in
 # high in iSCSI boards so far). With the new
 # splitting 8's algorithm its not as bad as it used
 # to be.. but it does speed things up try only
 # for in a captured lab environment :-)
 options 	SCTP_WITH_NO_CSUM
 #
 
 #
 # All that options after that turn on specific types of
 # logging. You can monitor CWND growth, flight size
 # and all sorts of things. Go look at the code and
 # see. I have used this to produce interesting 
 # charts and graphs as well :->
 # 
 # I have not yet committed the tools to get and print
 # the logs, I will do that eventually .. before then
 # if you want them send me an email rrs@freebsd.org
 # You basically must have ktr(4) enabled for these
 # and you then set the sysctl to turn on/off various
 # logging bits. Use ktrdump(8) to pull the log and run
 # it through a display program.. and graphs and other
 # things too.
 #
 options 	SCTP_LOCK_LOGGING
 options 	SCTP_MBUF_LOGGING
 options 	SCTP_MBCNT_LOGGING
 options 	SCTP_PACKET_LOGGING
 options 	SCTP_LTRACE_CHUNKS
 options 	SCTP_LTRACE_ERRORS
 
 
 # altq(9). Enable the base part of the hooks with the ALTQ option.
 # Individual disciplines must be built into the base system and can not be
 # loaded as modules at this point. ALTQ requires a stable TSC so if yours is
 # broken or changes with CPU throttling then you must also have the ALTQ_NOPCC
 # option.
 options 	ALTQ
 options 	ALTQ_CBQ	# Class Based Queueing
 options 	ALTQ_RED	# Random Early Detection
 options 	ALTQ_RIO	# RED In/Out
 options 	ALTQ_HFSC	# Hierarchical Packet Scheduler
 options		ALTQ_FAIRQ	# Fair Packet Scheduler
 options 	ALTQ_CDNR	# Traffic conditioner
 options 	ALTQ_PRIQ	# Priority Queueing
 options 	ALTQ_NOPCC	# Required if the TSC is unusable
 options 	ALTQ_DEBUG
 
 # netgraph(4). Enable the base netgraph code with the NETGRAPH option.
 # Individual node types can be enabled with the corresponding option
 # listed below; however, this is not strictly necessary as netgraph
 # will automatically load the corresponding KLD module if the node type
 # is not already compiled into the kernel. Each type below has a
 # corresponding man page, e.g., ng_async(8).
 options 	NETGRAPH		# netgraph(4) system
 options 	NETGRAPH_DEBUG		# enable extra debugging, this
 					# affects netgraph(4) and nodes
 # Node types
 options 	NETGRAPH_ASYNC
 options 	NETGRAPH_ATMLLC
 options 	NETGRAPH_ATM_ATMPIF
 options 	NETGRAPH_BLUETOOTH		# ng_bluetooth(4)
 options 	NETGRAPH_BLUETOOTH_BT3C		# ng_bt3c(4)
 options 	NETGRAPH_BLUETOOTH_HCI		# ng_hci(4)
 options 	NETGRAPH_BLUETOOTH_L2CAP	# ng_l2cap(4)
 options 	NETGRAPH_BLUETOOTH_SOCKET	# ng_btsocket(4)
 options 	NETGRAPH_BLUETOOTH_UBT		# ng_ubt(4)
 options 	NETGRAPH_BLUETOOTH_UBTBCMFW	# ubtbcmfw(4)
 options 	NETGRAPH_BPF
 options 	NETGRAPH_BRIDGE
 options 	NETGRAPH_CAR
 options 	NETGRAPH_CISCO
 options 	NETGRAPH_DEFLATE
 options 	NETGRAPH_DEVICE
 options 	NETGRAPH_ECHO
 options 	NETGRAPH_EIFACE
 options 	NETGRAPH_ETHER
 options 	NETGRAPH_FRAME_RELAY
 options 	NETGRAPH_GIF
 options 	NETGRAPH_GIF_DEMUX
 options 	NETGRAPH_HOLE
 options 	NETGRAPH_IFACE
 options 	NETGRAPH_IP_INPUT
 options 	NETGRAPH_IPFW
 options 	NETGRAPH_KSOCKET
 options 	NETGRAPH_L2TP
 options 	NETGRAPH_LMI
 # MPPC compression requires proprietary files (not included)
 #options 	NETGRAPH_MPPC_COMPRESSION
 options 	NETGRAPH_MPPC_ENCRYPTION
 options 	NETGRAPH_NETFLOW
 options 	NETGRAPH_NAT
 options 	NETGRAPH_ONE2MANY
 options 	NETGRAPH_PATCH
 options 	NETGRAPH_PIPE
 options 	NETGRAPH_PPP
 options 	NETGRAPH_PPPOE
 options 	NETGRAPH_PPTPGRE
 options 	NETGRAPH_PRED1
 options 	NETGRAPH_RFC1490
 options 	NETGRAPH_SOCKET
 options 	NETGRAPH_SPLIT
 options 	NETGRAPH_SPPP
 options 	NETGRAPH_TAG
 options 	NETGRAPH_TCPMSS
 options 	NETGRAPH_TEE
 options 	NETGRAPH_UI
 options 	NETGRAPH_VJC
 options 	NETGRAPH_VLAN
 
 # NgATM - Netgraph ATM
 options 	NGATM_ATM
 options 	NGATM_ATMBASE
 options 	NGATM_SSCOP
 options 	NGATM_SSCFU
 options 	NGATM_UNI
 options 	NGATM_CCATM
 
 device		mn	# Munich32x/Falc54 Nx64kbit/sec cards.
 
 # Network stack virtualization.
 #options	VIMAGE
 #options	VNET_DEBUG	# debug for VIMAGE
 
 #
 # Network interfaces:
 #  The `loop' device is MANDATORY when networking is enabled.
 device		loop
 
 #  The `ether' device provides generic code to handle
 #  Ethernets; it is MANDATORY when an Ethernet device driver is
 #  configured or token-ring is enabled.
 device		ether
 
 #  The `vlan' device implements the VLAN tagging of Ethernet frames
 #  according to IEEE 802.1Q.
 device		vlan
 
 # The `vxlan' device implements the VXLAN encapsulation of Ethernet
 # frames in UDP packets according to RFC7348.
 device		vxlan
 
 #  The `wlan' device provides generic code to support 802.11
 #  drivers, including host AP mode; it is MANDATORY for the wi,
 #  and ath drivers and will eventually be required by all 802.11 drivers.
 device		wlan
 options 	IEEE80211_DEBUG		#enable debugging msgs
 options 	IEEE80211_AMPDU_AGE	#age frames in AMPDU reorder q's
 options 	IEEE80211_SUPPORT_MESH	#enable 802.11s D3.0 support
 options 	IEEE80211_SUPPORT_TDMA	#enable TDMA support
 
 #  The `wlan_wep', `wlan_tkip', and `wlan_ccmp' devices provide
 #  support for WEP, TKIP, and AES-CCMP crypto protocols optionally
 #  used with 802.11 devices that depend on the `wlan' module.
 device		wlan_wep
 device		wlan_ccmp
 device		wlan_tkip
 
 #  The `wlan_xauth' device provides support for external (i.e. user-mode)
 #  authenticators for use with 802.11 drivers that use the `wlan'
 #  module and support 802.1x and/or WPA security protocols.
 device		wlan_xauth
 
 #  The `wlan_acl' device provides a MAC-based access control mechanism
 #  for use with 802.11 drivers operating in ap mode and using the
 #  `wlan' module.
 #  The 'wlan_amrr' device provides AMRR transmit rate control algorithm
 device		wlan_acl
 device		wlan_amrr
 
 # Generic TokenRing
 device		token
 
 #  The `fddi' device provides generic code to support FDDI.
 device		fddi
 
 #  The `arcnet' device provides generic code to support Arcnet.
 device		arcnet
 
 #  The `sppp' device serves a similar role for certain types
 #  of synchronous PPP links (like `cx', `ar').
 device		sppp
 
 #  The `bpf' device enables the Berkeley Packet Filter.  Be
 #  aware of the legal and administrative consequences of enabling this
 #  option.  DHCP requires bpf.
 device		bpf
 
 #  The `netmap' device implements memory-mapped access to network
 #  devices from userspace, enabling wire-speed packet capture and
 #  generation even at 10Gbit/s. Requires support in the device
 #  driver. Supported drivers are ixgbe, e1000, re.
 device		netmap
 
 #  The `disc' device implements a minimal network interface,
 #  which throws away all packets sent and never receives any.  It is
 #  included for testing and benchmarking purposes.
 device		disc
 
 # The `epair' device implements a virtual back-to-back connected Ethernet
 # like interface pair.
 device		epair
 
 #  The `edsc' device implements a minimal Ethernet interface,
 #  which discards all packets sent and receives none.
 device		edsc
 
 #  The `tap' device is a pty-like virtual Ethernet interface
 device		tap
 
 #  The `tun' device implements (user-)ppp and nos-tun(8)
 device		tun
 
 #  The `gif' device implements IPv6 over IP4 tunneling,
 #  IPv4 over IPv6 tunneling, IPv4 over IPv4 tunneling and
 #  IPv6 over IPv6 tunneling.
 #  The `gre' device implements GRE (Generic Routing Encapsulation) tunneling,
 #  as specified in the RFC 2784 and RFC 2890.
 #  The `me' device implements Minimal Encapsulation within IPv4 as
 #  specified in the RFC 2004.
 #  The XBONEHACK option allows the same pair of addresses to be configured on
 #  multiple gif interfaces.
 device		gif
 device		gre
 device		me
 options 	XBONEHACK
 
 #  The `stf' device implements 6to4 encapsulation.
 device		stf
 
 # The pf packet filter consists of three devices:
 #  The `pf' device provides /dev/pf and the firewall code itself.
 #  The `pflog' device provides the pflog0 interface which logs packets.
 #  The `pfsync' device provides the pfsync0 interface used for
 #   synchronization of firewall state tables (over the net).
 device		pf
 device		pflog
 device		pfsync
 
 # Bridge interface.
 device		if_bridge
 
 # Common Address Redundancy Protocol. See carp(4) for more details.
 device		carp
 
 # IPsec interface.
 device		enc
 
 # Link aggregation interface.
 device		lagg
 
 #
 # Internet family options:
 #
 # MROUTING enables the kernel multicast packet forwarder, which works
 # with mrouted and XORP.
 #
 # IPFIREWALL enables support for IP firewall construction, in
 # conjunction with the `ipfw' program.  IPFIREWALL_VERBOSE sends
 # logged packets to the system logger.  IPFIREWALL_VERBOSE_LIMIT
 # limits the number of times a matching entry can be logged.
 #
 # WARNING:  IPFIREWALL defaults to a policy of "deny ip from any to any"
 # and if you do not add other rules during startup to allow access,
 # YOU WILL LOCK YOURSELF OUT.  It is suggested that you set firewall_type=open
 # in /etc/rc.conf when first enabling this feature, then refining the
 # firewall rules in /etc/rc.firewall after you've tested that the new kernel
 # feature works properly.
 #
 # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to
 # allow everything.  Use with care, if a cracker can crash your
 # firewall machine, they can get to your protected machines.  However,
 # if you are using it as an as-needed filter for specific problems as
 # they arise, then this may be for you.  Changing the default to 'allow'
 # means that you won't get stuck if the kernel and /sbin/ipfw binary get
 # out of sync.
 #
 # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''.  It
 # depends on IPFIREWALL if compiled into the kernel.
 #
 # IPFIREWALL_NAT adds support for in kernel nat in ipfw, and it requires
 # LIBALIAS.
 #
 # IPSTEALTH enables code to support stealth forwarding (i.e., forwarding
 # packets without touching the TTL).  This can be useful to hide firewalls
 # from traceroute and similar tools.
 #
 # PF_DEFAULT_TO_DROP causes the default pf(4) rule to deny everything.
 #
 # TCPDEBUG enables code which keeps traces of the TCP state machine
 # for sockets with the SO_DEBUG option set, which can then be examined
 # using the trpt(8) utility.
 #
 # TCPPCAP enables code which keeps the last n packets sent and received
 # on a TCP socket.
 #
 # RADIX_MPATH provides support for equal-cost multi-path routing.
 #
 options 	MROUTING		# Multicast routing
 options 	IPFIREWALL		#firewall
 options 	IPFIREWALL_VERBOSE	#enable logging to syslogd(8)
 options 	IPFIREWALL_VERBOSE_LIMIT=100	#limit verbosity
 options 	IPFIREWALL_DEFAULT_TO_ACCEPT	#allow everything by default
 options 	IPFIREWALL_NAT		#ipfw kernel nat support
 options 	IPDIVERT		#divert sockets
 options 	IPFILTER		#ipfilter support
 options 	IPFILTER_LOG		#ipfilter logging
 options 	IPFILTER_LOOKUP		#ipfilter pools
 options 	IPFILTER_DEFAULT_BLOCK	#block all packets by default
 options 	IPSTEALTH		#support for stealth forwarding
 options 	PF_DEFAULT_TO_DROP	#drop everything by default
 options 	TCPDEBUG
 options 	TCPPCAP
 options 	RADIX_MPATH
 
 # The MBUF_STRESS_TEST option enables options which create
 # various random failures / extreme cases related to mbuf
 # functions.  See mbuf(9) for a list of available test cases.
 # MBUF_PROFILING enables code to profile the mbuf chains
 # exiting the system (via participating interfaces) and
 # return a logarithmic histogram of monitored parameters
 # (e.g. packet size, wasted space, number of mbufs in chain).
 options 	MBUF_STRESS_TEST
 options 	MBUF_PROFILING
 
 # Statically link in accept filters
 options 	ACCEPT_FILTER_DATA
 options 	ACCEPT_FILTER_DNS
 options 	ACCEPT_FILTER_HTTP
 
 # TCP_SIGNATURE adds support for RFC 2385 (TCP-MD5) digests. These are
 # carried in TCP option 19. This option is commonly used to protect
 # TCP sessions (e.g. BGP) where IPSEC is not available nor desirable.
 # This is enabled on a per-socket basis using the TCP_MD5SIG socket option.
 # This requires the use of 'device crypto' and 'options IPSEC'.
 options 	TCP_SIGNATURE		#include support for RFC 2385
 
 # DUMMYNET enables the "dummynet" bandwidth limiter.  You need IPFIREWALL
 # as well.  See dummynet(4) and ipfw(8) for more info.  When you run
 # DUMMYNET it is advisable to also have at least "options HZ=1000" to achieve
 # a smooth scheduling of the traffic.
 options 	DUMMYNET
 
 #####################################################################
 # FILESYSTEM OPTIONS
 
 #
 # Only the root filesystem needs to be statically compiled or preloaded
 # as module; everything else will be automatically loaded at mount
 # time.  Some people still prefer to statically compile other
 # filesystems as well.
 #
 # NB: The UNION filesystem was known to be buggy in the past.  It is now
 # being actively maintained, although there are still some issues being
 # resolved.
 #
 
 # One of these is mandatory:
 options 	FFS			#Fast filesystem
 options 	NFSCL			#Network File System client
 
 # The rest are optional:
 options 	AUTOFS			#Automounter filesystem
 options 	CD9660			#ISO 9660 filesystem
 options 	FDESCFS			#File descriptor filesystem
 options 	FUSE			#FUSE support module
 options 	MSDOSFS			#MS DOS File System (FAT, FAT32)
 options 	NFSLOCKD		#Network Lock Manager
 options 	NFSD			#Network Filesystem Server
 options 	KGSSAPI			#Kernel GSSAPI implementation
 
 options 	NULLFS			#NULL filesystem
 options 	PROCFS			#Process filesystem (requires PSEUDOFS)
 options 	PSEUDOFS		#Pseudo-filesystem framework
 options 	PSEUDOFS_TRACE		#Debugging support for PSEUDOFS
 options 	SMBFS			#SMB/CIFS filesystem
 options 	TMPFS			#Efficient memory filesystem
 options 	UDF			#Universal Disk Format
 options 	UNIONFS			#Union filesystem
 # The xFS_ROOT options REQUIRE the associated ``options xFS''
 options 	NFS_ROOT		#NFS usable as root device
 
 # Soft updates is a technique for improving filesystem speed and
 # making abrupt shutdown less risky.
 #
 options 	SOFTUPDATES
 
 # Extended attributes allow additional data to be associated with files,
 # and is used for ACLs, Capabilities, and MAC labels.
 # See src/sys/ufs/ufs/README.extattr for more information.
 options 	UFS_EXTATTR
 options 	UFS_EXTATTR_AUTOSTART
 
 # Access Control List support for UFS filesystems.  The current ACL
 # implementation requires extended attribute support, UFS_EXTATTR,
 # for the underlying filesystem.
 # See src/sys/ufs/ufs/README.acls for more information.
 options 	UFS_ACL
 
 # Directory hashing improves the speed of operations on very large
 # directories at the expense of some memory.
 options 	UFS_DIRHASH
 
 # Gjournal-based UFS journaling support.
 options 	UFS_GJOURNAL
 
 # Make space in the kernel for a root filesystem on a md device.
 # Define to the number of kilobytes to reserve for the filesystem.
 # This is now optional.
 # If not defined, the root filesystem passed in as the MFS_IMAGE makeoption
 # will be automatically embedded in the kernel during linking. Its exact size
 # will be consumed within the kernel.
 # If defined, the old way of embedding the filesystem in the kernel will be
 # used. That is to say MD_ROOT_SIZE KB will be allocated in the kernel and
 # later, the filesystem image passed in as the MFS_IMAGE makeoption will be
 # dd'd into the reserved space if it fits.
 options 	MD_ROOT_SIZE=10
 
 # Make the md device a potential root device, either with preloaded
 # images of type mfs_root or md_root.
 options 	MD_ROOT
 
 # Disk quotas are supported when this option is enabled.
 options 	QUOTA			#enable disk quotas
 
 # If you are running a machine just as a fileserver for PC and MAC
 # users, using SAMBA, you may consider setting this option
 # and keeping all those users' directories on a filesystem that is
 # mounted with the suiddir option. This gives new files the same
 # ownership as the directory (similar to group). It's a security hole
 # if you let these users run programs, so confine it to file-servers
 # (but it'll save you lots of headaches in those cases). Root owned
 # directories are exempt and X bits are cleared. The suid bit must be
 # set on the directory as well; see chmod(1). PC owners can't see/set
 # ownerships so they keep getting their toes trodden on. This saves
 # you all the support calls as the filesystem it's used on will act as
 # they expect: "It's my dir so it must be my file".
 #
 options 	SUIDDIR
 
 # NFS options:
 options 	NFS_MINATTRTIMO=3	# VREG attrib cache timeout in sec
 options 	NFS_MAXATTRTIMO=60
 options 	NFS_MINDIRATTRTIMO=30	# VDIR attrib cache timeout in sec
 options 	NFS_MAXDIRATTRTIMO=60
 options 	NFS_DEBUG		# Enable NFS Debugging
 
 #
 # Add support for the EXT2FS filesystem of Linux fame.  Be a bit
 # careful with this - the ext2fs code has a tendency to lag behind
 # changes and not be exercised very much, so mounting read/write could
 # be dangerous (and even mounting read only could result in panics.)
 #
 options 	EXT2FS
 
 #
 # Add support for the ReiserFS filesystem (used in Linux). Currently,
 # this is limited to read-only access.
 #
 options 	REISERFS
 
 # Cryptographically secure random number generator; /dev/random
 device		random
 
 # The system memory devices; /dev/mem, /dev/kmem
 device		mem
 
 # The kernel symbol table device; /dev/ksyms
 device		ksyms
 
 # Optional character code conversion support with LIBICONV.
 # Each option requires their base file system and LIBICONV.
 options 	CD9660_ICONV
 options 	MSDOSFS_ICONV
 options 	UDF_ICONV
 
 
 #####################################################################
 # POSIX P1003.1B
 
 # Real time extensions added in the 1993 POSIX
 # _KPOSIX_PRIORITY_SCHEDULING: Build in _POSIX_PRIORITY_SCHEDULING
 
 options 	_KPOSIX_PRIORITY_SCHEDULING
 # p1003_1b_semaphores are very experimental,
 # user should be ready to assist in debugging if problems arise.
 options 	P1003_1B_SEMAPHORES
 
 # POSIX message queue
 options 	P1003_1B_MQUEUE
 
 #####################################################################
 # SECURITY POLICY PARAMETERS
 
 # Support for BSM audit
 options 	AUDIT
 
 # Support for Mandatory Access Control (MAC):
 options 	MAC
 options 	MAC_BIBA
 options 	MAC_BSDEXTENDED
 options 	MAC_IFOFF
 options 	MAC_LOMAC
 options 	MAC_MLS
 options 	MAC_NONE
 options 	MAC_PARTITION
 options 	MAC_PORTACL
 options 	MAC_SEEOTHERUIDS
 options 	MAC_STUB
 options 	MAC_TEST
 
 # Support for Capsicum
 options 	CAPABILITIES	# fine-grained rights on file descriptors
 options 	CAPABILITY_MODE	# sandboxes with no global namespace access
 
 
 #####################################################################
 # CLOCK OPTIONS
 
 # The granularity of operation is controlled by the kernel option HZ whose
 # default value (1000 on most architectures) means a granularity of 1ms
 # (1s/HZ).  Historically, the default was 100, but finer granularity is
 # required for DUMMYNET and other systems on modern hardware.  There are
 # reasonable arguments that HZ should, in fact, be 100 still; consider,
 # that reducing the granularity too much might cause excessive overhead in
 # clock interrupt processing, potentially causing ticks to be missed and thus
 # actually reducing the accuracy of operation.
 
 options 	HZ=100
 
 # Enable support for the kernel PLL to use an external PPS signal,
 # under supervision of [x]ntpd(8)
 # More info in ntpd documentation: http://www.eecis.udel.edu/~ntp
 
 options 	PPS_SYNC
 
 # Enable support for generic feed-forward clocks in the kernel.
 # The feed-forward clock support is an alternative to the feedback oriented
 # ntpd/system clock approach, and is to be used with a feed-forward
 # synchronization algorithm such as the RADclock:
 # More info here: http://www.synclab.org/radclock
 
 options 	FFCLOCK
 
 
 #####################################################################
 # SCSI DEVICES
 
 # SCSI DEVICE CONFIGURATION
 
 # The SCSI subsystem consists of the `base' SCSI code, a number of
 # high-level SCSI device `type' drivers, and the low-level host-adapter
 # device drivers.  The host adapters are listed in the ISA and PCI
 # device configuration sections below.
 #
 # It is possible to wire down your SCSI devices so that a given bus,
 # target, and LUN always come on line as the same device unit.  In
 # earlier versions the unit numbers were assigned in the order that
 # the devices were probed on the SCSI bus.  This means that if you
 # removed a disk drive, you may have had to rewrite your /etc/fstab
 # file, and also that you had to be careful when adding a new disk
 # as it may have been probed earlier and moved your device configuration
 # around.  (See also option GEOM_VOL for a different solution to this
 # problem.)
 
 # This old behavior is maintained as the default behavior.  The unit
 # assignment begins with the first non-wired down unit for a device
 # type.  For example, if you wire a disk as "da3" then the first
 # non-wired disk will be assigned da4.
 
 # The syntax for wiring down devices is:
 
 hint.scbus.0.at="ahc0"
 hint.scbus.1.at="ahc1"
 hint.scbus.1.bus="0"
 hint.scbus.3.at="ahc2"
 hint.scbus.3.bus="0"
 hint.scbus.2.at="ahc2"
 hint.scbus.2.bus="1"
 hint.da.0.at="scbus0"
 hint.da.0.target="0"
 hint.da.0.unit="0"
 hint.da.1.at="scbus3"
 hint.da.1.target="1"
 hint.da.2.at="scbus2"
 hint.da.2.target="3"
 hint.sa.1.at="scbus1"
 hint.sa.1.target="6"
 
 # "units" (SCSI logical unit number) that are not specified are
 # treated as if specified as LUN 0.
 
 # All SCSI devices allocate as many units as are required.
 
 # The ch driver drives SCSI Media Changer ("jukebox") devices.
 #
 # The da driver drives SCSI Direct Access ("disk") and Optical Media
 # ("WORM") devices.
 #
 # The sa driver drives SCSI Sequential Access ("tape") devices.
 #
 # The cd driver drives SCSI Read Only Direct Access ("cd") devices.
 #
 # The ses driver drives SCSI Environment Services ("ses") and
 # SAF-TE ("SCSI Accessible Fault-Tolerant Enclosure") devices.
 #
 # The pt driver drives SCSI Processor devices.
 #
 # The sg driver provides a passthrough API that is compatible with the
 # Linux SG driver.  It will work in conjunction with the COMPAT_LINUX
 # option to run linux SG apps.  It can also stand on its own and provide
 # source level API compatibility for porting apps to FreeBSD.
 #
 # Target Mode support is provided here but also requires that a SIM
 # (SCSI Host Adapter Driver) provide support as well.
 #
 # The targ driver provides target mode support as a Processor type device.
 # It exists to give the minimal context necessary to respond to Inquiry
 # commands. There is a sample user application that shows how the rest
 # of the command support might be done in /usr/share/examples/scsi_target.
 #
 # The targbh driver provides target mode support and exists to respond
 # to incoming commands that do not otherwise have a logical unit assigned
 # to them.
 #
 # The pass driver provides a passthrough API to access the CAM subsystem.
 
 device		scbus		#base SCSI code
 device		ch		#SCSI media changers
 device		da		#SCSI direct access devices (aka disks)
 device		sa		#SCSI tapes
 device		cd		#SCSI CD-ROMs
 device		ses		#Enclosure Services (SES and SAF-TE)
 device		pt		#SCSI processor
 device		targ		#SCSI Target Mode Code
 device		targbh		#SCSI Target Mode Blackhole Device
 device		pass		#CAM passthrough driver
 device		sg		#Linux SCSI passthrough
 device		ctl		#CAM Target Layer
 
 # CAM OPTIONS:
 # debugging options:
 # CAMDEBUG		Compile in all possible debugging.
 # CAM_DEBUG_COMPILE	Debug levels to compile in.
 # CAM_DEBUG_FLAGS	Debug levels to enable on boot.
 # CAM_DEBUG_BUS		Limit debugging to the given bus.
 # CAM_DEBUG_TARGET	Limit debugging to the given target.
 # CAM_DEBUG_LUN		Limit debugging to the given lun.
 # CAM_DEBUG_DELAY	Delay in us after printing each debug line.
 #
 # CAM_MAX_HIGHPOWER: Maximum number of concurrent high power (start unit) cmds
 # SCSI_NO_SENSE_STRINGS: When defined disables sense descriptions
 # SCSI_NO_OP_STRINGS: When defined disables opcode descriptions
 # SCSI_DELAY: The number of MILLISECONDS to freeze the SIM (scsi adapter)
 #             queue after a bus reset, and the number of milliseconds to
 #             freeze the device queue after a bus device reset.  This
 #             can be changed at boot and runtime with the
 #             kern.cam.scsi_delay tunable/sysctl.
 options 	CAMDEBUG
 options 	CAM_DEBUG_COMPILE=-1
 options 	CAM_DEBUG_FLAGS=(CAM_DEBUG_INFO|CAM_DEBUG_PROBE|CAM_DEBUG_PERIPH)
 options 	CAM_DEBUG_BUS=-1
 options 	CAM_DEBUG_TARGET=-1
 options 	CAM_DEBUG_LUN=-1
 options 	CAM_DEBUG_DELAY=1
 options 	CAM_MAX_HIGHPOWER=4
 options 	SCSI_NO_SENSE_STRINGS
 options 	SCSI_NO_OP_STRINGS
 options 	SCSI_DELAY=5000	# Be pessimistic about Joe SCSI device
 
 # Options for the CAM CDROM driver:
 # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN
 # CHANGER_MAX_BUSY_SECONDS: Maximum time quantum per changer LUN, only
 #                           enforced if there is I/O waiting for another LUN
 # The compiled in defaults for these variables are 2 and 10 seconds,
 # respectively.
 #
 # These can also be changed on the fly with the following sysctl variables:
 # kern.cam.cd.changer.min_busy_seconds
 # kern.cam.cd.changer.max_busy_seconds
 #
 options 	CHANGER_MIN_BUSY_SECONDS=2
 options 	CHANGER_MAX_BUSY_SECONDS=10
 
 # Options for the CAM sequential access driver:
 # SA_IO_TIMEOUT: Timeout for read/write/wfm  operations, in minutes
 # SA_SPACE_TIMEOUT: Timeout for space operations, in minutes
 # SA_REWIND_TIMEOUT: Timeout for rewind operations, in minutes
 # SA_ERASE_TIMEOUT: Timeout for erase operations, in minutes
 # SA_1FM_AT_EOD: Default to model which only has a default one filemark at EOT.
 options 	SA_IO_TIMEOUT=4
 options 	SA_SPACE_TIMEOUT=60
 options 	SA_REWIND_TIMEOUT=(2*60)
 options 	SA_ERASE_TIMEOUT=(4*60)
 options 	SA_1FM_AT_EOD
 
 # Optional timeout for the CAM processor target (pt) device
 # This is specified in seconds.  The default is 60 seconds.
 options 	SCSI_PT_DEFAULT_TIMEOUT=60
 
 # Optional enable of doing SES passthrough on other devices (e.g., disks)
 #
 # Normally disabled because a lot of newer SCSI disks report themselves
 # as having SES capabilities, but this can then clot up attempts to build
 # a topology with the SES device that's on the box these drives are in....
 options 	SES_ENABLE_PASSTHROUGH
 
 
 #####################################################################
 # MISCELLANEOUS DEVICES AND OPTIONS
 
 device		pty		#BSD-style compatibility pseudo ttys
 device		nmdm		#back-to-back tty devices
 device		md		#Memory/malloc disk
 device		snp		#Snoop device - to look at pty/vty/etc..
 device		ccd		#Concatenated disk driver
 device		firmware	#firmware(9) support
 
 # Kernel side iconv library
 options 	LIBICONV
 
 # Size of the kernel message buffer.  Should be N * pagesize.
 options 	MSGBUF_SIZE=40960
 
 
 #####################################################################
 # HARDWARE DEVICE CONFIGURATION
 
 # For ISA the required hints are listed.
 # EISA, MCA, PCI, CardBus, SD/MMC and pccard are self identifying buses, so
 # no hints are needed.
 
 #
 # Mandatory devices:
 #
 
 # These options are valid for other keyboard drivers as well.
 options 	KBD_DISABLE_KEYMAP_LOAD	# refuse to load a keymap
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 
 device		kbdmux			# keyboard multiplexer
 options		KBDMUX_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	KBDMUX_DFLT_KEYMAP=it.iso
 
 options 	FB_DEBUG		# Frame buffer debugging
 
 device		splash			# Splash screen and screen saver support
 
 # Various screen savers.
 device		blank_saver
 device		daemon_saver
 device		dragon_saver
 device		fade_saver
 device		fire_saver
 device		green_saver
 device		logo_saver
 device		rain_saver
 device		snake_saver
 device		star_saver
 device		warp_saver
 
 # The syscons console driver (SCO color console compatible).
 device		sc
 hint.sc.0.at="isa"
 options 	MAXCONS=16		# number of virtual consoles
 options 	SC_ALT_MOUSE_IMAGE	# simplified mouse cursor in text mode
 options 	SC_DFLT_FONT		# compile font in
 makeoptions	SC_DFLT_FONT=cp850
 options 	SC_DISABLE_KDBKEY	# disable `debug' key
 options 	SC_DISABLE_REBOOT	# disable reboot key sequence
 options 	SC_HISTORY_SIZE=200	# number of history buffer lines
 options 	SC_MOUSE_CHAR=0x3	# char code for text mode mouse cursor
 options 	SC_PIXEL_MODE		# add support for the raster text mode
 
 # The following options will let you change the default colors of syscons.
 options 	SC_NORM_ATTR=(FG_GREEN|BG_BLACK)
 options 	SC_NORM_REV_ATTR=(FG_YELLOW|BG_GREEN)
 options 	SC_KERNEL_CONS_ATTR=(FG_RED|BG_BLACK)
 options 	SC_KERNEL_CONS_REV_ATTR=(FG_BLACK|BG_RED)
 
 # The following options will let you change the default behavior of
 # cut-n-paste feature
 options 	SC_CUT_SPACES2TABS	# convert leading spaces into tabs
 options 	SC_CUT_SEPCHARS=\"x09\"	# set of characters that delimit words
 					# (default is single space - \"x20\")
 
 # If you have a two button mouse, you may want to add the following option
 # to use the right button of the mouse to paste text.
 options 	SC_TWOBUTTON_MOUSE
 
 # You can selectively disable features in syscons.
 options 	SC_NO_CUTPASTE
 options 	SC_NO_FONT_LOADING
 options 	SC_NO_HISTORY
 options 	SC_NO_MODE_CHANGE
 options 	SC_NO_SYSMOUSE
 options 	SC_NO_SUSPEND_VTYSWITCH
 
 # `flags' for sc
 #	0x80	Put the video card in the VESA 800x600 dots, 16 color mode
 #	0x100	Probe for a keyboard device periodically if one is not present
 
 # Enable experimental features of the syscons terminal emulator (teken).
 options 	TEKEN_CONS25		# cons25-style terminal emulation
 options 	TEKEN_UTF8		# UTF-8 output handling
 
 # The vt video console driver.
 device		vt
 options		VT_ALT_TO_ESC_HACK=1	# Prepend ESC sequence to ALT keys
 options		VT_MAXWINDOWS=16	# Number of virtual consoles
 options		VT_TWOBUTTON_MOUSE	# Use right mouse button to paste
 
 # The following options set the default framebuffer size.
 options		VT_FB_DEFAULT_HEIGHT=480
 options		VT_FB_DEFAULT_WIDTH=640
 
 # The following options will let you change the default vt terminal colors.
 options		TERMINAL_NORM_ATTR=(FG_GREEN|BG_BLACK)
 options		TERMINAL_KERN_ATTR=(FG_LIGHTRED|BG_BLACK)
 
 #
 # Optional devices:
 #
 
 #
 # SCSI host adapters:
 #
 # adv: All Narrow SCSI bus AdvanSys controllers.
 # adw: Second Generation AdvanSys controllers including the ADV940UW.
 # aha: Adaptec 154x/1535/1640
 # ahb: Adaptec 174x EISA controllers
 # ahc: Adaptec 274x/284x/2910/293x/294x/394x/3950x/3960x/398X/4944/
 #      19160x/29160x, aic7770/aic78xx
 # ahd: Adaptec 29320/39320 Controllers.
 # aic: Adaptec 6260/6360, APA-1460 (PC Card), NEC PC9801-100 (C-BUS)
 # bt:  Most Buslogic controllers: including BT-445, BT-54x, BT-64x, BT-74x,
 #      BT-75x, BT-946, BT-948, BT-956, BT-958, SDC3211B, SDC3211F, SDC3222F
 # esp: Emulex ESP, NCR 53C9x and QLogic FAS families based controllers
 #      including the AMD Am53C974 (found on devices such as the Tekram
 #      DC-390(T)) and the Sun ESP and FAS families of controllers
 # isp: Qlogic ISP 1020, 1040 and 1040B PCI SCSI host adapters,
 #      ISP 1240 Dual Ultra SCSI, ISP 1080 and 1280 (Dual) Ultra2,
 #      ISP 12160 Ultra3 SCSI,
 #      Qlogic ISP 2100 and ISP 2200 1Gb Fibre Channel host adapters.
 #      Qlogic ISP 2300 and ISP 2312 2Gb Fibre Channel host adapters.
 #      Qlogic ISP 2322 and ISP 6322 2Gb Fibre Channel host adapters.
 # ispfw: Firmware module for Qlogic host adapters
 # mpt: LSI-Logic MPT/Fusion 53c1020 or 53c1030 Ultra4
 #      or FC9x9 Fibre Channel host adapters.
 # ncr: NCR 53C810, 53C825 self-contained SCSI host adapters.
 # sym: Symbios/Logic 53C8XX family of PCI-SCSI I/O processors:
 #      53C810, 53C810A, 53C815, 53C825,  53C825A, 53C860, 53C875,
 #      53C876, 53C885,  53C895, 53C895A, 53C896,  53C897, 53C1510D,
 #      53C1010-33, 53C1010-66.
 # trm: Tekram DC395U/UW/F DC315U adapters.
 # wds: WD7000
 
 #
 # Note that the order is important in order for Buslogic ISA/EISA cards to be
 # probed correctly.
 #
 device		bt
 hint.bt.0.at="isa"
 hint.bt.0.port="0x330"
 device		adv
 hint.adv.0.at="isa"
 device		adw
 device		aha
 hint.aha.0.at="isa"
 device		aic
 hint.aic.0.at="isa"
 device		ahb
 device		ahc
 device		ahd
 device		esp
 device		iscsi_initiator
 device		isp
 hint.isp.0.disable="1"
 hint.isp.0.role="3"
 hint.isp.0.prefer_iomap="1"
 hint.isp.0.prefer_memmap="1"
 hint.isp.0.fwload_disable="1"
 hint.isp.0.ignore_nvram="1"
 hint.isp.0.fullduplex="1"
 hint.isp.0.topology="lport"
 hint.isp.0.topology="nport"
 hint.isp.0.topology="lport-only"
 hint.isp.0.topology="nport-only"
 # we can't get u_int64_t types, nor can we get strings if it's got
 # a leading 0x, hence this silly dodge.
 hint.isp.0.portwnn="w50000000aaaa0000"
 hint.isp.0.nodewnn="w50000000aaaa0001"
 device		ispfw
 device		mpt
 device		ncr
 device		sym
 device		trm
 device		wds
 hint.wds.0.at="isa"
 hint.wds.0.port="0x350"
 hint.wds.0.irq="11"
 hint.wds.0.drq="6"
 
 # The aic7xxx driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set. Unfortunately,
 # this doesn't work on some motherboards, which prevents it from being the
 # default.
 options 	AHC_ALLOW_MEMIO
 
 # Dump the contents of the ahc controller configuration PROM.
 options 	AHC_DUMP_EEPROM
 
 # Bitmap of units to enable targetmode operations.
 options 	AHC_TMODE_ENABLE
 
 # Compile in Aic7xxx Debugging code.
 options 	AHC_DEBUG
 
 # Aic7xxx driver debugging options. See sys/dev/aic7xxx/aic7xxx.h
 options 	AHC_DEBUG_OPTS
 
 # Print register bitfields in debug output.  Adds ~128k to driver
 # See ahc(4).
 options 	AHC_REG_PRETTY_PRINT
 
 # Compile in aic79xx debugging code.
 options 	AHD_DEBUG
 
 # Aic79xx driver debugging options.  Adds ~215k to driver.  See ahd(4).
 options 	AHD_DEBUG_OPTS=0xFFFFFFFF
 
 # Print human-readable register definitions when debugging
 options 	AHD_REG_PRETTY_PRINT
 
 # Bitmap of units to enable targetmode operations.
 options 	AHD_TMODE_ENABLE
 
 # The adw driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set.
 options 	ADW_ALLOW_MEMIO
 
 # Options used in dev/iscsi (Software iSCSI stack)
 #
 options 	ISCSI_INITIATOR_DEBUG=9
 
 # Options used in dev/isp/ (Qlogic SCSI/FC driver).
 #
 #	ISP_TARGET_MODE		-	enable target mode operation
 #
 options 	ISP_TARGET_MODE=1
 #
 #	ISP_DEFAULT_ROLES	-	default role
 #		none=0
 #		target=1
 #		initiator=2
 #		both=3			(not supported currently)
 #
 #	ISP_INTERNAL_TARGET		(trivial internal disk target, for testing)
 #
 options 	ISP_DEFAULT_ROLES=0
 
 # Options used in dev/sym/ (Symbios SCSI driver).
 #options 	SYM_SETUP_LP_PROBE_MAP	#-Low Priority Probe Map (bits)
 					# Allows the ncr to take precedence
 					# 1 (1<<0) -> 810a, 860
 					# 2 (1<<1) -> 825a, 875, 885, 895
 					# 4 (1<<2) -> 895a, 896, 1510d
 #options 	SYM_SETUP_SCSI_DIFF	#-HVD support for 825a, 875, 885
 					# disabled:0 (default), enabled:1
 #options 	SYM_SETUP_PCI_PARITY	#-PCI parity checking
 					# disabled:0, enabled:1 (default)
 #options 	SYM_SETUP_MAX_LUN	#-Number of LUNs supported
 					# default:8, range:[1..64]
 
 # The 'dpt' driver provides support for old DPT controllers (http://www.dpt.com/).
 # These have hardware RAID-{0,1,5} support, and do multi-initiator I/O.
 # The DPT controllers are commonly re-licensed under other brand-names -
 # some controllers by Olivetti, Dec, HP, AT&T, SNI, AST, Alphatronic, NEC and
 # Compaq are actually DPT controllers.
 #
 # See src/sys/dev/dpt for debugging and other subtle options.
 #  DPT_MEASURE_PERFORMANCE  Enables a set of (semi)invasive metrics. Various
 #                           instruments are enabled.  The tools in
 #                           /usr/sbin/dpt_* assume these to be enabled.
 #  DPT_DEBUG_xxxx           These are controllable from sys/dev/dpt/dpt.h
 #  DPT_RESET_HBA            Make "reset" actually reset the controller
 #                           instead of fudging it.  Only enable this if you
 #			    are 100% certain you need it.
 
 device		dpt
 
 # DPT options
 #!CAM# options 	DPT_MEASURE_PERFORMANCE
 options 	DPT_RESET_HBA
 
 #
 # Compaq "CISS" RAID controllers (SmartRAID 5* series)
 # These controllers have a SCSI-like interface, and require the
 # CAM infrastructure.
 #
 device		ciss
 
 #
 # Intel Integrated RAID controllers.
 # This driver was developed and is maintained by Intel.  Contacts
 # at Intel for this driver are
 # "Kannanthanam, Boji T" <boji.t.kannanthanam@intel.com> and
 # "Leubner, Achim" <achim.leubner@intel.com>.
 #
 device		iir
 
 #
 # Mylex AcceleRAID and eXtremeRAID controllers with v6 and later
 # firmware.  These controllers have a SCSI-like interface, and require
 # the CAM infrastructure.
 #
 device		mly
 
 #
 # Compaq Smart RAID, Mylex DAC960 and AMI MegaRAID controllers.  Only
 # one entry is needed; the code will find and configure all supported
 # controllers.
 #
 device		ida		# Compaq Smart RAID
 device		mlx		# Mylex DAC960
 device		amr		# AMI MegaRAID
 device		amrp		# SCSI Passthrough interface (optional, CAM req.)
 device		mfi		# LSI MegaRAID SAS
 device		mfip		# LSI MegaRAID SAS passthrough, requires CAM
 options 	MFI_DEBUG
 device		mrsas		# LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s
 
 #
 # 3ware ATA RAID
 #
 device		twe		# 3ware ATA RAID
 
 #
 # Serial ATA host controllers:
 #
 # ahci: Advanced Host Controller Interface (AHCI) compatible
 # mvs:  Marvell 88SX50XX/88SX60XX/88SX70XX/SoC controllers
 # siis: SiliconImage SiI3124/SiI3132/SiI3531 controllers
 #
 # These drivers are part of cam(4) subsystem. They supersede less featured
 # ata(4) subsystem drivers, supporting same hardware.
 
 device		ahci
 device		mvs
 device		siis
 
 #
 # The 'ATA' driver supports all legacy ATA/ATAPI controllers, including
 # PC Card devices. You only need one "device ata" for it to find all
 # PCI and PC Card ATA/ATAPI devices on modern machines.
 # Alternatively, individual bus and chipset drivers may be chosen by using
 # the 'atacore' driver then selecting the drivers on a per vendor basis.
 # For example to build a system which only supports a VIA chipset,
 # omit 'ata' and include the 'atacore', 'atapci' and 'atavia' drivers.
 device		ata
 
 # Modular ATA
 #device		atacore		# Core ATA functionality
 #device		atacard		# CARDBUS support
 #device		atabus		# PC98 cbus support
 #device		ataisa		# ISA bus support
 #device		atapci		# PCI bus support; only generic chipset support
 
 # PCI ATA chipsets
 #device		ataacard	# ACARD
 #device		ataacerlabs	# Acer Labs Inc. (ALI)
 #device		ataamd		# American Micro Devices (AMD)
 #device		ataati		# ATI
 #device		atacenatek	# Cenatek
 #device		atacypress	# Cypress
 #device		atacyrix	# Cyrix
 #device		atahighpoint	# HighPoint
 #device		ataintel	# Intel
 #device		ataite		# Integrated Technology Inc. (ITE)
 #device		atajmicron	# JMicron
 #device		atamarvell	# Marvell
 #device		atamicron	# Micron
 #device		atanational	# National
 #device		atanetcell	# NetCell
 #device		atanvidia	# nVidia
 #device		atapromise	# Promise
 #device		ataserverworks	# ServerWorks
 #device		atasiliconimage	# Silicon Image Inc. (SiI) (formerly CMD)
 #device		atasis		# Silicon Integrated Systems Corp.(SiS)
 #device		atavia		# VIA Technologies Inc.
 
 #
 # For older non-PCI, non-PnPBIOS systems, these are the hints lines to add:
 hint.ata.0.at="isa"
 hint.ata.0.port="0x1f0"
 hint.ata.0.irq="14"
 hint.ata.1.at="isa"
 hint.ata.1.port="0x170"
 hint.ata.1.irq="15"
 
 #
 # The following options are valid on the ATA driver:
 #
 # ATA_REQUEST_TIMEOUT:	the number of seconds to wait for an ATA request
 #			before timing out.
 
 #options 	ATA_REQUEST_TIMEOUT=10
 
 #
 # Standard floppy disk controllers and floppy tapes, supports
 # the Y-E DATA External FDD (PC Card)
 #
 device		fdc
 hint.fdc.0.at="isa"
 hint.fdc.0.port="0x3F0"
 hint.fdc.0.irq="6"
 hint.fdc.0.drq="2"
 #
 # FDC_DEBUG enables floppy debugging.  Since the debug output is huge, you
 # gotta turn it actually on by setting the variable fd_debug with DDB,
 # however.
 options 	FDC_DEBUG
 #
 # Activate this line if you happen to have an Insight floppy tape.
 # Probing them proved to be dangerous for people with floppy disks only,
 # so it's "hidden" behind a flag:
 #hint.fdc.0.flags="1"
 
 # Specify floppy devices
 hint.fd.0.at="fdc0"
 hint.fd.0.drive="0"
 hint.fd.1.at="fdc0"
 hint.fd.1.drive="1"
 
 #
 # uart: newbusified driver for serial interfaces.  It consolidates the sio(4),
 #	sab(4) and zs(4) drivers.
 #
 device		uart
 
 # Options for uart(4)
 options 	UART_PPS_ON_CTS		# Do time pulse capturing using CTS
 					# instead of DCD.
 options 	UART_POLL_FREQ		# Set polling rate, used when hw has
 					# no interrupt support (50 Hz default).
 
 # The following hint should only be used for pure ISA devices.  It is not
 # needed otherwise.  Use of hints is strongly discouraged.
 hint.uart.0.at="isa"
 
 # The following 3 hints are used when the UART is a system device (i.e., a
 # console or debug port), but only on platforms that don't have any other
 # means to pass the information to the kernel.  The unit number of the hint
 # is only used to bundle the hints together.  There is no relation to the
 # unit number of the probed UART.
 hint.uart.0.port="0x3f8"
 hint.uart.0.flags="0x10"
 hint.uart.0.baud="115200"
 
 # `flags' for serial drivers that support consoles like sio(4) and uart(4):
 #	0x10	enable console support for this unit.  Other console flags
 #		(if applicable) are ignored unless this is set.  Enabling
 #		console support does not make the unit the preferred console.
 #		Boot with -h or set boot_serial=YES in the loader.  For sio(4)
 #		specifically, the 0x20 flag can also be set (see above).
 #		Currently, at most one unit can have console support; the
 #		first one (in config file order) with this flag set is
 #		preferred.  Setting this flag for sio0 gives the old behavior.
 #	0x80	use this port for serial line gdb support in ddb.  Also known
 #		as debug port.
 #
 
 # Options for serial drivers that support consoles:
 options 	BREAK_TO_DEBUGGER	# A BREAK/DBG on the console goes to
 					# ddb, if available.
 
 # Solaris implements a new BREAK which is initiated by a character
 # sequence CR ~ ^b which is similar to a familiar pattern used on
 # Sun servers by the Remote Console.  There are FreeBSD extensions:
 # CR ~ ^p requests force panic and CR ~ ^r requests a clean reboot.
 options 	ALT_BREAK_TO_DEBUGGER
 
 # Serial Communications Controller
 # Supports the Siemens SAB 82532 and Zilog Z8530 multi-channel
 # communications controllers.
 device		scc
 
 # PCI Universal Communications driver
 # Supports various multi port PCI I/O cards.
 device		puc
 
 #
 # Network interfaces:
 #
 # MII bus support is required for many PCI Ethernet NICs,
 # namely those which use MII-compliant transceivers or implement
 # transceiver control interfaces that operate like an MII.  Adding
 # "device miibus" to the kernel config pulls in support for the generic
 # miibus API, the common support for for bit-bang'ing the MII and all
 # of the PHY drivers, including a generic one for PHYs that aren't
 # specifically handled by an individual driver.  Support for specific
 # PHYs may be built by adding "device mii", "device mii_bitbang" if
 # needed by the NIC driver and then adding the appropriate PHY driver.
 device  	mii		# Minimal MII support
 device  	mii_bitbang	# Common module for bit-bang'ing the MII
 device  	miibus		# MII support w/ bit-bang'ing and all PHYs
 
 device  	acphy		# Altima Communications AC101
 device  	amphy		# AMD AM79c873 / Davicom DM910{1,2}
 device  	atphy		# Attansic/Atheros F1
 device  	axphy		# Asix Semiconductor AX88x9x
 device  	bmtphy		# Broadcom BCM5201/BCM5202 and 3Com 3c905C
 device  	brgphy		# Broadcom BCM54xx/57xx 1000baseTX
 device  	ciphy		# Cicada/Vitesse CS/VSC8xxx
 device  	e1000phy	# Marvell 88E1000 1000/100/10-BT
 device  	gentbi		# Generic 10-bit 1000BASE-{LX,SX} fiber ifaces
 device  	icsphy		# ICS ICS1889-1893
 device  	ip1000phy	# IC Plus IP1000A/IP1001
 device  	jmphy		# JMicron JMP211/JMP202
 device  	lxtphy		# Level One LXT-970
 device  	mlphy		# Micro Linear 6692
 device  	nsgphy		# NatSemi DP8361/DP83865/DP83891
 device  	nsphy		# NatSemi DP83840A
 device  	nsphyter	# NatSemi DP83843/DP83815
 device  	pnaphy		# HomePNA
 device  	qsphy		# Quality Semiconductor QS6612
 device  	rdcphy		# RDC Semiconductor R6040
 device  	rgephy		# RealTek 8169S/8110S/8211B/8211C
 device  	rlphy		# RealTek 8139
 device  	rlswitch	# RealTek 8305
 device  	smcphy		# SMSC LAN91C111
 device  	tdkphy		# TDK 89Q2120
 device  	tlphy		# Texas Instruments ThunderLAN
 device  	truephy		# LSI TruePHY
 device		xmphy		# XaQti XMAC II
 
 # an:   Aironet 4500/4800 802.11 wireless adapters. Supports the PCMCIA,
 #       PCI and ISA varieties.
 # ae:   Support for gigabit ethernet adapters based on the Attansic/Atheros
 #       L2 PCI-Express FastEthernet controllers.
 # age:  Support for gigabit ethernet adapters based on the Attansic/Atheros
 #       L1 PCI express gigabit ethernet controllers.
 # alc:  Support for Atheros AR8131/AR8132 PCIe ethernet controllers.
 # ale:  Support for Atheros AR8121/AR8113/AR8114 PCIe ethernet controllers.
 # ath:  Atheros a/b/g WiFi adapters (requires ath_hal and wlan)
 # bce:	Broadcom NetXtreme II (BCM5706/BCM5708) PCI/PCIe Gigabit Ethernet
 #       adapters.
 # bfe:	Broadcom BCM4401 Ethernet adapter.
 # bge:	Support for gigabit ethernet adapters based on the Broadcom
 #	BCM570x family of controllers, including the 3Com 3c996-T,
 #	the Netgear GA302T, the SysKonnect SK-9D21 and SK-9D41, and
 #	the embedded gigE NICs on Dell PowerEdge 2550 servers.
 # bxe:	Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet
 #       adapters.
 # bwi:	Broadcom BCM430* and BCM431* family of wireless adapters.
 # bwn:	Broadcom BCM43xx family of wireless adapters.
 # cas:	Sun Cassini/Cassini+ and National Semiconductor DP83065 Saturn
 # cm:	Arcnet SMC COM90c26 / SMC COM90c56
 #	(and SMC COM90c66 in '56 compatibility mode) adapters.
 # cxgb: Chelsio T3 based 1GbE/10GbE PCIe Ethernet adapters.
 # cxgbe:Chelsio T4 and T5 based 1GbE/10GbE/40GbE PCIe Ethernet adapters.
 # dc:   Support for PCI fast ethernet adapters based on the DEC/Intel 21143
 #       and various workalikes including:
 #       the ADMtek AL981 Comet and AN985 Centaur, the ASIX Electronics
 #       AX88140A and AX88141, the Davicom DM9100 and DM9102, the Lite-On
 #       82c168 and 82c169 PNIC, the Lite-On/Macronix LC82C115 PNIC II
 #       and the Macronix 98713/98713A/98715/98715A/98725 PMAC. This driver
 #       replaces the old al, ax, dm, pn and mx drivers.  List of brands:
 #       Digital DE500-BA, Kingston KNE100TX, D-Link DFE-570TX, SOHOware SFA110,
 #       SVEC PN102-TX, CNet Pro110B, 120A, and 120B, Compex RL100-TX,
 #       LinkSys LNE100TX, LNE100TX V2.0, Jaton XpressNet, Alfa Inc GFC2204,
 #       KNE110TX.
 # de:   Digital Equipment DC21040
 # em:   Intel Pro/1000 Gigabit Ethernet 82542, 82543, 82544 based adapters.
 # igb:  Intel Pro/1000 PCI Express Gigabit Ethernet: 82575 and later adapters.
 # ep:   3Com 3C509, 3C529, 3C556, 3C562D, 3C563D, 3C572, 3C574X, 3C579, 3C589
 #       and PC Card devices using these chipsets.
 # ex:   Intel EtherExpress Pro/10 and other i82595-based adapters,
 #       Olicom Ethernet PC Card devices.
 # fe:   Fujitsu MB86960A/MB86965A Ethernet
 # fea:  DEC DEFEA EISA FDDI adapter
 # fpa:  Support for the Digital DEFPA PCI FDDI. `device fddi' is also needed.
 # fxp:  Intel EtherExpress Pro/100B
 #	(hint of prefer_iomap can be done to prefer I/O instead of Mem mapping)
 # gem:  Apple GMAC/Sun ERI/Sun GEM
 # hme:  Sun HME (Happy Meal Ethernet)
 # jme:  JMicron JMC260 Fast Ethernet/JMC250 Gigabit Ethernet based adapters.
 # le:   AMD Am7900 LANCE and Am79C9xx PCnet
 # lge:	Support for PCI gigabit ethernet adapters based on the Level 1
 #	LXT1001 NetCellerator chipset. This includes the D-Link DGE-500SX,
 #	SMC TigerCard 1000 (SMC9462SX), and some Addtron cards.
 # malo: Marvell Libertas wireless NICs.
 # mwl:  Marvell 88W8363 802.11n wireless NICs.
 #	Requires the mwl firmware module
 # mwlfw: Marvell 88W8363 firmware
 # msk:	Support for gigabit ethernet adapters based on the Marvell/SysKonnect
 #	Yukon II Gigabit controllers, including 88E8021, 88E8022, 88E8061,
 #	88E8062, 88E8035, 88E8036, 88E8038, 88E8050, 88E8052, 88E8053,
 #	88E8055, 88E8056 and D-Link 560T/550SX.
 # lmc:	Support for the LMC/SBE wide-area network interface cards.
 # mlx5:	Mellanox ConnectX-4 and ConnectX-4 LX IB and Eth shared code module.
 # mlx5en:Mellanox ConnectX-4 and ConnectX-4 LX PCIe Ethernet adapters.
 # my:	Myson Fast Ethernet (MTD80X, MTD89X)
 # nge:	Support for PCI gigabit ethernet adapters based on the National
 #	Semiconductor DP83820 and DP83821 chipset. This includes the
 #	SMC EZ Card 1000 (SMC9462TX), D-Link DGE-500T, Asante FriendlyNet
 #	GigaNIX 1000TA and 1000TPC, the Addtron AEG320T, the Surecom
 #	EP-320G-TX and the Netgear GA622T.
 # oce:	Emulex 10 Gbit adapters (OneConnect Ethernet)
 # pcn:	Support for PCI fast ethernet adapters based on the AMD Am79c97x
 #	PCnet-FAST, PCnet-FAST+, PCnet-FAST III, PCnet-PRO and PCnet-Home
 #	chipsets. These can also be handled by the le(4) driver if the
 #	pcn(4) driver is left out of the kernel. The le(4) driver does not
 #	support the additional features like the MII bus and burst mode of
 #	the PCnet-FAST and greater chipsets though.
 # ral:	Ralink Technology IEEE 802.11 wireless adapter
 # re:   RealTek 8139C+/8169/816xS/811xS/8101E PCI/PCIe Ethernet adapter
 # rl:   Support for PCI fast ethernet adapters based on the RealTek 8129/8139
 #       chipset.  Note that the RealTek driver defaults to using programmed
 #       I/O to do register accesses because memory mapped mode seems to cause
 #       severe lockups on SMP hardware.  This driver also supports the
 #       Accton EN1207D `Cheetah' adapter, which uses a chip called
 #       the MPX 5030/5038, which is either a RealTek in disguise or a
 #       RealTek workalike.  Note that the D-Link DFE-530TX+ uses the RealTek
 #       chipset and is supported by this driver, not the 'vr' driver.
 # sf:   Support for Adaptec Duralink PCI fast ethernet adapters based on the
 #       Adaptec AIC-6915 "starfire" controller.
 #       This includes dual and quad port cards, as well as one 100baseFX card.
 #       Most of these are 64-bit PCI devices, except for one single port
 #       card which is 32-bit.
 # sge:  Silicon Integrated Systems SiS190/191 Fast/Gigabit Ethernet adapter
 # sis:  Support for NICs based on the Silicon Integrated Systems SiS 900,
 #       SiS 7016 and NS DP83815 PCI fast ethernet controller chips.
 # sk:   Support for the SysKonnect SK-984x series PCI gigabit ethernet NICs.
 #       This includes the SK-9841 and SK-9842 single port cards (single mode
 #       and multimode fiber) and the SK-9843 and SK-9844 dual port cards
 #       (also single mode and multimode).
 #       The driver will autodetect the number of ports on the card and
 #       attach each one as a separate network interface.
 # sn:   Support for ISA and PC Card Ethernet devices using the
 #       SMC91C90/92/94/95 chips.
 # ste:  Sundance Technologies ST201 PCI fast ethernet controller, includes
 #       the D-Link DFE-550TX.
 # stge: Support for gigabit ethernet adapters based on the Sundance/Tamarack
 #       TC9021 family of controllers, including the Sundance ST2021/ST2023,
 #       the Sundance/Tamarack TC9021, the D-Link DL-4000 and ASUS NX1101.
 # ti:   Support for PCI gigabit ethernet NICs based on the Alteon Networks
 #       Tigon 1 and Tigon 2 chipsets.  This includes the Alteon AceNIC, the
 #       3Com 3c985, the Netgear GA620 and various others.  Note that you will
 #       probably want to bump up kern.ipc.nmbclusters a lot to use this driver.
 # tl:   Support for the Texas Instruments TNETE100 series 'ThunderLAN'
 #       cards and integrated ethernet controllers.  This includes several
 #       Compaq Netelligent 10/100 cards and the built-in ethernet controllers
 #       in several Compaq Prosignia, Proliant and Deskpro systems.  It also
 #       supports several Olicom 10Mbps and 10/100 boards.
 # tx:   SMC 9432 TX, BTX and FTX cards. (SMC EtherPower II series)
 # txp:	Support for 3Com 3cR990 cards with the "Typhoon" chipset
 # vr:   Support for various fast ethernet adapters based on the VIA
 #       Technologies VT3043 `Rhine I' and VT86C100A `Rhine II' chips,
 #       including the D-Link DFE520TX and D-Link DFE530TX (see 'rl' for
 #       DFE530TX+), the Hawking Technologies PN102TX, and the AOpen/Acer ALN-320.
 # vte:  DM&P Vortex86 RDC R6040 Fast Ethernet
 # vx:   3Com 3C590 and 3C595
 # wb:   Support for fast ethernet adapters based on the Winbond W89C840F chip.
 #       Note: this is not the same as the Winbond W89C940F, which is a
 #       NE2000 clone.
 # wi:   Lucent WaveLAN/IEEE 802.11 PCMCIA adapters. Note: this supports both
 #       the PCMCIA and ISA cards: the ISA card is really a PCMCIA to ISA
 #       bridge with a PCMCIA adapter plugged into it.
 # xe:   Xircom/Intel EtherExpress Pro100/16 PC Card ethernet controller,
 #       Accton Fast EtherCard-16, Compaq Netelligent 10/100 PC Card,
 #       Toshiba 10/100 Ethernet PC Card, Xircom 16-bit Ethernet + Modem 56
 # xl:   Support for the 3Com 3c900, 3c905, 3c905B and 3c905C (Fast)
 #       Etherlink XL cards and integrated controllers.  This includes the
 #       integrated 3c905B-TX chips in certain Dell Optiplex and Dell
 #       Precision desktop machines and the integrated 3c905-TX chips
 #       in Dell Latitude laptop docking stations.
 #       Also supported: 3Com 3c980(C)-TX, 3Com 3cSOHO100-TX, 3Com 3c450-TX
 
 # Order for ISA/EISA devices is important here
 
 device		cm
 hint.cm.0.at="isa"
 hint.cm.0.port="0x2e0"
 hint.cm.0.irq="9"
 hint.cm.0.maddr="0xdc000"
 device		ep
 device		ex
 device		fe
 hint.fe.0.at="isa"
 hint.fe.0.port="0x300"
 device		fea
 device		sn
 hint.sn.0.at="isa"
 hint.sn.0.port="0x300"
 hint.sn.0.irq="10"
 device		an
 device		wi
 device		xe
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 device		ae		# Attansic/Atheros L2 FastEthernet
 device		age		# Attansic/Atheros L1 Gigabit Ethernet
 device		alc		# Atheros AR8131/AR8132 Ethernet
 device		ale		# Atheros AR8121/AR8113/AR8114 Ethernet
 device		bce		# Broadcom BCM5706/BCM5708 Gigabit Ethernet
 device		bfe		# Broadcom BCM440x 10/100 Ethernet
 device		bge		# Broadcom BCM570xx Gigabit Ethernet
 device		cas		# Sun Cassini/Cassini+ and NS DP83065 Saturn
 device		cxgb		# Chelsio T3 10 Gigabit Ethernet
 device		cxgb_t3fw	# Chelsio T3 10 Gigabit Ethernet firmware
 device		cxgbe		# Chelsio T4 and T5 1GbE/10GbE/40GbE
 device		dc		# DEC/Intel 21143 and various workalikes
 device		et		# Agere ET1310 10/100/Gigabit Ethernet
 device		fxp		# Intel EtherExpress PRO/100B (82557, 82558)
 hint.fxp.0.prefer_iomap="0"
 device		gem		# Apple GMAC/Sun ERI/Sun GEM
 device		hme		# Sun HME (Happy Meal Ethernet)
 device		jme		# JMicron JMC250 Gigabit/JMC260 Fast Ethernet
 device		lge		# Level 1 LXT1001 gigabit Ethernet
 device		mlx5		# Shared code module between IB and Ethernet
 device		mlx5en		# Mellanox ConnectX-4 and ConnectX-4 LX
 device		msk		# Marvell/SysKonnect Yukon II Gigabit Ethernet
 device		my		# Myson Fast Ethernet (MTD80X, MTD89X)
 device		nge		# NatSemi DP83820 gigabit Ethernet
 device		re		# RealTek 8139C+/8169/8169S/8110S
 device		rl		# RealTek 8129/8139
 device		pcn		# AMD Am79C97x PCI 10/100 NICs
 device		sf		# Adaptec AIC-6915 (``Starfire'')
 device		sge		# Silicon Integrated Systems SiS190/191
 device		sis		# Silicon Integrated Systems SiS 900/SiS 7016
 device		sk		# SysKonnect SK-984x & SK-982x gigabit Ethernet
 device		ste		# Sundance ST201 (D-Link DFE-550TX)
 device		stge		# Sundance/Tamarack TC9021 gigabit Ethernet
 device		tl		# Texas Instruments ThunderLAN
 device		tx		# SMC EtherPower II (83c170 ``EPIC'')
 device		vr		# VIA Rhine, Rhine II
 device		vte		# DM&P Vortex86 RDC R6040 Fast Ethernet
 device		wb		# Winbond W89C840F
 device		xl		# 3Com 3c90x (``Boomerang'', ``Cyclone'')
 
 # PCI Ethernet NICs.
 device		de		# DEC/Intel DC21x4x (``Tulip'')
 device		em		# Intel Pro/1000 Gigabit Ethernet
 device		igb		# Intel Pro/1000 PCIE Gigabit Ethernet
 device		ixgb		# Intel Pro/10Gbe PCI-X Ethernet
 device		ix		# Intel Pro/10Gbe PCIE Ethernet
 device		ixv		# Intel Pro/10Gbe PCIE Ethernet VF
 device		le		# AMD Am7900 LANCE and Am79C9xx PCnet
 device		mxge		# Myricom Myri-10G 10GbE NIC
 device		nxge		# Neterion Xframe 10GbE Server/Storage Adapter
 device		oce		# Emulex 10 GbE (OneConnect Ethernet)
 device		ti		# Alteon Networks Tigon I/II gigabit Ethernet
 device		txp		# 3Com 3cR990 (``Typhoon'')
 device		vx		# 3Com 3c590, 3c595 (``Vortex'')
 device		vxge		# Exar/Neterion XFrame 3100 10GbE
 
 # PCI FDDI NICs.
 device		fpa
 
 # PCI WAN adapters.
 device		lmc
 
 # PCI IEEE 802.11 Wireless NICs
 device		ath		# Atheros pci/cardbus NIC's
 device		ath_hal		# pci/cardbus chip support
 #device		ath_ar5210	# AR5210 chips
 #device		ath_ar5211	# AR5211 chips
 #device		ath_ar5212	# AR5212 chips
 #device		ath_rf2413
 #device		ath_rf2417
 #device		ath_rf2425
 #device		ath_rf5111
 #device		ath_rf5112
 #device		ath_rf5413
 #device		ath_ar5416	# AR5416 chips
 options 	AH_SUPPORT_AR5416	# enable AR5416 tx/rx descriptors
 # All of the AR5212 parts have a problem when paired with the AR71xx
 # CPUS.  These parts have a bug that triggers a fatal bus error on the AR71xx
 # only.  Details of the exact nature of the bug are sketchy, but some can be
 # found at https://forum.openwrt.org/viewtopic.php?pid=70060 on pages 4, 5 and
 # 6.  This option enables this workaround.  There is a performance penalty
 # for this work around, but without it things don't work at all.  The DMA
 # from the card usually bursts 128 bytes, but on the affected CPUs, only
 # 4 are safe.
 options	   	AH_RXCFG_SDMAMW_4BYTES
 #device		ath_ar9160	# AR9160 chips
 #device		ath_ar9280	# AR9280 chips
 #device		ath_ar9285	# AR9285 chips
 device		ath_rate_sample	# SampleRate tx rate control for ath
 device		bwi		# Broadcom BCM430* BCM431*
 device		bwn		# Broadcom BCM43xx
 device		malo		# Marvell Libertas wireless NICs.
 device		mwl		# Marvell 88W8363 802.11n wireless NICs.
 device		mwlfw
 device		ral		# Ralink Technology RT2500 wireless NICs.
 
 # Use sf_buf(9) interface for jumbo buffers on ti(4) controllers.
 #options 	TI_SF_BUF_JUMBO
 # Turn on the header splitting option for the ti(4) driver firmware.  This
 # only works for Tigon II chips, and has no effect for Tigon I chips.
 # This option requires the TI_SF_BUF_JUMBO option above.
 #options 	TI_JUMBO_HDRSPLIT
 
 # These two options allow manipulating the mbuf cluster size and mbuf size,
 # respectively.  Be very careful with NIC driver modules when changing
 # these from their default values, because that can potentially cause a
 # mismatch between the mbuf size assumed by the kernel and the mbuf size
 # assumed by a module.  The only driver that currently has the ability to
 # detect a mismatch is ti(4).
 options 	MCLSHIFT=12	# mbuf cluster shift in bits, 12 == 4KB
 options 	MSIZE=512	# mbuf size in bytes
 
 #
 # ATM related options (Cranor version)
 # (note: this driver cannot be used with the HARP ATM stack)
 #
 # The `en' device provides support for Efficient Networks (ENI)
 # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0).
 #
 # The `hatm' device provides support for Fore/Marconi HE155 and HE622
 # ATM PCI cards.
 #
 # The `fatm' device provides support for Fore PCA200E ATM PCI cards.
 #
 # The `patm' device provides support for IDT77252 based cards like
 # ProSum's ProATM-155 and ProATM-25 and IDT's evaluation boards.
 #
 # atm device provides generic atm functions and is required for
 # atm devices.
 # NATM enables the netnatm protocol family that can be used to
 # bypass TCP/IP.
 #
 # utopia provides the access to the ATM PHY chips and is required for en,
 # hatm and fatm.
 #
 # the current driver supports only PVC operations (no atm-arp, no multicast).
 # for more details, please read the original documents at
 # http://www.ccrc.wustl.edu/pub/chuck/tech/bsdatm/bsdatm.html
 #
 device		atm
 device		en
 device		fatm			#Fore PCA200E
 device		hatm			#Fore/Marconi HE155/622
 device		patm			#IDT77252 cards (ProATM and IDT)
 device		utopia			#ATM PHY driver
 options 	NATM			#native ATM
 
 options 	LIBMBPOOL		#needed by patm, iatm
 
 #
 # Sound drivers
 #
 # sound: The generic sound driver.
 #
 
 device		sound
 
 #
 # snd_*: Device-specific drivers.
 #
 # The flags of the device tell the device a bit more info about the
 # device that normally is obtained through the PnP interface.
 #	bit  2..0   secondary DMA channel;
 #	bit  4      set if the board uses two dma channels;
 #	bit 15..8   board type, overrides autodetection; leave it
 #		    zero if don't know what to put in (and you don't,
 #		    since this is unsupported at the moment...).
 #
 # snd_ad1816:		Analog Devices AD1816 ISA PnP/non-PnP.
 # snd_als4000:		Avance Logic ALS4000 PCI.
 # snd_atiixp:		ATI IXP 200/300/400 PCI.
 # snd_audiocs:		Crystal Semiconductor CS4231 SBus/EBus. Only
 #			for sparc64.
 # snd_cmi:		CMedia CMI8338/CMI8738 PCI.
 # snd_cs4281:		Crystal Semiconductor CS4281 PCI.
 # snd_csa:		Crystal Semiconductor CS461x/428x PCI. (except
 #			4281)
 # snd_ds1:		Yamaha DS-1 PCI.
 # snd_emu10k1:		Creative EMU10K1 PCI and EMU10K2 (Audigy) PCI.
 # snd_emu10kx:		Creative SoundBlaster Live! and Audigy
 # snd_envy24:		VIA Envy24 and compatible, needs snd_spicds.
 # snd_envy24ht:		VIA Envy24HT and compatible, needs snd_spicds.
 # snd_es137x:		Ensoniq AudioPCI ES137x PCI.
 # snd_ess:		Ensoniq ESS ISA PnP/non-PnP, to be used in
 #			conjunction with snd_sbc.
 # snd_fm801:		Forte Media FM801 PCI.
 # snd_gusc:		Gravis UltraSound ISA PnP/non-PnP.
 # snd_hda:		Intel High Definition Audio (Controller) and
 #			compatible.
 # snd_hdspe:		RME HDSPe AIO and RayDAT.
 # snd_ich:		Intel ICH AC'97 and some more audio controllers
 #			embedded in a chipset, for example nVidia
 #			nForce controllers.
 # snd_maestro:		ESS Technology Maestro-1/2x PCI.
 # snd_maestro3:		ESS Technology Maestro-3/Allegro PCI.
 # snd_mss:		Microsoft Sound System ISA PnP/non-PnP.
 # snd_neomagic:		Neomagic 256 AV/ZX PCI.
 # snd_sb16:		Creative SoundBlaster16, to be used in
 #			conjunction with snd_sbc.
 # snd_sb8:		Creative SoundBlaster (pre-16), to be used in
 #			conjunction with snd_sbc.
 # snd_sbc:		Creative SoundBlaster ISA PnP/non-PnP.
 #			Supports ESS and Avance ISA chips as well.
 # snd_solo:		ESS Solo-1x PCI.
 # snd_spicds:		SPI codec driver, needed by Envy24/Envy24HT drivers.
 # snd_t4dwave:		Trident 4DWave DX/NX PCI, Sis 7018 PCI and Acer Labs
 #			M5451 PCI.
 # snd_uaudio:		USB audio.
 # snd_via8233:		VIA VT8233x PCI.
 # snd_via82c686:	VIA VT82C686A PCI.
 # snd_vibes:		S3 Sonicvibes PCI.
 
 device		snd_ad1816
 device		snd_als4000
 device		snd_atiixp
 #device		snd_audiocs
 device		snd_cmi
 device		snd_cs4281
 device		snd_csa
 device		snd_ds1
 device		snd_emu10k1
 device		snd_emu10kx
 device		snd_envy24
 device		snd_envy24ht
 device		snd_es137x
 device		snd_ess
 device		snd_fm801
 device		snd_gusc
 device		snd_hda
 device		snd_hdspe
 device		snd_ich
 device		snd_maestro
 device		snd_maestro3
 device		snd_mss
 device		snd_neomagic
 device		snd_sb16
 device		snd_sb8
 device		snd_sbc
 device		snd_solo
 device		snd_spicds
 device		snd_t4dwave
 device		snd_uaudio
 device		snd_via8233
 device		snd_via82c686
 device		snd_vibes
 
 # For non-PnP sound cards:
 hint.pcm.0.at="isa"
 hint.pcm.0.irq="10"
 hint.pcm.0.drq="1"
 hint.pcm.0.flags="0x0"
 hint.sbc.0.at="isa"
 hint.sbc.0.port="0x220"
 hint.sbc.0.irq="5"
 hint.sbc.0.drq="1"
 hint.sbc.0.flags="0x15"
 hint.gusc.0.at="isa"
 hint.gusc.0.port="0x220"
 hint.gusc.0.irq="5"
 hint.gusc.0.drq="1"
 hint.gusc.0.flags="0x13"
 
 #
 # Following options are intended for debugging/testing purposes:
 #
 # SND_DEBUG                    Enable extra debugging code that includes
 #                              sanity checking and possible increase of
 #                              verbosity.
 #
 # SND_DIAGNOSTIC               Similar in a spirit of INVARIANTS/DIAGNOSTIC,
 #                              zero tolerance against inconsistencies.
 #
 # SND_FEEDER_MULTIFORMAT       By default, only 16/32 bit feeders are compiled
 #                              in. This options enable most feeder converters
 #                              except for 8bit. WARNING: May bloat the kernel.
 #
 # SND_FEEDER_FULL_MULTIFORMAT  Ditto, but includes 8bit feeders as well.
 #
 # SND_FEEDER_RATE_HP           (feeder_rate) High precision 64bit arithmetic
 #                              as much as possible (the default trying to
 #                              avoid it). Possible slowdown.
 #
 # SND_PCM_64                   (Only applicable for i386/32bit arch)
 #                              Process 32bit samples through 64bit
 #                              integer/arithmetic. Slight increase of dynamic
 #                              range at a cost of possible slowdown.
 #
 # SND_OLDSTEREO                Only 2 channels are allowed, effectively
 #                              disabling multichannel processing.
 #
 options		SND_DEBUG
 options		SND_DIAGNOSTIC
 options		SND_FEEDER_MULTIFORMAT
 options		SND_FEEDER_FULL_MULTIFORMAT
 options		SND_FEEDER_RATE_HP
 options		SND_PCM_64
 options		SND_OLDSTEREO
 
 #
 # Miscellaneous hardware:
 #
 # scd: Sony CD-ROM using proprietary (non-ATAPI) interface
 # mcd: Mitsumi CD-ROM using proprietary (non-ATAPI) interface
 # bktr: Brooktree bt848/848a/849a/878/879 video capture and TV Tuner board
 # joy: joystick (including IO DATA PCJOY PC Card joystick)
 # cmx: OmniKey CardMan 4040 pccard smartcard reader
 
 # Mitsumi CD-ROM
 device		mcd
 hint.mcd.0.at="isa"
 hint.mcd.0.port="0x300"
 # for the Sony CDU31/33A CDROM
 device		scd
 hint.scd.0.at="isa"
 hint.scd.0.port="0x230"
 device		joy			# PnP aware, hints for non-PnP only
 hint.joy.0.at="isa"
 hint.joy.0.port="0x201"
 device		cmx
 
 #
 # The 'bktr' device is a PCI video capture device using the Brooktree
 # bt848/bt848a/bt849a/bt878/bt879 chipset. When used with a TV Tuner it forms a
 # TV card, e.g. Miro PC/TV, Hauppauge WinCast/TV WinTV, VideoLogic Captivator,
 # Intel Smart Video III, AverMedia, IMS Turbo, FlyVideo.
 #
 # options 	OVERRIDE_CARD=xxx
 # options 	OVERRIDE_TUNER=xxx
 # options 	OVERRIDE_MSP=1
 # options 	OVERRIDE_DBX=1
 # These options can be used to override the auto detection
 # The current values for xxx are found in src/sys/dev/bktr/bktr_card.h
 # Using sysctl(8) run-time overrides on a per-card basis can be made
 #
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_PAL
 # or
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_NTSC
 # Specifies the default video capture mode.
 # This is required for Dual Crystal (28&35MHz) boards where PAL is used
 # to prevent hangs during initialization, e.g. VideoLogic Captivator PCI.
 #
 # options 	BKTR_USE_PLL
 # This is required for PAL or SECAM boards with a 28MHz crystal and no 35MHz
 # crystal, e.g. some new Bt878 cards.
 #
 # options 	BKTR_GPIO_ACCESS
 # This enables IOCTLs which give user level access to the GPIO port.
 #
 # options 	BKTR_NO_MSP_RESET
 # Prevents the MSP34xx reset. Good if you initialize the MSP in another OS first
 #
 # options 	BKTR_430_FX_MODE
 # Switch Bt878/879 cards into Intel 430FX chipset compatibility mode.
 #
 # options 	BKTR_SIS_VIA_MODE
 # Switch Bt878/879 cards into SIS/VIA chipset compatibility mode which is
 # needed for some old SiS and VIA chipset motherboards.
 # This also allows Bt878/879 chips to work on old OPTi (<1997) chipset
 # motherboards and motherboards with bad or incomplete PCI 2.1 support.
 # As a rough guess, old = before 1998
 #
 # options 	BKTR_NEW_MSP34XX_DRIVER
 # Use new, more complete initialization scheme for the msp34* soundchip.
 # Should fix stereo autodetection if the old driver does only output
 # mono sound.
 
 #
 # options 	BKTR_USE_FREEBSD_SMBUS
 # Compile with FreeBSD SMBus implementation
 #
 # Brooktree driver has been ported to the new I2C framework. Thus,
 # you'll need to have the following 3 lines in the kernel config.
 #     device smbus
 #     device iicbus
 #     device iicbb
 #     device iicsmb
 # The iic and smb devices are only needed if you want to control other
 # I2C slaves connected to the external connector of some cards.
 #
 device		bktr
  
 #
 # PC Card/PCMCIA and Cardbus
 #
 # cbb: pci/cardbus bridge implementing YENTA interface
 # pccard: pccard slots
 # cardbus: cardbus slots
 device		cbb
 device		pccard
 device		cardbus
 
 #
 # MMC/SD
 #
 # mmc 		MMC/SD bus
 # mmcsd		MMC/SD memory card
 # sdhci		Generic PCI SD Host Controller
 #
 device		mmc
 device		mmcsd
 device		sdhci
 
 #
 # SMB bus
 #
 # System Management Bus support is provided by the 'smbus' device.
 # Access to the SMBus device is via the 'smb' device (/dev/smb*),
 # which is a child of the 'smbus' device.
 #
 # Supported devices:
 # smb		standard I/O through /dev/smb*
 #
 # Supported SMB interfaces:
 # iicsmb	I2C to SMB bridge with any iicbus interface
 # bktr		brooktree848 I2C hardware interface
 # intpm		Intel PIIX4 (82371AB, 82443MX) Power Management Unit
 # alpm		Acer Aladdin-IV/V/Pro2 Power Management Unit
 # ichsmb	Intel ICH SMBus controller chips (82801AA, 82801AB, 82801BA)
 # viapm		VIA VT82C586B/596B/686A and VT8233 Power Management Unit
 # amdpm		AMD 756 Power Management Unit
 # amdsmb	AMD 8111 SMBus 2.0 Controller
 # nfpm		NVIDIA nForce Power Management Unit
 # nfsmb		NVIDIA nForce2/3/4 MCP SMBus 2.0 Controller
 # ismt		Intel SMBus 2.0 controller chips (on Atom S1200, C2000)
 #
 device		smbus		# Bus support, required for smb below.
 
 device		intpm
 device		alpm
 device		ichsmb
 device		viapm
 device		amdpm
 device		amdsmb
 device		nfpm
 device		nfsmb
 device		ismt
 
 device		smb
 
 #
 # I2C Bus
 #
 # Philips i2c bus support is provided by the `iicbus' device.
 #
 # Supported devices:
 # ic	i2c network interface
 # iic	i2c standard io
 # iicsmb i2c to smb bridge. Allow i2c i/o with smb commands.
 # iicoc simple polling driver for OpenCores I2C controller
 #
 # Supported interfaces:
 # bktr	brooktree848 I2C software interface
 #
 # Other:
 # iicbb	generic I2C bit-banging code (needed by lpbb, bktr)
 #
 device		iicbus		# Bus support, required for ic/iic/iicsmb below.
 device		iicbb
 
 device		ic
 device		iic
 device		iicsmb		# smb over i2c bridge
 device		iicoc		# OpenCores I2C controller support
 
 # I2C peripheral devices
 #
 # ds133x	Dallas Semiconductor DS1337, DS1338 and DS1339 RTC
 # ds1374	Dallas Semiconductor DS1374 RTC
 # ds1672	Dallas Semiconductor DS1672 RTC
 # s35390a	Seiko Instruments S-35390A RTC
 #
 device		ds133x
 device		ds1374
 device		ds1672
 device		s35390a
 
 # Parallel-Port Bus
 #
 # Parallel port bus support is provided by the `ppbus' device.
 # Multiple devices may be attached to the parallel port, devices
 # are automatically probed and attached when found.
 #
 # Supported devices:
 # vpo	Iomega Zip Drive
 #	Requires SCSI disk support ('scbus' and 'da'), best
 #	performance is achieved with ports in EPP 1.9 mode.
 # lpt	Parallel Printer
 # plip	Parallel network interface
 # ppi	General-purpose I/O ("Geek Port") + IEEE1284 I/O
 # pps	Pulse per second Timing Interface
 # lpbb	Philips official parallel port I2C bit-banging interface
 # pcfclock Parallel port clock driver.
 #
 # Supported interfaces:
 # ppc	ISA-bus parallel port interfaces.
 #
 
 options 	PPC_PROBE_CHIPSET # Enable chipset specific detection
 				  # (see flags in ppc(4))
 options 	DEBUG_1284	# IEEE1284 signaling protocol debug
 options 	PERIPH_1284	# Makes your computer act as an IEEE1284
 				# compliant peripheral
 options 	DONTPROBE_1284	# Avoid boot detection of PnP parallel devices
 options 	VP0_DEBUG	# ZIP/ZIP+ debug
 options 	LPT_DEBUG	# Printer driver debug
 options 	PPC_DEBUG	# Parallel chipset level debug
 options 	PLIP_DEBUG	# Parallel network IP interface debug
 options 	PCFCLOCK_VERBOSE         # Verbose pcfclock driver
 options 	PCFCLOCK_MAX_RETRIES=5   # Maximum read tries (default 10)
 
 device		ppc
 hint.ppc.0.at="isa"
 hint.ppc.0.irq="7"
 device		ppbus
 device		vpo
 device		lpt
 device		plip
 device		ppi
 device		pps
 device		lpbb
 device		pcfclock
 
 #
 # Etherswitch framework and drivers
 #
 # etherswitch	The etherswitch(4) framework
 # miiproxy	Proxy device for miibus(4) functionality
 # 
 # Switch hardware support:
 # arswitch	Atheros switches
 # ip17x 	IC+ 17x family switches
 # rtl8366r	Realtek RTL8366 switches
 # ukswitch	Multi-PHY switches
 #
 device		etherswitch
 device		miiproxy
 device		arswitch
 device		ip17x
 device		rtl8366rb
 device		ukswitch
 
 # Kernel BOOTP support
 
 options 	BOOTP		# Use BOOTP to obtain IP address/hostname
 				# Requires NFSCL and NFS_ROOT
 options 	BOOTP_NFSROOT	# NFS mount root filesystem using BOOTP info
 options 	BOOTP_NFSV3	# Use NFS v3 to NFS mount root
 options 	BOOTP_COMPAT	# Workaround for broken bootp daemons.
 options 	BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP
 options 	BOOTP_BLOCKSIZE=8192 # Override NFS block size
 
 #
 # Add software watchdog routines.
 #
 options 	SW_WATCHDOG
 
 #
 # Add the software deadlock resolver thread.
 #
 options 	DEADLKRES
 
 #
 # Disable swapping of stack pages.  This option removes all
 # code which actually performs swapping, so it's not possible to turn
 # it back on at run-time.
 #
 # This is sometimes usable for systems which don't have any swap space
 # (see also sysctls "vm.defer_swapspace_pageouts" and
 # "vm.disable_swapspace_pageouts")
 #
 #options 	NO_SWAPPING
 
 # Set the number of sf_bufs to allocate. sf_bufs are virtual buffers
 # for sendfile(2) that are used to map file VM pages, and normally
 # default to a quantity that is roughly 16*MAXUSERS+512. You would
 # typically want about 4 of these for each simultaneous file send.
 #
 options 	NSFBUFS=1024
 
 #
 # Enable extra debugging code for locks.  This stores the filename and
 # line of whatever acquired the lock in the lock itself, and changes a
 # number of function calls to pass around the relevant data.  This is
 # not at all useful unless you are debugging lock code.  Note that
 # modules should be recompiled as this option modifies KBI.
 #
 options 	DEBUG_LOCKS
 
 
 #####################################################################
 # USB support
 # UHCI controller
 device		uhci
 # OHCI controller
 device		ohci
 # EHCI controller
 device		ehci
 # XHCI controller
 device		xhci
 # SL811 Controller
 #device		slhci
 # General USB code (mandatory for USB)
 device		usb
 #
 # USB Double Bulk Pipe devices
 device		udbp
 # USB Fm Radio
 device		ufm
 # USB temperature meter
 device		ugold
 # USB LED
 device		uled
 # Human Interface Device (anything with buttons and dials)
 device		uhid
 # USB keyboard
 device		ukbd
 # USB printer
 device		ulpt
 # USB mass storage driver (Requires scbus and da)
 device		umass
 # USB mass storage driver for device-side mode
 device		usfs
 # USB support for Belkin F5U109 and Magic Control Technology serial adapters
 device		umct
 # USB modem support
 device		umodem
 # USB mouse
 device		ums
 # USB touchpad(s)
 device		atp
 device		wsp
 # eGalax USB touch screen
 device		uep
 # Diamond Rio 500 MP3 player
 device		urio
 #
 # USB serial support
 device		ucom
 # USB support for 3G modem cards by Option, Novatel, Huawei and Sierra
 device		u3g
 # USB support for Technologies ARK3116 based serial adapters
 device		uark
 # USB support for Belkin F5U103 and compatible serial adapters
 device		ubsa
 # USB support for serial adapters based on the FT8U100AX and FT8U232AM
 device		uftdi
 # USB support for some Windows CE based serial communication.
 device		uipaq
 # USB support for Prolific PL-2303 serial adapters
 device		uplcom
 # USB support for Silicon Laboratories CP2101/CP2102 based USB serial adapters
 device		uslcom
 # USB Visor and Palm devices
 device		uvisor
 # USB serial support for DDI pocket's PHS
 device		uvscom
 #
 # USB ethernet support
 device		uether
 # ADMtek USB ethernet. Supports the LinkSys USB100TX,
 # the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX
 # and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus
 # eval board.
 device		aue
 
 # ASIX Electronics AX88172 USB 2.0 ethernet driver. Used in the
 # LinkSys USB200M and various other adapters.
 device		axe
 # ASIX Electronics AX88178A/AX88179 USB 2.0/3.0 gigabit ethernet driver.
 device		axge
 
 #
 # Devices which communicate using Ethernet over USB, particularly
 # Communication Device Class (CDC) Ethernet specification. Supports
 # Sharp Zaurus PDAs, some DOCSIS cable modems and so on.
 device		cdce
 #
 # CATC USB-EL1201A USB ethernet. Supports the CATC Netmate
 # and Netmate II, and the Belkin F5U111.
 device		cue
 #
 # Kawasaki LSI ethernet. Supports the LinkSys USB10T,
 # Entrega USB-NET-E45, Peracom Ethernet Adapter, the
 # 3Com 3c19250, the ADS Technologies USB-10BT, the ATen UC10T,
 # the Netgear EA101, the D-Link DSB-650, the SMC 2102USB
 # and 2104USB, and the Corega USB-T.
 device		kue
 #
 # RealTek RTL8150 USB to fast ethernet. Supports the Melco LUA-KTX
 # and the GREEN HOUSE GH-USB100B.
 device		rue
 #
 # Davicom DM9601E USB to fast ethernet. Supports the Corega FEther USB-TXC.
 device		udav
 #
 # RealTek RTL8152 USB to fast ethernet.
 device		ure
 #
 # Moschip MCS7730/MCS7840 USB to fast ethernet. Supports the Sitecom LN030.
 device		mos
 #
 # HSxPA devices from Option N.V
 device		uhso
 
 # Realtek RTL8188SU/RTL8191SU/RTL8192SU wireless driver
 device		rsu
 #
 # Ralink Technology RT2501USB/RT2601USB wireless driver
 device		rum
 # Ralink Technology RT2700U/RT2800U/RT3000U wireless driver
 device		run
 #
 # Atheros AR5523 wireless driver
 device		uath
 #
 # Conexant/Intersil PrismGT wireless driver
 device		upgt
 #
 # Ralink Technology RT2500USB wireless driver
 device		ural
 #
 # RNDIS USB ethernet driver
 device		urndis
 # Realtek RTL8187B/L wireless driver
 device		urtw
 #
 # ZyDas ZD1211/ZD1211B wireless driver
 device		zyd
 #
 # Sierra USB wireless driver
 device		usie
 
 # 
 # debugging options for the USB subsystem
 #
 options 	USB_DEBUG
 options 	U3G_DEBUG
 
 # options for ukbd:
 options 	UKBD_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	UKBD_DFLT_KEYMAP=jp.pc98
 
 # options for uplcom:
 options 	UPLCOM_INTR_INTERVAL=100	# interrupt pipe interval
 						# in milliseconds
 
 # options for uvscom:
 options 	UVSCOM_DEFAULT_OPKTSIZE=8	# default output packet size
 options 	UVSCOM_INTR_INTERVAL=100	# interrupt pipe interval
 						# in milliseconds
 
 #####################################################################
 # FireWire support
 
 device		firewire	# FireWire bus code
 device		sbp		# SCSI over Firewire (Requires scbus and da)
 device		sbp_targ	# SBP-2 Target mode  (Requires scbus and targ)
 device		fwe		# Ethernet over FireWire (non-standard!)
 device		fwip		# IP over FireWire (RFC2734 and RFC3146)
 
 #####################################################################
 # dcons support (Dumb Console Device)
 
 device		dcons			# dumb console driver
 device		dcons_crom		# FireWire attachment
 options 	DCONS_BUF_SIZE=16384	# buffer size
 options 	DCONS_POLL_HZ=100	# polling rate
 options 	DCONS_FORCE_CONSOLE=0	# force to be the primary console
 options 	DCONS_FORCE_GDB=1	# force to be the gdb device
 
 #####################################################################
 # crypto subsystem
 #
 # This is a port of the OpenBSD crypto framework.  Include this when
 # configuring IPSEC and when you have a h/w crypto device to accelerate
 # user applications that link to OpenSSL.
 #
 # Drivers are ports from OpenBSD with some simple enhancements that have
 # been fed back to OpenBSD.
 
 device		crypto		# core crypto support
 
 # Only install the cryptodev device if you are running tests, or know
 # specifically why you need it.  In most cases, it is not needed and
 # will make things slower.
 device		cryptodev	# /dev/crypto for access to h/w
 
 device		rndtest		# FIPS 140-2 entropy tester
 
 device		hifn		# Hifn 7951, 7781, etc.
 options 	HIFN_DEBUG	# enable debugging support: hw.hifn.debug
 options 	HIFN_RNDTEST	# enable rndtest support
 
 device		ubsec		# Broadcom 5501, 5601, 58xx
 options 	UBSEC_DEBUG	# enable debugging support: hw.ubsec.debug
 options 	UBSEC_RNDTEST	# enable rndtest support
 
 #####################################################################
 
 
 #
 # Embedded system options:
 #
 # An embedded system might want to run something other than init.
 options 	INIT_PATH=/sbin/init:/rescue/init
 
 # Debug options
 options 	BUS_DEBUG	# enable newbus debugging
 options 	DEBUG_VFS_LOCKS	# enable VFS lock debugging
 options 	SOCKBUF_DEBUG	# enable sockbuf last record/mb tail checking
 options 	IFMEDIA_DEBUG	# enable debugging in net/if_media.c
 
 #
 # Verbose SYSINIT
 #
 # Make the SYSINIT process performed by mi_startup() verbose.  This is very
 # useful when porting to a new architecture.  If DDB is also enabled, this
 # will print function names instead of addresses.
 options 	VERBOSE_SYSINIT
 
 #####################################################################
 # SYSV IPC KERNEL PARAMETERS
 #
 # Maximum number of System V semaphores that can be used on the system at
 # one time.
 options 	SEMMNI=11
 
 # Total number of semaphores system wide
 options 	SEMMNS=61
 
 # Total number of undo structures in system
 options 	SEMMNU=31
 
 # Maximum number of System V semaphores that can be used by a single process
 # at one time.
 options 	SEMMSL=61
 
 # Maximum number of operations that can be outstanding on a single System V
 # semaphore at one time.
 options 	SEMOPM=101
 
 # Maximum number of undo operations that can be outstanding on a single
 # System V semaphore at one time.
 options 	SEMUME=11
 
 # Maximum number of shared memory pages system wide.
 options 	SHMALL=1025
 
 # Maximum size, in bytes, of a single System V shared memory region.
 options 	SHMMAX=(SHMMAXPGS*PAGE_SIZE+1)
 options 	SHMMAXPGS=1025
 
 # Minimum size, in bytes, of a single System V shared memory region.
 options 	SHMMIN=2
 
 # Maximum number of shared memory regions that can be used on the system
 # at one time.
 options 	SHMMNI=33
 
 # Maximum number of System V shared memory regions that can be attached to
 # a single process at one time.
 options 	SHMSEG=9
 
 # Set the amount of time (in seconds) the system will wait before
 # rebooting automatically when a kernel panic occurs.  If set to (-1),
 # the system will wait indefinitely until a key is pressed on the
 # console.
 options 	PANIC_REBOOT_WAIT_TIME=16
 
 # Attempt to bypass the buffer cache and put data directly into the
 # userland buffer for read operation when O_DIRECT flag is set on the
 # file.  Both offset and length of the read operation must be
 # multiples of the physical media sector size.
 #
 options 	DIRECTIO
 
 # Specify a lower limit for the number of swap I/O buffers.  They are
 # (among other things) used when bypassing the buffer cache due to
 # DIRECTIO kernel option enabled and O_DIRECT flag set on file.
 #
 options 	NSWBUF_MIN=120
 
 #####################################################################
 
 # More undocumented options for linting.
 # Note that documenting these is not considered an affront.
 
 options 	CAM_DEBUG_DELAY
 
 # VFS cluster debugging.
 options 	CLUSTERDEBUG
 
 options 	DEBUG
 
 # Kernel filelock debugging.
 options 	LOCKF_DEBUG
 
 # System V compatible message queues
 # Please note that the values provided here are used to test kernel
 # building.  The defaults in the sources provide almost the same numbers.
 # MSGSSZ must be a power of 2 between 8 and 1024.
 options 	MSGMNB=2049	# Max number of chars in queue
 options 	MSGMNI=41	# Max number of message queue identifiers
 options 	MSGSEG=2049	# Max number of message segments
 options 	MSGSSZ=16	# Size of a message segment
 options 	MSGTQL=41	# Max number of messages in system
 
 options 	NBUF=512	# Number of buffer headers
 
 options 	SCSI_NCR_DEBUG
 options 	SCSI_NCR_MAX_SYNC=10000
 options 	SCSI_NCR_MAX_WIDE=1
 options 	SCSI_NCR_MYADDR=7
 
 options 	SC_DEBUG_LEVEL=5	# Syscons debug level
 options 	SC_RENDER_DEBUG	# syscons rendering debugging
 
 options 	VFS_BIO_DEBUG	# VFS buffer I/O debugging
 
 options 	KSTACK_MAX_PAGES=32 # Maximum pages to give the kernel stack
 options 	KSTACK_USAGE_PROF
 
 # Adaptec Array Controller driver options
 options 	AAC_DEBUG	# Debugging levels:
 				# 0 - quiet, only emit warnings
 				# 1 - noisy, emit major function
 				#     points and things done
 				# 2 - extremely noisy, emit trace
 				#     items in loops, etc.
 
 # Resource Accounting
 options 	RACCT
 
 # Resource Limits
 options 	RCTL
 
 # Yet more undocumented options for linting.
 # BKTR_ALLOC_PAGES has no effect except to cause warnings, and
 # BROOKTREE_ALLOC_PAGES hasn't actually been superseded by it, since the
 # driver still mostly spells this option BROOKTREE_ALLOC_PAGES.
 ##options 	BKTR_ALLOC_PAGES=(217*4+1)
 options 	BROOKTREE_ALLOC_PAGES=(217*4+1)
 options 	MAXFILES=999
 
 # Random number generator
 # Only ONE of the below two may be used; they are mutually exclusive.
 # If neither is present, then the Fortuna algorithm is selected.
 #options 	RANDOM_YARROW	# Yarrow CSPRNG (old default)
 #options 	RANDOM_LOADABLE	# Allow the algorithm to be loaded as
 				# a module.
 # Select this to allow high-rate but potentially expensive
 # harvesting of Slab-Allocator entropy. In very high-rate
 # situations the value of doing this is dubious at best.
 options 	RANDOM_ENABLE_UMA	# slab allocator
 
 # Module to enable execution of application via emulators like QEMU
 options         IMAGACT_BINMISC
 
 # Intel em(4) driver
 options		EM_MULTIQUEUE # Activate multiqueue features/disable MSI-X
 
 # zlib I/O stream support
 # This enables support for compressed core dumps.
 options 	GZIO
Index: head/sys/conf/options
===================================================================
--- head/sys/conf/options	(revision 297747)
+++ head/sys/conf/options	(revision 297748)
@@ -1,970 +1,972 @@
 # $FreeBSD$
 #
 #        On the handling of kernel options
 #
 # All kernel options should be listed in NOTES, with suitable
 # descriptions.  Negative options (options that make some code not
 # compile) should be commented out; LINT (generated from NOTES) should
 # compile as much code as possible.  Try to structure option-using
 # code so that a single option only switch code on, or only switch
 # code off, to make it possible to have a full compile-test.  If
 # necessary, you can check for COMPILING_LINT to get maximum code
 # coverage.
 #
 # All new options shall also be listed in either "conf/options" or
 # "conf/options.<machine>".  Options that affect a single source-file
 # <xxx>.[c|s] should be directed into "opt_<xxx>.h", while options
 # that affect multiple files should either go in "opt_global.h" if
 # this is a kernel-wide option (used just about everywhere), or in
 # "opt_<option-name-in-lower-case>.h" if it affects only some files.
 # Note that the effect of listing only an option without a
 # header-file-name in conf/options (and cousins) is that the last
 # convention is followed.
 #
 # This handling scheme is not yet fully implemented.
 #
 #
 # Format of this file:
 # Option name	filename
 #
 # If filename is missing, the default is
 # opt_<name-of-option-in-lower-case>.h
 
 AAC_DEBUG		opt_aac.h
 AACRAID_DEBUG		opt_aacraid.h
 AHC_ALLOW_MEMIO		opt_aic7xxx.h
 AHC_TMODE_ENABLE	opt_aic7xxx.h
 AHC_DUMP_EEPROM		opt_aic7xxx.h
 AHC_DEBUG		opt_aic7xxx.h
 AHC_DEBUG_OPTS		opt_aic7xxx.h
 AHC_REG_PRETTY_PRINT	opt_aic7xxx.h
 AHD_DEBUG		opt_aic79xx.h
 AHD_DEBUG_OPTS		opt_aic79xx.h
 AHD_TMODE_ENABLE	opt_aic79xx.h	
 AHD_REG_PRETTY_PRINT	opt_aic79xx.h
 ADW_ALLOW_MEMIO		opt_adw.h
 
 TWA_DEBUG		opt_twa.h
 TWA_FLASH_FIRMWARE	opt_twa.h
 
 # Debugging options.
 ALT_BREAK_TO_DEBUGGER	opt_kdb.h
 BREAK_TO_DEBUGGER	opt_kdb.h
 DDB
 DDB_BUFR_SIZE	opt_ddb.h
 DDB_CAPTURE_DEFAULTBUFSIZE	opt_ddb.h
 DDB_CAPTURE_MAXBUFSIZE	opt_ddb.h
 DDB_CTF		opt_ddb.h
 DDB_NUMSYM	opt_ddb.h
 GDB
 KDB		opt_global.h
 KDB_TRACE	opt_kdb.h
 KDB_UNATTENDED	opt_kdb.h
 KLD_DEBUG	opt_kld.h
 SYSCTL_DEBUG	opt_sysctl.h
 EARLY_PRINTF	opt_global.h
 TEXTDUMP_PREFERRED	opt_ddb.h
 TEXTDUMP_VERBOSE	opt_ddb.h
 
 # Miscellaneous options.
 ADAPTIVE_LOCKMGRS
 ALQ
 ALTERA_SDCARD_FAST_SIM	opt_altera_sdcard.h
 ATSE_CFI_HACK	opt_cfi.h
 AUDIT		opt_global.h
 BOOTHOWTO	opt_global.h
 BOOTVERBOSE	opt_global.h
 CALLOUT_PROFILING
 CAPABILITIES	opt_capsicum.h
 CAPABILITY_MODE	opt_capsicum.h
 COMPAT_43	opt_compat.h
 COMPAT_43TTY	opt_compat.h
 COMPAT_FREEBSD4	opt_compat.h
 COMPAT_FREEBSD5	opt_compat.h
 COMPAT_FREEBSD6	opt_compat.h
 COMPAT_FREEBSD7	opt_compat.h
 COMPAT_FREEBSD9	opt_compat.h
 COMPAT_FREEBSD10	opt_compat.h
 COMPAT_CLOUDABI64	opt_dontuse.h
 COMPAT_LINUXKPI	opt_compat.h
 COMPILING_LINT	opt_global.h
 CY_PCI_FASTINTR
 DEADLKRES	opt_watchdog.h
+DEVICE_NUMA
 EXT_RESOURCES	opt_global.h
 DIRECTIO
 FILEMON		opt_dontuse.h
 FFCLOCK
 FULL_PREEMPTION	opt_sched.h
 GZIO		opt_gzio.h
 IMAGACT_BINMISC		opt_dontuse.h
 IPI_PREEMPTION	opt_sched.h
 GEOM_AES	opt_geom.h
 GEOM_BDE	opt_geom.h
 GEOM_BSD	opt_geom.h
 GEOM_CACHE	opt_geom.h
 GEOM_CONCAT	opt_geom.h
 GEOM_ELI	opt_geom.h
 GEOM_FOX	opt_geom.h
 GEOM_GATE	opt_geom.h
 GEOM_JOURNAL	opt_geom.h
 GEOM_LABEL	opt_geom.h
 GEOM_LABEL_GPT	opt_geom.h
 GEOM_LINUX_LVM	opt_geom.h
 GEOM_MAP	opt_geom.h
 GEOM_MBR	opt_geom.h
 GEOM_MIRROR	opt_geom.h
 GEOM_MOUNTVER	opt_geom.h
 GEOM_MULTIPATH	opt_geom.h
 GEOM_NOP	opt_geom.h
 GEOM_PART_APM	opt_geom.h
 GEOM_PART_BSD	opt_geom.h
 GEOM_PART_BSD64	opt_geom.h
 GEOM_PART_EBR	opt_geom.h
 GEOM_PART_EBR_COMPAT	opt_geom.h
 GEOM_PART_GPT	opt_geom.h
 GEOM_PART_LDM	opt_geom.h
 GEOM_PART_MBR	opt_geom.h
 GEOM_PART_PC98	opt_geom.h
 GEOM_PART_VTOC8	opt_geom.h
 GEOM_PC98	opt_geom.h
 GEOM_RAID	opt_geom.h
 GEOM_RAID3	opt_geom.h
 GEOM_SHSEC	opt_geom.h
 GEOM_STRIPE	opt_geom.h
 GEOM_SUNLABEL	opt_geom.h
 GEOM_UZIP	opt_geom.h
 GEOM_UZIP_DEBUG	opt_geom.h
 GEOM_VINUM	opt_geom.h
 GEOM_VIRSTOR	opt_geom.h
 GEOM_VOL	opt_geom.h
 GEOM_ZERO	opt_geom.h
 KDTRACE_HOOKS	opt_global.h
 KDTRACE_FRAME	opt_kdtrace.h
 KN_HASHSIZE	opt_kqueue.h
 KSTACK_MAX_PAGES
 KSTACK_PAGES
 KSTACK_USAGE_PROF
 KTRACE
 KTRACE_REQUEST_POOL	opt_ktrace.h
 LIBICONV
 MAC		opt_global.h
 MAC_BIBA	opt_dontuse.h
 MAC_BSDEXTENDED	opt_dontuse.h
 MAC_IFOFF	opt_dontuse.h
 MAC_LOMAC	opt_dontuse.h
 MAC_MLS		opt_dontuse.h
 MAC_NONE	opt_dontuse.h
 MAC_PARTITION	opt_dontuse.h
 MAC_PORTACL	opt_dontuse.h
 MAC_SEEOTHERUIDS	opt_dontuse.h
 MAC_STATIC	opt_mac.h
 MAC_STUB	opt_dontuse.h
 MAC_TEST	opt_dontuse.h
 MD_ROOT		opt_md.h
 MD_ROOT_FSTYPE	opt_md.h
 MD_ROOT_SIZE	opt_md.h
 MFI_DEBUG	opt_mfi.h
 MFI_DECODE_LOG	opt_mfi.h
 MPROF_BUFFERS	opt_mprof.h
 MPROF_HASH_SIZE	opt_mprof.h
 NEW_PCIB	opt_global.h
 NO_ADAPTIVE_MUTEXES	opt_adaptive_mutexes.h
 NO_ADAPTIVE_RWLOCKS
 NO_ADAPTIVE_SX
 NO_EVENTTIMERS		opt_timer.h
 NO_SYSCTL_DESCR	opt_global.h
 NSWBUF_MIN	opt_swap.h
 MBUF_PACKET_ZONE_DISABLE	opt_global.h
 PANIC_REBOOT_WAIT_TIME	opt_panic.h
 PCI_IOV		opt_global.h
 PPC_DEBUG	opt_ppc.h
 PPC_PROBE_CHIPSET	opt_ppc.h
 PPS_SYNC	opt_ntp.h
 PREEMPTION	opt_sched.h
 QUOTA
 SCHED_4BSD	opt_sched.h
 SCHED_STATS	opt_sched.h
 SCHED_ULE	opt_sched.h
 SLEEPQUEUE_PROFILING
 SLHCI_DEBUG	opt_slhci.h
 SPX_HACK
 STACK		opt_stack.h
 SUIDDIR
 MSGMNB		opt_sysvipc.h
 MSGMNI		opt_sysvipc.h
 MSGSEG		opt_sysvipc.h
 MSGSSZ		opt_sysvipc.h
 MSGTQL		opt_sysvipc.h
 SEMMNI		opt_sysvipc.h
 SEMMNS		opt_sysvipc.h
 SEMMNU		opt_sysvipc.h
 SEMMSL		opt_sysvipc.h
 SEMOPM		opt_sysvipc.h
 SEMUME		opt_sysvipc.h
 SHMALL		opt_sysvipc.h
 SHMMAX		opt_sysvipc.h
 SHMMAXPGS	opt_sysvipc.h
 SHMMIN		opt_sysvipc.h
 SHMMNI		opt_sysvipc.h
 SHMSEG		opt_sysvipc.h
 SYSVMSG		opt_sysvipc.h
 SYSVSEM		opt_sysvipc.h
 SYSVSHM		opt_sysvipc.h
 SW_WATCHDOG	opt_watchdog.h
 TURNSTILE_PROFILING
 UMTX_PROFILING
 VERBOSE_SYSINIT
 WLCACHE		opt_wavelan.h
 WLDEBUG		opt_wavelan.h
 
 # POSIX kernel options
 P1003_1B_MQUEUE			opt_posix.h
 P1003_1B_SEMAPHORES		opt_posix.h
 _KPOSIX_PRIORITY_SCHEDULING	opt_posix.h
 
 # Do we want the config file compiled into the kernel?
 INCLUDE_CONFIG_FILE	opt_config.h
 
 # Options for static filesystems.  These should only be used at config
 # time, since the corresponding lkms cannot work if there are any static
 # dependencies.  Unusability is enforced by hiding the defines for the
 # options in a never-included header.
 AUTOFS		opt_dontuse.h
 CD9660		opt_dontuse.h
 EXT2FS		opt_dontuse.h
 FDESCFS		opt_dontuse.h
 FFS		opt_dontuse.h
 FUSE		opt_dontuse.h
 MSDOSFS		opt_dontuse.h
 NANDFS		opt_dontuse.h
 NULLFS		opt_dontuse.h
 PROCFS		opt_dontuse.h
 PSEUDOFS	opt_dontuse.h
 REISERFS	opt_dontuse.h
 SMBFS		opt_dontuse.h
 TMPFS		opt_dontuse.h
 UDF		opt_dontuse.h
 UNIONFS		opt_dontuse.h
 ZFS		opt_dontuse.h
 
 # Pseudofs debugging
 PSEUDOFS_TRACE	opt_pseudofs.h
 
 # In-kernel GSS-API
 KGSSAPI		opt_kgssapi.h
 KGSSAPI_DEBUG	opt_kgssapi.h
 
 # These static filesystems have one slightly bogus static dependency in
 # sys/i386/i386/autoconf.c.  If any of these filesystems are
 # statically compiled into the kernel, code for mounting them as root
 # filesystems will be enabled - but look below.
 # NFSCL - client
 # NFSD - server
 NFSCL		opt_nfs.h
 NFSD		opt_nfs.h
 
 # filesystems and libiconv bridge
 CD9660_ICONV	opt_dontuse.h
 MSDOSFS_ICONV	opt_dontuse.h
 UDF_ICONV	opt_dontuse.h
 
 # If you are following the conditions in the copyright,
 # you can enable soft-updates which will speed up a lot of thigs
 # and make the system safer from crashes at the same time.
 # otherwise a STUB module will be compiled in.
 SOFTUPDATES	opt_ffs.h
 
 # On small, embedded systems, it can be useful to turn off support for
 # snapshots.  It saves about 30-40k for a feature that would be lightly
 # used, if it is used at all.
 NO_FFS_SNAPSHOT	opt_ffs.h
 
 # Enabling this option turns on support for Access Control Lists in UFS,
 # which can be used to support high security configurations.  Depends on
 # UFS_EXTATTR.
 UFS_ACL		opt_ufs.h
 
 # Enabling this option turns on support for extended attributes in UFS-based
 # filesystems, which can be used to support high security configurations
 # as well as new filesystem features.
 UFS_EXTATTR	opt_ufs.h
 UFS_EXTATTR_AUTOSTART	opt_ufs.h
 
 # Enable fast hash lookups for large directories on UFS-based filesystems.
 UFS_DIRHASH	opt_ufs.h
 
 # Enable gjournal-based UFS journal.
 UFS_GJOURNAL	opt_ufs.h
 
 # The below sentence is not in English, and neither is this one.
 # We plan to remove the static dependences above, with a
 # <filesystem>_ROOT option to control if it usable as root.  This list
 # allows these options to be present in config files already (though
 # they won't make any difference yet).
 NFS_ROOT	opt_nfsroot.h
 
 # SMB/CIFS requester
 NETSMB		opt_netsmb.h
 
 # Options used only in subr_param.c.
 HZ		opt_param.h
 MAXFILES	opt_param.h
 NBUF		opt_param.h
 NSFBUFS		opt_param.h
 VM_BCACHE_SIZE_MAX	opt_param.h
 VM_SWZONE_SIZE_MAX	opt_param.h
 MAXUSERS
 DFLDSIZ		opt_param.h
 MAXDSIZ		opt_param.h
 MAXSSIZ		opt_param.h
 
 # Generic SCSI options.
 CAM_MAX_HIGHPOWER	opt_cam.h
 CAMDEBUG		opt_cam.h
 CAM_DEBUG_COMPILE	opt_cam.h
 CAM_DEBUG_DELAY		opt_cam.h
 CAM_DEBUG_BUS		opt_cam.h
 CAM_DEBUG_TARGET	opt_cam.h
 CAM_DEBUG_LUN		opt_cam.h
 CAM_DEBUG_FLAGS		opt_cam.h
 CAM_BOOT_DELAY		opt_cam.h
 SCSI_DELAY		opt_scsi.h
 SCSI_NO_SENSE_STRINGS	opt_scsi.h
 SCSI_NO_OP_STRINGS	opt_scsi.h
 
 # Options used only in cam/ata/ata_da.c
 ADA_TEST_FAILURE	opt_ada.h
 ATA_STATIC_ID		opt_ada.h
 
 # Options used only in cam/scsi/scsi_cd.c
 CHANGER_MIN_BUSY_SECONDS	opt_cd.h
 CHANGER_MAX_BUSY_SECONDS	opt_cd.h
 
 # Options used only in cam/scsi/scsi_sa.c.
 SA_IO_TIMEOUT		opt_sa.h
 SA_SPACE_TIMEOUT	opt_sa.h
 SA_REWIND_TIMEOUT	opt_sa.h
 SA_ERASE_TIMEOUT	opt_sa.h
 SA_1FM_AT_EOD		opt_sa.h
 
 # Options used only in cam/scsi/scsi_pt.c
 SCSI_PT_DEFAULT_TIMEOUT	opt_pt.h
 
 # Options used only in cam/scsi/scsi_ses.c
 SES_ENABLE_PASSTHROUGH	opt_ses.h
 
 # Options used in dev/sym/ (Symbios SCSI driver).
 SYM_SETUP_LP_PROBE_MAP	opt_sym.h	#-Low Priority Probe Map (bits)
 					# Allows the ncr to take precedence
 					# 1 (1<<0) -> 810a, 860
 					# 2 (1<<1) -> 825a, 875, 885, 895
 					# 4 (1<<2) -> 895a, 896, 1510d 
 SYM_SETUP_SCSI_DIFF	opt_sym.h	#-HVD support for 825a, 875, 885
 					# disabled:0 (default), enabled:1
 SYM_SETUP_PCI_PARITY	opt_sym.h	#-PCI parity checking
 					# disabled:0, enabled:1 (default)
 SYM_SETUP_MAX_LUN	opt_sym.h	#-Number of LUNs supported
 					# default:8, range:[1..64]
 
 # Options used only in dev/ncr/*
 SCSI_NCR_DEBUG		opt_ncr.h
 SCSI_NCR_MAX_SYNC	opt_ncr.h
 SCSI_NCR_MAX_WIDE	opt_ncr.h
 SCSI_NCR_MYADDR		opt_ncr.h
 
 # Options used only in dev/isp/*
 ISP_TARGET_MODE		opt_isp.h
 ISP_FW_CRASH_DUMP	opt_isp.h
 ISP_DEFAULT_ROLES	opt_isp.h
 ISP_INTERNAL_TARGET	opt_isp.h
 
 # Options used only in dev/iscsi
 ISCSI_INITIATOR_DEBUG	opt_iscsi_initiator.h
 
 # Net stuff.
 ACCEPT_FILTER_DATA
 ACCEPT_FILTER_DNS
 ACCEPT_FILTER_HTTP
 ALTQ			opt_global.h
 ALTQ_CBQ		opt_altq.h
 ALTQ_CDNR		opt_altq.h
 ALTQ_CODEL		opt_altq.h
 ALTQ_DEBUG		opt_altq.h
 ALTQ_HFSC		opt_altq.h
 ALTQ_FAIRQ		opt_altq.h
 ALTQ_NOPCC		opt_altq.h
 ALTQ_PRIQ		opt_altq.h
 ALTQ_RED		opt_altq.h
 ALTQ_RIO		opt_altq.h
 BOOTP			opt_bootp.h
 BOOTP_BLOCKSIZE		opt_bootp.h
 BOOTP_COMPAT		opt_bootp.h
 BOOTP_NFSROOT		opt_bootp.h
 BOOTP_NFSV3		opt_bootp.h
 BOOTP_WIRED_TO		opt_bootp.h
 DEVICE_POLLING
 DUMMYNET		opt_ipdn.h
 INET			opt_inet.h
 INET6			opt_inet6.h
 IPDIVERT
 IPFILTER		opt_ipfilter.h
 IPFILTER_DEFAULT_BLOCK	opt_ipfilter.h
 IPFILTER_LOG		opt_ipfilter.h
 IPFILTER_LOOKUP		opt_ipfilter.h
 IPFIREWALL		opt_ipfw.h
 IPFIREWALL_DEFAULT_TO_ACCEPT	opt_ipfw.h
 IPFIREWALL_NAT		opt_ipfw.h
 IPFIREWALL_VERBOSE	opt_ipfw.h
 IPFIREWALL_VERBOSE_LIMIT	opt_ipfw.h
 IPSEC			opt_ipsec.h
 IPSEC_DEBUG		opt_ipsec.h
 IPSEC_FILTERTUNNEL	opt_ipsec.h
 IPSEC_NAT_T		opt_ipsec.h
 IPSTEALTH
 KRPC
 LIBALIAS
 LIBMBPOOL
 LIBMCHAIN
 MBUF_PROFILING
 MBUF_STRESS_TEST
 MROUTING		opt_mrouting.h
 NFSLOCKD
 PCBGROUP		opt_pcbgroup.h
 PF_DEFAULT_TO_DROP	opt_pf.h
 RADIX_MPATH		opt_mpath.h
 ROUTETABLES		opt_route.h
 RSS			opt_rss.h
 SLIP_IFF_OPTS		opt_slip.h
 TCPDEBUG
 TCPPCAP		opt_global.h
 SIFTR
 TCP_OFFLOAD		opt_inet.h # Enable code to dispatch TCP offloading
 TCP_RFC7413		opt_inet.h
 TCP_RFC7413_MAX_KEYS	opt_inet.h
 TCP_SIGNATURE		opt_inet.h
 VLAN_ARRAY		opt_vlan.h
 XBONEHACK
 FLOWTABLE		opt_route.h
 FLOWTABLE_HASH_ALL	opt_route.h
 
 #
 # SCTP
 #
 SCTP			opt_sctp.h
 SCTP_DEBUG		opt_sctp.h # Enable debug printfs
 SCTP_WITH_NO_CSUM	opt_sctp.h # Use this at your peril
 SCTP_LOCK_LOGGING	opt_sctp.h # Log to KTR lock activity
 SCTP_MBUF_LOGGING	opt_sctp.h # Log to KTR general mbuf aloc/free
 SCTP_MBCNT_LOGGING	opt_sctp.h # Log to KTR mbcnt activity
 SCTP_PACKET_LOGGING	opt_sctp.h # Log to a packet buffer last N packets
 SCTP_LTRACE_CHUNKS	opt_sctp.h # Log to KTR chunks processed
 SCTP_LTRACE_ERRORS	opt_sctp.h # Log to KTR error returns.
 SCTP_USE_PERCPU_STAT	opt_sctp.h # Use per cpu stats.
 SCTP_MCORE_INPUT	opt_sctp.h # Have multiple input threads for input mbufs
 SCTP_LOCAL_TRACE_BUF	opt_sctp.h # Use tracebuffer exported via sysctl
 SCTP_DETAILED_STR_STATS	opt_sctp.h # Use per PR-SCTP policy stream stats
 #
 #
 #
 
 # Netgraph(4). Use option NETGRAPH to enable the base netgraph code.
 # Each netgraph node type can be either be compiled into the kernel
 # or loaded dynamically. To get the former, include the corresponding
 # option below. Each type has its own man page, e.g. ng_async(4).
 NETGRAPH
 NETGRAPH_DEBUG		opt_netgraph.h
 NETGRAPH_ASYNC		opt_netgraph.h
 NETGRAPH_ATMLLC		opt_netgraph.h
 NETGRAPH_ATM_ATMPIF	opt_netgraph.h
 NETGRAPH_BLUETOOTH	opt_netgraph.h
 NETGRAPH_BLUETOOTH_BT3C	opt_netgraph.h
 NETGRAPH_BLUETOOTH_H4	opt_netgraph.h
 NETGRAPH_BLUETOOTH_HCI	opt_netgraph.h
 NETGRAPH_BLUETOOTH_L2CAP	opt_netgraph.h
 NETGRAPH_BLUETOOTH_SOCKET	opt_netgraph.h
 NETGRAPH_BLUETOOTH_UBT	opt_netgraph.h
 NETGRAPH_BLUETOOTH_UBTBCMFW	opt_netgraph.h
 NETGRAPH_BPF		opt_netgraph.h
 NETGRAPH_BRIDGE		opt_netgraph.h
 NETGRAPH_CAR		opt_netgraph.h
 NETGRAPH_CISCO		opt_netgraph.h
 NETGRAPH_DEFLATE	opt_netgraph.h
 NETGRAPH_DEVICE		opt_netgraph.h
 NETGRAPH_ECHO		opt_netgraph.h
 NETGRAPH_EIFACE		opt_netgraph.h
 NETGRAPH_ETHER		opt_netgraph.h
 NETGRAPH_ETHER_ECHO	opt_netgraph.h
 NETGRAPH_FEC		opt_netgraph.h
 NETGRAPH_FRAME_RELAY	opt_netgraph.h
 NETGRAPH_GIF		opt_netgraph.h
 NETGRAPH_GIF_DEMUX	opt_netgraph.h
 NETGRAPH_HOLE		opt_netgraph.h
 NETGRAPH_IFACE		opt_netgraph.h
 NETGRAPH_IP_INPUT	opt_netgraph.h
 NETGRAPH_IPFW		opt_netgraph.h
 NETGRAPH_KSOCKET	opt_netgraph.h
 NETGRAPH_L2TP		opt_netgraph.h
 NETGRAPH_LMI		opt_netgraph.h
 # MPPC compression requires proprietary files (not included)
 NETGRAPH_MPPC_COMPRESSION	opt_netgraph.h
 NETGRAPH_MPPC_ENCRYPTION	opt_netgraph.h
 NETGRAPH_NAT		opt_netgraph.h
 NETGRAPH_NETFLOW	opt_netgraph.h
 NETGRAPH_ONE2MANY	opt_netgraph.h
 NETGRAPH_PATCH		opt_netgraph.h
 NETGRAPH_PIPE		opt_netgraph.h
 NETGRAPH_PPP		opt_netgraph.h
 NETGRAPH_PPPOE		opt_netgraph.h
 NETGRAPH_PPTPGRE	opt_netgraph.h
 NETGRAPH_PRED1		opt_netgraph.h
 NETGRAPH_RFC1490	opt_netgraph.h
 NETGRAPH_SOCKET		opt_netgraph.h
 NETGRAPH_SPLIT		opt_netgraph.h
 NETGRAPH_SPPP		opt_netgraph.h
 NETGRAPH_TAG		opt_netgraph.h
 NETGRAPH_TCPMSS		opt_netgraph.h
 NETGRAPH_TEE		opt_netgraph.h
 NETGRAPH_TTY		opt_netgraph.h
 NETGRAPH_UI		opt_netgraph.h
 NETGRAPH_VJC		opt_netgraph.h
 NETGRAPH_VLAN		opt_netgraph.h
 
 # NgATM options
 NGATM_ATM		opt_netgraph.h
 NGATM_ATMBASE		opt_netgraph.h
 NGATM_SSCOP		opt_netgraph.h
 NGATM_SSCFU		opt_netgraph.h
 NGATM_UNI		opt_netgraph.h
 NGATM_CCATM		opt_netgraph.h
 
 # DRM options
 DRM_DEBUG		opt_drm.h
 
 TI_SF_BUF_JUMBO		opt_ti.h
 TI_JUMBO_HDRSPLIT	opt_ti.h
 
 # XXX Conflict: # of devices vs network protocol (Native ATM).
 # This makes "atm.h" unusable.
 NATM
 
 # DPT driver debug flags
 DPT_MEASURE_PERFORMANCE	opt_dpt.h
 DPT_RESET_HBA		opt_dpt.h
 
 # Misc debug flags.  Most of these should probably be replaced with
 # 'DEBUG', and then let people recompile just the interesting modules
 # with 'make CC="cc -DDEBUG"'.
 CLUSTERDEBUG		opt_debug_cluster.h
 DEBUG_1284		opt_ppb_1284.h
 VP0_DEBUG		opt_vpo.h
 LPT_DEBUG		opt_lpt.h
 PLIP_DEBUG		opt_plip.h
 LOCKF_DEBUG		opt_debug_lockf.h
 SI_DEBUG		opt_debug_si.h
 IFMEDIA_DEBUG		opt_ifmedia.h
 
 # Fb options
 FB_DEBUG		opt_fb.h
 FB_INSTALL_CDEV		opt_fb.h
 
 # ppbus related options
 PERIPH_1284		opt_ppb_1284.h
 DONTPROBE_1284		opt_ppb_1284.h
 
 # smbus related options
 ENABLE_ALART		opt_intpm.h
 
 # These cause changes all over the kernel
 BLKDEV_IOSIZE		opt_global.h
 BURN_BRIDGES		opt_global.h
 DEBUG			opt_global.h
 DEBUG_LOCKS		opt_global.h
 DEBUG_VFS_LOCKS		opt_global.h
 DFLTPHYS		opt_global.h
 DIAGNOSTIC		opt_global.h
 INVARIANT_SUPPORT	opt_global.h
 INVARIANTS		opt_global.h
 MAXCPU			opt_global.h
 MAXMEMDOM		opt_global.h
 MAXPHYS			opt_global.h
 MCLSHIFT		opt_global.h
 MUTEX_DEBUG		opt_global.h
 MUTEX_NOINLINE		opt_global.h
 LOCK_PROFILING		opt_global.h
 LOCK_PROFILING_FAST	opt_global.h
 MSIZE			opt_global.h
 REGRESSION		opt_global.h
 RWLOCK_NOINLINE		opt_global.h
 SX_NOINLINE		opt_global.h
 VFS_BIO_DEBUG		opt_global.h
 
 # These are VM related options
 VM_KMEM_SIZE		opt_vm.h
 VM_KMEM_SIZE_SCALE	opt_vm.h
 VM_KMEM_SIZE_MAX	opt_vm.h
 VM_NRESERVLEVEL		opt_vm.h
+VM_NUMA_ALLOC		opt_vm.h
 VM_LEVEL_0_ORDER	opt_vm.h
 NO_SWAPPING		opt_vm.h
 MALLOC_MAKE_FAILURES	opt_vm.h
 MALLOC_PROFILE		opt_vm.h
 MALLOC_DEBUG_MAXZONES	opt_vm.h
 
 # The MemGuard replacement allocator used for tamper-after-free detection
 DEBUG_MEMGUARD		opt_vm.h
 
 # The RedZone malloc(9) protection
 DEBUG_REDZONE		opt_vm.h
 
 # Standard SMP options
 SMP			opt_global.h
 
 # Size of the kernel message buffer
 MSGBUF_SIZE		opt_msgbuf.h
 
 # NFS options
 NFS_MINATTRTIMO		opt_nfs.h
 NFS_MAXATTRTIMO		opt_nfs.h
 NFS_MINDIRATTRTIMO	opt_nfs.h
 NFS_MAXDIRATTRTIMO	opt_nfs.h
 NFS_DEBUG		opt_nfs.h
 
 # For the Bt848/Bt848A/Bt849/Bt878/Bt879 driver
 OVERRIDE_CARD			opt_bktr.h
 OVERRIDE_TUNER			opt_bktr.h
 OVERRIDE_DBX			opt_bktr.h
 OVERRIDE_MSP			opt_bktr.h
 BROOKTREE_SYSTEM_DEFAULT	opt_bktr.h
 BROOKTREE_ALLOC_PAGES		opt_bktr.h
 BKTR_OVERRIDE_CARD		opt_bktr.h
 BKTR_OVERRIDE_TUNER		opt_bktr.h
 BKTR_OVERRIDE_DBX		opt_bktr.h
 BKTR_OVERRIDE_MSP		opt_bktr.h
 BKTR_SYSTEM_DEFAULT		opt_bktr.h
 BKTR_ALLOC_PAGES		opt_bktr.h
 BKTR_USE_PLL			opt_bktr.h	
 BKTR_GPIO_ACCESS		opt_bktr.h
 BKTR_NO_MSP_RESET		opt_bktr.h
 BKTR_430_FX_MODE		opt_bktr.h
 BKTR_SIS_VIA_MODE		opt_bktr.h
 BKTR_USE_FREEBSD_SMBUS		opt_bktr.h
 BKTR_NEW_MSP34XX_DRIVER		opt_bktr.h
 
 # Options for uart(4)
 UART_PPS_ON_CTS		opt_uart.h
 UART_POLL_FREQ		opt_uart.h
 UART_DEV_TOLERANCE_PCT	opt_uart.h
 
 # options for bus/device framework
 BUS_DEBUG		opt_bus.h
 
 # options for USB support
 USB_DEBUG		opt_usb.h
 USB_HOST_ALIGN		opt_usb.h
 USB_REQ_DEBUG		opt_usb.h
 USB_TEMPLATE		opt_usb.h
 USB_VERBOSE		opt_usb.h
 USB_DMA_SINGLE_ALLOC	opt_usb.h
 USB_EHCI_BIG_ENDIAN_DESC	opt_usb.h
 U3G_DEBUG		opt_u3g.h
 UKBD_DFLT_KEYMAP	opt_ukbd.h
 UPLCOM_INTR_INTERVAL	opt_uplcom.h
 UVSCOM_DEFAULT_OPKTSIZE	opt_uvscom.h
 UVSCOM_INTR_INTERVAL	opt_uvscom.h
 
 # options for the Realtek RTL8188*U/RTL8192CU driver (urtwn)
 URTWN_WITHOUT_UCODE	opt_urtwn.h
 
 # Embedded system options
 INIT_PATH
 
 ROOTDEVNAME
 
 FDC_DEBUG		opt_fdc.h
 PCFCLOCK_VERBOSE	opt_pcfclock.h
 PCFCLOCK_MAX_RETRIES	opt_pcfclock.h
 
 KTR			opt_global.h
 KTR_ALQ			opt_ktr.h
 KTR_MASK		opt_ktr.h
 KTR_CPUMASK		opt_ktr.h
 KTR_COMPILE		opt_global.h
 KTR_BOOT_ENTRIES	opt_global.h
 KTR_ENTRIES		opt_global.h
 KTR_VERBOSE		opt_ktr.h
 WITNESS			opt_global.h
 WITNESS_KDB		opt_witness.h
 WITNESS_NO_VNODE	opt_witness.h
 WITNESS_SKIPSPIN	opt_witness.h
 WITNESS_COUNT		opt_witness.h
 OPENSOLARIS_WITNESS	opt_global.h
 
 # options for ACPI support
 ACPI_DEBUG		opt_acpi.h
 ACPI_MAX_TASKS		opt_acpi.h
 ACPI_MAX_THREADS	opt_acpi.h
 ACPI_DMAR		opt_acpi.h
 DEV_ACPI		opt_acpi.h
 
 # ISA support
 DEV_ISA			opt_isa.h
 ISAPNP			opt_isa.h
 
 # various 'device presence' options.
 DEV_BPF			opt_bpf.h
 DEV_CARP		opt_carp.h
 DEV_MCA			opt_mca.h
 DEV_NETMAP		opt_global.h
 DEV_PCI			opt_pci.h
 DEV_PF			opt_pf.h
 DEV_PFLOG		opt_pf.h
 DEV_PFSYNC		opt_pf.h
 DEV_RANDOM		opt_global.h
 DEV_SPLASH		opt_splash.h
 DEV_VLAN		opt_vlan.h
 
 # EISA support
 DEV_EISA		opt_eisa.h
 EISA_SLOTS		opt_eisa.h
 
 # ed driver
 ED_HPP			opt_ed.h
 ED_3C503		opt_ed.h
 ED_SIC			opt_ed.h
 
 # bce driver
 BCE_DEBUG		opt_bce.h
 BCE_NVRAM_WRITE_SUPPORT	opt_bce.h
 
 SOCKBUF_DEBUG		opt_global.h
 
 
 # options for ubsec driver
 UBSEC_DEBUG		opt_ubsec.h
 UBSEC_RNDTEST		opt_ubsec.h
 UBSEC_NO_RNG		opt_ubsec.h
 
 # options for hifn driver
 HIFN_DEBUG		opt_hifn.h
 HIFN_RNDTEST		opt_hifn.h
 
 # options for safenet driver
 SAFE_DEBUG		opt_safe.h
 SAFE_NO_RNG		opt_safe.h
 SAFE_RNDTEST		opt_safe.h
 
 # syscons/vt options
 MAXCONS			opt_syscons.h
 SC_ALT_MOUSE_IMAGE	opt_syscons.h
 SC_CUT_SPACES2TABS	opt_syscons.h
 SC_CUT_SEPCHARS		opt_syscons.h
 SC_DEBUG_LEVEL		opt_syscons.h
 SC_DFLT_FONT		opt_syscons.h
 SC_DISABLE_KDBKEY	opt_syscons.h
 SC_DISABLE_REBOOT	opt_syscons.h
 SC_HISTORY_SIZE		opt_syscons.h
 SC_KERNEL_CONS_ATTR	opt_syscons.h
 SC_KERNEL_CONS_REV_ATTR	opt_syscons.h
 SC_MOUSE_CHAR		opt_syscons.h
 SC_NO_CUTPASTE		opt_syscons.h
 SC_NO_FONT_LOADING	opt_syscons.h
 SC_NO_HISTORY		opt_syscons.h
 SC_NO_MODE_CHANGE	opt_syscons.h
 SC_NO_SUSPEND_VTYSWITCH	opt_syscons.h
 SC_NO_SYSMOUSE		opt_syscons.h
 SC_NORM_ATTR		opt_syscons.h
 SC_NORM_REV_ATTR	opt_syscons.h
 SC_PIXEL_MODE		opt_syscons.h
 SC_RENDER_DEBUG		opt_syscons.h
 SC_TWOBUTTON_MOUSE	opt_syscons.h
 VT_ALT_TO_ESC_HACK	opt_syscons.h
 VT_FB_DEFAULT_WIDTH	opt_syscons.h
 VT_FB_DEFAULT_HEIGHT	opt_syscons.h
 VT_MAXWINDOWS		opt_syscons.h
 VT_TWOBUTTON_MOUSE	opt_syscons.h
 DEV_SC			opt_syscons.h
 DEV_VT			opt_syscons.h
 
 # teken terminal emulator options
 TEKEN_CONS25		opt_teken.h
 TEKEN_UTF8		opt_teken.h
 TERMINAL_KERN_ATTR	opt_teken.h
 TERMINAL_NORM_ATTR	opt_teken.h
 
 # options for printf
 PRINTF_BUFR_SIZE	opt_printf.h
 
 # kbd options
 KBD_DISABLE_KEYMAP_LOAD	opt_kbd.h
 KBD_INSTALL_CDEV	opt_kbd.h
 KBD_MAXRETRY		opt_kbd.h
 KBD_MAXWAIT		opt_kbd.h
 KBD_RESETDELAY		opt_kbd.h
 KBDIO_DEBUG		opt_kbd.h
 
 KBDMUX_DFLT_KEYMAP	opt_kbdmux.h
 
 # options for the Atheros driver
 ATH_DEBUG		opt_ath.h
 ATH_TXBUF		opt_ath.h
 ATH_RXBUF		opt_ath.h
 ATH_DIAGAPI		opt_ath.h
 ATH_TX99_DIAG		opt_ath.h
 ATH_ENABLE_11N		opt_ath.h
 ATH_ENABLE_DFS		opt_ath.h
 ATH_EEPROM_FIRMWARE	opt_ath.h
 ATH_ENABLE_RADIOTAP_VENDOR_EXT	opt_ath.h
 ATH_DEBUG_ALQ		opt_ath.h
 ATH_KTR_INTR_DEBUG	opt_ath.h
 
 # options for the Atheros hal
 AH_SUPPORT_AR5416	opt_ah.h
 # XXX For now, this breaks non-AR9130 chipsets, so only use it
 # XXX when actually targetting AR9130.
 AH_SUPPORT_AR9130	opt_ah.h
 
 # This is required for AR933x SoC support
 AH_SUPPORT_AR9330	opt_ah.h
 AH_SUPPORT_AR9340	opt_ah.h
 AH_SUPPORT_QCA9530	opt_ah.h
 AH_SUPPORT_QCA9550	opt_ah.h
 
 AH_DEBUG		opt_ah.h
 AH_ASSERT		opt_ah.h
 AH_DEBUG_ALQ		opt_ah.h
 AH_REGOPS_FUNC		opt_ah.h
 AH_WRITE_REGDOMAIN	opt_ah.h
 AH_DEBUG_COUNTRY	opt_ah.h
 AH_WRITE_EEPROM		opt_ah.h
 AH_PRIVATE_DIAG		opt_ah.h
 AH_NEED_DESC_SWAP	opt_ah.h
 AH_USE_INIPDGAIN	opt_ah.h
 AH_MAXCHAN		opt_ah.h
 AH_RXCFG_SDMAMW_4BYTES	opt_ah.h
 AH_INTERRUPT_DEBUGGING	opt_ah.h
 # AR5416 and later interrupt mitigation
 # XXX do not use this for AR9130
 AH_AR5416_INTERRUPT_MITIGATION	opt_ah.h
 
 # options for the Broadcom BCM43xx driver (bwi)
 BWI_DEBUG		opt_bwi.h
 BWI_DEBUG_VERBOSE	opt_bwi.h
 
 # options for the Marvell 8335 wireless driver
 MALO_DEBUG		opt_malo.h
 MALO_TXBUF		opt_malo.h
 MALO_RXBUF		opt_malo.h
 
 # options for the Marvell wireless driver
 MWL_DEBUG		opt_mwl.h
 MWL_TXBUF		opt_mwl.h
 MWL_RXBUF		opt_mwl.h
 MWL_DIAGAPI		opt_mwl.h
 MWL_AGGR_SIZE		opt_mwl.h
 MWL_TX_NODROP		opt_mwl.h
 
 # Options for the Intel 802.11ac wireless driver
 IWM_DEBUG		opt_iwm.h
 
 # Options for the Intel 802.11n wireless driver
 IWN_DEBUG		opt_iwn.h
 
 # Options for the Intel 3945ABG wireless driver
 WPI_DEBUG		opt_wpi.h
 
 # dcons options 
 DCONS_BUF_SIZE		opt_dcons.h
 DCONS_POLL_HZ		opt_dcons.h
 DCONS_FORCE_CONSOLE	opt_dcons.h
 DCONS_FORCE_GDB		opt_dcons.h
 
 # HWPMC options
 HWPMC_DEBUG		opt_global.h
 HWPMC_HOOKS
 HWPMC_MIPS_BACKTRACE 	opt_hwpmc_hooks.h
 
 # XBOX options for FreeBSD/i386, but some files are MI
 XBOX			opt_xbox.h
 
 # Interrupt filtering
 INTR_FILTER
 
 # 802.11 support layer
 IEEE80211_DEBUG		opt_wlan.h
 IEEE80211_DEBUG_REFCNT	opt_wlan.h
 IEEE80211_AMPDU_AGE	opt_wlan.h
 IEEE80211_SUPPORT_MESH	opt_wlan.h
 IEEE80211_SUPPORT_SUPERG	opt_wlan.h
 IEEE80211_SUPPORT_TDMA	opt_wlan.h
 IEEE80211_ALQ		opt_wlan.h
 IEEE80211_DFS_DEBUG	opt_wlan.h
 
 # 802.11 TDMA support
 TDMA_SLOTLEN_DEFAULT	opt_tdma.h
 TDMA_SLOTCNT_DEFAULT	opt_tdma.h
 TDMA_BINTVAL_DEFAULT	opt_tdma.h
 TDMA_TXRATE_11B_DEFAULT	opt_tdma.h
 TDMA_TXRATE_11G_DEFAULT	opt_tdma.h
 TDMA_TXRATE_11A_DEFAULT	opt_tdma.h
 TDMA_TXRATE_TURBO_DEFAULT	opt_tdma.h
 TDMA_TXRATE_HALF_DEFAULT	opt_tdma.h
 TDMA_TXRATE_QUARTER_DEFAULT	opt_tdma.h
 TDMA_TXRATE_11NA_DEFAULT	opt_tdma.h
 TDMA_TXRATE_11NG_DEFAULT	opt_tdma.h
 
 # VideoMode
 PICKMODE_DEBUG			opt_videomode.h
 
 # Network stack virtualization options
 VIMAGE			opt_global.h
 VNET_DEBUG		opt_global.h
 
 # Common Flash Interface (CFI) options
 CFI_SUPPORT_STRATAFLASH	opt_cfi.h
 CFI_ARMEDANDDANGEROUS	opt_cfi.h
 CFI_HARDWAREBYTESWAP	opt_cfi.h
 
 # Sound options
 SND_DEBUG		opt_snd.h
 SND_DIAGNOSTIC		opt_snd.h
 SND_FEEDER_MULTIFORMAT	opt_snd.h
 SND_FEEDER_FULL_MULTIFORMAT	opt_snd.h
 SND_FEEDER_RATE_HP	opt_snd.h
 SND_PCM_64		opt_snd.h
 SND_OLDSTEREO		opt_snd.h
 
 X86BIOS
 
 # Flattened device tree options
 FDT		opt_platform.h
 FDT_DTB_STATIC	opt_platform.h
 
 # OFED Infiniband stack
 OFED		opt_ofed.h
 OFED_DEBUG_INIT	opt_ofed.h
 SDP		opt_ofed.h
 SDP_DEBUG	opt_ofed.h
 IPOIB		opt_ofed.h
 IPOIB_DEBUG	opt_ofed.h
 IPOIB_CM	opt_ofed.h
 
 # Resource Accounting
 RACCT		opt_global.h
 RACCT_DEFAULT_TO_DISABLED	opt_global.h
 
 # Resource Limits
 RCTL		opt_global.h
 
 # Random number generator(s)
 # Which CSPRNG hash we get.
 # If Yarrow is not chosen, Fortuna is selected.
 RANDOM_YARROW	opt_global.h
 # With this, no entropy processor is loaded, but the entropy
 # harvesting infrastructure is present. This means an entropy
 # processor may be loaded as a module.
 RANDOM_LOADABLE	opt_global.h
 # This turns on high-rate and potentially expensive harvesting in
 # the uma slab allocator.
 RANDOM_ENABLE_UMA	opt_global.h
 
 # Intel em(4) driver
 EM_MULTIQUEUE	opt_em.h
Index: head/sys/dev/acpica/acpi.c
===================================================================
--- head/sys/dev/acpica/acpi.c	(revision 297747)
+++ head/sys/dev/acpica/acpi.c	(revision 297748)
@@ -1,4057 +1,4059 @@
 /*-
  * Copyright (c) 2000 Takanori Watanabe <takawata@jp.freebsd.org>
  * Copyright (c) 2000 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
  * Copyright (c) 2000, 2001 Michael Smith
  * Copyright (c) 2000 BSDi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_acpi.h"
+#include "opt_device_numa.h"
+
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/ioccom.h>
 #include <sys/reboot.h>
 #include <sys/sysctl.h>
 #include <sys/ctype.h>
 #include <sys/linker.h>
 #include <sys/power.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/timetc.h>
 
 #if defined(__i386__) || defined(__amd64__)
 #include <machine/pci_cfgreg.h>
 #endif
 #include <machine/resource.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <isa/isavar.h>
 #include <isa/pnpvar.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/accommon.h>
 #include <contrib/dev/acpica/include/acnamesp.h>
 
 #include <dev/acpica/acpivar.h>
 #include <dev/acpica/acpiio.h>
 
 #include <vm/vm_param.h>
 
 static MALLOC_DEFINE(M_ACPIDEV, "acpidev", "ACPI devices");
 
 /* Hooks for the ACPI CA debugging infrastructure */
 #define _COMPONENT	ACPI_BUS
 ACPI_MODULE_NAME("ACPI")
 
 static d_open_t		acpiopen;
 static d_close_t	acpiclose;
 static d_ioctl_t	acpiioctl;
 
 static struct cdevsw acpi_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_open =	acpiopen,
 	.d_close =	acpiclose,
 	.d_ioctl =	acpiioctl,
 	.d_name =	"acpi",
 };
 
 struct acpi_interface {
 	ACPI_STRING	*data;
 	int		num;
 };
 
 /* Global mutex for locking access to the ACPI subsystem. */
 struct mtx	acpi_mutex;
 struct callout	acpi_sleep_timer;
 
 /* Bitmap of device quirks. */
 int		acpi_quirks;
 
 /* Supported sleep states. */
 static BOOLEAN	acpi_sleep_states[ACPI_S_STATE_COUNT];
 
 static void	acpi_lookup(void *arg, const char *name, device_t *dev);
 static int	acpi_modevent(struct module *mod, int event, void *junk);
 static int	acpi_probe(device_t dev);
 static int	acpi_attach(device_t dev);
 static int	acpi_suspend(device_t dev);
 static int	acpi_resume(device_t dev);
 static int	acpi_shutdown(device_t dev);
 static device_t	acpi_add_child(device_t bus, u_int order, const char *name,
 			int unit);
 static int	acpi_print_child(device_t bus, device_t child);
 static void	acpi_probe_nomatch(device_t bus, device_t child);
 static void	acpi_driver_added(device_t dev, driver_t *driver);
 static int	acpi_read_ivar(device_t dev, device_t child, int index,
 			uintptr_t *result);
 static int	acpi_write_ivar(device_t dev, device_t child, int index,
 			uintptr_t value);
 static struct resource_list *acpi_get_rlist(device_t dev, device_t child);
 static void	acpi_reserve_resources(device_t dev);
 static int	acpi_sysres_alloc(device_t dev);
 static int	acpi_set_resource(device_t dev, device_t child, int type,
 			int rid, rman_res_t start, rman_res_t count);
 static struct resource *acpi_alloc_resource(device_t bus, device_t child,
 			int type, int *rid, rman_res_t start, rman_res_t end,
 			rman_res_t count, u_int flags);
 static int	acpi_adjust_resource(device_t bus, device_t child, int type,
 			struct resource *r, rman_res_t start, rman_res_t end);
 static int	acpi_release_resource(device_t bus, device_t child, int type,
 			int rid, struct resource *r);
 static void	acpi_delete_resource(device_t bus, device_t child, int type,
 		    int rid);
 static uint32_t	acpi_isa_get_logicalid(device_t dev);
 static int	acpi_isa_get_compatid(device_t dev, uint32_t *cids, int count);
 static char	*acpi_device_id_probe(device_t bus, device_t dev, char **ids);
 static ACPI_STATUS acpi_device_eval_obj(device_t bus, device_t dev,
 		    ACPI_STRING pathname, ACPI_OBJECT_LIST *parameters,
 		    ACPI_BUFFER *ret);
 static ACPI_STATUS acpi_device_scan_cb(ACPI_HANDLE h, UINT32 level,
 		    void *context, void **retval);
 static ACPI_STATUS acpi_device_scan_children(device_t bus, device_t dev,
 		    int max_depth, acpi_scan_cb_t user_fn, void *arg);
 static int	acpi_set_powerstate(device_t child, int state);
 static int	acpi_isa_pnp_probe(device_t bus, device_t child,
 		    struct isa_pnp_id *ids);
 static void	acpi_probe_children(device_t bus);
 static void	acpi_probe_order(ACPI_HANDLE handle, int *order);
 static ACPI_STATUS acpi_probe_child(ACPI_HANDLE handle, UINT32 level,
 		    void *context, void **status);
 static void	acpi_sleep_enable(void *arg);
 static ACPI_STATUS acpi_sleep_disable(struct acpi_softc *sc);
 static ACPI_STATUS acpi_EnterSleepState(struct acpi_softc *sc, int state);
 static void	acpi_shutdown_final(void *arg, int howto);
 static void	acpi_enable_fixed_events(struct acpi_softc *sc);
 static BOOLEAN	acpi_has_hid(ACPI_HANDLE handle);
 static void	acpi_resync_clock(struct acpi_softc *sc);
 static int	acpi_wake_sleep_prep(ACPI_HANDLE handle, int sstate);
 static int	acpi_wake_run_prep(ACPI_HANDLE handle, int sstate);
 static int	acpi_wake_prep_walk(int sstate);
 static int	acpi_wake_sysctl_walk(device_t dev);
 static int	acpi_wake_set_sysctl(SYSCTL_HANDLER_ARGS);
 static void	acpi_system_eventhandler_sleep(void *arg, int state);
 static void	acpi_system_eventhandler_wakeup(void *arg, int state);
 static int	acpi_sname2sstate(const char *sname);
 static const char *acpi_sstate2sname(int sstate);
 static int	acpi_supported_sleep_state_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_sleep_state_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_debug_objects_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_pm_func(u_long cmd, void *arg, ...);
 static int	acpi_child_location_str_method(device_t acdev, device_t child,
 					       char *buf, size_t buflen);
 static int	acpi_child_pnpinfo_str_method(device_t acdev, device_t child,
 					      char *buf, size_t buflen);
 #if defined(__i386__) || defined(__amd64__)
 static void	acpi_enable_pcie(void);
 #endif
 static void	acpi_hint_device_unit(device_t acdev, device_t child,
 		    const char *name, int *unitp);
 static void	acpi_reset_interfaces(device_t dev);
 
 static device_method_t acpi_methods[] = {
     /* Device interface */
     DEVMETHOD(device_probe,		acpi_probe),
     DEVMETHOD(device_attach,		acpi_attach),
     DEVMETHOD(device_shutdown,		acpi_shutdown),
     DEVMETHOD(device_detach,		bus_generic_detach),
     DEVMETHOD(device_suspend,		acpi_suspend),
     DEVMETHOD(device_resume,		acpi_resume),
 
     /* Bus interface */
     DEVMETHOD(bus_add_child,		acpi_add_child),
     DEVMETHOD(bus_print_child,		acpi_print_child),
     DEVMETHOD(bus_probe_nomatch,	acpi_probe_nomatch),
     DEVMETHOD(bus_driver_added,		acpi_driver_added),
     DEVMETHOD(bus_read_ivar,		acpi_read_ivar),
     DEVMETHOD(bus_write_ivar,		acpi_write_ivar),
     DEVMETHOD(bus_get_resource_list,	acpi_get_rlist),
     DEVMETHOD(bus_set_resource,		acpi_set_resource),
     DEVMETHOD(bus_get_resource,		bus_generic_rl_get_resource),
     DEVMETHOD(bus_alloc_resource,	acpi_alloc_resource),
     DEVMETHOD(bus_adjust_resource,	acpi_adjust_resource),
     DEVMETHOD(bus_release_resource,	acpi_release_resource),
     DEVMETHOD(bus_delete_resource,	acpi_delete_resource),
     DEVMETHOD(bus_child_pnpinfo_str,	acpi_child_pnpinfo_str_method),
     DEVMETHOD(bus_child_location_str,	acpi_child_location_str_method),
     DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
     DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
     DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
     DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
     DEVMETHOD(bus_hint_device_unit,	acpi_hint_device_unit),
     DEVMETHOD(bus_get_domain,		acpi_get_domain),
 
     /* ACPI bus */
     DEVMETHOD(acpi_id_probe,		acpi_device_id_probe),
     DEVMETHOD(acpi_evaluate_object,	acpi_device_eval_obj),
     DEVMETHOD(acpi_pwr_for_sleep,	acpi_device_pwr_for_sleep),
     DEVMETHOD(acpi_scan_children,	acpi_device_scan_children),
 
     /* ISA emulation */
     DEVMETHOD(isa_pnp_probe,		acpi_isa_pnp_probe),
 
     DEVMETHOD_END
 };
 
 static driver_t acpi_driver = {
     "acpi",
     acpi_methods,
     sizeof(struct acpi_softc),
 };
 
 static devclass_t acpi_devclass;
 DRIVER_MODULE(acpi, nexus, acpi_driver, acpi_devclass, acpi_modevent, 0);
 MODULE_VERSION(acpi, 1);
 
 ACPI_SERIAL_DECL(acpi, "ACPI root bus");
 
 /* Local pools for managing system resources for ACPI child devices. */
 static struct rman acpi_rman_io, acpi_rman_mem;
 
 #define ACPI_MINIMUM_AWAKETIME	5
 
 /* Holds the description of the acpi0 device. */
 static char acpi_desc[ACPI_OEM_ID_SIZE + ACPI_OEM_TABLE_ID_SIZE + 2];
 
 SYSCTL_NODE(_debug, OID_AUTO, acpi, CTLFLAG_RD, NULL, "ACPI debugging");
 static char acpi_ca_version[12];
 SYSCTL_STRING(_debug_acpi, OID_AUTO, acpi_ca_version, CTLFLAG_RD,
 	      acpi_ca_version, 0, "Version of Intel ACPI-CA");
 
 /*
  * Allow overriding _OSI methods.
  */
 static char acpi_install_interface[256];
 TUNABLE_STR("hw.acpi.install_interface", acpi_install_interface,
     sizeof(acpi_install_interface));
 static char acpi_remove_interface[256];
 TUNABLE_STR("hw.acpi.remove_interface", acpi_remove_interface,
     sizeof(acpi_remove_interface));
 
 /* Allow users to dump Debug objects without ACPI debugger. */
 static int acpi_debug_objects;
 TUNABLE_INT("debug.acpi.enable_debug_objects", &acpi_debug_objects);
 SYSCTL_PROC(_debug_acpi, OID_AUTO, enable_debug_objects,
     CTLFLAG_RW | CTLTYPE_INT, NULL, 0, acpi_debug_objects_sysctl, "I",
     "Enable Debug objects");
 
 /* Allow the interpreter to ignore common mistakes in BIOS. */
 static int acpi_interpreter_slack = 1;
 TUNABLE_INT("debug.acpi.interpreter_slack", &acpi_interpreter_slack);
 SYSCTL_INT(_debug_acpi, OID_AUTO, interpreter_slack, CTLFLAG_RDTUN,
     &acpi_interpreter_slack, 1, "Turn on interpreter slack mode.");
 
 /* Ignore register widths set by FADT and use default widths instead. */
 static int acpi_ignore_reg_width = 1;
 TUNABLE_INT("debug.acpi.default_register_width", &acpi_ignore_reg_width);
 SYSCTL_INT(_debug_acpi, OID_AUTO, default_register_width, CTLFLAG_RDTUN,
     &acpi_ignore_reg_width, 1, "Ignore register widths set by FADT");
 
 #ifdef __amd64__
 /* Reset system clock while resuming.  XXX Remove once tested. */
 static int acpi_reset_clock = 1;
 TUNABLE_INT("debug.acpi.reset_clock", &acpi_reset_clock);
 SYSCTL_INT(_debug_acpi, OID_AUTO, reset_clock, CTLFLAG_RW,
     &acpi_reset_clock, 1, "Reset system clock while resuming.");
 #endif
 
 /* Allow users to override quirks. */
 TUNABLE_INT("debug.acpi.quirks", &acpi_quirks);
 
 static int acpi_susp_bounce;
 SYSCTL_INT(_debug_acpi, OID_AUTO, suspend_bounce, CTLFLAG_RW,
     &acpi_susp_bounce, 0, "Don't actually suspend, just test devices.");
 
 /*
  * ACPI can only be loaded as a module by the loader; activating it after
  * system bootstrap time is not useful, and can be fatal to the system.
  * It also cannot be unloaded, since the entire system bus hierarchy hangs
  * off it.
  */
 static int
 acpi_modevent(struct module *mod, int event, void *junk)
 {
     switch (event) {
     case MOD_LOAD:
 	if (!cold) {
 	    printf("The ACPI driver cannot be loaded after boot.\n");
 	    return (EPERM);
 	}
 	break;
     case MOD_UNLOAD:
 	if (!cold && power_pm_get_type() == POWER_PM_TYPE_ACPI)
 	    return (EBUSY);
 	break;
     default:
 	break;
     }
     return (0);
 }
 
 /*
  * Perform early initialization.
  */
 ACPI_STATUS
 acpi_Startup(void)
 {
     static int started = 0;
     ACPI_STATUS status;
     int val;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /* Only run the startup code once.  The MADT driver also calls this. */
     if (started)
 	return_VALUE (AE_OK);
     started = 1;
 
     /*
      * Pre-allocate space for RSDT/XSDT and DSDT tables and allow resizing
      * if more tables exist.
      */
     if (ACPI_FAILURE(status = AcpiInitializeTables(NULL, 2, TRUE))) {
 	printf("ACPI: Table initialisation failed: %s\n",
 	    AcpiFormatException(status));
 	return_VALUE (status);
     }
 
     /* Set up any quirks we have for this system. */
     if (acpi_quirks == ACPI_Q_OK)
 	acpi_table_quirks(&acpi_quirks);
 
     /* If the user manually set the disabled hint to 0, force-enable ACPI. */
     if (resource_int_value("acpi", 0, "disabled", &val) == 0 && val == 0)
 	acpi_quirks &= ~ACPI_Q_BROKEN;
     if (acpi_quirks & ACPI_Q_BROKEN) {
 	printf("ACPI disabled by blacklist.  Contact your BIOS vendor.\n");
 	status = AE_SUPPORT;
     }
 
     return_VALUE (status);
 }
 
 /*
  * Detect ACPI and perform early initialisation.
  */
 int
 acpi_identify(void)
 {
     ACPI_TABLE_RSDP	*rsdp;
     ACPI_TABLE_HEADER	*rsdt;
     ACPI_PHYSICAL_ADDRESS paddr;
     struct sbuf		sb;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (!cold)
 	return (ENXIO);
 
     /* Check that we haven't been disabled with a hint. */
     if (resource_disabled("acpi", 0))
 	return (ENXIO);
 
     /* Check for other PM systems. */
     if (power_pm_get_type() != POWER_PM_TYPE_NONE &&
 	power_pm_get_type() != POWER_PM_TYPE_ACPI) {
 	printf("ACPI identify failed, other PM system enabled.\n");
 	return (ENXIO);
     }
 
     /* Initialize root tables. */
     if (ACPI_FAILURE(acpi_Startup())) {
 	printf("ACPI: Try disabling either ACPI or apic support.\n");
 	return (ENXIO);
     }
 
     if ((paddr = AcpiOsGetRootPointer()) == 0 ||
 	(rsdp = AcpiOsMapMemory(paddr, sizeof(ACPI_TABLE_RSDP))) == NULL)
 	return (ENXIO);
     if (rsdp->Revision > 1 && rsdp->XsdtPhysicalAddress != 0)
 	paddr = (ACPI_PHYSICAL_ADDRESS)rsdp->XsdtPhysicalAddress;
     else
 	paddr = (ACPI_PHYSICAL_ADDRESS)rsdp->RsdtPhysicalAddress;
     AcpiOsUnmapMemory(rsdp, sizeof(ACPI_TABLE_RSDP));
 
     if ((rsdt = AcpiOsMapMemory(paddr, sizeof(ACPI_TABLE_HEADER))) == NULL)
 	return (ENXIO);
     sbuf_new(&sb, acpi_desc, sizeof(acpi_desc), SBUF_FIXEDLEN);
     sbuf_bcat(&sb, rsdt->OemId, ACPI_OEM_ID_SIZE);
     sbuf_trim(&sb);
     sbuf_putc(&sb, ' ');
     sbuf_bcat(&sb, rsdt->OemTableId, ACPI_OEM_TABLE_ID_SIZE);
     sbuf_trim(&sb);
     sbuf_finish(&sb);
     sbuf_delete(&sb);
     AcpiOsUnmapMemory(rsdt, sizeof(ACPI_TABLE_HEADER));
 
     snprintf(acpi_ca_version, sizeof(acpi_ca_version), "%x", ACPI_CA_VERSION);
 
     return (0);
 }
 
 /*
  * Fetch some descriptive data from ACPI to put in our attach message.
  */
 static int
 acpi_probe(device_t dev)
 {
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     device_set_desc(dev, acpi_desc);
 
     return_VALUE (BUS_PROBE_NOWILDCARD);
 }
 
 static int
 acpi_attach(device_t dev)
 {
     struct acpi_softc	*sc;
     ACPI_STATUS		status;
     int			error, state;
     UINT32		flags;
     UINT8		TypeA, TypeB;
     char		*env;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     sc = device_get_softc(dev);
     sc->acpi_dev = dev;
     callout_init(&sc->susp_force_to, 1);
 
     error = ENXIO;
 
     /* Initialize resource manager. */
     acpi_rman_io.rm_type = RMAN_ARRAY;
     acpi_rman_io.rm_start = 0;
     acpi_rman_io.rm_end = 0xffff;
     acpi_rman_io.rm_descr = "ACPI I/O ports";
     if (rman_init(&acpi_rman_io) != 0)
 	panic("acpi rman_init IO ports failed");
     acpi_rman_mem.rm_type = RMAN_ARRAY;
     acpi_rman_mem.rm_descr = "ACPI I/O memory addresses";
     if (rman_init(&acpi_rman_mem) != 0)
 	panic("acpi rman_init memory failed");
 
     /* Initialise the ACPI mutex */
     mtx_init(&acpi_mutex, "ACPI global lock", NULL, MTX_DEF);
 
     /*
      * Set the globals from our tunables.  This is needed because ACPI-CA
      * uses UINT8 for some values and we have no tunable_byte.
      */
     AcpiGbl_EnableInterpreterSlack = acpi_interpreter_slack ? TRUE : FALSE;
     AcpiGbl_EnableAmlDebugObject = acpi_debug_objects ? TRUE : FALSE;
     AcpiGbl_UseDefaultRegisterWidths = acpi_ignore_reg_width ? TRUE : FALSE;
 
 #ifndef ACPI_DEBUG
     /*
      * Disable all debugging layers and levels.
      */
     AcpiDbgLayer = 0;
     AcpiDbgLevel = 0;
 #endif
 
     /* Start up the ACPI CA subsystem. */
     status = AcpiInitializeSubsystem();
     if (ACPI_FAILURE(status)) {
 	device_printf(dev, "Could not initialize Subsystem: %s\n",
 		      AcpiFormatException(status));
 	goto out;
     }
 
     /* Override OS interfaces if the user requested. */
     acpi_reset_interfaces(dev);
 
     /* Load ACPI name space. */
     status = AcpiLoadTables();
     if (ACPI_FAILURE(status)) {
 	device_printf(dev, "Could not load Namespace: %s\n",
 		      AcpiFormatException(status));
 	goto out;
     }
 
 #if defined(__i386__) || defined(__amd64__)
     /* Handle MCFG table if present. */
     acpi_enable_pcie();
 #endif
 
     /*
      * Note that some systems (specifically, those with namespace evaluation
      * issues that require the avoidance of parts of the namespace) must
      * avoid running _INI and _STA on everything, as well as dodging the final
      * object init pass.
      *
      * For these devices, we set ACPI_NO_DEVICE_INIT and ACPI_NO_OBJECT_INIT).
      *
      * XXX We should arrange for the object init pass after we have attached
      *     all our child devices, but on many systems it works here.
      */
     flags = 0;
     if (testenv("debug.acpi.avoid"))
 	flags = ACPI_NO_DEVICE_INIT | ACPI_NO_OBJECT_INIT;
 
     /* Bring the hardware and basic handlers online. */
     if (ACPI_FAILURE(status = AcpiEnableSubsystem(flags))) {
 	device_printf(dev, "Could not enable ACPI: %s\n",
 		      AcpiFormatException(status));
 	goto out;
     }
 
     /*
      * Call the ECDT probe function to provide EC functionality before
      * the namespace has been evaluated.
      *
      * XXX This happens before the sysresource devices have been probed and
      * attached so its resources come from nexus0.  In practice, this isn't
      * a problem but should be addressed eventually.
      */
     acpi_ec_ecdt_probe(dev);
 
     /* Bring device objects and regions online. */
     if (ACPI_FAILURE(status = AcpiInitializeObjects(flags))) {
 	device_printf(dev, "Could not initialize ACPI objects: %s\n",
 		      AcpiFormatException(status));
 	goto out;
     }
 
     /*
      * Setup our sysctl tree.
      *
      * XXX: This doesn't check to make sure that none of these fail.
      */
     sysctl_ctx_init(&sc->acpi_sysctl_ctx);
     sc->acpi_sysctl_tree = SYSCTL_ADD_NODE(&sc->acpi_sysctl_ctx,
 			       SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
 			       device_get_name(dev), CTLFLAG_RD, 0, "");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "supported_sleep_state", CTLTYPE_STRING | CTLFLAG_RD,
 	0, 0, acpi_supported_sleep_state_sysctl, "A",
 	"List supported ACPI sleep states.");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "power_button_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_power_button_sx, 0, acpi_sleep_state_sysctl, "A",
 	"Power button ACPI sleep state.");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "sleep_button_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_sleep_button_sx, 0, acpi_sleep_state_sysctl, "A",
 	"Sleep button ACPI sleep state.");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "lid_switch_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_lid_switch_sx, 0, acpi_sleep_state_sysctl, "A",
 	"Lid ACPI sleep state. Set to S3 if you want to suspend your laptop when close the Lid.");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "standby_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_standby_sx, 0, acpi_sleep_state_sysctl, "A", "");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "suspend_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_suspend_sx, 0, acpi_sleep_state_sysctl, "A", "");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "sleep_delay", CTLFLAG_RW, &sc->acpi_sleep_delay, 0,
 	"sleep delay in seconds");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "s4bios", CTLFLAG_RW, &sc->acpi_s4bios, 0, "S4BIOS mode");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "verbose", CTLFLAG_RW, &sc->acpi_verbose, 0, "verbose mode");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "disable_on_reboot", CTLFLAG_RW,
 	&sc->acpi_do_disable, 0, "Disable ACPI when rebooting/halting system");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "handle_reboot", CTLFLAG_RW,
 	&sc->acpi_handle_reboot, 0, "Use ACPI Reset Register to reboot");
 
     /*
      * Default to 1 second before sleeping to give some machines time to
      * stabilize.
      */
     sc->acpi_sleep_delay = 1;
     if (bootverbose)
 	sc->acpi_verbose = 1;
     if ((env = kern_getenv("hw.acpi.verbose")) != NULL) {
 	if (strcmp(env, "0") != 0)
 	    sc->acpi_verbose = 1;
 	freeenv(env);
     }
 
     /* Only enable reboot by default if the FADT says it is available. */
     if (AcpiGbl_FADT.Flags & ACPI_FADT_RESET_REGISTER)
 	sc->acpi_handle_reboot = 1;
 
 #if !ACPI_REDUCED_HARDWARE
     /* Only enable S4BIOS by default if the FACS says it is available. */
     if (AcpiGbl_FACS != NULL && AcpiGbl_FACS->Flags & ACPI_FACS_S4_BIOS_PRESENT)
 	sc->acpi_s4bios = 1;
 #endif
 
     /* Probe all supported sleep states. */
     acpi_sleep_states[ACPI_STATE_S0] = TRUE;
     for (state = ACPI_STATE_S1; state < ACPI_S_STATE_COUNT; state++)
 	if (ACPI_SUCCESS(AcpiEvaluateObject(ACPI_ROOT_OBJECT,
 	    __DECONST(char *, AcpiGbl_SleepStateNames[state]), NULL, NULL)) &&
 	    ACPI_SUCCESS(AcpiGetSleepTypeData(state, &TypeA, &TypeB)))
 	    acpi_sleep_states[state] = TRUE;
 
     /*
      * Dispatch the default sleep state to devices.  The lid switch is set
      * to UNKNOWN by default to avoid surprising users.
      */
     sc->acpi_power_button_sx = acpi_sleep_states[ACPI_STATE_S5] ?
 	ACPI_STATE_S5 : ACPI_STATE_UNKNOWN;
     sc->acpi_lid_switch_sx = ACPI_STATE_UNKNOWN;
     sc->acpi_standby_sx = acpi_sleep_states[ACPI_STATE_S1] ?
 	ACPI_STATE_S1 : ACPI_STATE_UNKNOWN;
     sc->acpi_suspend_sx = acpi_sleep_states[ACPI_STATE_S3] ?
 	ACPI_STATE_S3 : ACPI_STATE_UNKNOWN;
 
     /* Pick the first valid sleep state for the sleep button default. */
     sc->acpi_sleep_button_sx = ACPI_STATE_UNKNOWN;
     for (state = ACPI_STATE_S1; state <= ACPI_STATE_S4; state++)
 	if (acpi_sleep_states[state]) {
 	    sc->acpi_sleep_button_sx = state;
 	    break;
 	}
 
     acpi_enable_fixed_events(sc);
 
     /*
      * Scan the namespace and attach/initialise children.
      */
 
     /* Register our shutdown handler. */
     EVENTHANDLER_REGISTER(shutdown_final, acpi_shutdown_final, sc,
 	SHUTDOWN_PRI_LAST);
 
     /*
      * Register our acpi event handlers.
      * XXX should be configurable eg. via userland policy manager.
      */
     EVENTHANDLER_REGISTER(acpi_sleep_event, acpi_system_eventhandler_sleep,
 	sc, ACPI_EVENT_PRI_LAST);
     EVENTHANDLER_REGISTER(acpi_wakeup_event, acpi_system_eventhandler_wakeup,
 	sc, ACPI_EVENT_PRI_LAST);
 
     /* Flag our initial states. */
     sc->acpi_enabled = TRUE;
     sc->acpi_sstate = ACPI_STATE_S0;
     sc->acpi_sleep_disabled = TRUE;
 
     /* Create the control device */
     sc->acpi_dev_t = make_dev(&acpi_cdevsw, 0, UID_ROOT, GID_WHEEL, 0644,
 			      "acpi");
     sc->acpi_dev_t->si_drv1 = sc;
 
     if ((error = acpi_machdep_init(dev)))
 	goto out;
 
     /* Register ACPI again to pass the correct argument of pm_func. */
     power_pm_register(POWER_PM_TYPE_ACPI, acpi_pm_func, sc);
 
     if (!acpi_disabled("bus")) {
 	EVENTHANDLER_REGISTER(dev_lookup, acpi_lookup, NULL, 1000);
 	acpi_probe_children(dev);
     }
 
     /* Update all GPEs and enable runtime GPEs. */
     status = AcpiUpdateAllGpes();
     if (ACPI_FAILURE(status))
 	device_printf(dev, "Could not update all GPEs: %s\n",
 	    AcpiFormatException(status));
 
     /* Allow sleep request after a while. */
     callout_init_mtx(&acpi_sleep_timer, &acpi_mutex, 0);
     callout_reset(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME,
 	acpi_sleep_enable, sc);
 
     error = 0;
 
  out:
     return_VALUE (error);
 }
 
 static void
 acpi_set_power_children(device_t dev, int state)
 {
 	device_t child;
 	device_t *devlist;
 	int dstate, i, numdevs;
 
 	if (device_get_children(dev, &devlist, &numdevs) != 0)
 		return;
 
 	/*
 	 * Retrieve and set D-state for the sleep state if _SxD is present.
 	 * Skip children who aren't attached since they are handled separately.
 	 */
 	for (i = 0; i < numdevs; i++) {
 		child = devlist[i];
 		dstate = state;
 		if (device_is_attached(child) &&
 		    acpi_device_pwr_for_sleep(dev, child, &dstate) == 0)
 			acpi_set_powerstate(child, dstate);
 	}
 	free(devlist, M_TEMP);
 }
 
 static int
 acpi_suspend(device_t dev)
 {
     int error;
 
     GIANT_REQUIRED;
 
     error = bus_generic_suspend(dev);
     if (error == 0)
 	acpi_set_power_children(dev, ACPI_STATE_D3);
 
     return (error);
 }
 
 static int
 acpi_resume(device_t dev)
 {
 
     GIANT_REQUIRED;
 
     acpi_set_power_children(dev, ACPI_STATE_D0);
 
     return (bus_generic_resume(dev));
 }
 
 static int
 acpi_shutdown(device_t dev)
 {
 
     GIANT_REQUIRED;
 
     /* Allow children to shutdown first. */
     bus_generic_shutdown(dev);
 
     /*
      * Enable any GPEs that are able to power-on the system (i.e., RTC).
      * Also, disable any that are not valid for this state (most).
      */
     acpi_wake_prep_walk(ACPI_STATE_S5);
 
     return (0);
 }
 
 /*
  * Handle a new device being added
  */
 static device_t
 acpi_add_child(device_t bus, u_int order, const char *name, int unit)
 {
     struct acpi_device	*ad;
     device_t		child;
 
     if ((ad = malloc(sizeof(*ad), M_ACPIDEV, M_NOWAIT | M_ZERO)) == NULL)
 	return (NULL);
 
     resource_list_init(&ad->ad_rl);
 
     child = device_add_child_ordered(bus, order, name, unit);
     if (child != NULL)
 	device_set_ivars(child, ad);
     else
 	free(ad, M_ACPIDEV);
     return (child);
 }
 
 static int
 acpi_print_child(device_t bus, device_t child)
 {
     struct acpi_device	 *adev = device_get_ivars(child);
     struct resource_list *rl = &adev->ad_rl;
     int retval = 0;
 
     retval += bus_print_child_header(bus, child);
     retval += resource_list_print_type(rl, "port",  SYS_RES_IOPORT, "%#jx");
     retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#jx");
     retval += resource_list_print_type(rl, "irq",   SYS_RES_IRQ,    "%jd");
     retval += resource_list_print_type(rl, "drq",   SYS_RES_DRQ,    "%jd");
     if (device_get_flags(child))
 	retval += printf(" flags %#x", device_get_flags(child));
     retval += bus_print_child_domain(bus, child);
     retval += bus_print_child_footer(bus, child);
 
     return (retval);
 }
 
 /*
  * If this device is an ACPI child but no one claimed it, attempt
  * to power it off.  We'll power it back up when a driver is added.
  *
  * XXX Disabled for now since many necessary devices (like fdc and
  * ATA) don't claim the devices we created for them but still expect
  * them to be powered up.
  */
 static void
 acpi_probe_nomatch(device_t bus, device_t child)
 {
 #ifdef ACPI_ENABLE_POWERDOWN_NODRIVER
     acpi_set_powerstate(child, ACPI_STATE_D3);
 #endif
 }
 
 /*
  * If a new driver has a chance to probe a child, first power it up.
  *
  * XXX Disabled for now (see acpi_probe_nomatch for details).
  */
 static void
 acpi_driver_added(device_t dev, driver_t *driver)
 {
     device_t child, *devlist;
     int i, numdevs;
 
     DEVICE_IDENTIFY(driver, dev);
     if (device_get_children(dev, &devlist, &numdevs))
 	    return;
     for (i = 0; i < numdevs; i++) {
 	child = devlist[i];
 	if (device_get_state(child) == DS_NOTPRESENT) {
 #ifdef ACPI_ENABLE_POWERDOWN_NODRIVER
 	    acpi_set_powerstate(child, ACPI_STATE_D0);
 	    if (device_probe_and_attach(child) != 0)
 		acpi_set_powerstate(child, ACPI_STATE_D3);
 #else
 	    device_probe_and_attach(child);
 #endif
 	}
     }
     free(devlist, M_TEMP);
 }
 
 /* Location hint for devctl(8) */
 static int
 acpi_child_location_str_method(device_t cbdev, device_t child, char *buf,
     size_t buflen)
 {
     struct acpi_device *dinfo = device_get_ivars(child);
     char buf2[32];
     int pxm;
 
     if (dinfo->ad_handle) {
         snprintf(buf, buflen, "handle=%s", acpi_name(dinfo->ad_handle));
         if (ACPI_SUCCESS(acpi_GetInteger(dinfo->ad_handle, "_PXM", &pxm))) {
                 snprintf(buf2, 32, " _PXM=%d", pxm);
                 strlcat(buf, buf2, buflen);
         }
     } else {
         snprintf(buf, buflen, "unknown");
     }
     return (0);
 }
 
 /* PnP information for devctl(8) */
 static int
 acpi_child_pnpinfo_str_method(device_t cbdev, device_t child, char *buf,
     size_t buflen)
 {
     struct acpi_device *dinfo = device_get_ivars(child);
     ACPI_DEVICE_INFO *adinfo;
 
     if (ACPI_FAILURE(AcpiGetObjectInfo(dinfo->ad_handle, &adinfo))) {
 	snprintf(buf, buflen, "unknown");
 	return (0);
     }
 
     snprintf(buf, buflen, "_HID=%s _UID=%lu",
 	(adinfo->Valid & ACPI_VALID_HID) ?
 	adinfo->HardwareId.String : "none",
 	(adinfo->Valid & ACPI_VALID_UID) ?
 	strtoul(adinfo->UniqueId.String, NULL, 10) : 0UL);
     AcpiOsFree(adinfo);
 
     return (0);
 }
 
 /*
  * Handle per-device ivars
  */
 static int
 acpi_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
 {
     struct acpi_device	*ad;
 
     if ((ad = device_get_ivars(child)) == NULL) {
 	device_printf(child, "device has no ivars\n");
 	return (ENOENT);
     }
 
     /* ACPI and ISA compatibility ivars */
     switch(index) {
     case ACPI_IVAR_HANDLE:
 	*(ACPI_HANDLE *)result = ad->ad_handle;
 	break;
     case ACPI_IVAR_PRIVATE:
 	*(void **)result = ad->ad_private;
 	break;
     case ACPI_IVAR_FLAGS:
 	*(int *)result = ad->ad_flags;
 	break;
     case ISA_IVAR_VENDORID:
     case ISA_IVAR_SERIAL:
     case ISA_IVAR_COMPATID:
 	*(int *)result = -1;
 	break;
     case ISA_IVAR_LOGICALID:
 	*(int *)result = acpi_isa_get_logicalid(child);
 	break;
     default:
 	return (ENOENT);
     }
 
     return (0);
 }
 
 static int
 acpi_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
 {
     struct acpi_device	*ad;
 
     if ((ad = device_get_ivars(child)) == NULL) {
 	device_printf(child, "device has no ivars\n");
 	return (ENOENT);
     }
 
     switch(index) {
     case ACPI_IVAR_HANDLE:
 	ad->ad_handle = (ACPI_HANDLE)value;
 	break;
     case ACPI_IVAR_PRIVATE:
 	ad->ad_private = (void *)value;
 	break;
     case ACPI_IVAR_FLAGS:
 	ad->ad_flags = (int)value;
 	break;
     default:
 	panic("bad ivar write request (%d)", index);
 	return (ENOENT);
     }
 
     return (0);
 }
 
 /*
  * Handle child resource allocation/removal
  */
 static struct resource_list *
 acpi_get_rlist(device_t dev, device_t child)
 {
     struct acpi_device		*ad;
 
     ad = device_get_ivars(child);
     return (&ad->ad_rl);
 }
 
 static int
 acpi_match_resource_hint(device_t dev, int type, long value)
 {
     struct acpi_device *ad = device_get_ivars(dev);
     struct resource_list *rl = &ad->ad_rl;
     struct resource_list_entry *rle;
 
     STAILQ_FOREACH(rle, rl, link) {
 	if (rle->type != type)
 	    continue;
 	if (rle->start <= value && rle->end >= value)
 	    return (1);
     }
     return (0);
 }
 
 /*
  * Wire device unit numbers based on resource matches in hints.
  */
 static void
 acpi_hint_device_unit(device_t acdev, device_t child, const char *name,
     int *unitp)
 {
     const char *s;
     long value;
     int line, matches, unit;
 
     /*
      * Iterate over all the hints for the devices with the specified
      * name to see if one's resources are a subset of this device.
      */
     line = 0;
     for (;;) {
 	if (resource_find_dev(&line, name, &unit, "at", NULL) != 0)
 	    break;
 
 	/* Must have an "at" for acpi or isa. */
 	resource_string_value(name, unit, "at", &s);
 	if (!(strcmp(s, "acpi0") == 0 || strcmp(s, "acpi") == 0 ||
 	    strcmp(s, "isa0") == 0 || strcmp(s, "isa") == 0))
 	    continue;
 
 	/*
 	 * Check for matching resources.  We must have at least one match.
 	 * Since I/O and memory resources cannot be shared, if we get a
 	 * match on either of those, ignore any mismatches in IRQs or DRQs.
 	 *
 	 * XXX: We may want to revisit this to be more lenient and wire
 	 * as long as it gets one match.
 	 */
 	matches = 0;
 	if (resource_long_value(name, unit, "port", &value) == 0) {
 	    /*
 	     * Floppy drive controllers are notorious for having a
 	     * wide variety of resources not all of which include the
 	     * first port that is specified by the hint (typically
 	     * 0x3f0) (see the comment above fdc_isa_alloc_resources()
 	     * in fdc_isa.c).  However, they do all seem to include
 	     * port + 2 (e.g. 0x3f2) so for a floppy device, look for
 	     * 'value + 2' in the port resources instead of the hint
 	     * value.
 	     */
 	    if (strcmp(name, "fdc") == 0)
 		value += 2;
 	    if (acpi_match_resource_hint(child, SYS_RES_IOPORT, value))
 		matches++;
 	    else
 		continue;
 	}
 	if (resource_long_value(name, unit, "maddr", &value) == 0) {
 	    if (acpi_match_resource_hint(child, SYS_RES_MEMORY, value))
 		matches++;
 	    else
 		continue;
 	}
 	if (matches > 0)
 	    goto matched;
 	if (resource_long_value(name, unit, "irq", &value) == 0) {
 	    if (acpi_match_resource_hint(child, SYS_RES_IRQ, value))
 		matches++;
 	    else
 		continue;
 	}
 	if (resource_long_value(name, unit, "drq", &value) == 0) {
 	    if (acpi_match_resource_hint(child, SYS_RES_DRQ, value))
 		matches++;
 	    else
 		continue;
 	}
 
     matched:
 	if (matches > 0) {
 	    /* We have a winner! */
 	    *unitp = unit;
 	    break;
 	}
     }
 }
 
 /*
  * Fetch the VM domain for the given device 'dev'.
  *
  * Return 1 + domain if there's a domain, 0 if not found;
  * -1 upon an error.
  */
 int
 acpi_parse_pxm(device_t dev, int *domain)
 {
-#if MAXMEMDOM > 1
+#ifdef DEVICE_NUMA
 	ACPI_HANDLE h;
 	int d, pxm;
 
 	h = acpi_get_handle(dev);
 	if ((h != NULL) &&
 	    ACPI_SUCCESS(acpi_GetInteger(h, "_PXM", &pxm))) {
 		d = acpi_map_pxm_to_vm_domainid(pxm);
 		if (d < 0)
 			return (-1);
 		*domain = d;
 		return (1);
 	}
 #endif
 
 	return (0);
 }
 
 /*
  * Fetch the NUMA domain for the given device.
  *
  * If a device has a _PXM method, map that to a NUMA domain.
  *
  * If none is found, then it'll call the parent method.
  * If there's no domain, return ENOENT.
  */
 int
 acpi_get_domain(device_t dev, device_t child, int *domain)
 {
 	int ret;
 
 	ret = acpi_parse_pxm(child, domain);
 	/* Error */
 	if (ret == -1)
 		return (ENOENT);
 	/* Found */
 	if (ret == 1)
 		return (0);
 
 	/* No _PXM node; go up a level */
 	return (bus_generic_get_domain(dev, child, domain));
 }
 
 /*
  * Pre-allocate/manage all memory and IO resources.  Since rman can't handle
  * duplicates, we merge any in the sysresource attach routine.
  */
 static int
 acpi_sysres_alloc(device_t dev)
 {
     struct resource *res;
     struct resource_list *rl;
     struct resource_list_entry *rle;
     struct rman *rm;
     char *sysres_ids[] = { "PNP0C01", "PNP0C02", NULL };
     device_t *children;
     int child_count, i;
 
     /*
      * Probe/attach any sysresource devices.  This would be unnecessary if we
      * had multi-pass probe/attach.
      */
     if (device_get_children(dev, &children, &child_count) != 0)
 	return (ENXIO);
     for (i = 0; i < child_count; i++) {
 	if (ACPI_ID_PROBE(dev, children[i], sysres_ids) != NULL)
 	    device_probe_and_attach(children[i]);
     }
     free(children, M_TEMP);
 
     rl = BUS_GET_RESOURCE_LIST(device_get_parent(dev), dev);
     STAILQ_FOREACH(rle, rl, link) {
 	if (rle->res != NULL) {
 	    device_printf(dev, "duplicate resource for %jx\n", rle->start);
 	    continue;
 	}
 
 	/* Only memory and IO resources are valid here. */
 	switch (rle->type) {
 	case SYS_RES_IOPORT:
 	    rm = &acpi_rman_io;
 	    break;
 	case SYS_RES_MEMORY:
 	    rm = &acpi_rman_mem;
 	    break;
 	default:
 	    continue;
 	}
 
 	/* Pre-allocate resource and add to our rman pool. */
 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), dev, rle->type,
 	    &rle->rid, rle->start, rle->start + rle->count - 1, rle->count, 0);
 	if (res != NULL) {
 	    rman_manage_region(rm, rman_get_start(res), rman_get_end(res));
 	    rle->res = res;
 	} else if (bootverbose)
 	    device_printf(dev, "reservation of %jx, %jx (%d) failed\n",
 		rle->start, rle->count, rle->type);
     }
     return (0);
 }
 
 static char *pcilink_ids[] = { "PNP0C0F", NULL };
 static char *sysres_ids[] = { "PNP0C01", "PNP0C02", NULL };
 
 /*
  * Reserve declared resources for devices found during attach once system
  * resources have been allocated.
  */
 static void
 acpi_reserve_resources(device_t dev)
 {
     struct resource_list_entry *rle;
     struct resource_list *rl;
     struct acpi_device *ad;
     struct acpi_softc *sc;
     device_t *children;
     int child_count, i;
 
     sc = device_get_softc(dev);
     if (device_get_children(dev, &children, &child_count) != 0)
 	return;
     for (i = 0; i < child_count; i++) {
 	ad = device_get_ivars(children[i]);
 	rl = &ad->ad_rl;
 
 	/* Don't reserve system resources. */
 	if (ACPI_ID_PROBE(dev, children[i], sysres_ids) != NULL)
 	    continue;
 
 	STAILQ_FOREACH(rle, rl, link) {
 	    /*
 	     * Don't reserve IRQ resources.  There are many sticky things
 	     * to get right otherwise (e.g. IRQs for psm, atkbd, and HPET
 	     * when using legacy routing).
 	     */
 	    if (rle->type == SYS_RES_IRQ)
 		continue;
 
 	    /*
 	     * Don't reserve the resource if it is already allocated.
 	     * The acpi_ec(4) driver can allocate its resources early
 	     * if ECDT is present.
 	     */
 	    if (rle->res != NULL)
 		continue;
 
 	    /*
 	     * Try to reserve the resource from our parent.  If this
 	     * fails because the resource is a system resource, just
 	     * let it be.  The resource range is already reserved so
 	     * that other devices will not use it.  If the driver
 	     * needs to allocate the resource, then
 	     * acpi_alloc_resource() will sub-alloc from the system
 	     * resource.
 	     */
 	    resource_list_reserve(rl, dev, children[i], rle->type, &rle->rid,
 		rle->start, rle->end, rle->count, 0);
 	}
     }
     free(children, M_TEMP);
     sc->acpi_resources_reserved = 1;
 }
 
 static int
 acpi_set_resource(device_t dev, device_t child, int type, int rid,
     rman_res_t start, rman_res_t count)
 {
     struct acpi_softc *sc = device_get_softc(dev);
     struct acpi_device *ad = device_get_ivars(child);
     struct resource_list *rl = &ad->ad_rl;
     ACPI_DEVICE_INFO *devinfo;
     rman_res_t end;
     
     /* Ignore IRQ resources for PCI link devices. */
     if (type == SYS_RES_IRQ && ACPI_ID_PROBE(dev, child, pcilink_ids) != NULL)
 	return (0);
 
     /*
      * Ignore most resources for PCI root bridges.  Some BIOSes
      * incorrectly enumerate the memory ranges they decode as plain
      * memory resources instead of as ResourceProducer ranges.  Other
      * BIOSes incorrectly list system resource entries for I/O ranges
      * under the PCI bridge.  Do allow the one known-correct case on
      * x86 of a PCI bridge claiming the I/O ports used for PCI config
      * access.
      */
     if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) {
 	if (ACPI_SUCCESS(AcpiGetObjectInfo(ad->ad_handle, &devinfo))) {
 	    if ((devinfo->Flags & ACPI_PCI_ROOT_BRIDGE) != 0) {
 #if defined(__i386__) || defined(__amd64__)
 		if (!(type == SYS_RES_IOPORT && start == CONF1_ADDR_PORT))
 #endif
 		{
 		    AcpiOsFree(devinfo);
 		    return (0);
 		}
 	    }
 	    AcpiOsFree(devinfo);
 	}
     }
 
     /* If the resource is already allocated, fail. */
     if (resource_list_busy(rl, type, rid))
 	return (EBUSY);
 
     /* If the resource is already reserved, release it. */
     if (resource_list_reserved(rl, type, rid))
 	resource_list_unreserve(rl, dev, child, type, rid);
 
     /* Add the resource. */
     end = (start + count - 1);
     resource_list_add(rl, type, rid, start, end, count);
 
     /* Don't reserve resources until the system resources are allocated. */
     if (!sc->acpi_resources_reserved)
 	return (0);
 
     /* Don't reserve system resources. */
     if (ACPI_ID_PROBE(dev, child, sysres_ids) != NULL)
 	return (0);
 
     /*
      * Don't reserve IRQ resources.  There are many sticky things to
      * get right otherwise (e.g. IRQs for psm, atkbd, and HPET when
      * using legacy routing).
      */
     if (type == SYS_RES_IRQ)
 	return (0);
 
     /*
      * Reserve the resource.
      *
      * XXX: Ignores failure for now.  Failure here is probably a
      * BIOS/firmware bug?
      */
     resource_list_reserve(rl, dev, child, type, &rid, start, end, count, 0);
     return (0);
 }
 
 static struct resource *
 acpi_alloc_resource(device_t bus, device_t child, int type, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
     ACPI_RESOURCE ares;
     struct acpi_device *ad;
     struct resource_list_entry *rle;
     struct resource_list *rl;
     struct resource *res;
     int isdefault = RMAN_IS_DEFAULT_RANGE(start, end);
 
     /*
      * First attempt at allocating the resource.  For direct children,
      * use resource_list_alloc() to handle reserved resources.  For
      * other devices, pass the request up to our parent.
      */
     if (bus == device_get_parent(child)) {
 	ad = device_get_ivars(child);
 	rl = &ad->ad_rl;
 
 	/*
 	 * Simulate the behavior of the ISA bus for direct children
 	 * devices.  That is, if a non-default range is specified for
 	 * a resource that doesn't exist, use bus_set_resource() to
 	 * add the resource before allocating it.  Note that these
 	 * resources will not be reserved.
 	 */
 	if (!isdefault && resource_list_find(rl, type, *rid) == NULL)
 		resource_list_add(rl, type, *rid, start, end, count);
 	res = resource_list_alloc(rl, bus, child, type, rid, start, end, count,
 	    flags);
 	if (res != NULL && type == SYS_RES_IRQ) {
 	    /*
 	     * Since bus_config_intr() takes immediate effect, we cannot
 	     * configure the interrupt associated with a device when we
 	     * parse the resources but have to defer it until a driver
 	     * actually allocates the interrupt via bus_alloc_resource().
 	     *
 	     * XXX: Should we handle the lookup failing?
 	     */
 	    if (ACPI_SUCCESS(acpi_lookup_irq_resource(child, *rid, res, &ares)))
 		acpi_config_intr(child, &ares);
 	}
 
 	/*
 	 * If this is an allocation of the "default" range for a given
 	 * RID, fetch the exact bounds for this resource from the
 	 * resource list entry to try to allocate the range from the
 	 * system resource regions.
 	 */
 	if (res == NULL && isdefault) {
 	    rle = resource_list_find(rl, type, *rid);
 	    if (rle != NULL) {
 		start = rle->start;
 		end = rle->end;
 		count = rle->count;
 	    }
 	}
     } else
 	res = BUS_ALLOC_RESOURCE(device_get_parent(bus), child, type, rid,
 	    start, end, count, flags);
 
     /*
      * If the first attempt failed and this is an allocation of a
      * specific range, try to satisfy the request via a suballocation
      * from our system resource regions.
      */
     if (res == NULL && start + count - 1 == end)
 	res = acpi_alloc_sysres(child, type, rid, start, end, count, flags);
     return (res);
 }
 
 /*
  * Attempt to allocate a specific resource range from the system
  * resource ranges.  Note that we only handle memory and I/O port
  * system resources.
  */
 struct resource *
 acpi_alloc_sysres(device_t child, int type, int *rid, rman_res_t start,
     rman_res_t end, rman_res_t count, u_int flags)
 {
     struct rman *rm;
     struct resource *res;
 
     switch (type) {
     case SYS_RES_IOPORT:
 	rm = &acpi_rman_io;
 	break;
     case SYS_RES_MEMORY:
 	rm = &acpi_rman_mem;
 	break;
     default:
 	return (NULL);
     }
 
     KASSERT(start + count - 1 == end, ("wildcard resource range"));
     res = rman_reserve_resource(rm, start, end, count, flags & ~RF_ACTIVE,
 	child);
     if (res == NULL)
 	return (NULL);
 
     rman_set_rid(res, *rid);
 
     /* If requested, activate the resource using the parent's method. */
     if (flags & RF_ACTIVE)
 	if (bus_activate_resource(child, type, *rid, res) != 0) {
 	    rman_release_resource(res);
 	    return (NULL);
 	}
 
     return (res);
 }
 
 static int
 acpi_is_resource_managed(int type, struct resource *r)
 {
 
     /* We only handle memory and IO resources through rman. */
     switch (type) {
     case SYS_RES_IOPORT:
 	return (rman_is_region_manager(r, &acpi_rman_io));
     case SYS_RES_MEMORY:
 	return (rman_is_region_manager(r, &acpi_rman_mem));
     }
     return (0);
 }
 
 static int
 acpi_adjust_resource(device_t bus, device_t child, int type, struct resource *r,
     rman_res_t start, rman_res_t end)
 {
 
     if (acpi_is_resource_managed(type, r))
 	return (rman_adjust_resource(r, start, end));
     return (bus_generic_adjust_resource(bus, child, type, r, start, end));
 }
 
 static int
 acpi_release_resource(device_t bus, device_t child, int type, int rid,
     struct resource *r)
 {
     int ret;
 
     /*
      * If this resource belongs to one of our internal managers,
      * deactivate it and release it to the local pool.
      */
     if (acpi_is_resource_managed(type, r)) {
 	if (rman_get_flags(r) & RF_ACTIVE) {
 	    ret = bus_deactivate_resource(child, type, rid, r);
 	    if (ret != 0)
 		return (ret);
 	}
 	return (rman_release_resource(r));
     }
 
     return (bus_generic_rl_release_resource(bus, child, type, rid, r));
 }
 
 static void
 acpi_delete_resource(device_t bus, device_t child, int type, int rid)
 {
     struct resource_list *rl;
 
     rl = acpi_get_rlist(bus, child);
     if (resource_list_busy(rl, type, rid)) {
 	device_printf(bus, "delete_resource: Resource still owned by child"
 	    " (type=%d, rid=%d)\n", type, rid);
 	return;
     }
     resource_list_unreserve(rl, bus, child, type, rid);
     resource_list_delete(rl, type, rid);
 }
 
 /* Allocate an IO port or memory resource, given its GAS. */
 int
 acpi_bus_alloc_gas(device_t dev, int *type, int *rid, ACPI_GENERIC_ADDRESS *gas,
     struct resource **res, u_int flags)
 {
     int error, res_type;
 
     error = ENOMEM;
     if (type == NULL || rid == NULL || gas == NULL || res == NULL)
 	return (EINVAL);
 
     /* We only support memory and IO spaces. */
     switch (gas->SpaceId) {
     case ACPI_ADR_SPACE_SYSTEM_MEMORY:
 	res_type = SYS_RES_MEMORY;
 	break;
     case ACPI_ADR_SPACE_SYSTEM_IO:
 	res_type = SYS_RES_IOPORT;
 	break;
     default:
 	return (EOPNOTSUPP);
     }
 
     /*
      * If the register width is less than 8, assume the BIOS author means
      * it is a bit field and just allocate a byte.
      */
     if (gas->BitWidth && gas->BitWidth < 8)
 	gas->BitWidth = 8;
 
     /* Validate the address after we're sure we support the space. */
     if (gas->Address == 0 || gas->BitWidth == 0)
 	return (EINVAL);
 
     bus_set_resource(dev, res_type, *rid, gas->Address,
 	gas->BitWidth / 8);
     *res = bus_alloc_resource_any(dev, res_type, rid, RF_ACTIVE | flags);
     if (*res != NULL) {
 	*type = res_type;
 	error = 0;
     } else
 	bus_delete_resource(dev, res_type, *rid);
 
     return (error);
 }
 
 /* Probe _HID and _CID for compatible ISA PNP ids. */
 static uint32_t
 acpi_isa_get_logicalid(device_t dev)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_HANDLE		h;
     uint32_t		pnpid;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /* Fetch and validate the HID. */
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return_VALUE (0);
 
     pnpid = (devinfo->Valid & ACPI_VALID_HID) != 0 &&
 	devinfo->HardwareId.Length >= ACPI_EISAID_STRING_SIZE ?
 	PNP_EISAID(devinfo->HardwareId.String) : 0;
     AcpiOsFree(devinfo);
 
     return_VALUE (pnpid);
 }
 
 static int
 acpi_isa_get_compatid(device_t dev, uint32_t *cids, int count)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_PNP_DEVICE_ID	*ids;
     ACPI_HANDLE		h;
     uint32_t		*pnpid;
     int			i, valid;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     pnpid = cids;
 
     /* Fetch and validate the CID */
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return_VALUE (0);
 
     if ((devinfo->Valid & ACPI_VALID_CID) == 0) {
 	AcpiOsFree(devinfo);
 	return_VALUE (0);
     }
 
     if (devinfo->CompatibleIdList.Count < count)
 	count = devinfo->CompatibleIdList.Count;
     ids = devinfo->CompatibleIdList.Ids;
     for (i = 0, valid = 0; i < count; i++)
 	if (ids[i].Length >= ACPI_EISAID_STRING_SIZE &&
 	    strncmp(ids[i].String, "PNP", 3) == 0) {
 	    *pnpid++ = PNP_EISAID(ids[i].String);
 	    valid++;
 	}
     AcpiOsFree(devinfo);
 
     return_VALUE (valid);
 }
 
 static char *
 acpi_device_id_probe(device_t bus, device_t dev, char **ids) 
 {
     ACPI_HANDLE h;
     ACPI_OBJECT_TYPE t;
     int i;
 
     h = acpi_get_handle(dev);
     if (ids == NULL || h == NULL)
 	return (NULL);
     t = acpi_get_type(dev);
     if (t != ACPI_TYPE_DEVICE && t != ACPI_TYPE_PROCESSOR)
 	return (NULL);
 
     /* Try to match one of the array of IDs with a HID or CID. */
     for (i = 0; ids[i] != NULL; i++) {
 	if (acpi_MatchHid(h, ids[i]))
 	    return (ids[i]);
     }
     return (NULL);
 }
 
 static ACPI_STATUS
 acpi_device_eval_obj(device_t bus, device_t dev, ACPI_STRING pathname,
     ACPI_OBJECT_LIST *parameters, ACPI_BUFFER *ret)
 {
     ACPI_HANDLE h;
 
     if (dev == NULL)
 	h = ACPI_ROOT_OBJECT;
     else if ((h = acpi_get_handle(dev)) == NULL)
 	return (AE_BAD_PARAMETER);
     return (AcpiEvaluateObject(h, pathname, parameters, ret));
 }
 
 int
 acpi_device_pwr_for_sleep(device_t bus, device_t dev, int *dstate)
 {
     struct acpi_softc *sc;
     ACPI_HANDLE handle;
     ACPI_STATUS status;
     char sxd[8];
 
     handle = acpi_get_handle(dev);
 
     /*
      * XXX If we find these devices, don't try to power them down.
      * The serial and IRDA ports on my T23 hang the system when
      * set to D3 and it appears that such legacy devices may
      * need special handling in their drivers.
      */
     if (dstate == NULL || handle == NULL ||
 	acpi_MatchHid(handle, "PNP0500") ||
 	acpi_MatchHid(handle, "PNP0501") ||
 	acpi_MatchHid(handle, "PNP0502") ||
 	acpi_MatchHid(handle, "PNP0510") ||
 	acpi_MatchHid(handle, "PNP0511"))
 	return (ENXIO);
 
     /*
      * Override next state with the value from _SxD, if present.
      * Note illegal _S0D is evaluated because some systems expect this.
      */
     sc = device_get_softc(bus);
     snprintf(sxd, sizeof(sxd), "_S%dD", sc->acpi_sstate);
     status = acpi_GetInteger(handle, sxd, dstate);
     if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
 	    device_printf(dev, "failed to get %s on %s: %s\n", sxd,
 		acpi_name(handle), AcpiFormatException(status));
 	    return (ENXIO);
     }
 
     return (0);
 }
 
 /* Callback arg for our implementation of walking the namespace. */
 struct acpi_device_scan_ctx {
     acpi_scan_cb_t	user_fn;
     void		*arg;
     ACPI_HANDLE		parent;
 };
 
 static ACPI_STATUS
 acpi_device_scan_cb(ACPI_HANDLE h, UINT32 level, void *arg, void **retval)
 {
     struct acpi_device_scan_ctx *ctx;
     device_t dev, old_dev;
     ACPI_STATUS status;
     ACPI_OBJECT_TYPE type;
 
     /*
      * Skip this device if we think we'll have trouble with it or it is
      * the parent where the scan began.
      */
     ctx = (struct acpi_device_scan_ctx *)arg;
     if (acpi_avoid(h) || h == ctx->parent)
 	return (AE_OK);
 
     /* If this is not a valid device type (e.g., a method), skip it. */
     if (ACPI_FAILURE(AcpiGetType(h, &type)))
 	return (AE_OK);
     if (type != ACPI_TYPE_DEVICE && type != ACPI_TYPE_PROCESSOR &&
 	type != ACPI_TYPE_THERMAL && type != ACPI_TYPE_POWER)
 	return (AE_OK);
 
     /*
      * Call the user function with the current device.  If it is unchanged
      * afterwards, return.  Otherwise, we update the handle to the new dev.
      */
     old_dev = acpi_get_device(h);
     dev = old_dev;
     status = ctx->user_fn(h, &dev, level, ctx->arg);
     if (ACPI_FAILURE(status) || old_dev == dev)
 	return (status);
 
     /* Remove the old child and its connection to the handle. */
     if (old_dev != NULL) {
 	device_delete_child(device_get_parent(old_dev), old_dev);
 	AcpiDetachData(h, acpi_fake_objhandler);
     }
 
     /* Recreate the handle association if the user created a device. */
     if (dev != NULL)
 	AcpiAttachData(h, acpi_fake_objhandler, dev);
 
     return (AE_OK);
 }
 
 static ACPI_STATUS
 acpi_device_scan_children(device_t bus, device_t dev, int max_depth,
     acpi_scan_cb_t user_fn, void *arg)
 {
     ACPI_HANDLE h;
     struct acpi_device_scan_ctx ctx;
 
     if (acpi_disabled("children"))
 	return (AE_OK);
 
     if (dev == NULL)
 	h = ACPI_ROOT_OBJECT;
     else if ((h = acpi_get_handle(dev)) == NULL)
 	return (AE_BAD_PARAMETER);
     ctx.user_fn = user_fn;
     ctx.arg = arg;
     ctx.parent = h;
     return (AcpiWalkNamespace(ACPI_TYPE_ANY, h, max_depth,
 	acpi_device_scan_cb, NULL, &ctx, NULL));
 }
 
 /*
  * Even though ACPI devices are not PCI, we use the PCI approach for setting
  * device power states since it's close enough to ACPI.
  */
 static int
 acpi_set_powerstate(device_t child, int state)
 {
     ACPI_HANDLE h;
     ACPI_STATUS status;
 
     h = acpi_get_handle(child);
     if (state < ACPI_STATE_D0 || state > ACPI_D_STATES_MAX)
 	return (EINVAL);
     if (h == NULL)
 	return (0);
 
     /* Ignore errors if the power methods aren't present. */
     status = acpi_pwr_switch_consumer(h, state);
     if (ACPI_SUCCESS(status)) {
 	if (bootverbose)
 	    device_printf(child, "set ACPI power state D%d on %s\n",
 		state, acpi_name(h));
     } else if (status != AE_NOT_FOUND)
 	device_printf(child,
 	    "failed to set ACPI power state D%d on %s: %s\n", state,
 	    acpi_name(h), AcpiFormatException(status));
 
     return (0);
 }
 
 static int
 acpi_isa_pnp_probe(device_t bus, device_t child, struct isa_pnp_id *ids)
 {
     int			result, cid_count, i;
     uint32_t		lid, cids[8];
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /*
      * ISA-style drivers attached to ACPI may persist and
      * probe manually if we return ENOENT.  We never want
      * that to happen, so don't ever return it.
      */
     result = ENXIO;
 
     /* Scan the supplied IDs for a match */
     lid = acpi_isa_get_logicalid(child);
     cid_count = acpi_isa_get_compatid(child, cids, 8);
     while (ids && ids->ip_id) {
 	if (lid == ids->ip_id) {
 	    result = 0;
 	    goto out;
 	}
 	for (i = 0; i < cid_count; i++) {
 	    if (cids[i] == ids->ip_id) {
 		result = 0;
 		goto out;
 	    }
 	}
 	ids++;
     }
 
  out:
     if (result == 0 && ids->ip_desc)
 	device_set_desc(child, ids->ip_desc);
 
     return_VALUE (result);
 }
 
 #if defined(__i386__) || defined(__amd64__)
 /*
  * Look for a MCFG table.  If it is present, use the settings for
  * domain (segment) 0 to setup PCI config space access via the memory
  * map.
  */
 static void
 acpi_enable_pcie(void)
 {
 	ACPI_TABLE_HEADER *hdr;
 	ACPI_MCFG_ALLOCATION *alloc, *end;
 	ACPI_STATUS status;
 
 	status = AcpiGetTable(ACPI_SIG_MCFG, 1, &hdr);
 	if (ACPI_FAILURE(status))
 		return;
 
 	end = (ACPI_MCFG_ALLOCATION *)((char *)hdr + hdr->Length);
 	alloc = (ACPI_MCFG_ALLOCATION *)((ACPI_TABLE_MCFG *)hdr + 1);
 	while (alloc < end) {
 		if (alloc->PciSegment == 0) {
 			pcie_cfgregopen(alloc->Address, alloc->StartBusNumber,
 			    alloc->EndBusNumber);
 			return;
 		}
 		alloc++;
 	}
 }
 #endif
 
 /*
  * Scan all of the ACPI namespace and attach child devices.
  *
  * We should only expect to find devices in the \_PR, \_TZ, \_SI, and
  * \_SB scopes, and \_PR and \_TZ became obsolete in the ACPI 2.0 spec.
  * However, in violation of the spec, some systems place their PCI link
  * devices in \, so we have to walk the whole namespace.  We check the
  * type of namespace nodes, so this should be ok.
  */
 static void
 acpi_probe_children(device_t bus)
 {
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /*
      * Scan the namespace and insert placeholders for all the devices that
      * we find.  We also probe/attach any early devices.
      *
      * Note that we use AcpiWalkNamespace rather than AcpiGetDevices because
      * we want to create nodes for all devices, not just those that are
      * currently present. (This assumes that we don't want to create/remove
      * devices as they appear, which might be smarter.)
      */
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "namespace scan\n"));
     AcpiWalkNamespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, 100, acpi_probe_child,
 	NULL, bus, NULL);
 
     /* Pre-allocate resources for our rman from any sysresource devices. */
     acpi_sysres_alloc(bus);
 
     /* Reserve resources already allocated to children. */
     acpi_reserve_resources(bus);
 
     /* Create any static children by calling device identify methods. */
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "device identify routines\n"));
     bus_generic_probe(bus);
 
     /* Probe/attach all children, created statically and from the namespace. */
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "acpi bus_generic_attach\n"));
     bus_generic_attach(bus);
 
     /* Attach wake sysctls. */
     acpi_wake_sysctl_walk(bus);
 
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "done attaching children\n"));
     return_VOID;
 }
 
 /*
  * Determine the probe order for a given device.
  */
 static void
 acpi_probe_order(ACPI_HANDLE handle, int *order)
 {
 	ACPI_OBJECT_TYPE type;
 
 	/*
 	 * 0. CPUs
 	 * 1. I/O port and memory system resource holders
 	 * 2. Clocks and timers (to handle early accesses)
 	 * 3. Embedded controllers (to handle early accesses)
 	 * 4. PCI Link Devices
 	 */
 	AcpiGetType(handle, &type);
 	if (type == ACPI_TYPE_PROCESSOR)
 		*order = 0;
 	else if (acpi_MatchHid(handle, "PNP0C01") ||
 	    acpi_MatchHid(handle, "PNP0C02"))
 		*order = 1;
 	else if (acpi_MatchHid(handle, "PNP0100") ||
 	    acpi_MatchHid(handle, "PNP0103") ||
 	    acpi_MatchHid(handle, "PNP0B00"))
 		*order = 2;
 	else if (acpi_MatchHid(handle, "PNP0C09"))
 		*order = 3;
 	else if (acpi_MatchHid(handle, "PNP0C0F"))
 		*order = 4;
 }
 
 /*
  * Evaluate a child device and determine whether we might attach a device to
  * it.
  */
 static ACPI_STATUS
 acpi_probe_child(ACPI_HANDLE handle, UINT32 level, void *context, void **status)
 {
     struct acpi_prw_data prw;
     ACPI_OBJECT_TYPE type;
     ACPI_HANDLE h;
     device_t bus, child;
     char *handle_str;
     int order;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (acpi_disabled("children"))
 	return_ACPI_STATUS (AE_OK);
 
     /* Skip this device if we think we'll have trouble with it. */
     if (acpi_avoid(handle))
 	return_ACPI_STATUS (AE_OK);
 
     bus = (device_t)context;
     if (ACPI_SUCCESS(AcpiGetType(handle, &type))) {
 	handle_str = acpi_name(handle);
 	switch (type) {
 	case ACPI_TYPE_DEVICE:
 	    /*
 	     * Since we scan from \, be sure to skip system scope objects.
 	     * \_SB_ and \_TZ_ are defined in ACPICA as devices to work around
 	     * BIOS bugs.  For example, \_SB_ is to allow \_SB_._INI to be run
 	     * during the intialization and \_TZ_ is to support Notify() on it.
 	     */
 	    if (strcmp(handle_str, "\\_SB_") == 0 ||
 		strcmp(handle_str, "\\_TZ_") == 0)
 		break;
 	    if (acpi_parse_prw(handle, &prw) == 0)
 		AcpiSetupGpeForWake(handle, prw.gpe_handle, prw.gpe_bit);
 
 	    /*
 	     * Ignore devices that do not have a _HID or _CID.  They should
 	     * be discovered by other buses (e.g. the PCI bus driver).
 	     */
 	    if (!acpi_has_hid(handle))
 		break;
 	    /* FALLTHROUGH */
 	case ACPI_TYPE_PROCESSOR:
 	case ACPI_TYPE_THERMAL:
 	case ACPI_TYPE_POWER:
 	    /* 
 	     * Create a placeholder device for this node.  Sort the
 	     * placeholder so that the probe/attach passes will run
 	     * breadth-first.  Orders less than ACPI_DEV_BASE_ORDER
 	     * are reserved for special objects (i.e., system
 	     * resources).
 	     */
 	    ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "scanning '%s'\n", handle_str));
 	    order = level * 10 + ACPI_DEV_BASE_ORDER;
 	    acpi_probe_order(handle, &order);
 	    child = BUS_ADD_CHILD(bus, order, NULL, -1);
 	    if (child == NULL)
 		break;
 
 	    /* Associate the handle with the device_t and vice versa. */
 	    acpi_set_handle(child, handle);
 	    AcpiAttachData(handle, acpi_fake_objhandler, child);
 
 	    /*
 	     * Check that the device is present.  If it's not present,
 	     * leave it disabled (so that we have a device_t attached to
 	     * the handle, but we don't probe it).
 	     *
 	     * XXX PCI link devices sometimes report "present" but not
 	     * "functional" (i.e. if disabled).  Go ahead and probe them
 	     * anyway since we may enable them later.
 	     */
 	    if (type == ACPI_TYPE_DEVICE && !acpi_DeviceIsPresent(child)) {
 		/* Never disable PCI link devices. */
 		if (acpi_MatchHid(handle, "PNP0C0F"))
 		    break;
 		/*
 		 * Docking stations should remain enabled since the system
 		 * may be undocked at boot.
 		 */
 		if (ACPI_SUCCESS(AcpiGetHandle(handle, "_DCK", &h)))
 		    break;
 
 		device_disable(child);
 		break;
 	    }
 
 	    /*
 	     * Get the device's resource settings and attach them.
 	     * Note that if the device has _PRS but no _CRS, we need
 	     * to decide when it's appropriate to try to configure the
 	     * device.  Ignore the return value here; it's OK for the
 	     * device not to have any resources.
 	     */
 	    acpi_parse_resources(child, handle, &acpi_res_parse_set, NULL);
 	    break;
 	}
     }
 
     return_ACPI_STATUS (AE_OK);
 }
 
 /*
  * AcpiAttachData() requires an object handler but never uses it.  This is a
  * placeholder object handler so we can store a device_t in an ACPI_HANDLE.
  */
 void
 acpi_fake_objhandler(ACPI_HANDLE h, void *data)
 {
 }
 
 static void
 acpi_shutdown_final(void *arg, int howto)
 {
     struct acpi_softc *sc = (struct acpi_softc *)arg;
     register_t intr;
     ACPI_STATUS status;
 
     /*
      * XXX Shutdown code should only run on the BSP (cpuid 0).
      * Some chipsets do not power off the system correctly if called from
      * an AP.
      */
     if ((howto & RB_POWEROFF) != 0) {
 	status = AcpiEnterSleepStatePrep(ACPI_STATE_S5);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(sc->acpi_dev, "AcpiEnterSleepStatePrep failed - %s\n",
 		AcpiFormatException(status));
 	    return;
 	}
 	device_printf(sc->acpi_dev, "Powering system off\n");
 	intr = intr_disable();
 	status = AcpiEnterSleepState(ACPI_STATE_S5);
 	if (ACPI_FAILURE(status)) {
 	    intr_restore(intr);
 	    device_printf(sc->acpi_dev, "power-off failed - %s\n",
 		AcpiFormatException(status));
 	} else {
 	    DELAY(1000000);
 	    intr_restore(intr);
 	    device_printf(sc->acpi_dev, "power-off failed - timeout\n");
 	}
     } else if ((howto & RB_HALT) == 0 && sc->acpi_handle_reboot) {
 	/* Reboot using the reset register. */
 	status = AcpiReset();
 	if (ACPI_SUCCESS(status)) {
 	    DELAY(1000000);
 	    device_printf(sc->acpi_dev, "reset failed - timeout\n");
 	} else if (status != AE_NOT_EXIST)
 	    device_printf(sc->acpi_dev, "reset failed - %s\n",
 		AcpiFormatException(status));
     } else if (sc->acpi_do_disable && panicstr == NULL) {
 	/*
 	 * Only disable ACPI if the user requested.  On some systems, writing
 	 * the disable value to SMI_CMD hangs the system.
 	 */
 	device_printf(sc->acpi_dev, "Shutting down\n");
 	AcpiTerminate();
     }
 }
 
 static void
 acpi_enable_fixed_events(struct acpi_softc *sc)
 {
     static int	first_time = 1;
 
     /* Enable and clear fixed events and install handlers. */
     if ((AcpiGbl_FADT.Flags & ACPI_FADT_POWER_BUTTON) == 0) {
 	AcpiClearEvent(ACPI_EVENT_POWER_BUTTON);
 	AcpiInstallFixedEventHandler(ACPI_EVENT_POWER_BUTTON,
 				     acpi_event_power_button_sleep, sc);
 	if (first_time)
 	    device_printf(sc->acpi_dev, "Power Button (fixed)\n");
     }
     if ((AcpiGbl_FADT.Flags & ACPI_FADT_SLEEP_BUTTON) == 0) {
 	AcpiClearEvent(ACPI_EVENT_SLEEP_BUTTON);
 	AcpiInstallFixedEventHandler(ACPI_EVENT_SLEEP_BUTTON,
 				     acpi_event_sleep_button_sleep, sc);
 	if (first_time)
 	    device_printf(sc->acpi_dev, "Sleep Button (fixed)\n");
     }
 
     first_time = 0;
 }
 
 /*
  * Returns true if the device is actually present and should
  * be attached to.  This requires the present, enabled, UI-visible 
  * and diagnostics-passed bits to be set.
  */
 BOOLEAN
 acpi_DeviceIsPresent(device_t dev)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_HANDLE		h;
     BOOLEAN		present;
 
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     /* If no _STA method, must be present */
     present = (devinfo->Valid & ACPI_VALID_STA) == 0 ||
 	ACPI_DEVICE_PRESENT(devinfo->CurrentStatus) ? TRUE : FALSE;
 
     AcpiOsFree(devinfo);
     return (present);
 }
 
 /*
  * Returns true if the battery is actually present and inserted.
  */
 BOOLEAN
 acpi_BatteryIsPresent(device_t dev)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_HANDLE		h;
     BOOLEAN		present;
 
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     /* If no _STA method, must be present */
     present = (devinfo->Valid & ACPI_VALID_STA) == 0 ||
 	ACPI_BATTERY_PRESENT(devinfo->CurrentStatus) ? TRUE : FALSE;
 
     AcpiOsFree(devinfo);
     return (present);
 }
 
 /*
  * Returns true if a device has at least one valid device ID.
  */
 static BOOLEAN
 acpi_has_hid(ACPI_HANDLE h)
 {
     ACPI_DEVICE_INFO	*devinfo;
     BOOLEAN		ret;
 
     if (h == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     ret = FALSE;
     if ((devinfo->Valid & ACPI_VALID_HID) != 0)
 	ret = TRUE;
     else if ((devinfo->Valid & ACPI_VALID_CID) != 0)
 	if (devinfo->CompatibleIdList.Count > 0)
 	    ret = TRUE;
 
     AcpiOsFree(devinfo);
     return (ret);
 }
 
 /*
  * Match a HID string against a handle
  */
 BOOLEAN
 acpi_MatchHid(ACPI_HANDLE h, const char *hid) 
 {
     ACPI_DEVICE_INFO	*devinfo;
     BOOLEAN		ret;
     int			i;
 
     if (hid == NULL || h == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     ret = FALSE;
     if ((devinfo->Valid & ACPI_VALID_HID) != 0 &&
 	strcmp(hid, devinfo->HardwareId.String) == 0)
 	    ret = TRUE;
     else if ((devinfo->Valid & ACPI_VALID_CID) != 0)
 	for (i = 0; i < devinfo->CompatibleIdList.Count; i++) {
 	    if (strcmp(hid, devinfo->CompatibleIdList.Ids[i].String) == 0) {
 		ret = TRUE;
 		break;
 	    }
 	}
 
     AcpiOsFree(devinfo);
     return (ret);
 }
 
 /*
  * Return the handle of a named object within our scope, ie. that of (parent)
  * or one if its parents.
  */
 ACPI_STATUS
 acpi_GetHandleInScope(ACPI_HANDLE parent, char *path, ACPI_HANDLE *result)
 {
     ACPI_HANDLE		r;
     ACPI_STATUS		status;
 
     /* Walk back up the tree to the root */
     for (;;) {
 	status = AcpiGetHandle(parent, path, &r);
 	if (ACPI_SUCCESS(status)) {
 	    *result = r;
 	    return (AE_OK);
 	}
 	/* XXX Return error here? */
 	if (status != AE_NOT_FOUND)
 	    return (AE_OK);
 	if (ACPI_FAILURE(AcpiGetParent(parent, &r)))
 	    return (AE_NOT_FOUND);
 	parent = r;
     }
 }
 
 /*
  * Allocate a buffer with a preset data size.
  */
 ACPI_BUFFER *
 acpi_AllocBuffer(int size)
 {
     ACPI_BUFFER	*buf;
 
     if ((buf = malloc(size + sizeof(*buf), M_ACPIDEV, M_NOWAIT)) == NULL)
 	return (NULL);
     buf->Length = size;
     buf->Pointer = (void *)(buf + 1);
     return (buf);
 }
 
 ACPI_STATUS
 acpi_SetInteger(ACPI_HANDLE handle, char *path, UINT32 number)
 {
     ACPI_OBJECT arg1;
     ACPI_OBJECT_LIST args;
 
     arg1.Type = ACPI_TYPE_INTEGER;
     arg1.Integer.Value = number;
     args.Count = 1;
     args.Pointer = &arg1;
 
     return (AcpiEvaluateObject(handle, path, &args, NULL));
 }
 
 /*
  * Evaluate a path that should return an integer.
  */
 ACPI_STATUS
 acpi_GetInteger(ACPI_HANDLE handle, char *path, UINT32 *number)
 {
     ACPI_STATUS	status;
     ACPI_BUFFER	buf;
     ACPI_OBJECT	param;
 
     if (handle == NULL)
 	handle = ACPI_ROOT_OBJECT;
 
     /*
      * Assume that what we've been pointed at is an Integer object, or
      * a method that will return an Integer.
      */
     buf.Pointer = &param;
     buf.Length = sizeof(param);
     status = AcpiEvaluateObject(handle, path, NULL, &buf);
     if (ACPI_SUCCESS(status)) {
 	if (param.Type == ACPI_TYPE_INTEGER)
 	    *number = param.Integer.Value;
 	else
 	    status = AE_TYPE;
     }
 
     /* 
      * In some applications, a method that's expected to return an Integer
      * may instead return a Buffer (probably to simplify some internal
      * arithmetic).  We'll try to fetch whatever it is, and if it's a Buffer,
      * convert it into an Integer as best we can.
      *
      * This is a hack.
      */
     if (status == AE_BUFFER_OVERFLOW) {
 	if ((buf.Pointer = AcpiOsAllocate(buf.Length)) == NULL) {
 	    status = AE_NO_MEMORY;
 	} else {
 	    status = AcpiEvaluateObject(handle, path, NULL, &buf);
 	    if (ACPI_SUCCESS(status))
 		status = acpi_ConvertBufferToInteger(&buf, number);
 	    AcpiOsFree(buf.Pointer);
 	}
     }
     return (status);
 }
 
 ACPI_STATUS
 acpi_ConvertBufferToInteger(ACPI_BUFFER *bufp, UINT32 *number)
 {
     ACPI_OBJECT	*p;
     UINT8	*val;
     int		i;
 
     p = (ACPI_OBJECT *)bufp->Pointer;
     if (p->Type == ACPI_TYPE_INTEGER) {
 	*number = p->Integer.Value;
 	return (AE_OK);
     }
     if (p->Type != ACPI_TYPE_BUFFER)
 	return (AE_TYPE);
     if (p->Buffer.Length > sizeof(int))
 	return (AE_BAD_DATA);
 
     *number = 0;
     val = p->Buffer.Pointer;
     for (i = 0; i < p->Buffer.Length; i++)
 	*number += val[i] << (i * 8);
     return (AE_OK);
 }
 
 /*
  * Iterate over the elements of an a package object, calling the supplied
  * function for each element.
  *
  * XXX possible enhancement might be to abort traversal on error.
  */
 ACPI_STATUS
 acpi_ForeachPackageObject(ACPI_OBJECT *pkg,
 	void (*func)(ACPI_OBJECT *comp, void *arg), void *arg)
 {
     ACPI_OBJECT	*comp;
     int		i;
 
     if (pkg == NULL || pkg->Type != ACPI_TYPE_PACKAGE)
 	return (AE_BAD_PARAMETER);
 
     /* Iterate over components */
     i = 0;
     comp = pkg->Package.Elements;
     for (; i < pkg->Package.Count; i++, comp++)
 	func(comp, arg);
 
     return (AE_OK);
 }
 
 /*
  * Find the (index)th resource object in a set.
  */
 ACPI_STATUS
 acpi_FindIndexedResource(ACPI_BUFFER *buf, int index, ACPI_RESOURCE **resp)
 {
     ACPI_RESOURCE	*rp;
     int			i;
 
     rp = (ACPI_RESOURCE *)buf->Pointer;
     i = index;
     while (i-- > 0) {
 	/* Range check */
 	if (rp > (ACPI_RESOURCE *)((u_int8_t *)buf->Pointer + buf->Length))
 	    return (AE_BAD_PARAMETER);
 
 	/* Check for terminator */
 	if (rp->Type == ACPI_RESOURCE_TYPE_END_TAG || rp->Length == 0)
 	    return (AE_NOT_FOUND);
 	rp = ACPI_NEXT_RESOURCE(rp);
     }
     if (resp != NULL)
 	*resp = rp;
 
     return (AE_OK);
 }
 
 /*
  * Append an ACPI_RESOURCE to an ACPI_BUFFER.
  *
  * Given a pointer to an ACPI_RESOURCE structure, expand the ACPI_BUFFER
  * provided to contain it.  If the ACPI_BUFFER is empty, allocate a sensible
  * backing block.  If the ACPI_RESOURCE is NULL, return an empty set of
  * resources.
  */
 #define ACPI_INITIAL_RESOURCE_BUFFER_SIZE	512
 
 ACPI_STATUS
 acpi_AppendBufferResource(ACPI_BUFFER *buf, ACPI_RESOURCE *res)
 {
     ACPI_RESOURCE	*rp;
     void		*newp;
 
     /* Initialise the buffer if necessary. */
     if (buf->Pointer == NULL) {
 	buf->Length = ACPI_INITIAL_RESOURCE_BUFFER_SIZE;
 	if ((buf->Pointer = AcpiOsAllocate(buf->Length)) == NULL)
 	    return (AE_NO_MEMORY);
 	rp = (ACPI_RESOURCE *)buf->Pointer;
 	rp->Type = ACPI_RESOURCE_TYPE_END_TAG;
 	rp->Length = ACPI_RS_SIZE_MIN;
     }
     if (res == NULL)
 	return (AE_OK);
 
     /*
      * Scan the current buffer looking for the terminator.
      * This will either find the terminator or hit the end
      * of the buffer and return an error.
      */
     rp = (ACPI_RESOURCE *)buf->Pointer;
     for (;;) {
 	/* Range check, don't go outside the buffer */
 	if (rp >= (ACPI_RESOURCE *)((u_int8_t *)buf->Pointer + buf->Length))
 	    return (AE_BAD_PARAMETER);
 	if (rp->Type == ACPI_RESOURCE_TYPE_END_TAG || rp->Length == 0)
 	    break;
 	rp = ACPI_NEXT_RESOURCE(rp);
     }
 
     /*
      * Check the size of the buffer and expand if required.
      *
      * Required size is:
      *	size of existing resources before terminator + 
      *	size of new resource and header +
      * 	size of terminator.
      *
      * Note that this loop should really only run once, unless
      * for some reason we are stuffing a *really* huge resource.
      */
     while ((((u_int8_t *)rp - (u_int8_t *)buf->Pointer) + 
 	    res->Length + ACPI_RS_SIZE_NO_DATA +
 	    ACPI_RS_SIZE_MIN) >= buf->Length) {
 	if ((newp = AcpiOsAllocate(buf->Length * 2)) == NULL)
 	    return (AE_NO_MEMORY);
 	bcopy(buf->Pointer, newp, buf->Length);
 	rp = (ACPI_RESOURCE *)((u_int8_t *)newp +
 			       ((u_int8_t *)rp - (u_int8_t *)buf->Pointer));
 	AcpiOsFree(buf->Pointer);
 	buf->Pointer = newp;
 	buf->Length += buf->Length;
     }
 
     /* Insert the new resource. */
     bcopy(res, rp, res->Length + ACPI_RS_SIZE_NO_DATA);
 
     /* And add the terminator. */
     rp = ACPI_NEXT_RESOURCE(rp);
     rp->Type = ACPI_RESOURCE_TYPE_END_TAG;
     rp->Length = ACPI_RS_SIZE_MIN;
 
     return (AE_OK);
 }
 
 /*
  * Set interrupt model.
  */
 ACPI_STATUS
 acpi_SetIntrModel(int model)
 {
 
     return (acpi_SetInteger(ACPI_ROOT_OBJECT, "_PIC", model));
 }
 
 /*
  * Walk subtables of a table and call a callback routine for each
  * subtable.  The caller should provide the first subtable and a
  * pointer to the end of the table.  This can be used to walk tables
  * such as MADT and SRAT that use subtable entries.
  */
 void
 acpi_walk_subtables(void *first, void *end, acpi_subtable_handler *handler,
     void *arg)
 {
     ACPI_SUBTABLE_HEADER *entry;
 
     for (entry = first; (void *)entry < end; ) {
 	/* Avoid an infinite loop if we hit a bogus entry. */
 	if (entry->Length < sizeof(ACPI_SUBTABLE_HEADER))
 	    return;
 
 	handler(entry, arg);
 	entry = ACPI_ADD_PTR(ACPI_SUBTABLE_HEADER, entry, entry->Length);
     }
 }
 
 /*
  * DEPRECATED.  This interface has serious deficiencies and will be
  * removed.
  *
  * Immediately enter the sleep state.  In the old model, acpiconf(8) ran
  * rc.suspend and rc.resume so we don't have to notify devd(8) to do this.
  */
 ACPI_STATUS
 acpi_SetSleepState(struct acpi_softc *sc, int state)
 {
     static int once;
 
     if (!once) {
 	device_printf(sc->acpi_dev,
 "warning: acpi_SetSleepState() deprecated, need to update your software\n");
 	once = 1;
     }
     return (acpi_EnterSleepState(sc, state));
 }
 
 #if defined(__amd64__) || defined(__i386__)
 static void
 acpi_sleep_force_task(void *context)
 {
     struct acpi_softc *sc = (struct acpi_softc *)context;
 
     if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
 	device_printf(sc->acpi_dev, "force sleep state S%d failed\n",
 	    sc->acpi_next_sstate);
 }
 
 static void
 acpi_sleep_force(void *arg)
 {
     struct acpi_softc *sc = (struct acpi_softc *)arg;
 
     device_printf(sc->acpi_dev,
 	"suspend request timed out, forcing sleep now\n");
     /*
      * XXX Suspending from callout causes freezes in DEVICE_SUSPEND().
      * Suspend from acpi_task thread instead.
      */
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_sleep_force_task, sc)))
 	device_printf(sc->acpi_dev, "AcpiOsExecute() for sleeping failed\n");
 }
 #endif
 
 /*
  * Request that the system enter the given suspend state.  All /dev/apm
  * devices and devd(8) will be notified.  Userland then has a chance to
  * save state and acknowledge the request.  The system sleeps once all
  * acks are in.
  */
 int
 acpi_ReqSleepState(struct acpi_softc *sc, int state)
 {
 #if defined(__amd64__) || defined(__i386__)
     struct apm_clone_data *clone;
     ACPI_STATUS status;
 
     if (state < ACPI_STATE_S1 || state > ACPI_S_STATES_MAX)
 	return (EINVAL);
     if (!acpi_sleep_states[state])
 	return (EOPNOTSUPP);
 
     /*
      * If a reboot/shutdown/suspend request is already in progress or
      * suspend is blocked due to an upcoming shutdown, just return.
      */
     if (rebooting || sc->acpi_next_sstate != 0 || suspend_blocked) {
 	return (0);
     }
 
     /* Wait until sleep is enabled. */
     while (sc->acpi_sleep_disabled) {
 	AcpiOsSleep(1000);
     }
 
     ACPI_LOCK(acpi);
 
     sc->acpi_next_sstate = state;
 
     /* S5 (soft-off) should be entered directly with no waiting. */
     if (state == ACPI_STATE_S5) {
     	ACPI_UNLOCK(acpi);
 	status = acpi_EnterSleepState(sc, state);
 	return (ACPI_SUCCESS(status) ? 0 : ENXIO);
     }
 
     /* Record the pending state and notify all apm devices. */
     STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) {
 	clone->notify_status = APM_EV_NONE;
 	if ((clone->flags & ACPI_EVF_DEVD) == 0) {
 	    selwakeuppri(&clone->sel_read, PZERO);
 	    KNOTE_LOCKED(&clone->sel_read.si_note, 0);
 	}
     }
 
     /* If devd(8) is not running, immediately enter the sleep state. */
     if (!devctl_process_running()) {
 	ACPI_UNLOCK(acpi);
 	status = acpi_EnterSleepState(sc, state);
 	return (ACPI_SUCCESS(status) ? 0 : ENXIO);
     }
 
     /*
      * Set a timeout to fire if userland doesn't ack the suspend request
      * in time.  This way we still eventually go to sleep if we were
      * overheating or running low on battery, even if userland is hung.
      * We cancel this timeout once all userland acks are in or the
      * suspend request is aborted.
      */
     callout_reset(&sc->susp_force_to, 10 * hz, acpi_sleep_force, sc);
     ACPI_UNLOCK(acpi);
 
     /* Now notify devd(8) also. */
     acpi_UserNotify("Suspend", ACPI_ROOT_OBJECT, state);
 
     return (0);
 #else
     /* This platform does not support acpi suspend/resume. */
     return (EOPNOTSUPP);
 #endif
 }
 
 /*
  * Acknowledge (or reject) a pending sleep state.  The caller has
  * prepared for suspend and is now ready for it to proceed.  If the
  * error argument is non-zero, it indicates suspend should be cancelled
  * and gives an errno value describing why.  Once all votes are in,
  * we suspend the system.
  */
 int
 acpi_AckSleepState(struct apm_clone_data *clone, int error)
 {
 #if defined(__amd64__) || defined(__i386__)
     struct acpi_softc *sc;
     int ret, sleeping;
 
     /* If no pending sleep state, return an error. */
     ACPI_LOCK(acpi);
     sc = clone->acpi_sc;
     if (sc->acpi_next_sstate == 0) {
     	ACPI_UNLOCK(acpi);
 	return (ENXIO);
     }
 
     /* Caller wants to abort suspend process. */
     if (error) {
 	sc->acpi_next_sstate = 0;
 	callout_stop(&sc->susp_force_to);
 	device_printf(sc->acpi_dev,
 	    "listener on %s cancelled the pending suspend\n",
 	    devtoname(clone->cdev));
     	ACPI_UNLOCK(acpi);
 	return (0);
     }
 
     /*
      * Mark this device as acking the suspend request.  Then, walk through
      * all devices, seeing if they agree yet.  We only count devices that
      * are writable since read-only devices couldn't ack the request.
      */
     sleeping = TRUE;
     clone->notify_status = APM_EV_ACKED;
     STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) {
 	if ((clone->flags & ACPI_EVF_WRITE) != 0 &&
 	    clone->notify_status != APM_EV_ACKED) {
 	    sleeping = FALSE;
 	    break;
 	}
     }
 
     /* If all devices have voted "yes", we will suspend now. */
     if (sleeping)
 	callout_stop(&sc->susp_force_to);
     ACPI_UNLOCK(acpi);
     ret = 0;
     if (sleeping) {
 	if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
 		ret = ENODEV;
     }
     return (ret);
 #else
     /* This platform does not support acpi suspend/resume. */
     return (EOPNOTSUPP);
 #endif
 }
 
 static void
 acpi_sleep_enable(void *arg)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)arg;
 
     ACPI_LOCK_ASSERT(acpi);
 
     /* Reschedule if the system is not fully up and running. */
     if (!AcpiGbl_SystemAwakeAndRunning) {
 	callout_schedule(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME);
 	return;
     }
 
     sc->acpi_sleep_disabled = FALSE;
 }
 
 static ACPI_STATUS
 acpi_sleep_disable(struct acpi_softc *sc)
 {
     ACPI_STATUS		status;
 
     /* Fail if the system is not fully up and running. */
     if (!AcpiGbl_SystemAwakeAndRunning)
 	return (AE_ERROR);
 
     ACPI_LOCK(acpi);
     status = sc->acpi_sleep_disabled ? AE_ERROR : AE_OK;
     sc->acpi_sleep_disabled = TRUE;
     ACPI_UNLOCK(acpi);
 
     return (status);
 }
 
 enum acpi_sleep_state {
     ACPI_SS_NONE,
     ACPI_SS_GPE_SET,
     ACPI_SS_DEV_SUSPEND,
     ACPI_SS_SLP_PREP,
     ACPI_SS_SLEPT,
 };
 
 /*
  * Enter the desired system sleep state.
  *
  * Currently we support S1-S5 but S4 is only S4BIOS
  */
 static ACPI_STATUS
 acpi_EnterSleepState(struct acpi_softc *sc, int state)
 {
     register_t intr;
     ACPI_STATUS status;
     ACPI_EVENT_STATUS power_button_status;
     enum acpi_sleep_state slp_state;
     int sleep_result;
 
     ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
 
     if (state < ACPI_STATE_S1 || state > ACPI_S_STATES_MAX)
 	return_ACPI_STATUS (AE_BAD_PARAMETER);
     if (!acpi_sleep_states[state]) {
 	device_printf(sc->acpi_dev, "Sleep state S%d not supported by BIOS\n",
 	    state);
 	return (AE_SUPPORT);
     }
 
     /* Re-entry once we're suspending is not allowed. */
     status = acpi_sleep_disable(sc);
     if (ACPI_FAILURE(status)) {
 	device_printf(sc->acpi_dev,
 	    "suspend request ignored (not ready yet)\n");
 	return (status);
     }
 
     if (state == ACPI_STATE_S5) {
 	/*
 	 * Shut down cleanly and power off.  This will call us back through the
 	 * shutdown handlers.
 	 */
 	shutdown_nice(RB_POWEROFF);
 	return_ACPI_STATUS (AE_OK);
     }
 
     EVENTHANDLER_INVOKE(power_suspend_early);
     stop_all_proc();
     EVENTHANDLER_INVOKE(power_suspend);
 
     if (smp_started) {
 	thread_lock(curthread);
 	sched_bind(curthread, 0);
 	thread_unlock(curthread);
     }
 
     /*
      * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
      * drivers need this.
      */
     mtx_lock(&Giant);
 
     slp_state = ACPI_SS_NONE;
 
     sc->acpi_sstate = state;
 
     /* Enable any GPEs as appropriate and requested by the user. */
     acpi_wake_prep_walk(state);
     slp_state = ACPI_SS_GPE_SET;
 
     /*
      * Inform all devices that we are going to sleep.  If at least one
      * device fails, DEVICE_SUSPEND() automatically resumes the tree.
      *
      * XXX Note that a better two-pass approach with a 'veto' pass
      * followed by a "real thing" pass would be better, but the current
      * bus interface does not provide for this.
      */
     if (DEVICE_SUSPEND(root_bus) != 0) {
 	device_printf(sc->acpi_dev, "device_suspend failed\n");
 	goto backout;
     }
     slp_state = ACPI_SS_DEV_SUSPEND;
 
     /* If testing device suspend only, back out of everything here. */
     if (acpi_susp_bounce)
 	goto backout;
 
     status = AcpiEnterSleepStatePrep(state);
     if (ACPI_FAILURE(status)) {
 	device_printf(sc->acpi_dev, "AcpiEnterSleepStatePrep failed - %s\n",
 		      AcpiFormatException(status));
 	goto backout;
     }
     slp_state = ACPI_SS_SLP_PREP;
 
     if (sc->acpi_sleep_delay > 0)
 	DELAY(sc->acpi_sleep_delay * 1000000);
 
     intr = intr_disable();
     if (state != ACPI_STATE_S1) {
 	sleep_result = acpi_sleep_machdep(sc, state);
 	acpi_wakeup_machdep(sc, state, sleep_result, 0);
 
 	/*
 	 * XXX According to ACPI specification SCI_EN bit should be restored
 	 * by ACPI platform (BIOS, firmware) to its pre-sleep state.
 	 * Unfortunately some BIOSes fail to do that and that leads to
 	 * unexpected and serious consequences during wake up like a system
 	 * getting stuck in SMI handlers.
 	 * This hack is picked up from Linux, which claims that it follows
 	 * Windows behavior.
 	 */
 	if (sleep_result == 1 && state != ACPI_STATE_S4)
 	    AcpiWriteBitRegister(ACPI_BITREG_SCI_ENABLE, ACPI_ENABLE_EVENT);
 
 	AcpiLeaveSleepStatePrep(state);
 
 	if (sleep_result == 1 && state == ACPI_STATE_S3) {
 	    /*
 	     * Prevent mis-interpretation of the wakeup by power button
 	     * as a request for power off.
 	     * Ideally we should post an appropriate wakeup event,
 	     * perhaps using acpi_event_power_button_wake or alike.
 	     *
 	     * Clearing of power button status after wakeup is mandated
 	     * by ACPI specification in section "Fixed Power Button".
 	     *
 	     * XXX As of ACPICA 20121114 AcpiGetEventStatus provides
 	     * status as 0/1 corressponding to inactive/active despite
 	     * its type being ACPI_EVENT_STATUS.  In other words,
 	     * we should not test for ACPI_EVENT_FLAG_SET for time being.
 	     */
 	    if (ACPI_SUCCESS(AcpiGetEventStatus(ACPI_EVENT_POWER_BUTTON,
 		&power_button_status)) && power_button_status != 0) {
 		AcpiClearEvent(ACPI_EVENT_POWER_BUTTON);
 		device_printf(sc->acpi_dev,
 		    "cleared fixed power button status\n");
 	    }
 	}
 
 	intr_restore(intr);
 
 	/* call acpi_wakeup_machdep() again with interrupt enabled */
 	acpi_wakeup_machdep(sc, state, sleep_result, 1);
 
 	if (sleep_result == -1)
 		goto backout;
 
 	/* Re-enable ACPI hardware on wakeup from sleep state 4. */
 	if (state == ACPI_STATE_S4)
 	    AcpiEnable();
     } else {
 	status = AcpiEnterSleepState(state);
 	AcpiLeaveSleepStatePrep(state);
 	intr_restore(intr);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(sc->acpi_dev, "AcpiEnterSleepState failed - %s\n",
 			  AcpiFormatException(status));
 	    goto backout;
 	}
     }
     slp_state = ACPI_SS_SLEPT;
 
     /*
      * Back out state according to how far along we got in the suspend
      * process.  This handles both the error and success cases.
      */
 backout:
     if (slp_state >= ACPI_SS_GPE_SET) {
 	acpi_wake_prep_walk(state);
 	sc->acpi_sstate = ACPI_STATE_S0;
     }
     if (slp_state >= ACPI_SS_DEV_SUSPEND)
 	DEVICE_RESUME(root_bus);
     if (slp_state >= ACPI_SS_SLP_PREP)
 	AcpiLeaveSleepState(state);
     if (slp_state >= ACPI_SS_SLEPT) {
 	acpi_resync_clock(sc);
 	acpi_enable_fixed_events(sc);
     }
     sc->acpi_next_sstate = 0;
 
     mtx_unlock(&Giant);
 
     if (smp_started) {
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
     }
 
     resume_all_proc();
 
     EVENTHANDLER_INVOKE(power_resume);
 
     /* Allow another sleep request after a while. */
     callout_schedule(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME);
 
     /* Run /etc/rc.resume after we are back. */
     if (devctl_process_running())
 	acpi_UserNotify("Resume", ACPI_ROOT_OBJECT, state);
 
     return_ACPI_STATUS (status);
 }
 
 static void
 acpi_resync_clock(struct acpi_softc *sc)
 {
 #ifdef __amd64__
     if (!acpi_reset_clock)
 	return;
 
     /*
      * Warm up timecounter again and reset system clock.
      */
     (void)timecounter->tc_get_timecount(timecounter);
     (void)timecounter->tc_get_timecount(timecounter);
     inittodr(time_second + sc->acpi_sleep_delay);
 #endif
 }
 
 /* Enable or disable the device's wake GPE. */
 int
 acpi_wake_set_enable(device_t dev, int enable)
 {
     struct acpi_prw_data prw;
     ACPI_STATUS status;
     int flags;
 
     /* Make sure the device supports waking the system and get the GPE. */
     if (acpi_parse_prw(acpi_get_handle(dev), &prw) != 0)
 	return (ENXIO);
 
     flags = acpi_get_flags(dev);
     if (enable) {
 	status = AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit,
 	    ACPI_GPE_ENABLE);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(dev, "enable wake failed\n");
 	    return (ENXIO);
 	}
 	acpi_set_flags(dev, flags | ACPI_FLAG_WAKE_ENABLED);
     } else {
 	status = AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit,
 	    ACPI_GPE_DISABLE);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(dev, "disable wake failed\n");
 	    return (ENXIO);
 	}
 	acpi_set_flags(dev, flags & ~ACPI_FLAG_WAKE_ENABLED);
     }
 
     return (0);
 }
 
 static int
 acpi_wake_sleep_prep(ACPI_HANDLE handle, int sstate)
 {
     struct acpi_prw_data prw;
     device_t dev;
 
     /* Check that this is a wake-capable device and get its GPE. */
     if (acpi_parse_prw(handle, &prw) != 0)
 	return (ENXIO);
     dev = acpi_get_device(handle);
 
     /*
      * The destination sleep state must be less than (i.e., higher power)
      * or equal to the value specified by _PRW.  If this GPE cannot be
      * enabled for the next sleep state, then disable it.  If it can and
      * the user requested it be enabled, turn on any required power resources
      * and set _PSW.
      */
     if (sstate > prw.lowest_wake) {
 	AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit, ACPI_GPE_DISABLE);
 	if (bootverbose)
 	    device_printf(dev, "wake_prep disabled wake for %s (S%d)\n",
 		acpi_name(handle), sstate);
     } else if (dev && (acpi_get_flags(dev) & ACPI_FLAG_WAKE_ENABLED) != 0) {
 	acpi_pwr_wake_enable(handle, 1);
 	acpi_SetInteger(handle, "_PSW", 1);
 	if (bootverbose)
 	    device_printf(dev, "wake_prep enabled for %s (S%d)\n",
 		acpi_name(handle), sstate);
     }
 
     return (0);
 }
 
 static int
 acpi_wake_run_prep(ACPI_HANDLE handle, int sstate)
 {
     struct acpi_prw_data prw;
     device_t dev;
 
     /*
      * Check that this is a wake-capable device and get its GPE.  Return
      * now if the user didn't enable this device for wake.
      */
     if (acpi_parse_prw(handle, &prw) != 0)
 	return (ENXIO);
     dev = acpi_get_device(handle);
     if (dev == NULL || (acpi_get_flags(dev) & ACPI_FLAG_WAKE_ENABLED) == 0)
 	return (0);
 
     /*
      * If this GPE couldn't be enabled for the previous sleep state, it was
      * disabled before going to sleep so re-enable it.  If it was enabled,
      * clear _PSW and turn off any power resources it used.
      */
     if (sstate > prw.lowest_wake) {
 	AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit, ACPI_GPE_ENABLE);
 	if (bootverbose)
 	    device_printf(dev, "run_prep re-enabled %s\n", acpi_name(handle));
     } else {
 	acpi_SetInteger(handle, "_PSW", 0);
 	acpi_pwr_wake_enable(handle, 0);
 	if (bootverbose)
 	    device_printf(dev, "run_prep cleaned up for %s\n",
 		acpi_name(handle));
     }
 
     return (0);
 }
 
 static ACPI_STATUS
 acpi_wake_prep(ACPI_HANDLE handle, UINT32 level, void *context, void **status)
 {
     int sstate;
 
     /* If suspending, run the sleep prep function, otherwise wake. */
     sstate = *(int *)context;
     if (AcpiGbl_SystemAwakeAndRunning)
 	acpi_wake_sleep_prep(handle, sstate);
     else
 	acpi_wake_run_prep(handle, sstate);
     return (AE_OK);
 }
 
 /* Walk the tree rooted at acpi0 to prep devices for suspend/resume. */
 static int
 acpi_wake_prep_walk(int sstate)
 {
     ACPI_HANDLE sb_handle;
 
     if (ACPI_SUCCESS(AcpiGetHandle(ACPI_ROOT_OBJECT, "\\_SB_", &sb_handle)))
 	AcpiWalkNamespace(ACPI_TYPE_DEVICE, sb_handle, 100,
 	    acpi_wake_prep, NULL, &sstate, NULL);
     return (0);
 }
 
 /* Walk the tree rooted at acpi0 to attach per-device wake sysctls. */
 static int
 acpi_wake_sysctl_walk(device_t dev)
 {
     int error, i, numdevs;
     device_t *devlist;
     device_t child;
     ACPI_STATUS status;
 
     error = device_get_children(dev, &devlist, &numdevs);
     if (error != 0 || numdevs == 0) {
 	if (numdevs == 0)
 	    free(devlist, M_TEMP);
 	return (error);
     }
     for (i = 0; i < numdevs; i++) {
 	child = devlist[i];
 	acpi_wake_sysctl_walk(child);
 	if (!device_is_attached(child))
 	    continue;
 	status = AcpiEvaluateObject(acpi_get_handle(child), "_PRW", NULL, NULL);
 	if (ACPI_SUCCESS(status)) {
 	    SYSCTL_ADD_PROC(device_get_sysctl_ctx(child),
 		SYSCTL_CHILDREN(device_get_sysctl_tree(child)), OID_AUTO,
 		"wake", CTLTYPE_INT | CTLFLAG_RW, child, 0,
 		acpi_wake_set_sysctl, "I", "Device set to wake the system");
 	}
     }
     free(devlist, M_TEMP);
 
     return (0);
 }
 
 /* Enable or disable wake from userland. */
 static int
 acpi_wake_set_sysctl(SYSCTL_HANDLER_ARGS)
 {
     int enable, error;
     device_t dev;
 
     dev = (device_t)arg1;
     enable = (acpi_get_flags(dev) & ACPI_FLAG_WAKE_ENABLED) ? 1 : 0;
 
     error = sysctl_handle_int(oidp, &enable, 0, req);
     if (error != 0 || req->newptr == NULL)
 	return (error);
     if (enable != 0 && enable != 1)
 	return (EINVAL);
 
     return (acpi_wake_set_enable(dev, enable));
 }
 
 /* Parse a device's _PRW into a structure. */
 int
 acpi_parse_prw(ACPI_HANDLE h, struct acpi_prw_data *prw)
 {
     ACPI_STATUS			status;
     ACPI_BUFFER			prw_buffer;
     ACPI_OBJECT			*res, *res2;
     int				error, i, power_count;
 
     if (h == NULL || prw == NULL)
 	return (EINVAL);
 
     /*
      * The _PRW object (7.2.9) is only required for devices that have the
      * ability to wake the system from a sleeping state.
      */
     error = EINVAL;
     prw_buffer.Pointer = NULL;
     prw_buffer.Length = ACPI_ALLOCATE_BUFFER;
     status = AcpiEvaluateObject(h, "_PRW", NULL, &prw_buffer);
     if (ACPI_FAILURE(status))
 	return (ENOENT);
     res = (ACPI_OBJECT *)prw_buffer.Pointer;
     if (res == NULL)
 	return (ENOENT);
     if (!ACPI_PKG_VALID(res, 2))
 	goto out;
 
     /*
      * Element 1 of the _PRW object:
      * The lowest power system sleeping state that can be entered while still
      * providing wake functionality.  The sleeping state being entered must
      * be less than (i.e., higher power) or equal to this value.
      */
     if (acpi_PkgInt32(res, 1, &prw->lowest_wake) != 0)
 	goto out;
 
     /*
      * Element 0 of the _PRW object:
      */
     switch (res->Package.Elements[0].Type) {
     case ACPI_TYPE_INTEGER:
 	/*
 	 * If the data type of this package element is numeric, then this
 	 * _PRW package element is the bit index in the GPEx_EN, in the
 	 * GPE blocks described in the FADT, of the enable bit that is
 	 * enabled for the wake event.
 	 */
 	prw->gpe_handle = NULL;
 	prw->gpe_bit = res->Package.Elements[0].Integer.Value;
 	error = 0;
 	break;
     case ACPI_TYPE_PACKAGE:
 	/*
 	 * If the data type of this package element is a package, then this
 	 * _PRW package element is itself a package containing two
 	 * elements.  The first is an object reference to the GPE Block
 	 * device that contains the GPE that will be triggered by the wake
 	 * event.  The second element is numeric and it contains the bit
 	 * index in the GPEx_EN, in the GPE Block referenced by the
 	 * first element in the package, of the enable bit that is enabled for
 	 * the wake event.
 	 *
 	 * For example, if this field is a package then it is of the form:
 	 * Package() {\_SB.PCI0.ISA.GPE, 2}
 	 */
 	res2 = &res->Package.Elements[0];
 	if (!ACPI_PKG_VALID(res2, 2))
 	    goto out;
 	prw->gpe_handle = acpi_GetReference(NULL, &res2->Package.Elements[0]);
 	if (prw->gpe_handle == NULL)
 	    goto out;
 	if (acpi_PkgInt32(res2, 1, &prw->gpe_bit) != 0)
 	    goto out;
 	error = 0;
 	break;
     default:
 	goto out;
     }
 
     /* Elements 2 to N of the _PRW object are power resources. */
     power_count = res->Package.Count - 2;
     if (power_count > ACPI_PRW_MAX_POWERRES) {
 	printf("ACPI device %s has too many power resources\n", acpi_name(h));
 	power_count = 0;
     }
     prw->power_res_count = power_count;
     for (i = 0; i < power_count; i++)
 	prw->power_res[i] = res->Package.Elements[i];
 
 out:
     if (prw_buffer.Pointer != NULL)
 	AcpiOsFree(prw_buffer.Pointer);
     return (error);
 }
 
 /*
  * ACPI Event Handlers
  */
 
 /* System Event Handlers (registered by EVENTHANDLER_REGISTER) */
 
 static void
 acpi_system_eventhandler_sleep(void *arg, int state)
 {
     struct acpi_softc *sc = (struct acpi_softc *)arg;
     int ret;
 
     ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
 
     /* Check if button action is disabled or unknown. */
     if (state == ACPI_STATE_UNKNOWN)
 	return;
 
     /* Request that the system prepare to enter the given suspend state. */
     ret = acpi_ReqSleepState(sc, state);
     if (ret != 0)
 	device_printf(sc->acpi_dev,
 	    "request to enter state S%d failed (err %d)\n", state, ret);
 
     return_VOID;
 }
 
 static void
 acpi_system_eventhandler_wakeup(void *arg, int state)
 {
 
     ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
 
     /* Currently, nothing to do for wakeup. */
 
     return_VOID;
 }
 
 /* 
  * ACPICA Event Handlers (FixedEvent, also called from button notify handler)
  */
 static void
 acpi_invoke_sleep_eventhandler(void *context)
 {
 
     EVENTHANDLER_INVOKE(acpi_sleep_event, *(int *)context);
 }
 
 static void
 acpi_invoke_wake_eventhandler(void *context)
 {
 
     EVENTHANDLER_INVOKE(acpi_wakeup_event, *(int *)context);
 }
 
 UINT32
 acpi_event_power_button_sleep(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_sleep_eventhandler, &sc->acpi_power_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 UINT32
 acpi_event_power_button_wake(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_wake_eventhandler, &sc->acpi_power_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 UINT32
 acpi_event_sleep_button_sleep(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_sleep_eventhandler, &sc->acpi_sleep_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 UINT32
 acpi_event_sleep_button_wake(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_wake_eventhandler, &sc->acpi_sleep_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 /*
  * XXX This static buffer is suboptimal.  There is no locking so only
  * use this for single-threaded callers.
  */
 char *
 acpi_name(ACPI_HANDLE handle)
 {
     ACPI_BUFFER buf;
     static char data[256];
 
     buf.Length = sizeof(data);
     buf.Pointer = data;
 
     if (handle && ACPI_SUCCESS(AcpiGetName(handle, ACPI_FULL_PATHNAME, &buf)))
 	return (data);
     return ("(unknown)");
 }
 
 /*
  * Debugging/bug-avoidance.  Avoid trying to fetch info on various
  * parts of the namespace.
  */
 int
 acpi_avoid(ACPI_HANDLE handle)
 {
     char	*cp, *env, *np;
     int		len;
 
     np = acpi_name(handle);
     if (*np == '\\')
 	np++;
     if ((env = kern_getenv("debug.acpi.avoid")) == NULL)
 	return (0);
 
     /* Scan the avoid list checking for a match */
     cp = env;
     for (;;) {
 	while (*cp != 0 && isspace(*cp))
 	    cp++;
 	if (*cp == 0)
 	    break;
 	len = 0;
 	while (cp[len] != 0 && !isspace(cp[len]))
 	    len++;
 	if (!strncmp(cp, np, len)) {
 	    freeenv(env);
 	    return(1);
 	}
 	cp += len;
     }
     freeenv(env);
 
     return (0);
 }
 
 /*
  * Debugging/bug-avoidance.  Disable ACPI subsystem components.
  */
 int
 acpi_disabled(char *subsys)
 {
     char	*cp, *env;
     int		len;
 
     if ((env = kern_getenv("debug.acpi.disabled")) == NULL)
 	return (0);
     if (strcmp(env, "all") == 0) {
 	freeenv(env);
 	return (1);
     }
 
     /* Scan the disable list, checking for a match. */
     cp = env;
     for (;;) {
 	while (*cp != '\0' && isspace(*cp))
 	    cp++;
 	if (*cp == '\0')
 	    break;
 	len = 0;
 	while (cp[len] != '\0' && !isspace(cp[len]))
 	    len++;
 	if (strncmp(cp, subsys, len) == 0) {
 	    freeenv(env);
 	    return (1);
 	}
 	cp += len;
     }
     freeenv(env);
 
     return (0);
 }
 
 static void
 acpi_lookup(void *arg, const char *name, device_t *dev)
 {
     ACPI_HANDLE handle;
 
     if (*dev != NULL)
 	return;
 
     /*
      * Allow any handle name that is specified as an absolute path and
      * starts with '\'.  We could restrict this to \_SB and friends,
      * but see acpi_probe_children() for notes on why we scan the entire
      * namespace for devices.
      *
      * XXX: The pathname argument to AcpiGetHandle() should be fixed to
      * be const.
      */
     if (name[0] != '\\')
 	return;
     if (ACPI_FAILURE(AcpiGetHandle(ACPI_ROOT_OBJECT, __DECONST(char *, name),
 	&handle)))
 	return;
     *dev = acpi_get_device(handle);
 }
 
 /*
  * Control interface.
  *
  * We multiplex ioctls for all participating ACPI devices here.  Individual 
  * drivers wanting to be accessible via /dev/acpi should use the
  * register/deregister interface to make their handlers visible.
  */
 struct acpi_ioctl_hook
 {
     TAILQ_ENTRY(acpi_ioctl_hook) link;
     u_long			 cmd;
     acpi_ioctl_fn		 fn;
     void			 *arg;
 };
 
 static TAILQ_HEAD(,acpi_ioctl_hook)	acpi_ioctl_hooks;
 static int				acpi_ioctl_hooks_initted;
 
 int
 acpi_register_ioctl(u_long cmd, acpi_ioctl_fn fn, void *arg)
 {
     struct acpi_ioctl_hook	*hp;
 
     if ((hp = malloc(sizeof(*hp), M_ACPIDEV, M_NOWAIT)) == NULL)
 	return (ENOMEM);
     hp->cmd = cmd;
     hp->fn = fn;
     hp->arg = arg;
 
     ACPI_LOCK(acpi);
     if (acpi_ioctl_hooks_initted == 0) {
 	TAILQ_INIT(&acpi_ioctl_hooks);
 	acpi_ioctl_hooks_initted = 1;
     }
     TAILQ_INSERT_TAIL(&acpi_ioctl_hooks, hp, link);
     ACPI_UNLOCK(acpi);
 
     return (0);
 }
 
 void
 acpi_deregister_ioctl(u_long cmd, acpi_ioctl_fn fn)
 {
     struct acpi_ioctl_hook	*hp;
 
     ACPI_LOCK(acpi);
     TAILQ_FOREACH(hp, &acpi_ioctl_hooks, link)
 	if (hp->cmd == cmd && hp->fn == fn)
 	    break;
 
     if (hp != NULL) {
 	TAILQ_REMOVE(&acpi_ioctl_hooks, hp, link);
 	free(hp, M_ACPIDEV);
     }
     ACPI_UNLOCK(acpi);
 }
 
 static int
 acpiopen(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
     return (0);
 }
 
 static int
 acpiclose(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
     return (0);
 }
 
 static int
 acpiioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
 {
     struct acpi_softc		*sc;
     struct acpi_ioctl_hook	*hp;
     int				error, state;
 
     error = 0;
     hp = NULL;
     sc = dev->si_drv1;
 
     /*
      * Scan the list of registered ioctls, looking for handlers.
      */
     ACPI_LOCK(acpi);
     if (acpi_ioctl_hooks_initted)
 	TAILQ_FOREACH(hp, &acpi_ioctl_hooks, link) {
 	    if (hp->cmd == cmd)
 		break;
 	}
     ACPI_UNLOCK(acpi);
     if (hp)
 	return (hp->fn(cmd, addr, hp->arg));
 
     /*
      * Core ioctls are not permitted for non-writable user.
      * Currently, other ioctls just fetch information.
      * Not changing system behavior.
      */
     if ((flag & FWRITE) == 0)
 	return (EPERM);
 
     /* Core system ioctls. */
     switch (cmd) {
     case ACPIIO_REQSLPSTATE:
 	state = *(int *)addr;
 	if (state != ACPI_STATE_S5)
 	    return (acpi_ReqSleepState(sc, state));
 	device_printf(sc->acpi_dev, "power off via acpi ioctl not supported\n");
 	error = EOPNOTSUPP;
 	break;
     case ACPIIO_ACKSLPSTATE:
 	error = *(int *)addr;
 	error = acpi_AckSleepState(sc->acpi_clone, error);
 	break;
     case ACPIIO_SETSLPSTATE:	/* DEPRECATED */
 	state = *(int *)addr;
 	if (state < ACPI_STATE_S0 || state > ACPI_S_STATES_MAX)
 	    return (EINVAL);
 	if (!acpi_sleep_states[state])
 	    return (EOPNOTSUPP);
 	if (ACPI_FAILURE(acpi_SetSleepState(sc, state)))
 	    error = ENXIO;
 	break;
     default:
 	error = ENXIO;
 	break;
     }
 
     return (error);
 }
 
 static int
 acpi_sname2sstate(const char *sname)
 {
     int sstate;
 
     if (toupper(sname[0]) == 'S') {
 	sstate = sname[1] - '0';
 	if (sstate >= ACPI_STATE_S0 && sstate <= ACPI_STATE_S5 &&
 	    sname[2] == '\0')
 	    return (sstate);
     } else if (strcasecmp(sname, "NONE") == 0)
 	return (ACPI_STATE_UNKNOWN);
     return (-1);
 }
 
 static const char *
 acpi_sstate2sname(int sstate)
 {
     static const char *snames[] = { "S0", "S1", "S2", "S3", "S4", "S5" };
 
     if (sstate >= ACPI_STATE_S0 && sstate <= ACPI_STATE_S5)
 	return (snames[sstate]);
     else if (sstate == ACPI_STATE_UNKNOWN)
 	return ("NONE");
     return (NULL);
 }
 
 static int
 acpi_supported_sleep_state_sysctl(SYSCTL_HANDLER_ARGS)
 {
     int error;
     struct sbuf sb;
     UINT8 state;
 
     sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
     for (state = ACPI_STATE_S1; state < ACPI_S_STATE_COUNT; state++)
 	if (acpi_sleep_states[state])
 	    sbuf_printf(&sb, "%s ", acpi_sstate2sname(state));
     sbuf_trim(&sb);
     sbuf_finish(&sb);
     error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
     sbuf_delete(&sb);
     return (error);
 }
 
 static int
 acpi_sleep_state_sysctl(SYSCTL_HANDLER_ARGS)
 {
     char sleep_state[10];
     int error, new_state, old_state;
 
     old_state = *(int *)oidp->oid_arg1;
     strlcpy(sleep_state, acpi_sstate2sname(old_state), sizeof(sleep_state));
     error = sysctl_handle_string(oidp, sleep_state, sizeof(sleep_state), req);
     if (error == 0 && req->newptr != NULL) {
 	new_state = acpi_sname2sstate(sleep_state);
 	if (new_state < ACPI_STATE_S1)
 	    return (EINVAL);
 	if (new_state < ACPI_S_STATE_COUNT && !acpi_sleep_states[new_state])
 	    return (EOPNOTSUPP);
 	if (new_state != old_state)
 	    *(int *)oidp->oid_arg1 = new_state;
     }
     return (error);
 }
 
 /* Inform devctl(4) when we receive a Notify. */
 void
 acpi_UserNotify(const char *subsystem, ACPI_HANDLE h, uint8_t notify)
 {
     char		notify_buf[16];
     ACPI_BUFFER		handle_buf;
     ACPI_STATUS		status;
 
     if (subsystem == NULL)
 	return;
 
     handle_buf.Pointer = NULL;
     handle_buf.Length = ACPI_ALLOCATE_BUFFER;
     status = AcpiNsHandleToPathname(h, &handle_buf, FALSE);
     if (ACPI_FAILURE(status))
 	return;
     snprintf(notify_buf, sizeof(notify_buf), "notify=0x%02x", notify);
     devctl_notify("ACPI", subsystem, handle_buf.Pointer, notify_buf);
     AcpiOsFree(handle_buf.Pointer);
 }
 
 #ifdef ACPI_DEBUG
 /*
  * Support for parsing debug options from the kernel environment.
  *
  * Bits may be set in the AcpiDbgLayer and AcpiDbgLevel debug registers
  * by specifying the names of the bits in the debug.acpi.layer and
  * debug.acpi.level environment variables.  Bits may be unset by 
  * prefixing the bit name with !.
  */
 struct debugtag
 {
     char	*name;
     UINT32	value;
 };
 
 static struct debugtag	dbg_layer[] = {
     {"ACPI_UTILITIES",		ACPI_UTILITIES},
     {"ACPI_HARDWARE",		ACPI_HARDWARE},
     {"ACPI_EVENTS",		ACPI_EVENTS},
     {"ACPI_TABLES",		ACPI_TABLES},
     {"ACPI_NAMESPACE",		ACPI_NAMESPACE},
     {"ACPI_PARSER",		ACPI_PARSER},
     {"ACPI_DISPATCHER",		ACPI_DISPATCHER},
     {"ACPI_EXECUTER",		ACPI_EXECUTER},
     {"ACPI_RESOURCES",		ACPI_RESOURCES},
     {"ACPI_CA_DEBUGGER",	ACPI_CA_DEBUGGER},
     {"ACPI_OS_SERVICES",	ACPI_OS_SERVICES},
     {"ACPI_CA_DISASSEMBLER",	ACPI_CA_DISASSEMBLER},
     {"ACPI_ALL_COMPONENTS",	ACPI_ALL_COMPONENTS},
 
     {"ACPI_AC_ADAPTER",		ACPI_AC_ADAPTER},
     {"ACPI_BATTERY",		ACPI_BATTERY},
     {"ACPI_BUS",		ACPI_BUS},
     {"ACPI_BUTTON",		ACPI_BUTTON},
     {"ACPI_EC", 		ACPI_EC},
     {"ACPI_FAN",		ACPI_FAN},
     {"ACPI_POWERRES",		ACPI_POWERRES},
     {"ACPI_PROCESSOR",		ACPI_PROCESSOR},
     {"ACPI_THERMAL",		ACPI_THERMAL},
     {"ACPI_TIMER",		ACPI_TIMER},
     {"ACPI_ALL_DRIVERS",	ACPI_ALL_DRIVERS},
     {NULL, 0}
 };
 
 static struct debugtag dbg_level[] = {
     {"ACPI_LV_INIT",		ACPI_LV_INIT},
     {"ACPI_LV_DEBUG_OBJECT",	ACPI_LV_DEBUG_OBJECT},
     {"ACPI_LV_INFO",		ACPI_LV_INFO},
     {"ACPI_LV_REPAIR",		ACPI_LV_REPAIR},
     {"ACPI_LV_ALL_EXCEPTIONS",	ACPI_LV_ALL_EXCEPTIONS},
 
     /* Trace verbosity level 1 [Standard Trace Level] */
     {"ACPI_LV_INIT_NAMES",	ACPI_LV_INIT_NAMES},
     {"ACPI_LV_PARSE",		ACPI_LV_PARSE},
     {"ACPI_LV_LOAD",		ACPI_LV_LOAD},
     {"ACPI_LV_DISPATCH",	ACPI_LV_DISPATCH},
     {"ACPI_LV_EXEC",		ACPI_LV_EXEC},
     {"ACPI_LV_NAMES",		ACPI_LV_NAMES},
     {"ACPI_LV_OPREGION",	ACPI_LV_OPREGION},
     {"ACPI_LV_BFIELD",		ACPI_LV_BFIELD},
     {"ACPI_LV_TABLES",		ACPI_LV_TABLES},
     {"ACPI_LV_VALUES",		ACPI_LV_VALUES},
     {"ACPI_LV_OBJECTS",		ACPI_LV_OBJECTS},
     {"ACPI_LV_RESOURCES",	ACPI_LV_RESOURCES},
     {"ACPI_LV_USER_REQUESTS",	ACPI_LV_USER_REQUESTS},
     {"ACPI_LV_PACKAGE",		ACPI_LV_PACKAGE},
     {"ACPI_LV_VERBOSITY1",	ACPI_LV_VERBOSITY1},
 
     /* Trace verbosity level 2 [Function tracing and memory allocation] */
     {"ACPI_LV_ALLOCATIONS",	ACPI_LV_ALLOCATIONS},
     {"ACPI_LV_FUNCTIONS",	ACPI_LV_FUNCTIONS},
     {"ACPI_LV_OPTIMIZATIONS",	ACPI_LV_OPTIMIZATIONS},
     {"ACPI_LV_VERBOSITY2",	ACPI_LV_VERBOSITY2},
     {"ACPI_LV_ALL",		ACPI_LV_ALL},
 
     /* Trace verbosity level 3 [Threading, I/O, and Interrupts] */
     {"ACPI_LV_MUTEX",		ACPI_LV_MUTEX},
     {"ACPI_LV_THREADS",		ACPI_LV_THREADS},
     {"ACPI_LV_IO",		ACPI_LV_IO},
     {"ACPI_LV_INTERRUPTS",	ACPI_LV_INTERRUPTS},
     {"ACPI_LV_VERBOSITY3",	ACPI_LV_VERBOSITY3},
 
     /* Exceptionally verbose output -- also used in the global "DebugLevel"  */
     {"ACPI_LV_AML_DISASSEMBLE",	ACPI_LV_AML_DISASSEMBLE},
     {"ACPI_LV_VERBOSE_INFO",	ACPI_LV_VERBOSE_INFO},
     {"ACPI_LV_FULL_TABLES",	ACPI_LV_FULL_TABLES},
     {"ACPI_LV_EVENTS",		ACPI_LV_EVENTS},
     {"ACPI_LV_VERBOSE",		ACPI_LV_VERBOSE},
     {NULL, 0}
 };    
 
 static void
 acpi_parse_debug(char *cp, struct debugtag *tag, UINT32 *flag)
 {
     char	*ep;
     int		i, l;
     int		set;
 
     while (*cp) {
 	if (isspace(*cp)) {
 	    cp++;
 	    continue;
 	}
 	ep = cp;
 	while (*ep && !isspace(*ep))
 	    ep++;
 	if (*cp == '!') {
 	    set = 0;
 	    cp++;
 	    if (cp == ep)
 		continue;
 	} else {
 	    set = 1;
 	}
 	l = ep - cp;
 	for (i = 0; tag[i].name != NULL; i++) {
 	    if (!strncmp(cp, tag[i].name, l)) {
 		if (set)
 		    *flag |= tag[i].value;
 		else
 		    *flag &= ~tag[i].value;
 	    }
 	}
 	cp = ep;
     }
 }
 
 static void
 acpi_set_debugging(void *junk)
 {
     char	*layer, *level;
 
     if (cold) {
 	AcpiDbgLayer = 0;
 	AcpiDbgLevel = 0;
     }
 
     layer = kern_getenv("debug.acpi.layer");
     level = kern_getenv("debug.acpi.level");
     if (layer == NULL && level == NULL)
 	return;
 
     printf("ACPI set debug");
     if (layer != NULL) {
 	if (strcmp("NONE", layer) != 0)
 	    printf(" layer '%s'", layer);
 	acpi_parse_debug(layer, &dbg_layer[0], &AcpiDbgLayer);
 	freeenv(layer);
     }
     if (level != NULL) {
 	if (strcmp("NONE", level) != 0)
 	    printf(" level '%s'", level);
 	acpi_parse_debug(level, &dbg_level[0], &AcpiDbgLevel);
 	freeenv(level);
     }
     printf("\n");
 }
 
 SYSINIT(acpi_debugging, SI_SUB_TUNABLES, SI_ORDER_ANY, acpi_set_debugging,
 	NULL);
 
 static int
 acpi_debug_sysctl(SYSCTL_HANDLER_ARGS)
 {
     int		 error, *dbg;
     struct	 debugtag *tag;
     struct	 sbuf sb;
     char	 temp[128];
 
     if (sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND) == NULL)
 	return (ENOMEM);
     if (strcmp(oidp->oid_arg1, "debug.acpi.layer") == 0) {
 	tag = &dbg_layer[0];
 	dbg = &AcpiDbgLayer;
     } else {
 	tag = &dbg_level[0];
 	dbg = &AcpiDbgLevel;
     }
 
     /* Get old values if this is a get request. */
     ACPI_SERIAL_BEGIN(acpi);
     if (*dbg == 0) {
 	sbuf_cpy(&sb, "NONE");
     } else if (req->newptr == NULL) {
 	for (; tag->name != NULL; tag++) {
 	    if ((*dbg & tag->value) == tag->value)
 		sbuf_printf(&sb, "%s ", tag->name);
 	}
     }
     sbuf_trim(&sb);
     sbuf_finish(&sb);
     strlcpy(temp, sbuf_data(&sb), sizeof(temp));
     sbuf_delete(&sb);
 
     error = sysctl_handle_string(oidp, temp, sizeof(temp), req);
 
     /* Check for error or no change */
     if (error == 0 && req->newptr != NULL) {
 	*dbg = 0;
 	kern_setenv((char *)oidp->oid_arg1, temp);
 	acpi_set_debugging(NULL);
     }
     ACPI_SERIAL_END(acpi);
 
     return (error);
 }
 
 SYSCTL_PROC(_debug_acpi, OID_AUTO, layer, CTLFLAG_RW | CTLTYPE_STRING,
 	    "debug.acpi.layer", 0, acpi_debug_sysctl, "A", "");
 SYSCTL_PROC(_debug_acpi, OID_AUTO, level, CTLFLAG_RW | CTLTYPE_STRING,
 	    "debug.acpi.level", 0, acpi_debug_sysctl, "A", "");
 #endif /* ACPI_DEBUG */
 
 static int
 acpi_debug_objects_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int	error;
 	int	old;
 
 	old = acpi_debug_objects;
 	error = sysctl_handle_int(oidp, &acpi_debug_objects, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (old == acpi_debug_objects || (old && acpi_debug_objects))
 		return (0);
 
 	ACPI_SERIAL_BEGIN(acpi);
 	AcpiGbl_EnableAmlDebugObject = acpi_debug_objects ? TRUE : FALSE;
 	ACPI_SERIAL_END(acpi);
 
 	return (0);
 }
 
 static int
 acpi_parse_interfaces(char *str, struct acpi_interface *iface)
 {
 	char *p;
 	size_t len;
 	int i, j;
 
 	p = str;
 	while (isspace(*p) || *p == ',')
 		p++;
 	len = strlen(p);
 	if (len == 0)
 		return (0);
 	p = strdup(p, M_TEMP);
 	for (i = 0; i < len; i++)
 		if (p[i] == ',')
 			p[i] = '\0';
 	i = j = 0;
 	while (i < len)
 		if (isspace(p[i]) || p[i] == '\0')
 			i++;
 		else {
 			i += strlen(p + i) + 1;
 			j++;
 		}
 	if (j == 0) {
 		free(p, M_TEMP);
 		return (0);
 	}
 	iface->data = malloc(sizeof(*iface->data) * j, M_TEMP, M_WAITOK);
 	iface->num = j;
 	i = j = 0;
 	while (i < len)
 		if (isspace(p[i]) || p[i] == '\0')
 			i++;
 		else {
 			iface->data[j] = p + i;
 			i += strlen(p + i) + 1;
 			j++;
 		}
 
 	return (j);
 }
 
 static void
 acpi_free_interfaces(struct acpi_interface *iface)
 {
 
 	free(iface->data[0], M_TEMP);
 	free(iface->data, M_TEMP);
 }
 
 static void
 acpi_reset_interfaces(device_t dev)
 {
 	struct acpi_interface list;
 	ACPI_STATUS status;
 	int i;
 
 	if (acpi_parse_interfaces(acpi_install_interface, &list) > 0) {
 		for (i = 0; i < list.num; i++) {
 			status = AcpiInstallInterface(list.data[i]);
 			if (ACPI_FAILURE(status))
 				device_printf(dev,
 				    "failed to install _OSI(\"%s\"): %s\n",
 				    list.data[i], AcpiFormatException(status));
 			else if (bootverbose)
 				device_printf(dev, "installed _OSI(\"%s\")\n",
 				    list.data[i]);
 		}
 		acpi_free_interfaces(&list);
 	}
 	if (acpi_parse_interfaces(acpi_remove_interface, &list) > 0) {
 		for (i = 0; i < list.num; i++) {
 			status = AcpiRemoveInterface(list.data[i]);
 			if (ACPI_FAILURE(status))
 				device_printf(dev,
 				    "failed to remove _OSI(\"%s\"): %s\n",
 				    list.data[i], AcpiFormatException(status));
 			else if (bootverbose)
 				device_printf(dev, "removed _OSI(\"%s\")\n",
 				    list.data[i]);
 		}
 		acpi_free_interfaces(&list);
 	}
 }
 
 static int
 acpi_pm_func(u_long cmd, void *arg, ...)
 {
 	int	state, acpi_state;
 	int	error;
 	struct	acpi_softc *sc;
 	va_list	ap;
 
 	error = 0;
 	switch (cmd) {
 	case POWER_CMD_SUSPEND:
 		sc = (struct acpi_softc *)arg;
 		if (sc == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 
 		va_start(ap, arg);
 		state = va_arg(ap, int);
 		va_end(ap);
 
 		switch (state) {
 		case POWER_SLEEP_STATE_STANDBY:
 			acpi_state = sc->acpi_standby_sx;
 			break;
 		case POWER_SLEEP_STATE_SUSPEND:
 			acpi_state = sc->acpi_suspend_sx;
 			break;
 		case POWER_SLEEP_STATE_HIBERNATE:
 			acpi_state = ACPI_STATE_S4;
 			break;
 		default:
 			error = EINVAL;
 			goto out;
 		}
 
 		if (ACPI_FAILURE(acpi_EnterSleepState(sc, acpi_state)))
 			error = ENXIO;
 		break;
 	default:
 		error = EINVAL;
 		goto out;
 	}
 
 out:
 	return (error);
 }
 
 static void
 acpi_pm_register(void *arg)
 {
     if (!cold || resource_disabled("acpi", 0))
 	return;
 
     power_pm_register(POWER_PM_TYPE_ACPI, acpi_pm_func, NULL);
 }
 
 SYSINIT(power, SI_SUB_KLD, SI_ORDER_ANY, acpi_pm_register, 0);
Index: head/sys/dev/acpica/acpivar.h
===================================================================
--- head/sys/dev/acpica/acpivar.h	(revision 297747)
+++ head/sys/dev/acpica/acpivar.h	(revision 297748)
@@ -1,512 +1,510 @@
 /*-
  * Copyright (c) 2000 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
  * Copyright (c) 2000 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2000 BSDi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _ACPIVAR_H_
 #define _ACPIVAR_H_
 
 #ifdef _KERNEL
 
 #include "acpi_if.h"
 #include "bus_if.h"
 #include <sys/eventhandler.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/selinfo.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 struct apm_clone_data;
 struct acpi_softc {
     device_t		acpi_dev;
     struct cdev		*acpi_dev_t;
 
     int			acpi_enabled;
     int			acpi_sstate;
     int			acpi_sleep_disabled;
     int			acpi_resources_reserved;
 
     struct sysctl_ctx_list acpi_sysctl_ctx;
     struct sysctl_oid	*acpi_sysctl_tree;
     int			acpi_power_button_sx;
     int			acpi_sleep_button_sx;
     int			acpi_lid_switch_sx;
 
     int			acpi_standby_sx;
     int			acpi_suspend_sx;
 
     int			acpi_sleep_delay;
     int			acpi_s4bios;
     int			acpi_do_disable;
     int			acpi_verbose;
     int			acpi_handle_reboot;
 
     vm_offset_t		acpi_wakeaddr;
     vm_paddr_t		acpi_wakephys;
 
     int			acpi_next_sstate;	/* Next suspend Sx state. */
     struct apm_clone_data *acpi_clone;		/* Pseudo-dev for devd(8). */
     STAILQ_HEAD(,apm_clone_data) apm_cdevs;	/* All apm/apmctl/acpi cdevs. */
     struct callout	susp_force_to;		/* Force suspend if no acks. */
 };
 
 struct acpi_device {
     /* ACPI ivars */
     ACPI_HANDLE			ad_handle;
     void			*ad_private;
     int				ad_flags;
 
     /* Resources */
     struct resource_list	ad_rl;
 };
 
 /* Track device (/dev/{apm,apmctl} and /dev/acpi) notification status. */
 struct apm_clone_data {
     STAILQ_ENTRY(apm_clone_data) entries;
     struct cdev 	*cdev;
     int			flags;
 #define	ACPI_EVF_NONE	0	/* /dev/apm semantics */
 #define	ACPI_EVF_DEVD	1	/* /dev/acpi is handled via devd(8) */
 #define	ACPI_EVF_WRITE	2	/* Device instance is opened writable. */
     int			notify_status;
 #define	APM_EV_NONE	0	/* Device not yet aware of pending sleep. */
 #define	APM_EV_NOTIFIED	1	/* Device saw next sleep state. */
 #define	APM_EV_ACKED	2	/* Device agreed sleep can occur. */
     struct acpi_softc	*acpi_sc;
     struct selinfo	sel_read;
 };
 
 #define ACPI_PRW_MAX_POWERRES	8
 
 struct acpi_prw_data {
     ACPI_HANDLE		gpe_handle;
     int			gpe_bit;
     int			lowest_wake;
     ACPI_OBJECT		power_res[ACPI_PRW_MAX_POWERRES];
     int			power_res_count;
 };
 
 /* Flags for each device defined in the AML namespace. */
 #define ACPI_FLAG_WAKE_ENABLED	0x1
 
 /* Macros for extracting parts of a PCI address from an _ADR value. */
 #define	ACPI_ADR_PCI_SLOT(adr)	(((adr) & 0xffff0000) >> 16)
 #define	ACPI_ADR_PCI_FUNC(adr)	((adr) & 0xffff)
 
 /*
  * Entry points to ACPI from above are global functions defined in this
  * file, sysctls, and I/O on the control device.  Entry points from below
  * are interrupts (the SCI), notifies, task queue threads, and the thermal
  * zone polling thread.
  *
  * ACPI tables and global shared data are protected by a global lock
  * (acpi_mutex).  
  *
  * Each ACPI device can have its own driver-specific mutex for protecting
  * shared access to local data.  The ACPI_LOCK macros handle mutexes.
  *
  * Drivers that need to serialize access to functions (e.g., to route
  * interrupts, get/set control paths, etc.) should use the sx lock macros
  * (ACPI_SERIAL).
  *
  * ACPI-CA handles its own locking and should not be called with locks held.
  *
  * The most complicated path is:
  *     GPE -> EC runs _Qxx -> _Qxx reads EC space -> GPE
  */
 extern struct mtx			acpi_mutex;
 #define ACPI_LOCK(sys)			mtx_lock(&sys##_mutex)
 #define ACPI_UNLOCK(sys)		mtx_unlock(&sys##_mutex)
 #define ACPI_LOCK_ASSERT(sys)		mtx_assert(&sys##_mutex, MA_OWNED);
 #define ACPI_LOCK_DECL(sys, name)				\
 	static struct mtx sys##_mutex;				\
 	MTX_SYSINIT(sys##_mutex, &sys##_mutex, name, MTX_DEF)
 #define ACPI_SERIAL_BEGIN(sys)		sx_xlock(&sys##_sxlock)
 #define ACPI_SERIAL_END(sys)		sx_xunlock(&sys##_sxlock)
 #define ACPI_SERIAL_ASSERT(sys)		sx_assert(&sys##_sxlock, SX_XLOCKED);
 #define ACPI_SERIAL_DECL(sys, name)				\
 	static struct sx sys##_sxlock;				\
 	SX_SYSINIT(sys##_sxlock, &sys##_sxlock, name)
 
 /*
  * ACPI CA does not define layers for non-ACPI CA drivers.
  * We define some here within the range provided.
  */
 #define	ACPI_AC_ADAPTER		0x00010000
 #define	ACPI_BATTERY		0x00020000
 #define	ACPI_BUS		0x00040000
 #define	ACPI_BUTTON		0x00080000
 #define	ACPI_EC			0x00100000
 #define	ACPI_FAN		0x00200000
 #define	ACPI_POWERRES		0x00400000
 #define	ACPI_PROCESSOR		0x00800000
 #define	ACPI_THERMAL		0x01000000
 #define	ACPI_TIMER		0x02000000
 #define	ACPI_OEM		0x04000000
 
 /*
  * Constants for different interrupt models used with acpi_SetIntrModel().
  */
 #define	ACPI_INTR_PIC		0
 #define	ACPI_INTR_APIC		1
 #define	ACPI_INTR_SAPIC		2
 
 /*
  * Various features and capabilities for the acpi_get_features() method.
  * In particular, these are used for the ACPI 3.0 _PDC and _OSC methods.
  * See the Intel document titled "Intel Processor Vendor-Specific ACPI",
  * number 302223-007.
  */
 #define	ACPI_CAP_PERF_MSRS	(1 << 0)  /* Intel SpeedStep PERF_CTL MSRs */
 #define	ACPI_CAP_C1_IO_HALT	(1 << 1)  /* Intel C1 "IO then halt" sequence */
 #define	ACPI_CAP_THR_MSRS	(1 << 2)  /* Intel OnDemand throttling MSRs */
 #define	ACPI_CAP_SMP_SAME	(1 << 3)  /* MP C1, Px, and Tx (all the same) */
 #define	ACPI_CAP_SMP_SAME_C3	(1 << 4)  /* MP C2 and C3 (all the same) */
 #define	ACPI_CAP_SMP_DIFF_PX	(1 << 5)  /* MP Px (different, using _PSD) */
 #define	ACPI_CAP_SMP_DIFF_CX	(1 << 6)  /* MP Cx (different, using _CSD) */
 #define	ACPI_CAP_SMP_DIFF_TX	(1 << 7)  /* MP Tx (different, using _TSD) */
 #define	ACPI_CAP_SMP_C1_NATIVE	(1 << 8)  /* MP C1 support other than halt */
 #define	ACPI_CAP_SMP_C3_NATIVE	(1 << 9)  /* MP C2 and C3 support */
 #define	ACPI_CAP_PX_HW_COORD	(1 << 11) /* Intel P-state HW coordination */
 #define	ACPI_CAP_INTR_CPPC	(1 << 12) /* Native Interrupt Handling for
 	     Collaborative Processor Performance Control notifications */
 #define	ACPI_CAP_HW_DUTY_C	(1 << 13) /* Hardware Duty Cycling */
 
 /*
  * Quirk flags.
  *
  * ACPI_Q_BROKEN: Disables all ACPI support.
  * ACPI_Q_TIMER: Disables support for the ACPI timer.
  * ACPI_Q_MADT_IRQ0: Specifies that ISA IRQ 0 is wired up to pin 0 of the
  *	first APIC and that the MADT should force that by ignoring the PC-AT
  *	compatible flag and ignoring overrides that redirect IRQ 0 to pin 2.
  */
 extern int	acpi_quirks;
 #define ACPI_Q_OK		0
 #define ACPI_Q_BROKEN		(1 << 0)
 #define ACPI_Q_TIMER		(1 << 1)
 #define ACPI_Q_MADT_IRQ0	(1 << 2)
 
 /*
  * Note that the low ivar values are reserved to provide
  * interface compatibility with ISA drivers which can also
  * attach to ACPI.
  */
 #define ACPI_IVAR_HANDLE	0x100
 #define ACPI_IVAR_UNUSED	0x101	/* Unused/reserved. */
 #define ACPI_IVAR_PRIVATE	0x102
 #define ACPI_IVAR_FLAGS		0x103
 
 /*
  * Accessor functions for our ivars.  Default value for BUS_READ_IVAR is
  * (type) 0.  The <sys/bus.h> accessor functions don't check return values.
  */
 #define __ACPI_BUS_ACCESSOR(varp, var, ivarp, ivar, type)	\
 								\
 static __inline type varp ## _get_ ## var(device_t dev)		\
 {								\
     uintptr_t v = 0;						\
     BUS_READ_IVAR(device_get_parent(dev), dev,			\
 	ivarp ## _IVAR_ ## ivar, &v);				\
     return ((type) v);						\
 }								\
 								\
 static __inline void varp ## _set_ ## var(device_t dev, type t)	\
 {								\
     uintptr_t v = (uintptr_t) t;				\
     BUS_WRITE_IVAR(device_get_parent(dev), dev,			\
 	ivarp ## _IVAR_ ## ivar, v);				\
 }
 
 __ACPI_BUS_ACCESSOR(acpi, handle, ACPI, HANDLE, ACPI_HANDLE)
 __ACPI_BUS_ACCESSOR(acpi, private, ACPI, PRIVATE, void *)
 __ACPI_BUS_ACCESSOR(acpi, flags, ACPI, FLAGS, int)
 
 void acpi_fake_objhandler(ACPI_HANDLE h, void *data);
 static __inline device_t
 acpi_get_device(ACPI_HANDLE handle)
 {
     void *dev = NULL;
     AcpiGetData(handle, acpi_fake_objhandler, &dev);
     return ((device_t)dev);
 }
 
 static __inline ACPI_OBJECT_TYPE
 acpi_get_type(device_t dev)
 {
     ACPI_HANDLE		h;
     ACPI_OBJECT_TYPE	t;
 
     if ((h = acpi_get_handle(dev)) == NULL)
 	return (ACPI_TYPE_NOT_FOUND);
     if (ACPI_FAILURE(AcpiGetType(h, &t)))
 	return (ACPI_TYPE_NOT_FOUND);
     return (t);
 }
 
 /* Find the difference between two PM tick counts. */
 static __inline uint32_t
 acpi_TimerDelta(uint32_t end, uint32_t start)
 {
 
 	if (end < start && (AcpiGbl_FADT.Flags & ACPI_FADT_32BIT_TIMER) == 0)
 		end |= 0x01000000;
 	return (end - start);
 }
 
 #ifdef ACPI_DEBUGGER
 void		acpi_EnterDebugger(void);
 #endif
 
 #ifdef ACPI_DEBUG
 #include <sys/cons.h>
 #define STEP(x)		do {printf x, printf("\n"); cngetc();} while (0)
 #else
 #define STEP(x)
 #endif
 
 #define ACPI_VPRINT(dev, acpi_sc, x...) do {			\
     if (acpi_get_verbose(acpi_sc))				\
 	device_printf(dev, x);					\
 } while (0)
 
 /* Values for the device _STA (status) method. */
 #define ACPI_STA_PRESENT	(1 << 0)
 #define ACPI_STA_ENABLED	(1 << 1)
 #define ACPI_STA_SHOW_IN_UI	(1 << 2)
 #define ACPI_STA_FUNCTIONAL	(1 << 3)
 #define ACPI_STA_BATT_PRESENT	(1 << 4)
 
 #define ACPI_DEVINFO_PRESENT(x, flags)					\
 	(((x) & (flags)) == (flags))
 #define ACPI_DEVICE_PRESENT(x)						\
 	ACPI_DEVINFO_PRESENT(x, ACPI_STA_PRESENT | ACPI_STA_FUNCTIONAL)
 #define ACPI_BATTERY_PRESENT(x)						\
 	ACPI_DEVINFO_PRESENT(x, ACPI_STA_PRESENT | ACPI_STA_FUNCTIONAL | \
 	    ACPI_STA_BATT_PRESENT)
 
 /* Callback function type for walking subtables within a table. */
 typedef void acpi_subtable_handler(ACPI_SUBTABLE_HEADER *, void *);
 
 BOOLEAN		acpi_DeviceIsPresent(device_t dev);
 BOOLEAN		acpi_BatteryIsPresent(device_t dev);
 ACPI_STATUS	acpi_GetHandleInScope(ACPI_HANDLE parent, char *path,
 		    ACPI_HANDLE *result);
 ACPI_BUFFER	*acpi_AllocBuffer(int size);
 ACPI_STATUS	acpi_ConvertBufferToInteger(ACPI_BUFFER *bufp,
 		    UINT32 *number);
 ACPI_STATUS	acpi_GetInteger(ACPI_HANDLE handle, char *path,
 		    UINT32 *number);
 ACPI_STATUS	acpi_SetInteger(ACPI_HANDLE handle, char *path,
 		    UINT32 number);
 ACPI_STATUS	acpi_ForeachPackageObject(ACPI_OBJECT *obj, 
 		    void (*func)(ACPI_OBJECT *comp, void *arg), void *arg);
 ACPI_STATUS	acpi_FindIndexedResource(ACPI_BUFFER *buf, int index,
 		    ACPI_RESOURCE **resp);
 ACPI_STATUS	acpi_AppendBufferResource(ACPI_BUFFER *buf,
 		    ACPI_RESOURCE *res);
 ACPI_STATUS	acpi_OverrideInterruptLevel(UINT32 InterruptNumber);
 ACPI_STATUS	acpi_SetIntrModel(int model);
 int		acpi_ReqSleepState(struct acpi_softc *sc, int state);
 int		acpi_AckSleepState(struct apm_clone_data *clone, int error);
 ACPI_STATUS	acpi_SetSleepState(struct acpi_softc *sc, int state);
 int		acpi_wake_set_enable(device_t dev, int enable);
 int		acpi_parse_prw(ACPI_HANDLE h, struct acpi_prw_data *prw);
 ACPI_STATUS	acpi_Startup(void);
 void		acpi_UserNotify(const char *subsystem, ACPI_HANDLE h,
 		    uint8_t notify);
 int		acpi_bus_alloc_gas(device_t dev, int *type, int *rid,
 		    ACPI_GENERIC_ADDRESS *gas, struct resource **res,
 		    u_int flags);
 void		acpi_walk_subtables(void *first, void *end,
 		    acpi_subtable_handler *handler, void *arg);
 BOOLEAN		acpi_MatchHid(ACPI_HANDLE h, const char *hid);
 
 struct acpi_parse_resource_set {
     void	(*set_init)(device_t dev, void *arg, void **context);
     void	(*set_done)(device_t dev, void *context);
     void	(*set_ioport)(device_t dev, void *context, uint64_t base,
 		    uint64_t length);
     void	(*set_iorange)(device_t dev, void *context, uint64_t low,
 		    uint64_t high, uint64_t length, uint64_t align);
     void	(*set_memory)(device_t dev, void *context, uint64_t base,
 		    uint64_t length);
     void	(*set_memoryrange)(device_t dev, void *context, uint64_t low,
 		    uint64_t high, uint64_t length, uint64_t align);
     void	(*set_irq)(device_t dev, void *context, uint8_t *irq,
 		    int count, int trig, int pol);
     void	(*set_ext_irq)(device_t dev, void *context, uint32_t *irq,
 		    int count, int trig, int pol);
     void	(*set_drq)(device_t dev, void *context, uint8_t *drq,
 		    int count);
     void	(*set_start_dependent)(device_t dev, void *context,
 		    int preference);
     void	(*set_end_dependent)(device_t dev, void *context);
 };
 
 extern struct	acpi_parse_resource_set acpi_res_parse_set;
 
 int		acpi_identify(void);
 void		acpi_config_intr(device_t dev, ACPI_RESOURCE *res);
 ACPI_STATUS	acpi_lookup_irq_resource(device_t dev, int rid,
 		    struct resource *res, ACPI_RESOURCE *acpi_res);
 ACPI_STATUS	acpi_parse_resources(device_t dev, ACPI_HANDLE handle,
 		    struct acpi_parse_resource_set *set, void *arg);
 struct resource *acpi_alloc_sysres(device_t child, int type, int *rid,
 		    rman_res_t start, rman_res_t end, rman_res_t count,
 		    u_int flags);
 
 /* ACPI event handling */
 UINT32		acpi_event_power_button_sleep(void *context);
 UINT32		acpi_event_power_button_wake(void *context);
 UINT32		acpi_event_sleep_button_sleep(void *context);
 UINT32		acpi_event_sleep_button_wake(void *context);
 
 #define ACPI_EVENT_PRI_FIRST      0
 #define ACPI_EVENT_PRI_DEFAULT    10000
 #define ACPI_EVENT_PRI_LAST       20000
 
 typedef void (*acpi_event_handler_t)(void *, int);
 
 EVENTHANDLER_DECLARE(acpi_sleep_event, acpi_event_handler_t);
 EVENTHANDLER_DECLARE(acpi_wakeup_event, acpi_event_handler_t);
 
 /* Device power control. */
 ACPI_STATUS	acpi_pwr_wake_enable(ACPI_HANDLE consumer, int enable);
 ACPI_STATUS	acpi_pwr_switch_consumer(ACPI_HANDLE consumer, int state);
 int		acpi_device_pwr_for_sleep(device_t bus, device_t dev,
 		    int *dstate);
 
 /* APM emulation */
 void		acpi_apm_init(struct acpi_softc *);
 
 /* Misc. */
 static __inline struct acpi_softc *
 acpi_device_get_parent_softc(device_t child)
 {
     device_t	parent;
 
     parent = device_get_parent(child);
     if (parent == NULL)
 	return (NULL);
     return (device_get_softc(parent));
 }
 
 static __inline int
 acpi_get_verbose(struct acpi_softc *sc)
 {
     if (sc)
 	return (sc->acpi_verbose);
     return (0);
 }
 
 char		*acpi_name(ACPI_HANDLE handle);
 int		acpi_avoid(ACPI_HANDLE handle);
 int		acpi_disabled(char *subsys);
 int		acpi_machdep_init(device_t dev);
 void		acpi_install_wakeup_handler(struct acpi_softc *sc);
 int		acpi_sleep_machdep(struct acpi_softc *sc, int state);
 int		acpi_wakeup_machdep(struct acpi_softc *sc, int state,
 		    int sleep_result, int intr_enabled);
 int		acpi_table_quirks(int *quirks);
 int		acpi_machdep_quirks(int *quirks);
 
 uint32_t	hpet_get_uid(device_t dev);
 
 /* Battery Abstraction. */
 struct acpi_battinfo;
 
 int		acpi_battery_register(device_t dev);
 int		acpi_battery_remove(device_t dev);
 int		acpi_battery_get_units(void);
 int		acpi_battery_get_info_expire(void);
 int		acpi_battery_bst_valid(struct acpi_bst *bst);
 int		acpi_battery_bif_valid(struct acpi_bif *bif);
 int		acpi_battery_get_battinfo(device_t dev,
 		    struct acpi_battinfo *info);
 
 /* Embedded controller. */
 void		acpi_ec_ecdt_probe(device_t);
 
 /* AC adapter interface. */
 int		acpi_acad_get_acline(int *);
 
 /* Package manipulation convenience functions. */
 #define ACPI_PKG_VALID(pkg, size)				\
     ((pkg) != NULL && (pkg)->Type == ACPI_TYPE_PACKAGE &&	\
      (pkg)->Package.Count >= (size))
 int		acpi_PkgInt(ACPI_OBJECT *res, int idx, UINT64 *dst);
 int		acpi_PkgInt32(ACPI_OBJECT *res, int idx, uint32_t *dst);
 int		acpi_PkgStr(ACPI_OBJECT *res, int idx, void *dst, size_t size);
 int		acpi_PkgGas(device_t dev, ACPI_OBJECT *res, int idx, int *type,
 		    int *rid, struct resource **dst, u_int flags);
 int		acpi_PkgFFH_IntelCpu(ACPI_OBJECT *res, int idx, int *vendor,
 		    int *class, uint64_t *address, int *accsize);
 ACPI_HANDLE	acpi_GetReference(ACPI_HANDLE scope, ACPI_OBJECT *obj);
 
 /*
  * Base level for BUS_ADD_CHILD.  Special devices are added at orders less
  * than this, and normal devices at or above this level.  This keeps the
  * probe order sorted so that things like sysresource are available before
  * their children need them.
  */
 #define	ACPI_DEV_BASE_ORDER	100
 
 /* Default maximum number of tasks to enqueue. */
 #ifndef ACPI_MAX_TASKS
 #define	ACPI_MAX_TASKS		MAX(32, MAXCPU * 4)
 #endif
 
 /* Default number of task queue threads to start. */
 #ifndef ACPI_MAX_THREADS
 #define ACPI_MAX_THREADS	3
 #endif
 
 /* Use the device logging level for ktr(4). */
 #define	KTR_ACPI		KTR_DEV
 
 SYSCTL_DECL(_debug_acpi);
 
 /*
  * Map a PXM to a VM domain.
  *
  * Returns the VM domain ID if found, or -1 if not found / invalid.
  */
-#if MAXMEMDOM > 1
 extern	int acpi_map_pxm_to_vm_domainid(int pxm);
-#endif
 extern	int acpi_get_domain(device_t dev, device_t child, int *domain);
 extern	int acpi_parse_pxm(device_t dev, int *domain);
 
 #endif /* _KERNEL */
 #endif /* !_ACPIVAR_H_ */
Index: head/sys/kern/kern_cpuset.c
===================================================================
--- head/sys/kern/kern_cpuset.c	(revision 297747)
+++ head/sys/kern/kern_cpuset.c	(revision 297748)
@@ -1,1284 +1,1290 @@
 /*-
  * Copyright (c) 2008,  Jeffrey Roberson <jeff@freebsd.org>
  * All rights reserved.
  * 
  * Copyright (c) 2008 Nokia Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/refcount.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/syscallsubr.h>
 #include <sys/cpuset.h>
 #include <sys/sx.h>
 #include <sys/queue.h>
 #include <sys/libkern.h>
 #include <sys/limits.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 #include <vm/vm_phys.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif /* DDB */
 
 /*
  * cpusets provide a mechanism for creating and manipulating sets of
  * processors for the purpose of constraining the scheduling of threads to
  * specific processors.
  *
  * Each process belongs to an identified set, by default this is set 1.  Each
  * thread may further restrict the cpus it may run on to a subset of this
  * named set.  This creates an anonymous set which other threads and processes
  * may not join by number.
  *
  * The named set is referred to herein as the 'base' set to avoid ambiguity.
  * This set is usually a child of a 'root' set while the anonymous set may
  * simply be referred to as a mask.  In the syscall api these are referred to
  * as the ROOT, CPUSET, and MASK levels where CPUSET is called 'base' here.
  *
  * Threads inherit their set from their creator whether it be anonymous or
  * not.  This means that anonymous sets are immutable because they may be
  * shared.  To modify an anonymous set a new set is created with the desired
  * mask and the same parent as the existing anonymous set.  This gives the
  * illusion of each thread having a private mask.
  *
  * Via the syscall apis a user may ask to retrieve or modify the root, base,
  * or mask that is discovered via a pid, tid, or setid.  Modifying a set
  * modifies all numbered and anonymous child sets to comply with the new mask.
  * Modifying a pid or tid's mask applies only to that tid but must still
  * exist within the assigned parent set.
  *
  * A thread may not be assigned to a group separate from other threads in
  * the process.  This is to remove ambiguity when the setid is queried with
  * a pid argument.  There is no other technical limitation.
  *
  * This somewhat complex arrangement is intended to make it easy for
  * applications to query available processors and bind their threads to
  * specific processors while also allowing administrators to dynamically
  * reprovision by changing sets which apply to groups of processes.
  *
  * A simple application should not concern itself with sets at all and
  * rather apply masks to its own threads via CPU_WHICH_TID and a -1 id
  * meaning 'curthread'.  It may query available cpus for that tid with a
  * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...).
  */
 static uma_zone_t cpuset_zone;
 static struct mtx cpuset_lock;
 static struct setlist cpuset_ids;
 static struct unrhdr *cpuset_unr;
 static struct cpuset *cpuset_zero, *cpuset_default;
 
 /* Return the size of cpuset_t at the kernel level */
 SYSCTL_INT(_kern_sched, OID_AUTO, cpusetsize, CTLFLAG_RD | CTLFLAG_CAPRD,
     SYSCTL_NULL_INT_PTR, sizeof(cpuset_t), "sizeof(cpuset_t)");
 
 cpuset_t *cpuset_root;
 cpuset_t cpuset_domain[MAXMEMDOM];
 
 /*
  * Acquire a reference to a cpuset, all pointers must be tracked with refs.
  */
 struct cpuset *
 cpuset_ref(struct cpuset *set)
 {
 
 	refcount_acquire(&set->cs_ref);
 	return (set);
 }
 
 /*
  * Walks up the tree from 'set' to find the root.  Returns the root
  * referenced.
  */
 static struct cpuset *
 cpuset_refroot(struct cpuset *set)
 {
 
 	for (; set->cs_parent != NULL; set = set->cs_parent)
 		if (set->cs_flags & CPU_SET_ROOT)
 			break;
 	cpuset_ref(set);
 
 	return (set);
 }
 
 /*
  * Find the first non-anonymous set starting from 'set'.  Returns this set
  * referenced.  May return the passed in set with an extra ref if it is
  * not anonymous. 
  */
 static struct cpuset *
 cpuset_refbase(struct cpuset *set)
 {
 
 	if (set->cs_id == CPUSET_INVALID)
 		set = set->cs_parent;
 	cpuset_ref(set);
 
 	return (set);
 }
 
 /*
  * Release a reference in a context where it is safe to allocate.
  */
 void
 cpuset_rel(struct cpuset *set)
 {
 	cpusetid_t id;
 
 	if (refcount_release(&set->cs_ref) == 0)
 		return;
 	mtx_lock_spin(&cpuset_lock);
 	LIST_REMOVE(set, cs_siblings);
 	id = set->cs_id;
 	if (id != CPUSET_INVALID)
 		LIST_REMOVE(set, cs_link);
 	mtx_unlock_spin(&cpuset_lock);
 	cpuset_rel(set->cs_parent);
 	uma_zfree(cpuset_zone, set);
 	if (id != CPUSET_INVALID)
 		free_unr(cpuset_unr, id);
 }
 
 /*
  * Deferred release must be used when in a context that is not safe to
  * allocate/free.  This places any unreferenced sets on the list 'head'.
  */
 static void
 cpuset_rel_defer(struct setlist *head, struct cpuset *set)
 {
 
 	if (refcount_release(&set->cs_ref) == 0)
 		return;
 	mtx_lock_spin(&cpuset_lock);
 	LIST_REMOVE(set, cs_siblings);
 	if (set->cs_id != CPUSET_INVALID)
 		LIST_REMOVE(set, cs_link);
 	LIST_INSERT_HEAD(head, set, cs_link);
 	mtx_unlock_spin(&cpuset_lock);
 }
 
 /*
  * Complete a deferred release.  Removes the set from the list provided to
  * cpuset_rel_defer.
  */
 static void
 cpuset_rel_complete(struct cpuset *set)
 {
 	LIST_REMOVE(set, cs_link);
 	cpuset_rel(set->cs_parent);
 	uma_zfree(cpuset_zone, set);
 }
 
 /*
  * Find a set based on an id.  Returns it with a ref.
  */
 static struct cpuset *
 cpuset_lookup(cpusetid_t setid, struct thread *td)
 {
 	struct cpuset *set;
 
 	if (setid == CPUSET_INVALID)
 		return (NULL);
 	mtx_lock_spin(&cpuset_lock);
 	LIST_FOREACH(set, &cpuset_ids, cs_link)
 		if (set->cs_id == setid)
 			break;
 	if (set)
 		cpuset_ref(set);
 	mtx_unlock_spin(&cpuset_lock);
 
 	KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__));
 	if (set != NULL && jailed(td->td_ucred)) {
 		struct cpuset *jset, *tset;
 
 		jset = td->td_ucred->cr_prison->pr_cpuset;
 		for (tset = set; tset != NULL; tset = tset->cs_parent)
 			if (tset == jset)
 				break;
 		if (tset == NULL) {
 			cpuset_rel(set);
 			set = NULL;
 		}
 	}
 
 	return (set);
 }
 
 /*
  * Create a set in the space provided in 'set' with the provided parameters.
  * The set is returned with a single ref.  May return EDEADLK if the set
  * will have no valid cpu based on restrictions from the parent.
  */
 static int
 _cpuset_create(struct cpuset *set, struct cpuset *parent, const cpuset_t *mask,
     cpusetid_t id)
 {
 
 	if (!CPU_OVERLAP(&parent->cs_mask, mask))
 		return (EDEADLK);
 	CPU_COPY(mask, &set->cs_mask);
 	LIST_INIT(&set->cs_children);
 	refcount_init(&set->cs_ref, 1);
 	set->cs_flags = 0;
 	mtx_lock_spin(&cpuset_lock);
 	CPU_AND(&set->cs_mask, &parent->cs_mask);
 	set->cs_id = id;
 	set->cs_parent = cpuset_ref(parent);
 	LIST_INSERT_HEAD(&parent->cs_children, set, cs_siblings);
 	if (set->cs_id != CPUSET_INVALID)
 		LIST_INSERT_HEAD(&cpuset_ids, set, cs_link);
 	mtx_unlock_spin(&cpuset_lock);
 
 	return (0);
 }
 
 /*
  * Create a new non-anonymous set with the requested parent and mask.  May
  * return failures if the mask is invalid or a new number can not be
  * allocated.
  */
 static int
 cpuset_create(struct cpuset **setp, struct cpuset *parent, const cpuset_t *mask)
 {
 	struct cpuset *set;
 	cpusetid_t id;
 	int error;
 
 	id = alloc_unr(cpuset_unr);
 	if (id == -1)
 		return (ENFILE);
 	*setp = set = uma_zalloc(cpuset_zone, M_WAITOK);
 	error = _cpuset_create(set, parent, mask, id);
 	if (error == 0)
 		return (0);
 	free_unr(cpuset_unr, id);
 	uma_zfree(cpuset_zone, set);
 
 	return (error);
 }
 
 /*
  * Recursively check for errors that would occur from applying mask to
  * the tree of sets starting at 'set'.  Checks for sets that would become
  * empty as well as RDONLY flags.
  */
 static int
 cpuset_testupdate(struct cpuset *set, cpuset_t *mask, int check_mask)
 {
 	struct cpuset *nset;
 	cpuset_t newmask;
 	int error;
 
 	mtx_assert(&cpuset_lock, MA_OWNED);
 	if (set->cs_flags & CPU_SET_RDONLY)
 		return (EPERM);
 	if (check_mask) {
 		if (!CPU_OVERLAP(&set->cs_mask, mask))
 			return (EDEADLK);
 		CPU_COPY(&set->cs_mask, &newmask);
 		CPU_AND(&newmask, mask);
 	} else
 		CPU_COPY(mask, &newmask);
 	error = 0;
 	LIST_FOREACH(nset, &set->cs_children, cs_siblings) 
 		if ((error = cpuset_testupdate(nset, &newmask, 1)) != 0)
 			break;
 	return (error);
 }
 
 /*
  * Applies the mask 'mask' without checking for empty sets or permissions.
  */
 static void
 cpuset_update(struct cpuset *set, cpuset_t *mask)
 {
 	struct cpuset *nset;
 
 	mtx_assert(&cpuset_lock, MA_OWNED);
 	CPU_AND(&set->cs_mask, mask);
 	LIST_FOREACH(nset, &set->cs_children, cs_siblings) 
 		cpuset_update(nset, &set->cs_mask);
 
 	return;
 }
 
 /*
  * Modify the set 'set' to use a copy of the mask provided.  Apply this new
  * mask to restrict all children in the tree.  Checks for validity before
  * applying the changes.
  */
 static int
 cpuset_modify(struct cpuset *set, cpuset_t *mask)
 {
 	struct cpuset *root;
 	int error;
 
 	error = priv_check(curthread, PRIV_SCHED_CPUSET);
 	if (error)
 		return (error);
 	/*
 	 * In case we are called from within the jail
 	 * we do not allow modifying the dedicated root
 	 * cpuset of the jail but may still allow to
 	 * change child sets.
 	 */
 	if (jailed(curthread->td_ucred) &&
 	    set->cs_flags & CPU_SET_ROOT)
 		return (EPERM);
 	/*
 	 * Verify that we have access to this set of
 	 * cpus.
 	 */
 	root = set->cs_parent;
 	if (root && !CPU_SUBSET(&root->cs_mask, mask))
 		return (EINVAL);
 	mtx_lock_spin(&cpuset_lock);
 	error = cpuset_testupdate(set, mask, 0);
 	if (error)
 		goto out;
 	CPU_COPY(mask, &set->cs_mask);
 	cpuset_update(set, mask);
 out:
 	mtx_unlock_spin(&cpuset_lock);
 
 	return (error);
 }
 
 /*
  * Resolve the 'which' parameter of several cpuset apis.
  *
  * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid.  Also
  * checks for permission via p_cansched().
  *
  * For WHICH_SET returns a valid set with a new reference.
  *
  * -1 may be supplied for any argument to mean the current proc/thread or
  * the base set of the current thread.  May fail with ESRCH/EPERM.
  */
 int
 cpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp,
     struct cpuset **setp)
 {
 	struct cpuset *set;
 	struct thread *td;
 	struct proc *p;
 	int error;
 
 	*pp = p = NULL;
 	*tdp = td = NULL;
 	*setp = set = NULL;
 	switch (which) {
 	case CPU_WHICH_PID:
 		if (id == -1) {
 			PROC_LOCK(curproc);
 			p = curproc;
 			break;
 		}
 		if ((p = pfind(id)) == NULL)
 			return (ESRCH);
 		break;
 	case CPU_WHICH_TID:
 		if (id == -1) {
 			PROC_LOCK(curproc);
 			p = curproc;
 			td = curthread;
 			break;
 		}
 		td = tdfind(id, -1);
 		if (td == NULL)
 			return (ESRCH);
 		p = td->td_proc;
 		break;
 	case CPU_WHICH_CPUSET:
 		if (id == -1) {
 			thread_lock(curthread);
 			set = cpuset_refbase(curthread->td_cpuset);
 			thread_unlock(curthread);
 		} else
 			set = cpuset_lookup(id, curthread);
 		if (set) {
 			*setp = set;
 			return (0);
 		}
 		return (ESRCH);
 	case CPU_WHICH_JAIL:
 	{
 		/* Find `set' for prison with given id. */
 		struct prison *pr;
 
 		sx_slock(&allprison_lock);
 		pr = prison_find_child(curthread->td_ucred->cr_prison, id);
 		sx_sunlock(&allprison_lock);
 		if (pr == NULL)
 			return (ESRCH);
 		cpuset_ref(pr->pr_cpuset);
 		*setp = pr->pr_cpuset;
 		mtx_unlock(&pr->pr_mtx);
 		return (0);
 	}
 	case CPU_WHICH_IRQ:
 	case CPU_WHICH_DOMAIN:
 		return (0);
 	default:
 		return (EINVAL);
 	}
 	error = p_cansched(curthread, p);
 	if (error) {
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	if (td == NULL)
 		td = FIRST_THREAD_IN_PROC(p);
 	*pp = p;
 	*tdp = td;
 	return (0);
 }
 
 /*
  * Create an anonymous set with the provided mask in the space provided by
  * 'fset'.  If the passed in set is anonymous we use its parent otherwise
  * the new set is a child of 'set'.
  */
 static int
 cpuset_shadow(struct cpuset *set, struct cpuset *fset, const cpuset_t *mask)
 {
 	struct cpuset *parent;
 
 	if (set->cs_id == CPUSET_INVALID)
 		parent = set->cs_parent;
 	else
 		parent = set;
 	if (!CPU_SUBSET(&parent->cs_mask, mask))
 		return (EDEADLK);
 	return (_cpuset_create(fset, parent, mask, CPUSET_INVALID));
 }
 
 /*
  * Handle two cases for replacing the base set or mask of an entire process.
  *
  * 1) Set is non-null and mask is null.  This reparents all anonymous sets
  *    to the provided set and replaces all non-anonymous td_cpusets with the
  *    provided set.
  * 2) Mask is non-null and set is null.  This replaces or creates anonymous
  *    sets for every thread with the existing base as a parent.
  *
  * This is overly complicated because we can't allocate while holding a 
  * spinlock and spinlocks must be held while changing and examining thread
  * state.
  */
 static int
 cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask)
 {
 	struct setlist freelist;
 	struct setlist droplist;
 	struct cpuset *tdset;
 	struct cpuset *nset;
 	struct thread *td;
 	struct proc *p;
 	int threads;
 	int nfree;
 	int error;
 	/*
 	 * The algorithm requires two passes due to locking considerations.
 	 * 
 	 * 1) Lookup the process and acquire the locks in the required order.
 	 * 2) If enough cpusets have not been allocated release the locks and
 	 *    allocate them.  Loop.
 	 */
 	LIST_INIT(&freelist);
 	LIST_INIT(&droplist);
 	nfree = 0;
 	for (;;) {
 		error = cpuset_which(CPU_WHICH_PID, pid, &p, &td, &nset);
 		if (error)
 			goto out;
 		if (nfree >= p->p_numthreads)
 			break;
 		threads = p->p_numthreads;
 		PROC_UNLOCK(p);
 		for (; nfree < threads; nfree++) {
 			nset = uma_zalloc(cpuset_zone, M_WAITOK);
 			LIST_INSERT_HEAD(&freelist, nset, cs_link);
 		}
 	}
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	/*
 	 * Now that the appropriate locks are held and we have enough cpusets,
 	 * make sure the operation will succeed before applying changes.  The
 	 * proc lock prevents td_cpuset from changing between calls.
 	 */
 	error = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		tdset = td->td_cpuset;
 		/*
 		 * Verify that a new mask doesn't specify cpus outside of
 		 * the set the thread is a member of.
 		 */
 		if (mask) {
 			if (tdset->cs_id == CPUSET_INVALID)
 				tdset = tdset->cs_parent;
 			if (!CPU_SUBSET(&tdset->cs_mask, mask))
 				error = EDEADLK;
 		/*
 		 * Verify that a new set won't leave an existing thread
 		 * mask without a cpu to run on.  It can, however, restrict
 		 * the set.
 		 */
 		} else if (tdset->cs_id == CPUSET_INVALID) {
 			if (!CPU_OVERLAP(&set->cs_mask, &tdset->cs_mask))
 				error = EDEADLK;
 		}
 		thread_unlock(td);
 		if (error)
 			goto unlock_out;
 	}
 	/*
 	 * Replace each thread's cpuset while using deferred release.  We
 	 * must do this because the thread lock must be held while operating
 	 * on the thread and this limits the type of operations allowed.
 	 */
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		/*
 		 * If we presently have an anonymous set or are applying a
 		 * mask we must create an anonymous shadow set.  That is
 		 * either parented to our existing base or the supplied set.
 		 *
 		 * If we have a base set with no anonymous shadow we simply
 		 * replace it outright.
 		 */
 		tdset = td->td_cpuset;
 		if (tdset->cs_id == CPUSET_INVALID || mask) {
 			nset = LIST_FIRST(&freelist);
 			LIST_REMOVE(nset, cs_link);
 			if (mask)
 				error = cpuset_shadow(tdset, nset, mask);
 			else
 				error = _cpuset_create(nset, set,
 				    &tdset->cs_mask, CPUSET_INVALID);
 			if (error) {
 				LIST_INSERT_HEAD(&freelist, nset, cs_link);
 				thread_unlock(td);
 				break;
 			}
 		} else
 			nset = cpuset_ref(set);
 		cpuset_rel_defer(&droplist, tdset);
 		td->td_cpuset = nset;
 		sched_affinity(td);
 		thread_unlock(td);
 	}
 unlock_out:
 	PROC_UNLOCK(p);
 out:
 	while ((nset = LIST_FIRST(&droplist)) != NULL)
 		cpuset_rel_complete(nset);
 	while ((nset = LIST_FIRST(&freelist)) != NULL) {
 		LIST_REMOVE(nset, cs_link);
 		uma_zfree(cpuset_zone, nset);
 	}
 	return (error);
 }
 
 /*
  * Return a string representing a valid layout for a cpuset_t object.
  * It expects an incoming buffer at least sized as CPUSETBUFSIZ.
  */
 char *
 cpusetobj_strprint(char *buf, const cpuset_t *set)
 {
 	char *tbuf;
 	size_t i, bytesp, bufsiz;
 
 	tbuf = buf;
 	bytesp = 0;
 	bufsiz = CPUSETBUFSIZ;
 
 	for (i = 0; i < (_NCPUWORDS - 1); i++) {
 		bytesp = snprintf(tbuf, bufsiz, "%lx,", set->__bits[i]);
 		bufsiz -= bytesp;
 		tbuf += bytesp;
 	}
 	snprintf(tbuf, bufsiz, "%lx", set->__bits[_NCPUWORDS - 1]);
 	return (buf);
 }
 
 /*
  * Build a valid cpuset_t object from a string representation.
  * It expects an incoming buffer at least sized as CPUSETBUFSIZ.
  */
 int
 cpusetobj_strscan(cpuset_t *set, const char *buf)
 {
 	u_int nwords;
 	int i, ret;
 
 	if (strlen(buf) > CPUSETBUFSIZ - 1)
 		return (-1);
 
 	/* Allow to pass a shorter version of the mask when necessary. */
 	nwords = 1;
 	for (i = 0; buf[i] != '\0'; i++)
 		if (buf[i] == ',')
 			nwords++;
 	if (nwords > _NCPUWORDS)
 		return (-1);
 
 	CPU_ZERO(set);
 	for (i = 0; i < (nwords - 1); i++) {
 		ret = sscanf(buf, "%lx,", &set->__bits[i]);
 		if (ret == 0 || ret == -1)
 			return (-1);
 		buf = strstr(buf, ",");
 		if (buf == NULL)
 			return (-1);
 		buf++;
 	}
 	ret = sscanf(buf, "%lx", &set->__bits[nwords - 1]);
 	if (ret == 0 || ret == -1)
 		return (-1);
 	return (0);
 }
 
 /*
  * Apply an anonymous mask to a single thread.
  */
 int
 cpuset_setthread(lwpid_t id, cpuset_t *mask)
 {
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct thread *td;
 	struct proc *p;
 	int error;
 
 	nset = uma_zalloc(cpuset_zone, M_WAITOK);
 	error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set);
 	if (error)
 		goto out;
 	set = NULL;
 	thread_lock(td);
 	error = cpuset_shadow(td->td_cpuset, nset, mask);
 	if (error == 0) {
 		set = td->td_cpuset;
 		td->td_cpuset = nset;
 		sched_affinity(td);
 		nset = NULL;
 	}
 	thread_unlock(td);
 	PROC_UNLOCK(p);
 	if (set)
 		cpuset_rel(set);
 out:
 	if (nset)
 		uma_zfree(cpuset_zone, nset);
 	return (error);
 }
 
 /*
  * Apply new cpumask to the ithread.
  */
 int
 cpuset_setithread(lwpid_t id, int cpu)
 {
 	struct cpuset *nset, *rset;
 	struct cpuset *parent, *old_set;
 	struct thread *td;
 	struct proc *p;
 	cpusetid_t cs_id;
 	cpuset_t mask;
 	int error;
 
 	nset = uma_zalloc(cpuset_zone, M_WAITOK);
 	rset = uma_zalloc(cpuset_zone, M_WAITOK);
 	cs_id = CPUSET_INVALID;
 
 	CPU_ZERO(&mask);
 	if (cpu == NOCPU)
 		CPU_COPY(cpuset_root, &mask);
 	else
 		CPU_SET(cpu, &mask);
 
 	error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &old_set);
 	if (error != 0 || ((cs_id = alloc_unr(cpuset_unr)) == CPUSET_INVALID))
 		goto out;
 
 	/* cpuset_which() returns with PROC_LOCK held. */
 	old_set = td->td_cpuset;
 
 	if (cpu == NOCPU) {
 
 		/*
 		 * roll back to default set. We're not using cpuset_shadow()
 		 * here because we can fail CPU_SUBSET() check. This can happen
 		 * if default set does not contain all CPUs.
 		 */
 		error = _cpuset_create(nset, cpuset_default, &mask,
 		    CPUSET_INVALID);
 
 		goto applyset;
 	}
 
 	if (old_set->cs_id == 1 || (old_set->cs_id == CPUSET_INVALID &&
 	    old_set->cs_parent->cs_id == 1)) {
 
 		/*
 		 * Current set is either default (1) or
 		 * shadowed version of default set.
 		 *
 		 * Allocate new root set to be able to shadow it
 		 * with any mask.
 		 */
 		error = _cpuset_create(rset, cpuset_zero,
 		    &cpuset_zero->cs_mask, cs_id);
 		if (error != 0) {
 			PROC_UNLOCK(p);
 			goto out;
 		}
 		rset->cs_flags |= CPU_SET_ROOT;
 		parent = rset;
 		rset = NULL;
 		cs_id = CPUSET_INVALID;
 	} else {
 		/* Assume existing set was already allocated by previous call */
 		parent = old_set;
 		old_set = NULL;
 	}
 
 	error = cpuset_shadow(parent, nset, &mask);
 applyset:
 	if (error == 0) {
 		thread_lock(td);
 		td->td_cpuset = nset;
 		sched_affinity(td);
 		thread_unlock(td);
 		nset = NULL;
 	} else
 		old_set = NULL;
 	PROC_UNLOCK(p);
 	if (old_set != NULL)
 		cpuset_rel(old_set);
 out:
 	if (nset != NULL)
 		uma_zfree(cpuset_zone, nset);
 	if (rset != NULL)
 		uma_zfree(cpuset_zone, rset);
 	if (cs_id != CPUSET_INVALID)
 		free_unr(cpuset_unr, cs_id);
 	return (error);
 }
 
 
 /*
  * Creates system-wide cpusets and the cpuset for thread0 including two
  * sets:
  * 
  * 0 - The root set which should represent all valid processors in the
  *     system.  It is initially created with a mask of all processors
  *     because we don't know what processors are valid until cpuset_init()
  *     runs.  This set is immutable.
  * 1 - The default set which all processes are a member of until changed.
  *     This allows an administrator to move all threads off of given cpus to
  *     dedicate them to high priority tasks or save power etc.
  */
 struct cpuset *
 cpuset_thread0(void)
 {
 	struct cpuset *set;
-	int error;
+	int error, i;
 
 	cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
 	mtx_init(&cpuset_lock, "cpuset", NULL, MTX_SPIN | MTX_RECURSE);
 
 	/*
 	 * Create the root system set for the whole machine.  Doesn't use
 	 * cpuset_create() due to NULL parent.
 	 */
 	set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
 	CPU_FILL(&set->cs_mask);
 	LIST_INIT(&set->cs_children);
 	LIST_INSERT_HEAD(&cpuset_ids, set, cs_link);
 	set->cs_ref = 1;
 	set->cs_flags = CPU_SET_ROOT;
 	cpuset_zero = set;
 	cpuset_root = &set->cs_mask;
 
 	/*
 	 * Now derive a default, modifiable set from that to give out.
 	 */
 	set = uma_zalloc(cpuset_zone, M_WAITOK);
 	error = _cpuset_create(set, cpuset_zero, &cpuset_zero->cs_mask, 1);
 	KASSERT(error == 0, ("Error creating default set: %d\n", error));
 	cpuset_default = set;
 
 	/*
 	 * Initialize the unit allocator. 0 and 1 are allocated above.
 	 */
 	cpuset_unr = new_unrhdr(2, INT_MAX, NULL);
 
-	/* MD Code is responsible for initializing sets if vm_ndomains > 1. */
-	if (vm_ndomains == 1)
-		CPU_COPY(&all_cpus, &cpuset_domain[0]);
+	/*
+	 * If MD code has not initialized per-domain cpusets, place all
+	 * CPUs in domain 0.
+	 */
+	for (i = 0; i < MAXMEMDOM; i++)
+		if (!CPU_EMPTY(&cpuset_domain[i]))
+			goto domains_set;
+	CPU_COPY(&all_cpus, &cpuset_domain[0]);
+domains_set:
 
 	return (set);
 }
 
 /*
  * Create a cpuset, which would be cpuset_create() but
  * mark the new 'set' as root.
  *
  * We are not going to reparent the td to it.  Use cpuset_setproc_update_set()
  * for that.
  *
  * In case of no error, returns the set in *setp locked with a reference.
  */
 int
 cpuset_create_root(struct prison *pr, struct cpuset **setp)
 {
 	struct cpuset *set;
 	int error;
 
 	KASSERT(pr != NULL, ("[%s:%d] invalid pr", __func__, __LINE__));
 	KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__));
 
 	error = cpuset_create(setp, pr->pr_cpuset, &pr->pr_cpuset->cs_mask);
 	if (error)
 		return (error);
 
 	KASSERT(*setp != NULL, ("[%s:%d] cpuset_create returned invalid data",
 	    __func__, __LINE__));
 
 	/* Mark the set as root. */
 	set = *setp;
 	set->cs_flags |= CPU_SET_ROOT;
 
 	return (0);
 }
 
 int
 cpuset_setproc_update_set(struct proc *p, struct cpuset *set)
 {
 	int error;
 
 	KASSERT(p != NULL, ("[%s:%d] invalid proc", __func__, __LINE__));
 	KASSERT(set != NULL, ("[%s:%d] invalid set", __func__, __LINE__));
 
 	cpuset_ref(set);
 	error = cpuset_setproc(p->p_pid, set, NULL);
 	if (error)
 		return (error);
 	cpuset_rel(set);
 	return (0);
 }
 
 /*
  * This is called once the final set of system cpus is known.  Modifies
  * the root set and all children and mark the root read-only.  
  */
 static void
 cpuset_init(void *arg)
 {
 	cpuset_t mask;
 
 	mask = all_cpus;
 	if (cpuset_modify(cpuset_zero, &mask))
 		panic("Can't set initial cpuset mask.\n");
 	cpuset_zero->cs_flags |= CPU_SET_RDONLY;
 }
 SYSINIT(cpuset, SI_SUB_SMP, SI_ORDER_ANY, cpuset_init, NULL);
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_args {
 	cpusetid_t	*setid;
 };
 #endif
 int
 sys_cpuset(struct thread *td, struct cpuset_args *uap)
 {
 	struct cpuset *root;
 	struct cpuset *set;
 	int error;
 
 	thread_lock(td);
 	root = cpuset_refroot(td->td_cpuset);
 	thread_unlock(td);
 	error = cpuset_create(&set, root, &root->cs_mask);
 	cpuset_rel(root);
 	if (error)
 		return (error);
 	error = copyout(&set->cs_id, uap->setid, sizeof(set->cs_id));
 	if (error == 0)
 		error = cpuset_setproc(-1, set, NULL);
 	cpuset_rel(set);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_setid_args {
 	cpuwhich_t	which;
 	id_t		id;
 	cpusetid_t	setid;
 };
 #endif
 int
 sys_cpuset_setid(struct thread *td, struct cpuset_setid_args *uap)
 {
 	struct cpuset *set;
 	int error;
 
 	/*
 	 * Presently we only support per-process sets.
 	 */
 	if (uap->which != CPU_WHICH_PID)
 		return (EINVAL);
 	set = cpuset_lookup(uap->setid, td);
 	if (set == NULL)
 		return (ESRCH);
 	error = cpuset_setproc(uap->id, set, NULL);
 	cpuset_rel(set);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_getid_args {
 	cpulevel_t	level;
 	cpuwhich_t	which;
 	id_t		id;
 	cpusetid_t	*setid;
 };
 #endif
 int
 sys_cpuset_getid(struct thread *td, struct cpuset_getid_args *uap)
 {
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct thread *ttd;
 	struct proc *p;
 	cpusetid_t id;
 	int error;
 
 	if (uap->level == CPU_LEVEL_WHICH && uap->which != CPU_WHICH_CPUSET)
 		return (EINVAL);
 	error = cpuset_which(uap->which, uap->id, &p, &ttd, &set);
 	if (error)
 		return (error);
 	switch (uap->which) {
 	case CPU_WHICH_TID:
 	case CPU_WHICH_PID:
 		thread_lock(ttd);
 		set = cpuset_refbase(ttd->td_cpuset);
 		thread_unlock(ttd);
 		PROC_UNLOCK(p);
 		break;
 	case CPU_WHICH_CPUSET:
 	case CPU_WHICH_JAIL:
 		break;
 	case CPU_WHICH_IRQ:
 	case CPU_WHICH_DOMAIN:
 		return (EINVAL);
 	}
 	switch (uap->level) {
 	case CPU_LEVEL_ROOT:
 		nset = cpuset_refroot(set);
 		cpuset_rel(set);
 		set = nset;
 		break;
 	case CPU_LEVEL_CPUSET:
 		break;
 	case CPU_LEVEL_WHICH:
 		break;
 	}
 	id = set->cs_id;
 	cpuset_rel(set);
 	if (error == 0)
 		error = copyout(&id, uap->setid, sizeof(id));
 
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_getaffinity_args {
 	cpulevel_t	level;
 	cpuwhich_t	which;
 	id_t		id;
 	size_t		cpusetsize;
 	cpuset_t	*mask;
 };
 #endif
 int
 sys_cpuset_getaffinity(struct thread *td, struct cpuset_getaffinity_args *uap)
 {
 	struct thread *ttd;
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct proc *p;
 	cpuset_t *mask;
 	int error;
 	size_t size;
 
 	if (uap->cpusetsize < sizeof(cpuset_t) ||
 	    uap->cpusetsize > CPU_MAXSIZE / NBBY)
 		return (ERANGE);
 	size = uap->cpusetsize;
 	mask = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
 	error = cpuset_which(uap->which, uap->id, &p, &ttd, &set);
 	if (error)
 		goto out;
 	switch (uap->level) {
 	case CPU_LEVEL_ROOT:
 	case CPU_LEVEL_CPUSET:
 		switch (uap->which) {
 		case CPU_WHICH_TID:
 		case CPU_WHICH_PID:
 			thread_lock(ttd);
 			set = cpuset_ref(ttd->td_cpuset);
 			thread_unlock(ttd);
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			break;
 		case CPU_WHICH_IRQ:
 		case CPU_WHICH_DOMAIN:
 			error = EINVAL;
 			goto out;
 		}
 		if (uap->level == CPU_LEVEL_ROOT)
 			nset = cpuset_refroot(set);
 		else
 			nset = cpuset_refbase(set);
 		CPU_COPY(&nset->cs_mask, mask);
 		cpuset_rel(nset);
 		break;
 	case CPU_LEVEL_WHICH:
 		switch (uap->which) {
 		case CPU_WHICH_TID:
 			thread_lock(ttd);
 			CPU_COPY(&ttd->td_cpuset->cs_mask, mask);
 			thread_unlock(ttd);
 			break;
 		case CPU_WHICH_PID:
 			FOREACH_THREAD_IN_PROC(p, ttd) {
 				thread_lock(ttd);
 				CPU_OR(mask, &ttd->td_cpuset->cs_mask);
 				thread_unlock(ttd);
 			}
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			CPU_COPY(&set->cs_mask, mask);
 			break;
 		case CPU_WHICH_IRQ:
 			error = intr_getaffinity(uap->id, mask);
 			break;
 		case CPU_WHICH_DOMAIN:
-			if (uap->id < 0 || uap->id >= vm_ndomains)
+			if (uap->id < 0 || uap->id >= MAXMEMDOM)
 				error = ESRCH;
 			else
 				CPU_COPY(&cpuset_domain[uap->id], mask);
 			break;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (set)
 		cpuset_rel(set);
 	if (p)
 		PROC_UNLOCK(p);
 	if (error == 0)
 		error = copyout(mask, uap->mask, size);
 out:
 	free(mask, M_TEMP);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_setaffinity_args {
 	cpulevel_t	level;
 	cpuwhich_t	which;
 	id_t		id;
 	size_t		cpusetsize;
 	const cpuset_t	*mask;
 };
 #endif
 int
 sys_cpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap)
 {
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct thread *ttd;
 	struct proc *p;
 	cpuset_t *mask;
 	int error;
 
 	if (uap->cpusetsize < sizeof(cpuset_t) ||
 	    uap->cpusetsize > CPU_MAXSIZE / NBBY)
 		return (ERANGE);
 	mask = malloc(uap->cpusetsize, M_TEMP, M_WAITOK | M_ZERO);
 	error = copyin(uap->mask, mask, uap->cpusetsize);
 	if (error)
 		goto out;
 	/*
 	 * Verify that no high bits are set.
 	 */
 	if (uap->cpusetsize > sizeof(cpuset_t)) {
 		char *end;
 		char *cp;
 
 		end = cp = (char *)&mask->__bits;
 		end += uap->cpusetsize;
 		cp += sizeof(cpuset_t);
 		while (cp != end)
 			if (*cp++ != 0) {
 				error = EINVAL;
 				goto out;
 			}
 
 	}
 	switch (uap->level) {
 	case CPU_LEVEL_ROOT:
 	case CPU_LEVEL_CPUSET:
 		error = cpuset_which(uap->which, uap->id, &p, &ttd, &set);
 		if (error)
 			break;
 		switch (uap->which) {
 		case CPU_WHICH_TID:
 		case CPU_WHICH_PID:
 			thread_lock(ttd);
 			set = cpuset_ref(ttd->td_cpuset);
 			thread_unlock(ttd);
 			PROC_UNLOCK(p);
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			break;
 		case CPU_WHICH_IRQ:
 		case CPU_WHICH_DOMAIN:
 			error = EINVAL;
 			goto out;
 		}
 		if (uap->level == CPU_LEVEL_ROOT)
 			nset = cpuset_refroot(set);
 		else
 			nset = cpuset_refbase(set);
 		error = cpuset_modify(nset, mask);
 		cpuset_rel(nset);
 		cpuset_rel(set);
 		break;
 	case CPU_LEVEL_WHICH:
 		switch (uap->which) {
 		case CPU_WHICH_TID:
 			error = cpuset_setthread(uap->id, mask);
 			break;
 		case CPU_WHICH_PID:
 			error = cpuset_setproc(uap->id, NULL, mask);
 			break;
 		case CPU_WHICH_CPUSET:
 		case CPU_WHICH_JAIL:
 			error = cpuset_which(uap->which, uap->id, &p,
 			    &ttd, &set);
 			if (error == 0) {
 				error = cpuset_modify(set, mask);
 				cpuset_rel(set);
 			}
 			break;
 		case CPU_WHICH_IRQ:
 			error = intr_setaffinity(uap->id, mask);
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 out:
 	free(mask, M_TEMP);
 	return (error);
 }
 
 #ifdef DDB
 void
 ddb_display_cpuset(const cpuset_t *set)
 {
 	int cpu, once;
 
 	for (once = 0, cpu = 0; cpu < CPU_SETSIZE; cpu++) {
 		if (CPU_ISSET(cpu, set)) {
 			if (once == 0) {
 				db_printf("%d", cpu);
 				once = 1;
 			} else  
 				db_printf(",%d", cpu);
 		}
 	}
 	if (once == 0)
 		db_printf("<none>");
 }
 
 DB_SHOW_COMMAND(cpusets, db_show_cpusets)
 {
 	struct cpuset *set;
 
 	LIST_FOREACH(set, &cpuset_ids, cs_link) {
 		db_printf("set=%p id=%-6u ref=%-6d flags=0x%04x parent id=%d\n",
 		    set, set->cs_id, set->cs_ref, set->cs_flags,
 		    (set->cs_parent != NULL) ? set->cs_parent->cs_id : 0);
 		db_printf("  mask=");
 		ddb_display_cpuset(&set->cs_mask);
 		db_printf("\n");
 		if (db_pager_quit)
 			break;
 	}
 }
 #endif /* DDB */
Index: head/sys/vm/vm_domain.c
===================================================================
--- head/sys/vm/vm_domain.c	(revision 297747)
+++ head/sys/vm/vm_domain.c	(revision 297748)
@@ -1,384 +1,405 @@
 /*-
  * Copyright (c) 2015 Adrian Chadd <adrian@FreeBSD.org>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 #include <sys/proc.h>
 #endif
 #include <sys/queue.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/tree.h>
 #include <sys/vmmeter.h>
 #include <sys/seq.h>
 
 #include <ddb/ddb.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 
 #include <vm/vm_domain.h>
 
 static __inline int
 vm_domain_rr_selectdomain(int skip_domain)
 {
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	struct thread *td;
 
 	td = curthread;
 
 	td->td_dom_rr_idx++;
 	td->td_dom_rr_idx %= vm_ndomains;
 
 	/*
 	 * If skip_domain is provided then skip over that
 	 * domain.  This is intended for round robin variants
 	 * which first try a fixed domain.
 	 */
 	if ((skip_domain > -1) && (td->td_dom_rr_idx == skip_domain)) {
 		td->td_dom_rr_idx++;
 		td->td_dom_rr_idx %= vm_ndomains;
 	}
 	return (td->td_dom_rr_idx);
 #else
 	return (0);
 #endif
 }
 
 /*
  * This implements a very simple set of VM domain memory allocation
  * policies and iterators.
  */
 
 /*
  * A VM domain policy represents a desired VM domain policy.
  * Iterators implement searching through VM domains in a specific
  * order.
  */
 
 /*
  * When setting a policy, the caller must establish their own
  * exclusive write protection for the contents of the domain
  * policy.
  */
 int
 vm_domain_policy_init(struct vm_domain_policy *vp)
 {
 
 	bzero(vp, sizeof(*vp));
 	vp->p.policy = VM_POLICY_NONE;
 	vp->p.domain = -1;
 	return (0);
 }
 
 int
 vm_domain_policy_set(struct vm_domain_policy *vp,
     vm_domain_policy_type_t vt, int domain)
 {
 
 	seq_write_begin(&vp->seq);
 	vp->p.policy = vt;
 	vp->p.domain = domain;
 	seq_write_end(&vp->seq);
 	return (0);
 }
 
 /*
  * Take a local copy of a policy.
  *
  * The destination policy isn't write-barriered; this is used
  * for doing local copies into something that isn't shared.
  */
 void
 vm_domain_policy_localcopy(struct vm_domain_policy *dst,
     const struct vm_domain_policy *src)
 {
 	seq_t seq;
 
 	for (;;) {
 		seq = seq_read(&src->seq);
 		*dst = *src;
 		if (seq_consistent(&src->seq, seq))
 			return;
 		cpu_spinwait();
 	}
 }
 
 /*
  * Take a write-barrier copy of a policy.
  *
  * The destination policy is write -barriered; this is used
  * for doing copies into policies that may be read by other
  * threads.
  */
 void
 vm_domain_policy_copy(struct vm_domain_policy *dst,
     const struct vm_domain_policy *src)
 {
 	seq_t seq;
 	struct vm_domain_policy d;
 
 	for (;;) {
 		seq = seq_read(&src->seq);
 		d = *src;
 		if (seq_consistent(&src->seq, seq)) {
 			seq_write_begin(&dst->seq);
 			dst->p.domain = d.p.domain;
 			dst->p.policy = d.p.policy;
 			seq_write_end(&dst->seq);
 			return;
 		}
 		cpu_spinwait();
 	}
 }
 
 int
 vm_domain_policy_validate(const struct vm_domain_policy *vp)
 {
 
 	switch (vp->p.policy) {
 	case VM_POLICY_NONE:
 	case VM_POLICY_ROUND_ROBIN:
 	case VM_POLICY_FIRST_TOUCH:
 	case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
 		if (vp->p.domain == -1)
 			return (0);
 		return (-1);
 	case VM_POLICY_FIXED_DOMAIN:
 	case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN:
+#ifdef VM_NUMA_ALLOC
 		if (vp->p.domain >= 0 && vp->p.domain < vm_ndomains)
 			return (0);
+#else
+		if (vp->p.domain == 0)
+			return (0);
+#endif
 		return (-1);
 	default:
 		return (-1);
 	}
 	return (-1);
 }
 
 int
 vm_domain_policy_cleanup(struct vm_domain_policy *vp)
 {
 
 	/* For now, empty */
 	return (0);
 }
 
 int
 vm_domain_iterator_init(struct vm_domain_iterator *vi)
 {
 
 	/* Nothing to do for now */
 	return (0);
 }
 
 /*
  * Manually setup an iterator with the given details.
  */
 int
 vm_domain_iterator_set(struct vm_domain_iterator *vi,
     vm_domain_policy_type_t vt, int domain)
 {
 
+#ifdef VM_NUMA_ALLOC
 	switch (vt) {
 	case VM_POLICY_FIXED_DOMAIN:
 		vi->policy = VM_POLICY_FIXED_DOMAIN;
 		vi->domain = domain;
 		vi->n = 1;
 		break;
 	case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN:
 		vi->policy = VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN;
 		vi->domain = domain;
 		vi->n = vm_ndomains;
 		break;
 	case VM_POLICY_FIRST_TOUCH:
 		vi->policy = VM_POLICY_FIRST_TOUCH;
 		vi->domain = PCPU_GET(domain);
 		vi->n = 1;
 		break;
 	case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
 		vi->policy = VM_POLICY_FIRST_TOUCH_ROUND_ROBIN;
 		vi->domain = PCPU_GET(domain);
 		vi->n = vm_ndomains;
 		break;
 	case VM_POLICY_ROUND_ROBIN:
 	default:
 		vi->policy = VM_POLICY_ROUND_ROBIN;
 		vi->domain = -1;
 		vi->n = vm_ndomains;
 		break;
 	}
+#else
+	vi->domain = 0;
+	vi->n = 1;
+#endif
 	return (0);
 }
 
 /*
  * Setup an iterator based on the given policy.
  */
 static inline void
 _vm_domain_iterator_set_policy(struct vm_domain_iterator *vi,
     const struct vm_domain_policy *vt)
 {
+
+#ifdef VM_NUMA_ALLOC
 	/*
 	 * Initialise the iterator.
 	 *
 	 * For first-touch, the initial domain is set
 	 * via the current thread CPU domain.
 	 *
 	 * For fixed-domain, it's assumed that the
 	 * caller has initialised the specific domain
 	 * it is after.
 	 */
 	switch (vt->p.policy) {
 	case VM_POLICY_FIXED_DOMAIN:
 		vi->policy = vt->p.policy;
 		vi->domain = vt->p.domain;
 		vi->n = 1;
 		break;
 	case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN:
 		vi->policy = vt->p.policy;
 		vi->domain = vt->p.domain;
 		vi->n = vm_ndomains;
 		break;
 	case VM_POLICY_FIRST_TOUCH:
 		vi->policy = vt->p.policy;
 		vi->domain = PCPU_GET(domain);
 		vi->n = 1;
 		break;
 	case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
 		vi->policy = vt->p.policy;
 		vi->domain = PCPU_GET(domain);
 		vi->n = vm_ndomains;
 		break;
 	case VM_POLICY_ROUND_ROBIN:
 	default:
 		/*
 		 * Default to round-robin policy.
 		 */
 		vi->policy = VM_POLICY_ROUND_ROBIN;
 		vi->domain = -1;
 		vi->n = vm_ndomains;
 		break;
 	}
+#else
+	vi->domain = 0;
+	vi->n = 1;
+#endif
 }
 
 void
 vm_domain_iterator_set_policy(struct vm_domain_iterator *vi,
     const struct vm_domain_policy *vt)
 {
 	seq_t seq;
 	struct vm_domain_policy vt_lcl;
 
 	for (;;) {
 		seq = seq_read(&vt->seq);
 		vt_lcl = *vt;
 		if (seq_consistent(&vt->seq, seq)) {
 			_vm_domain_iterator_set_policy(vi, &vt_lcl);
 			return;
 		}
 		cpu_spinwait();
 	}
 }
 
 /*
  * Return the next VM domain to use.
  *
  * Returns 0 w/ domain set to the next domain to use, or
  * -1 to indicate no more domains are available.
  */
 int
 vm_domain_iterator_run(struct vm_domain_iterator *vi, int *domain)
 {
 
 	/* General catch-all */
 	if (vi->n <= 0)
 		return (-1);
 
+#ifdef VM_NUMA_ALLOC
 	switch (vi->policy) {
 	case VM_POLICY_FIXED_DOMAIN:
 	case VM_POLICY_FIRST_TOUCH:
 		*domain = vi->domain;
 		vi->n--;
 		break;
 	case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN:
 	case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
 		/*
 		 * XXX TODO: skip over the rr'ed domain
 		 * if it equals the one we started with.
 		 */
 		if (vi->n == vm_ndomains)
 			*domain = vi->domain;
 		else
 			*domain = vm_domain_rr_selectdomain(vi->domain);
 		vi->n--;
 		break;
 	case VM_POLICY_ROUND_ROBIN:
 	default:
 		*domain = vm_domain_rr_selectdomain(-1);
 		vi->n--;
 		break;
 	}
+#else
+	*domain = 0;
+	vi->n--;
+#endif
 
 	return (0);
 }
 
 /*
  * Returns 1 if the iteration is done, or 0 if it has not.
 
  * This can only be called after at least one loop through
  * the iterator.  Ie, it's designed to be used as a tail
  * check of a loop, not the head check of a loop.
  */
 int
 vm_domain_iterator_isdone(struct vm_domain_iterator *vi)
 {
 
 	return (vi->n <= 0);
 }
 
 int
 vm_domain_iterator_cleanup(struct vm_domain_iterator *vi)
 {
 
 	return (0);
 }
Index: head/sys/vm/vm_pageout.c
===================================================================
--- head/sys/vm/vm_pageout.c	(revision 297747)
+++ head/sys/vm/vm_pageout.c	(revision 297748)
@@ -1,1850 +1,1850 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2005 Yahoo! Technologies Norway AS
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	The proverbial page-out daemon.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/mount.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/time.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 /*
  * System initialization
  */
 
 /* the kernel process "vm_pageout"*/
 static void vm_pageout(void);
 static void vm_pageout_init(void);
 static int vm_pageout_clean(vm_page_t m);
 static int vm_pageout_cluster(vm_page_t m);
 static void vm_pageout_scan(struct vm_domain *vmd, int pass);
 static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
     int starting_page_shortage);
 
 SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init,
     NULL);
 
 struct proc *pageproc;
 
 static struct kproc_desc page_kp = {
 	"pagedaemon",
 	vm_pageout,
 	&pageproc
 };
 SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start,
     &page_kp);
 
 SDT_PROVIDER_DEFINE(vm);
 SDT_PROBE_DEFINE(vm, , , vm__lowmem_cache);
 SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
 
 #if !defined(NO_SWAPPING)
 /* the kernel process "vm_daemon"*/
 static void vm_daemon(void);
 static struct	proc *vmproc;
 
 static struct kproc_desc vm_kp = {
 	"vmdaemon",
 	vm_daemon,
 	&vmproc
 };
 SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
 #endif
 
 
 int vm_pages_needed;		/* Event on which pageout daemon sleeps */
 int vm_pageout_deficit;		/* Estimated number of pages deficit */
 int vm_pageout_wakeup_thresh;
 static int vm_pageout_oom_seq = 12;
 
 #if !defined(NO_SWAPPING)
 static int vm_pageout_req_swapout;	/* XXX */
 static int vm_daemon_needed;
 static struct mtx vm_daemon_mtx;
 /* Allow for use by vm_pageout before vm_daemon is initialized. */
 MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
 #endif
 static int vm_max_launder = 32;
 static int vm_pageout_update_period;
 static int defer_swap_pageouts;
 static int disable_swap_pageouts;
 static int lowmem_period = 10;
 static time_t lowmem_uptime;
 
 #if defined(NO_SWAPPING)
 static int vm_swap_enabled = 0;
 static int vm_swap_idle_enabled = 0;
 #else
 static int vm_swap_enabled = 1;
 static int vm_swap_idle_enabled = 0;
 #endif
 
 static int vm_panic_on_oom = 0;
 
 SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,
 	CTLFLAG_RWTUN, &vm_panic_on_oom, 0,
 	"panic on out of memory instead of killing the largest process");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
 	CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0,
 	"free page threshold for waking up the pageout daemon");
 
 SYSCTL_INT(_vm, OID_AUTO, max_launder,
 	CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
 	CTLFLAG_RW, &vm_pageout_update_period, 0,
 	"Maximum active LRU update period");
   
 SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0,
 	"Low memory callback period");
 
 #if defined(NO_SWAPPING)
 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
 	CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout");
 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
 	CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
 #else
 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
 	CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");
 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
 	CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
 #endif
 
 SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
 	CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");
 
 SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
 	CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
 
 static int pageout_lock_miss;
 SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
 	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
 	CTLFLAG_RW, &vm_pageout_oom_seq, 0,
 	"back-to-back calls to oom detector to start OOM");
 
 #define VM_PAGEOUT_PAGE_COUNT 16
 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
 
 int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
 SYSCTL_INT(_vm, OID_AUTO, max_wired,
 	CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
 
 static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
 #if !defined(NO_SWAPPING)
 static void vm_pageout_map_deactivate_pages(vm_map_t, long);
 static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
 static void vm_req_vmdaemon(int req);
 #endif
 static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
 
 /*
  * Initialize a dummy page for marking the caller's place in the specified
  * paging queue.  In principle, this function only needs to set the flag
  * PG_MARKER.  Nonetheless, it wirte busies and initializes the hold count
  * to one as safety precautions.
  */ 
 static void
 vm_pageout_init_marker(vm_page_t marker, u_short queue)
 {
 
 	bzero(marker, sizeof(*marker));
 	marker->flags = PG_MARKER;
 	marker->busy_lock = VPB_SINGLE_EXCLUSIVER;
 	marker->queue = queue;
 	marker->hold_count = 1;
 }
 
 /*
  * vm_pageout_fallback_object_lock:
  * 
  * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is
  * known to have failed and page queue must be either PQ_ACTIVE or
  * PQ_INACTIVE.  To avoid lock order violation, unlock the page queues
  * while locking the vm object.  Use marker page to detect page queue
  * changes and maintain notion of next page on page queue.  Return
  * TRUE if no changes were detected, FALSE otherwise.  vm object is
  * locked on return.
  * 
  * This function depends on both the lock portion of struct vm_object
  * and normal struct vm_page being type stable.
  */
 static boolean_t
 vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
 {
 	struct vm_page marker;
 	struct vm_pagequeue *pq;
 	boolean_t unchanged;
 	u_short queue;
 	vm_object_t object;
 
 	queue = m->queue;
 	vm_pageout_init_marker(&marker, queue);
 	pq = vm_page_pagequeue(m);
 	object = m->object;
 	
 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
 	vm_pagequeue_unlock(pq);
 	vm_page_unlock(m);
 	VM_OBJECT_WLOCK(object);
 	vm_page_lock(m);
 	vm_pagequeue_lock(pq);
 
 	/*
 	 * The page's object might have changed, and/or the page might
 	 * have moved from its original position in the queue.  If the
 	 * page's object has changed, then the caller should abandon
 	 * processing the page because the wrong object lock was
 	 * acquired.  Use the marker's plinks.q, not the page's, to
 	 * determine if the page has been moved.  The state of the
 	 * page's plinks.q can be indeterminate; whereas, the marker's
 	 * plinks.q must be valid.
 	 */
 	*next = TAILQ_NEXT(&marker, plinks.q);
 	unchanged = m->object == object &&
 	    m == TAILQ_PREV(&marker, pglist, plinks.q);
 	KASSERT(!unchanged || m->queue == queue,
 	    ("page %p queue %d %d", m, queue, m->queue));
 	TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
 	return (unchanged);
 }
 
 /*
  * Lock the page while holding the page queue lock.  Use marker page
  * to detect page queue changes and maintain notion of next page on
  * page queue.  Return TRUE if no changes were detected, FALSE
  * otherwise.  The page is locked on return. The page queue lock might
  * be dropped and reacquired.
  *
  * This function depends on normal struct vm_page being type stable.
  */
 static boolean_t
 vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
 {
 	struct vm_page marker;
 	struct vm_pagequeue *pq;
 	boolean_t unchanged;
 	u_short queue;
 
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	if (vm_page_trylock(m))
 		return (TRUE);
 
 	queue = m->queue;
 	vm_pageout_init_marker(&marker, queue);
 	pq = vm_page_pagequeue(m);
 
 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
 	vm_pagequeue_unlock(pq);
 	vm_page_lock(m);
 	vm_pagequeue_lock(pq);
 
 	/* Page queue might have changed. */
 	*next = TAILQ_NEXT(&marker, plinks.q);
 	unchanged = m == TAILQ_PREV(&marker, pglist, plinks.q);
 	KASSERT(!unchanged || m->queue == queue,
 	    ("page %p queue %d %d", m, queue, m->queue));
 	TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
 	return (unchanged);
 }
 
 /*
  * vm_pageout_clean:
  *
  * Clean the page and remove it from the laundry.
  * 
  * We set the busy bit to cause potential page faults on this page to
  * block.  Note the careful timing, however, the busy bit isn't set till
  * late and we cannot do anything that will mess with the page.
  */
 static int
 vm_pageout_cluster(vm_page_t m)
 {
 	vm_object_t object;
 	vm_page_t mc[2*vm_pageout_page_count], pb, ps;
 	int pageout_count;
 	int ib, is, page_base;
 	vm_pindex_t pindex = m->pindex;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP
 	 * with the new swapper, but we could have serious problems paging
 	 * out other object types if there is insufficient memory.  
 	 *
 	 * Unfortunately, checking free memory here is far too late, so the
 	 * check has been moved up a procedural level.
 	 */
 
 	/*
 	 * Can't clean the page if it's busy or held.
 	 */
 	vm_page_assert_unbusied(m);
 	KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m));
 	vm_page_unlock(m);
 
 	mc[vm_pageout_page_count] = pb = ps = m;
 	pageout_count = 1;
 	page_base = vm_pageout_page_count;
 	ib = 1;
 	is = 1;
 
 	/*
 	 * Scan object for clusterable pages.
 	 *
 	 * We can cluster ONLY if: ->> the page is NOT
 	 * clean, wired, busy, held, or mapped into a
 	 * buffer, and one of the following:
 	 * 1) The page is inactive, or a seldom used
 	 *    active page.
 	 * -or-
 	 * 2) we force the issue.
 	 *
 	 * During heavy mmap/modification loads the pageout
 	 * daemon can really fragment the underlying file
 	 * due to flushing pages out of order and not trying
 	 * align the clusters (which leave sporatic out-of-order
 	 * holes).  To solve this problem we do the reverse scan
 	 * first and attempt to align our cluster, then do a 
 	 * forward scan if room remains.
 	 */
 more:
 	while (ib && pageout_count < vm_pageout_page_count) {
 		vm_page_t p;
 
 		if (ib > pindex) {
 			ib = 0;
 			break;
 		}
 
 		if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) {
 			ib = 0;
 			break;
 		}
 		vm_page_test_dirty(p);
 		if (p->dirty == 0) {
 			ib = 0;
 			break;
 		}
 		vm_page_lock(p);
 		if (p->queue != PQ_INACTIVE ||
 		    p->hold_count != 0) {	/* may be undergoing I/O */
 			vm_page_unlock(p);
 			ib = 0;
 			break;
 		}
 		vm_page_unlock(p);
 		mc[--page_base] = pb = p;
 		++pageout_count;
 		++ib;
 		/*
 		 * alignment boundry, stop here and switch directions.  Do
 		 * not clear ib.
 		 */
 		if ((pindex - (ib - 1)) % vm_pageout_page_count == 0)
 			break;
 	}
 
 	while (pageout_count < vm_pageout_page_count && 
 	    pindex + is < object->size) {
 		vm_page_t p;
 
 		if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
 			break;
 		vm_page_test_dirty(p);
 		if (p->dirty == 0)
 			break;
 		vm_page_lock(p);
 		if (p->queue != PQ_INACTIVE ||
 		    p->hold_count != 0) {	/* may be undergoing I/O */
 			vm_page_unlock(p);
 			break;
 		}
 		vm_page_unlock(p);
 		mc[page_base + pageout_count] = ps = p;
 		++pageout_count;
 		++is;
 	}
 
 	/*
 	 * If we exhausted our forward scan, continue with the reverse scan
 	 * when possible, even past a page boundry.  This catches boundry
 	 * conditions.
 	 */
 	if (ib && pageout_count < vm_pageout_page_count)
 		goto more;
 
 	/*
 	 * we allow reads during pageouts...
 	 */
 	return (vm_pageout_flush(&mc[page_base], pageout_count, 0, 0, NULL,
 	    NULL));
 }
 
 /*
  * vm_pageout_flush() - launder the given pages
  *
  *	The given pages are laundered.  Note that we setup for the start of
  *	I/O ( i.e. busy the page ), mark it read-only, and bump the object
  *	reference count all in here rather then in the parent.  If we want
  *	the parent to do more sophisticated things we may have to change
  *	the ordering.
  *
  *	Returned runlen is the count of pages between mreq and first
  *	page after mreq with status VM_PAGER_AGAIN.
  *	*eio is set to TRUE if pager returned VM_PAGER_ERROR or VM_PAGER_FAIL
  *	for any page in runlen set.
  */
 int
 vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
     boolean_t *eio)
 {
 	vm_object_t object = mc[0]->object;
 	int pageout_status[count];
 	int numpagedout = 0;
 	int i, runlen;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * Initiate I/O.  Bump the vm_page_t->busy counter and
 	 * mark the pages read-only.
 	 *
 	 * We do not have to fixup the clean/dirty bits here... we can
 	 * allow the pager to do it after the I/O completes.
 	 *
 	 * NOTE! mc[i]->dirty may be partial or fragmented due to an
 	 * edge case with file fragments.
 	 */
 	for (i = 0; i < count; i++) {
 		KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL,
 		    ("vm_pageout_flush: partially invalid page %p index %d/%d",
 			mc[i], i, count));
 		vm_page_sbusy(mc[i]);
 		pmap_remove_write(mc[i]);
 	}
 	vm_object_pip_add(object, count);
 
 	vm_pager_put_pages(object, mc, count, flags, pageout_status);
 
 	runlen = count - mreq;
 	if (eio != NULL)
 		*eio = FALSE;
 	for (i = 0; i < count; i++) {
 		vm_page_t mt = mc[i];
 
 		KASSERT(pageout_status[i] == VM_PAGER_PEND ||
 		    !pmap_page_is_write_mapped(mt),
 		    ("vm_pageout_flush: page %p is not write protected", mt));
 		switch (pageout_status[i]) {
 		case VM_PAGER_OK:
 		case VM_PAGER_PEND:
 			numpagedout++;
 			break;
 		case VM_PAGER_BAD:
 			/*
 			 * Page outside of range of object. Right now we
 			 * essentially lose the changes by pretending it
 			 * worked.
 			 */
 			vm_page_undirty(mt);
 			break;
 		case VM_PAGER_ERROR:
 		case VM_PAGER_FAIL:
 			/*
 			 * If page couldn't be paged out, then reactivate the
 			 * page so it doesn't clog the inactive list.  (We
 			 * will try paging out it again later).
 			 */
 			vm_page_lock(mt);
 			vm_page_activate(mt);
 			vm_page_unlock(mt);
 			if (eio != NULL && i >= mreq && i - mreq < runlen)
 				*eio = TRUE;
 			break;
 		case VM_PAGER_AGAIN:
 			if (i >= mreq && i - mreq < runlen)
 				runlen = i - mreq;
 			break;
 		}
 
 		/*
 		 * If the operation is still going, leave the page busy to
 		 * block all other accesses. Also, leave the paging in
 		 * progress indicator set so that we don't attempt an object
 		 * collapse.
 		 */
 		if (pageout_status[i] != VM_PAGER_PEND) {
 			vm_object_pip_wakeup(object);
 			vm_page_sunbusy(mt);
 		}
 	}
 	if (prunlen != NULL)
 		*prunlen = runlen;
 	return (numpagedout);
 }
 
 #if !defined(NO_SWAPPING)
 /*
  *	vm_pageout_object_deactivate_pages
  *
  *	Deactivate enough pages to satisfy the inactive target
  *	requirements.
  *
  *	The object and map must be locked.
  */
 static void
 vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
     long desired)
 {
 	vm_object_t backing_object, object;
 	vm_page_t p;
 	int act_delta, remove_mode;
 
 	VM_OBJECT_ASSERT_LOCKED(first_object);
 	if ((first_object->flags & OBJ_FICTITIOUS) != 0)
 		return;
 	for (object = first_object;; object = backing_object) {
 		if (pmap_resident_count(pmap) <= desired)
 			goto unlock_return;
 		VM_OBJECT_ASSERT_LOCKED(object);
 		if ((object->flags & OBJ_UNMANAGED) != 0 ||
 		    object->paging_in_progress != 0)
 			goto unlock_return;
 
 		remove_mode = 0;
 		if (object->shadow_count > 1)
 			remove_mode = 1;
 		/*
 		 * Scan the object's entire memory queue.
 		 */
 		TAILQ_FOREACH(p, &object->memq, listq) {
 			if (pmap_resident_count(pmap) <= desired)
 				goto unlock_return;
 			if (vm_page_busied(p))
 				continue;
 			PCPU_INC(cnt.v_pdpages);
 			vm_page_lock(p);
 			if (p->wire_count != 0 || p->hold_count != 0 ||
 			    !pmap_page_exists_quick(pmap, p)) {
 				vm_page_unlock(p);
 				continue;
 			}
 			act_delta = pmap_ts_referenced(p);
 			if ((p->aflags & PGA_REFERENCED) != 0) {
 				if (act_delta == 0)
 					act_delta = 1;
 				vm_page_aflag_clear(p, PGA_REFERENCED);
 			}
 			if (p->queue != PQ_ACTIVE && act_delta != 0) {
 				vm_page_activate(p);
 				p->act_count += act_delta;
 			} else if (p->queue == PQ_ACTIVE) {
 				if (act_delta == 0) {
 					p->act_count -= min(p->act_count,
 					    ACT_DECLINE);
 					if (!remove_mode && p->act_count == 0) {
 						pmap_remove_all(p);
 						vm_page_deactivate(p);
 					} else
 						vm_page_requeue(p);
 				} else {
 					vm_page_activate(p);
 					if (p->act_count < ACT_MAX -
 					    ACT_ADVANCE)
 						p->act_count += ACT_ADVANCE;
 					vm_page_requeue(p);
 				}
 			} else if (p->queue == PQ_INACTIVE)
 				pmap_remove_all(p);
 			vm_page_unlock(p);
 		}
 		if ((backing_object = object->backing_object) == NULL)
 			goto unlock_return;
 		VM_OBJECT_RLOCK(backing_object);
 		if (object != first_object)
 			VM_OBJECT_RUNLOCK(object);
 	}
 unlock_return:
 	if (object != first_object)
 		VM_OBJECT_RUNLOCK(object);
 }
 
 /*
  * deactivate some number of pages in a map, try to do it fairly, but
  * that is really hard to do.
  */
 static void
 vm_pageout_map_deactivate_pages(map, desired)
 	vm_map_t map;
 	long desired;
 {
 	vm_map_entry_t tmpe;
 	vm_object_t obj, bigobj;
 	int nothingwired;
 
 	if (!vm_map_trylock(map))
 		return;
 
 	bigobj = NULL;
 	nothingwired = TRUE;
 
 	/*
 	 * first, search out the biggest object, and try to free pages from
 	 * that.
 	 */
 	tmpe = map->header.next;
 	while (tmpe != &map->header) {
 		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 			obj = tmpe->object.vm_object;
 			if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) {
 				if (obj->shadow_count <= 1 &&
 				    (bigobj == NULL ||
 				     bigobj->resident_page_count < obj->resident_page_count)) {
 					if (bigobj != NULL)
 						VM_OBJECT_RUNLOCK(bigobj);
 					bigobj = obj;
 				} else
 					VM_OBJECT_RUNLOCK(obj);
 			}
 		}
 		if (tmpe->wired_count > 0)
 			nothingwired = FALSE;
 		tmpe = tmpe->next;
 	}
 
 	if (bigobj != NULL) {
 		vm_pageout_object_deactivate_pages(map->pmap, bigobj, desired);
 		VM_OBJECT_RUNLOCK(bigobj);
 	}
 	/*
 	 * Next, hunt around for other pages to deactivate.  We actually
 	 * do this search sort of wrong -- .text first is not the best idea.
 	 */
 	tmpe = map->header.next;
 	while (tmpe != &map->header) {
 		if (pmap_resident_count(vm_map_pmap(map)) <= desired)
 			break;
 		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 			obj = tmpe->object.vm_object;
 			if (obj != NULL) {
 				VM_OBJECT_RLOCK(obj);
 				vm_pageout_object_deactivate_pages(map->pmap, obj, desired);
 				VM_OBJECT_RUNLOCK(obj);
 			}
 		}
 		tmpe = tmpe->next;
 	}
 
 	/*
 	 * Remove all mappings if a process is swapped out, this will free page
 	 * table pages.
 	 */
 	if (desired == 0 && nothingwired) {
 		pmap_remove(vm_map_pmap(map), vm_map_min(map),
 		    vm_map_max(map));
 	}
 
 	vm_map_unlock(map);
 }
 #endif		/* !defined(NO_SWAPPING) */
 
 /*
  * Attempt to acquire all of the necessary locks to launder a page and
  * then call through the clustering layer to PUTPAGES.  Wait a short
  * time for a vnode lock.
  *
  * Requires the page and object lock on entry, releases both before return.
  * Returns 0 on success and an errno otherwise.
  */
 static int
 vm_pageout_clean(vm_page_t m)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	vm_object_t object;
 	vm_pindex_t pindex;
 	int error, lockmode;
 
 	vm_page_assert_locked(m);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	error = 0;
 	vp = NULL;
 	mp = NULL;
 
 	/*
 	 * The object is already known NOT to be dead.   It
 	 * is possible for the vget() to block the whole
 	 * pageout daemon, but the new low-memory handling
 	 * code should prevent it.
 	 *
 	 * We can't wait forever for the vnode lock, we might
 	 * deadlock due to a vn_read() getting stuck in
 	 * vm_wait while holding this vnode.  We skip the 
 	 * vnode if we can't get it in a reasonable amount
 	 * of time.
 	 */
 	if (object->type == OBJT_VNODE) {
 		vm_page_unlock(m);
 		vp = object->handle;
 		if (vp->v_type == VREG &&
 		    vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 			mp = NULL;
 			error = EDEADLK;
 			goto unlock_all;
 		}
 		KASSERT(mp != NULL,
 		    ("vp %p with NULL v_mount", vp));
 		vm_object_reference_locked(object);
 		pindex = m->pindex;
 		VM_OBJECT_WUNLOCK(object);
 		lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
 		    LK_SHARED : LK_EXCLUSIVE;
 		if (vget(vp, lockmode | LK_TIMELOCK, curthread)) {
 			vp = NULL;
 			error = EDEADLK;
 			goto unlock_mp;
 		}
 		VM_OBJECT_WLOCK(object);
 		vm_page_lock(m);
 		/*
 		 * While the object and page were unlocked, the page
 		 * may have been:
 		 * (1) moved to a different queue,
 		 * (2) reallocated to a different object,
 		 * (3) reallocated to a different offset, or
 		 * (4) cleaned.
 		 */
 		if (m->queue != PQ_INACTIVE || m->object != object ||
 		    m->pindex != pindex || m->dirty == 0) {
 			vm_page_unlock(m);
 			error = ENXIO;
 			goto unlock_all;
 		}
 
 		/*
 		 * The page may have been busied or held while the object
 		 * and page locks were released.
 		 */
 		if (vm_page_busied(m) || m->hold_count != 0) {
 			vm_page_unlock(m);
 			error = EBUSY;
 			goto unlock_all;
 		}
 	}
 
 	/*
 	 * If a page is dirty, then it is either being washed
 	 * (but not yet cleaned) or it is still in the
 	 * laundry.  If it is still in the laundry, then we
 	 * start the cleaning operation. 
 	 */
 	if (vm_pageout_cluster(m) == 0)
 		error = EIO;
 
 unlock_all:
 	VM_OBJECT_WUNLOCK(object);
 
 unlock_mp:
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	if (mp != NULL) {
 		if (vp != NULL)
 			vput(vp);
 		vm_object_deallocate(object);
 		vn_finished_write(mp);
 	}
 
 	return (error);
 }
 
 /*
  *	vm_pageout_scan does the dirty work for the pageout daemon.
  *
  *	pass 0 - Update active LRU/deactivate pages
  *	pass 1 - Move inactive to cache or free
  *	pass 2 - Launder dirty pages
  */
 static void
 vm_pageout_scan(struct vm_domain *vmd, int pass)
 {
 	vm_page_t m, next;
 	struct vm_pagequeue *pq;
 	vm_object_t object;
 	long min_scan;
 	int act_delta, addl_page_shortage, deficit, error, maxlaunder, maxscan;
 	int page_shortage, scan_tick, scanned, starting_page_shortage;
 	int vnodes_skipped;
 	boolean_t pageout_ok, queues_locked;
 
 	/*
 	 * If we need to reclaim memory ask kernel caches to return
 	 * some.  We rate limit to avoid thrashing.
 	 */
 	if (vmd == &vm_dom[0] && pass > 0 &&
 	    (time_uptime - lowmem_uptime) >= lowmem_period) {
 		/*
 		 * Decrease registered cache sizes.
 		 */
 		SDT_PROBE0(vm, , , vm__lowmem_scan);
 		EVENTHANDLER_INVOKE(vm_lowmem, 0);
 		/*
 		 * We do this explicitly after the caches have been
 		 * drained above.
 		 */
 		uma_reclaim();
 		lowmem_uptime = time_uptime;
 	}
 
 	/*
 	 * The addl_page_shortage is the number of temporarily
 	 * stuck pages in the inactive queue.  In other words, the
 	 * number of pages from the inactive count that should be
 	 * discounted in setting the target for the active queue scan.
 	 */
 	addl_page_shortage = 0;
 
 	/*
 	 * Calculate the number of pages we want to either free or move
 	 * to the cache.
 	 */
 	if (pass > 0) {
 		deficit = atomic_readandclear_int(&vm_pageout_deficit);
 		page_shortage = vm_paging_target() + deficit;
 	} else
 		page_shortage = deficit = 0;
 	starting_page_shortage = page_shortage;
 
 	/*
 	 * maxlaunder limits the number of dirty pages we flush per scan.
 	 * For most systems a smaller value (16 or 32) is more robust under
 	 * extreme memory and disk pressure because any unnecessary writes
 	 * to disk can result in extreme performance degredation.  However,
 	 * systems with excessive dirty pages (especially when MAP_NOSYNC is
 	 * used) will die horribly with limited laundering.  If the pageout
 	 * daemon cannot clean enough pages in the first pass, we let it go
 	 * all out in succeeding passes.
 	 */
 	if ((maxlaunder = vm_max_launder) <= 1)
 		maxlaunder = 1;
 	if (pass > 1)
 		maxlaunder = 10000;
 
 	vnodes_skipped = 0;
 
 	/*
 	 * Start scanning the inactive queue for pages we can move to the
 	 * cache or free.  The scan will stop when the target is reached or
 	 * we have scanned the entire inactive queue.  Note that m->act_count
 	 * is not used to form decisions for the inactive queue, only for the
 	 * active queue.
 	 */
 	pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
 	maxscan = pq->pq_cnt;
 	vm_pagequeue_lock(pq);
 	queues_locked = TRUE;
 	for (m = TAILQ_FIRST(&pq->pq_pl);
 	     m != NULL && maxscan-- > 0 && page_shortage > 0;
 	     m = next) {
 		vm_pagequeue_assert_locked(pq);
 		KASSERT(queues_locked, ("unlocked queues"));
 		KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m));
 
 		PCPU_INC(cnt.v_pdpages);
 		next = TAILQ_NEXT(m, plinks.q);
 
 		/*
 		 * skip marker pages
 		 */
 		if (m->flags & PG_MARKER)
 			continue;
 
 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
 		    ("Fictitious page %p cannot be in inactive queue", m));
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("Unmanaged page %p cannot be in inactive queue", m));
 
 		/*
 		 * The page or object lock acquisitions fail if the
 		 * page was removed from the queue or moved to a
 		 * different position within the queue.  In either
 		 * case, addl_page_shortage should not be incremented.
 		 */
 		if (!vm_pageout_page_lock(m, &next))
 			goto unlock_page;
 		else if (m->hold_count != 0) {
 			/*
 			 * Held pages are essentially stuck in the
 			 * queue.  So, they ought to be discounted
 			 * from the inactive count.  See the
 			 * calculation of the page_shortage for the
 			 * loop over the active queue below.
 			 */
 			addl_page_shortage++;
 			goto unlock_page;
 		}
 		object = m->object;
 		if (!VM_OBJECT_TRYWLOCK(object)) {
 			if (!vm_pageout_fallback_object_lock(m, &next))
 				goto unlock_object;
 			else if (m->hold_count != 0) {
 				addl_page_shortage++;
 				goto unlock_object;
 			}
 		}
 		if (vm_page_busied(m)) {
 			/*
 			 * Don't mess with busy pages.  Leave them at
 			 * the front of the queue.  Most likely, they
 			 * are being paged out and will leave the
 			 * queue shortly after the scan finishes.  So,
 			 * they ought to be discounted from the
 			 * inactive count.
 			 */
 			addl_page_shortage++;
 unlock_object:
 			VM_OBJECT_WUNLOCK(object);
 unlock_page:
 			vm_page_unlock(m);
 			continue;
 		}
 		KASSERT(m->hold_count == 0, ("Held page %p", m));
 
 		/*
 		 * We unlock the inactive page queue, invalidating the
 		 * 'next' pointer.  Use our marker to remember our
 		 * place.
 		 */
 		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);
 		vm_pagequeue_unlock(pq);
 		queues_locked = FALSE;
 
 		/*
 		 * Invalid pages can be easily freed. They cannot be
 		 * mapped, vm_page_free() asserts this.
 		 */
 		if (m->valid == 0)
 			goto free_page;
 
 		/*
 		 * If the page has been referenced and the object is not dead,
 		 * reactivate or requeue the page depending on whether the
 		 * object is mapped.
 		 */
 		if ((m->aflags & PGA_REFERENCED) != 0) {
 			vm_page_aflag_clear(m, PGA_REFERENCED);
 			act_delta = 1;
 		} else
 			act_delta = 0;
 		if (object->ref_count != 0) {
 			act_delta += pmap_ts_referenced(m);
 		} else {
 			KASSERT(!pmap_page_is_mapped(m),
 			    ("vm_pageout_scan: page %p is mapped", m));
 		}
 		if (act_delta != 0) {
 			if (object->ref_count != 0) {
 				vm_page_activate(m);
 
 				/*
 				 * Increase the activation count if the page
 				 * was referenced while in the inactive queue.
 				 * This makes it less likely that the page will
 				 * be returned prematurely to the inactive
 				 * queue.
  				 */
 				m->act_count += act_delta + ACT_ADVANCE;
 				goto drop_page;
 			} else if ((object->flags & OBJ_DEAD) == 0)
 				goto requeue_page;
 		}
 
 		/*
 		 * If the page appears to be clean at the machine-independent
 		 * layer, then remove all of its mappings from the pmap in
 		 * anticipation of placing it onto the cache queue.  If,
 		 * however, any of the page's mappings allow write access,
 		 * then the page may still be modified until the last of those
 		 * mappings are removed.
 		 */
 		if (object->ref_count != 0) {
 			vm_page_test_dirty(m);
 			if (m->dirty == 0)
 				pmap_remove_all(m);
 		}
 
 		if (m->dirty == 0) {
 			/*
 			 * Clean pages can be freed.
 			 */
 free_page:
 			vm_page_free(m);
 			PCPU_INC(cnt.v_dfree);
 			--page_shortage;
 		} else if ((object->flags & OBJ_DEAD) != 0) {
 			/*
 			 * Leave dirty pages from dead objects at the front of
 			 * the queue.  They are being paged out and freed by
 			 * the thread that destroyed the object.  They will
 			 * leave the queue shortly after the scan finishes, so 
 			 * they should be discounted from the inactive count.
 			 */
 			addl_page_shortage++;
 		} else if ((m->flags & PG_WINATCFLS) == 0 && pass < 2) {
 			/*
 			 * Dirty pages need to be paged out, but flushing
 			 * a page is extremely expensive versus freeing
 			 * a clean page.  Rather then artificially limiting
 			 * the number of pages we can flush, we instead give
 			 * dirty pages extra priority on the inactive queue
 			 * by forcing them to be cycled through the queue
 			 * twice before being flushed, after which the
 			 * (now clean) page will cycle through once more
 			 * before being freed.  This significantly extends
 			 * the thrash point for a heavily loaded machine.
 			 */
 			m->flags |= PG_WINATCFLS;
 requeue_page:
 			vm_pagequeue_lock(pq);
 			queues_locked = TRUE;
 			vm_page_requeue_locked(m);
 		} else if (maxlaunder > 0) {
 			/*
 			 * We always want to try to flush some dirty pages if
 			 * we encounter them, to keep the system stable.
 			 * Normally this number is small, but under extreme
 			 * pressure where there are insufficient clean pages
 			 * on the inactive queue, we may have to go all out.
 			 */
 
 			if (object->type != OBJT_SWAP &&
 			    object->type != OBJT_DEFAULT)
 				pageout_ok = TRUE;
 			else if (disable_swap_pageouts)
 				pageout_ok = FALSE;
 			else if (defer_swap_pageouts)
 				pageout_ok = vm_page_count_min();
 			else
 				pageout_ok = TRUE;
 			if (!pageout_ok)
 				goto requeue_page;
 			error = vm_pageout_clean(m);
 			/*
 			 * Decrement page_shortage on success to account for
 			 * the (future) cleaned page.  Otherwise we could wind
 			 * up laundering or cleaning too many pages.
 			 */
 			if (error == 0) {
 				page_shortage--;
 				maxlaunder--;
 			} else if (error == EDEADLK) {
 				pageout_lock_miss++;
 				vnodes_skipped++;
 			} else if (error == EBUSY) {
 				addl_page_shortage++;
 			}
 			vm_page_lock_assert(m, MA_NOTOWNED);
 			goto relock_queues;
 		}
 drop_page:
 		vm_page_unlock(m);
 		VM_OBJECT_WUNLOCK(object);
 relock_queues:
 		if (!queues_locked) {
 			vm_pagequeue_lock(pq);
 			queues_locked = TRUE;
 		}
 		next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q);
 		TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
 	}
 	vm_pagequeue_unlock(pq);
 
 #if !defined(NO_SWAPPING)
 	/*
 	 * Wakeup the swapout daemon if we didn't cache or free the targeted
 	 * number of pages. 
 	 */
 	if (vm_swap_enabled && page_shortage > 0)
 		vm_req_vmdaemon(VM_SWAP_NORMAL);
 #endif
 
 	/*
 	 * Wakeup the sync daemon if we skipped a vnode in a writeable object
 	 * and we didn't cache or free enough pages.
 	 */
 	if (vnodes_skipped > 0 && page_shortage > vm_cnt.v_free_target -
 	    vm_cnt.v_free_min)
 		(void)speedup_syncer();
 
 	/*
 	 * If the inactive queue scan fails repeatedly to meet its
 	 * target, kill the largest process.
 	 */
 	vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage);
 
 	/*
 	 * Compute the number of pages we want to try to move from the
 	 * active queue to the inactive queue.
 	 */
 	page_shortage = vm_cnt.v_inactive_target - vm_cnt.v_inactive_count +
 	    vm_paging_target() + deficit + addl_page_shortage;
 
 	pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
 	vm_pagequeue_lock(pq);
 	maxscan = pq->pq_cnt;
 
 	/*
 	 * If we're just idle polling attempt to visit every
 	 * active page within 'update_period' seconds.
 	 */
 	scan_tick = ticks;
 	if (vm_pageout_update_period != 0) {
 		min_scan = pq->pq_cnt;
 		min_scan *= scan_tick - vmd->vmd_last_active_scan;
 		min_scan /= hz * vm_pageout_update_period;
 	} else
 		min_scan = 0;
 	if (min_scan > 0 || (page_shortage > 0 && maxscan > 0))
 		vmd->vmd_last_active_scan = scan_tick;
 
 	/*
 	 * Scan the active queue for pages that can be deactivated.  Update
 	 * the per-page activity counter and use it to identify deactivation
 	 * candidates.
 	 */
 	for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
 	    min_scan || (page_shortage > 0 && scanned < maxscan)); m = next,
 	    scanned++) {
 
 		KASSERT(m->queue == PQ_ACTIVE,
 		    ("vm_pageout_scan: page %p isn't active", m));
 
 		next = TAILQ_NEXT(m, plinks.q);
 		if ((m->flags & PG_MARKER) != 0)
 			continue;
 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
 		    ("Fictitious page %p cannot be in active queue", m));
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("Unmanaged page %p cannot be in active queue", m));
 		if (!vm_pageout_page_lock(m, &next)) {
 			vm_page_unlock(m);
 			continue;
 		}
 
 		/*
 		 * The count for pagedaemon pages is done after checking the
 		 * page for eligibility...
 		 */
 		PCPU_INC(cnt.v_pdpages);
 
 		/*
 		 * Check to see "how much" the page has been used.
 		 */
 		if ((m->aflags & PGA_REFERENCED) != 0) {
 			vm_page_aflag_clear(m, PGA_REFERENCED);
 			act_delta = 1;
 		} else
 			act_delta = 0;
 
 		/*
 		 * Unlocked object ref count check.  Two races are possible.
 		 * 1) The ref was transitioning to zero and we saw non-zero,
 		 *    the pmap bits will be checked unnecessarily.
 		 * 2) The ref was transitioning to one and we saw zero. 
 		 *    The page lock prevents a new reference to this page so
 		 *    we need not check the reference bits.
 		 */
 		if (m->object->ref_count != 0)
 			act_delta += pmap_ts_referenced(m);
 
 		/*
 		 * Advance or decay the act_count based on recent usage.
 		 */
 		if (act_delta != 0) {
 			m->act_count += ACT_ADVANCE + act_delta;
 			if (m->act_count > ACT_MAX)
 				m->act_count = ACT_MAX;
 		} else
 			m->act_count -= min(m->act_count, ACT_DECLINE);
 
 		/*
 		 * Move this page to the tail of the active or inactive
 		 * queue depending on usage.
 		 */
 		if (m->act_count == 0) {
 			/* Dequeue to avoid later lock recursion. */
 			vm_page_dequeue_locked(m);
 			vm_page_deactivate(m);
 			page_shortage--;
 		} else
 			vm_page_requeue_locked(m);
 		vm_page_unlock(m);
 	}
 	vm_pagequeue_unlock(pq);
 #if !defined(NO_SWAPPING)
 	/*
 	 * Idle process swapout -- run once per second.
 	 */
 	if (vm_swap_idle_enabled) {
 		static long lsec;
 		if (time_second != lsec) {
 			vm_req_vmdaemon(VM_SWAP_IDLE);
 			lsec = time_second;
 		}
 	}
 #endif
 }
 
 static int vm_pageout_oom_vote;
 
 /*
  * The pagedaemon threads randlomly select one to perform the
  * OOM.  Trying to kill processes before all pagedaemons
  * failed to reach free target is premature.
  */
 static void
 vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
     int starting_page_shortage)
 {
 	int old_vote;
 
 	if (starting_page_shortage <= 0 || starting_page_shortage !=
 	    page_shortage)
 		vmd->vmd_oom_seq = 0;
 	else
 		vmd->vmd_oom_seq++;
 	if (vmd->vmd_oom_seq < vm_pageout_oom_seq) {
 		if (vmd->vmd_oom) {
 			vmd->vmd_oom = FALSE;
 			atomic_subtract_int(&vm_pageout_oom_vote, 1);
 		}
 		return;
 	}
 
 	/*
 	 * Do not follow the call sequence until OOM condition is
 	 * cleared.
 	 */
 	vmd->vmd_oom_seq = 0;
 
 	if (vmd->vmd_oom)
 		return;
 
 	vmd->vmd_oom = TRUE;
 	old_vote = atomic_fetchadd_int(&vm_pageout_oom_vote, 1);
 	if (old_vote != vm_ndomains - 1)
 		return;
 
 	/*
 	 * The current pagedaemon thread is the last in the quorum to
 	 * start OOM.  Initiate the selection and signaling of the
 	 * victim.
 	 */
 	vm_pageout_oom(VM_OOM_MEM);
 
 	/*
 	 * After one round of OOM terror, recall our vote.  On the
 	 * next pass, current pagedaemon would vote again if the low
 	 * memory condition is still there, due to vmd_oom being
 	 * false.
 	 */
 	vmd->vmd_oom = FALSE;
 	atomic_subtract_int(&vm_pageout_oom_vote, 1);
 }
 
 /*
  * The OOM killer is the page daemon's action of last resort when
  * memory allocation requests have been stalled for a prolonged period
  * of time because it cannot reclaim memory.  This function computes
  * the approximate number of physical pages that could be reclaimed if
  * the specified address space is destroyed.
  *
  * Private, anonymous memory owned by the address space is the
  * principal resource that we expect to recover after an OOM kill.
  * Since the physical pages mapped by the address space's COW entries
  * are typically shared pages, they are unlikely to be released and so
  * they are not counted.
  *
  * To get to the point where the page daemon runs the OOM killer, its
  * efforts to write-back vnode-backed pages may have stalled.  This
  * could be caused by a memory allocation deadlock in the write path
  * that might be resolved by an OOM kill.  Therefore, physical pages
  * belonging to vnode-backed objects are counted, because they might
  * be freed without being written out first if the address space holds
  * the last reference to an unlinked vnode.
  *
  * Similarly, physical pages belonging to OBJT_PHYS objects are
  * counted because the address space might hold the last reference to
  * the object.
  */
 static long
 vm_pageout_oom_pagecount(struct vmspace *vmspace)
 {
 	vm_map_t map;
 	vm_map_entry_t entry;
 	vm_object_t obj;
 	long res;
 
 	map = &vmspace->vm_map;
 	KASSERT(!map->system_map, ("system map"));
 	sx_assert(&map->lock, SA_LOCKED);
 	res = 0;
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
 			continue;
 		obj = entry->object.vm_object;
 		if (obj == NULL)
 			continue;
 		if ((entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0 &&
 		    obj->ref_count != 1)
 			continue;
 		switch (obj->type) {
 		case OBJT_DEFAULT:
 		case OBJT_SWAP:
 		case OBJT_PHYS:
 		case OBJT_VNODE:
 			res += obj->resident_page_count;
 			break;
 		}
 	}
 	return (res);
 }
 
 void
 vm_pageout_oom(int shortage)
 {
 	struct proc *p, *bigproc;
 	vm_offset_t size, bigsize;
 	struct thread *td;
 	struct vmspace *vm;
 
 	/*
 	 * We keep the process bigproc locked once we find it to keep anyone
 	 * from messing with it; however, there is a possibility of
 	 * deadlock if process B is bigproc and one of it's child processes
 	 * attempts to propagate a signal to B while we are waiting for A's
 	 * lock while walking this list.  To avoid this, we don't block on
 	 * the process lock but just skip a process if it is already locked.
 	 */
 	bigproc = NULL;
 	bigsize = 0;
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		int breakout;
 
 		PROC_LOCK(p);
 
 		/*
 		 * If this is a system, protected or killed process, skip it.
 		 */
 		if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC |
 		    P_PROTECTED | P_SYSTEM | P_WEXIT)) != 0 ||
 		    p->p_pid == 1 || P_KILLED(p) ||
 		    (p->p_pid < 48 && swap_pager_avail != 0)) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		/*
 		 * If the process is in a non-running type state,
 		 * don't touch it.  Check all the threads individually.
 		 */
 		breakout = 0;
 		FOREACH_THREAD_IN_PROC(p, td) {
 			thread_lock(td);
 			if (!TD_ON_RUNQ(td) &&
 			    !TD_IS_RUNNING(td) &&
 			    !TD_IS_SLEEPING(td) &&
 			    !TD_IS_SUSPENDED(td) &&
 			    !TD_IS_SWAPPED(td)) {
 				thread_unlock(td);
 				breakout = 1;
 				break;
 			}
 			thread_unlock(td);
 		}
 		if (breakout) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		/*
 		 * get the process size
 		 */
 		vm = vmspace_acquire_ref(p);
 		if (vm == NULL) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		_PHOLD(p);
 		if (!vm_map_trylock_read(&vm->vm_map)) {
 			_PRELE(p);
 			PROC_UNLOCK(p);
 			vmspace_free(vm);
 			continue;
 		}
 		PROC_UNLOCK(p);
 		size = vmspace_swap_count(vm);
 		if (shortage == VM_OOM_MEM)
 			size += vm_pageout_oom_pagecount(vm);
 		vm_map_unlock_read(&vm->vm_map);
 		vmspace_free(vm);
 
 		/*
 		 * If this process is bigger than the biggest one,
 		 * remember it.
 		 */
 		if (size > bigsize) {
 			if (bigproc != NULL)
 				PRELE(bigproc);
 			bigproc = p;
 			bigsize = size;
 		} else {
 			PRELE(p);
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	if (bigproc != NULL) {
 		if (vm_panic_on_oom != 0)
 			panic("out of swap space");
 		PROC_LOCK(bigproc);
 		killproc(bigproc, "out of swap space");
 		sched_nice(bigproc, PRIO_MIN);
 		_PRELE(bigproc);
 		PROC_UNLOCK(bigproc);
 		wakeup(&vm_cnt.v_free_count);
 	}
 }
 
 static void
 vm_pageout_worker(void *arg)
 {
 	struct vm_domain *domain;
 	int domidx;
 
 	domidx = (uintptr_t)arg;
 	domain = &vm_dom[domidx];
 
 	/*
 	 * XXXKIB It could be useful to bind pageout daemon threads to
 	 * the cores belonging to the domain, from which vm_page_array
 	 * is allocated.
 	 */
 
 	KASSERT(domain->vmd_segs != 0, ("domain without segments"));
 	domain->vmd_last_active_scan = ticks;
 	vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE);
 	vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE);
 	TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl,
 	    &domain->vmd_inacthead, plinks.q);
 
 	/*
 	 * The pageout daemon worker is never done, so loop forever.
 	 */
 	while (TRUE) {
 		/*
 		 * If we have enough free memory, wakeup waiters.  Do
 		 * not clear vm_pages_needed until we reach our target,
 		 * otherwise we may be woken up over and over again and
 		 * waste a lot of cpu.
 		 */
 		mtx_lock(&vm_page_queue_free_mtx);
 		if (vm_pages_needed && !vm_page_count_min()) {
 			if (!vm_paging_needed())
 				vm_pages_needed = 0;
 			wakeup(&vm_cnt.v_free_count);
 		}
 		if (vm_pages_needed) {
 			/*
 			 * We're still not done.  Either vm_pages_needed was
 			 * set by another thread during the previous scan
 			 * (typically, this happens during a level 0 scan) or
 			 * vm_pages_needed was already set and the scan failed
 			 * to free enough pages.  If we haven't yet performed
 			 * a level >= 2 scan (unlimited dirty cleaning), then
 			 * upgrade the level and scan again now.  Otherwise,
 			 * sleep a bit and try again later.  While sleeping,
 			 * vm_pages_needed can be cleared.
 			 */
 			if (domain->vmd_pass > 1)
 				msleep(&vm_pages_needed,
 				    &vm_page_queue_free_mtx, PVM, "psleep",
 				    hz / 2);
 		} else {
 			/*
 			 * Good enough, sleep until required to refresh
 			 * stats.
 			 */
 			msleep(&vm_pages_needed, &vm_page_queue_free_mtx,
 			    PVM, "psleep", hz);
 		}
 		if (vm_pages_needed) {
 			vm_cnt.v_pdwakeups++;
 			domain->vmd_pass++;
 		} else
 			domain->vmd_pass = 0;
 		mtx_unlock(&vm_page_queue_free_mtx);
 		vm_pageout_scan(domain, domain->vmd_pass);
 	}
 }
 
 /*
  *	vm_pageout_init initialises basic pageout daemon settings.
  */
 static void
 vm_pageout_init(void)
 {
 	/*
 	 * Initialize some paging parameters.
 	 */
 	vm_cnt.v_interrupt_free_min = 2;
 	if (vm_cnt.v_page_count < 2000)
 		vm_pageout_page_count = 8;
 
 	/*
 	 * v_free_reserved needs to include enough for the largest
 	 * swap pager structures plus enough for any pv_entry structs
 	 * when paging. 
 	 */
 	if (vm_cnt.v_page_count > 1024)
 		vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200;
 	else
 		vm_cnt.v_free_min = 4;
 	vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
 	    vm_cnt.v_interrupt_free_min;
 	vm_cnt.v_free_reserved = vm_pageout_page_count +
 	    vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768);
 	vm_cnt.v_free_severe = vm_cnt.v_free_min / 2;
 	vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved;
 	vm_cnt.v_free_min += vm_cnt.v_free_reserved;
 	vm_cnt.v_free_severe += vm_cnt.v_free_reserved;
 	vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2;
 	if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3)
 		vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3;
 
 	/*
 	 * Set the default wakeup threshold to be 10% above the minimum
 	 * page limit.  This keeps the steady state out of shortfall.
 	 */
 	vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11;
 
 	/*
 	 * Set interval in seconds for active scan.  We want to visit each
 	 * page at least once every ten minutes.  This is to prevent worst
 	 * case paging behaviors with stale active LRU.
 	 */
 	if (vm_pageout_update_period == 0)
 		vm_pageout_update_period = 600;
 
 	/* XXX does not really belong here */
 	if (vm_page_max_wired == 0)
 		vm_page_max_wired = vm_cnt.v_free_count / 3;
 }
 
 /*
  *     vm_pageout is the high level pageout daemon.
  */
 static void
 vm_pageout(void)
 {
 	int error;
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	int i;
 #endif
 
 	swap_pager_swap_init();
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	for (i = 1; i < vm_ndomains; i++) {
 		error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
 		    curproc, NULL, 0, 0, "dom%d", i);
 		if (error != 0) {
 			panic("starting pageout for domain %d, error %d\n",
 			    i, error);
 		}
 	}
 #endif
 	error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
 	    0, 0, "uma");
 	if (error != 0)
 		panic("starting uma_reclaim helper, error %d\n", error);
 	vm_pageout_worker((void *)(uintptr_t)0);
 }
 
 /*
  * Unless the free page queue lock is held by the caller, this function
  * should be regarded as advisory.  Specifically, the caller should
  * not msleep() on &vm_cnt.v_free_count following this function unless
  * the free page queue lock is held until the msleep() is performed.
  */
 void
 pagedaemon_wakeup(void)
 {
 
 	if (!vm_pages_needed && curthread->td_proc != pageproc) {
 		vm_pages_needed = 1;
 		wakeup(&vm_pages_needed);
 	}
 }
 
 #if !defined(NO_SWAPPING)
 static void
 vm_req_vmdaemon(int req)
 {
 	static int lastrun = 0;
 
 	mtx_lock(&vm_daemon_mtx);
 	vm_pageout_req_swapout |= req;
 	if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
 		wakeup(&vm_daemon_needed);
 		lastrun = ticks;
 	}
 	mtx_unlock(&vm_daemon_mtx);
 }
 
 static void
 vm_daemon(void)
 {
 	struct rlimit rsslim;
 	struct proc *p;
 	struct thread *td;
 	struct vmspace *vm;
 	int breakout, swapout_flags, tryagain, attempts;
 #ifdef RACCT
 	uint64_t rsize, ravailable;
 #endif
 
 	while (TRUE) {
 		mtx_lock(&vm_daemon_mtx);
 		msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep",
 #ifdef RACCT
 		    racct_enable ? hz : 0
 #else
 		    0
 #endif
 		);
 		swapout_flags = vm_pageout_req_swapout;
 		vm_pageout_req_swapout = 0;
 		mtx_unlock(&vm_daemon_mtx);
 		if (swapout_flags)
 			swapout_procs(swapout_flags);
 
 		/*
 		 * scan the processes for exceeding their rlimits or if
 		 * process is swapped out -- deactivate pages
 		 */
 		tryagain = 0;
 		attempts = 0;
 again:
 		attempts++;
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			vm_pindex_t limit, size;
 
 			/*
 			 * if this is a system process or if we have already
 			 * looked at this process, skip it.
 			 */
 			PROC_LOCK(p);
 			if (p->p_state != PRS_NORMAL ||
 			    p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			/*
 			 * if the process is in a non-running type state,
 			 * don't touch it.
 			 */
 			breakout = 0;
 			FOREACH_THREAD_IN_PROC(p, td) {
 				thread_lock(td);
 				if (!TD_ON_RUNQ(td) &&
 				    !TD_IS_RUNNING(td) &&
 				    !TD_IS_SLEEPING(td) &&
 				    !TD_IS_SUSPENDED(td)) {
 					thread_unlock(td);
 					breakout = 1;
 					break;
 				}
 				thread_unlock(td);
 			}
 			if (breakout) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			/*
 			 * get a limit
 			 */
 			lim_rlimit_proc(p, RLIMIT_RSS, &rsslim);
 			limit = OFF_TO_IDX(
 			    qmin(rsslim.rlim_cur, rsslim.rlim_max));
 
 			/*
 			 * let processes that are swapped out really be
 			 * swapped out set the limit to nothing (will force a
 			 * swap-out.)
 			 */
 			if ((p->p_flag & P_INMEM) == 0)
 				limit = 0;	/* XXX */
 			vm = vmspace_acquire_ref(p);
 			PROC_UNLOCK(p);
 			if (vm == NULL)
 				continue;
 
 			size = vmspace_resident_count(vm);
 			if (size >= limit) {
 				vm_pageout_map_deactivate_pages(
 				    &vm->vm_map, limit);
 			}
 #ifdef RACCT
 			if (racct_enable) {
 				rsize = IDX_TO_OFF(size);
 				PROC_LOCK(p);
 				racct_set(p, RACCT_RSS, rsize);
 				ravailable = racct_get_available(p, RACCT_RSS);
 				PROC_UNLOCK(p);
 				if (rsize > ravailable) {
 					/*
 					 * Don't be overly aggressive; this
 					 * might be an innocent process,
 					 * and the limit could've been exceeded
 					 * by some memory hog.  Don't try
 					 * to deactivate more than 1/4th
 					 * of process' resident set size.
 					 */
 					if (attempts <= 8) {
 						if (ravailable < rsize -
 						    (rsize / 4)) {
 							ravailable = rsize -
 							    (rsize / 4);
 						}
 					}
 					vm_pageout_map_deactivate_pages(
 					    &vm->vm_map,
 					    OFF_TO_IDX(ravailable));
 					/* Update RSS usage after paging out. */
 					size = vmspace_resident_count(vm);
 					rsize = IDX_TO_OFF(size);
 					PROC_LOCK(p);
 					racct_set(p, RACCT_RSS, rsize);
 					PROC_UNLOCK(p);
 					if (rsize > ravailable)
 						tryagain = 1;
 				}
 			}
 #endif
 			vmspace_free(vm);
 		}
 		sx_sunlock(&allproc_lock);
 		if (tryagain != 0 && attempts <= 10)
 			goto again;
 	}
 }
 #endif			/* !defined(NO_SWAPPING) */
Index: head/sys/vm/vm_phys.c
===================================================================
--- head/sys/vm/vm_phys.c	(revision 297747)
+++ head/sys/vm/vm_phys.c	(revision 297748)
@@ -1,1525 +1,1529 @@
 /*-
  * Copyright (c) 2002-2006 Rice University
  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Alan L. Cox,
  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  *	Physical memory system implementation
  *
  * Any external functions defined by this module are only to be used by the
  * virtual memory system.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
-#if MAXMEMDOM > 1
 #include <sys/proc.h>
-#endif
 #include <sys/queue.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/tree.h>
 #include <sys/vmmeter.h>
 #include <sys/seq.h>
 
 #include <ddb/ddb.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 
 #include <vm/vm_domain.h>
 
 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
     "Too many physsegs.");
 
+#ifdef VM_NUMA_ALLOC
 struct mem_affinity *mem_affinity;
 int *mem_locality;
+#endif
 
 int vm_ndomains = 1;
 
 struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
 int vm_phys_nsegs;
 
 struct vm_phys_fictitious_seg;
 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
     struct vm_phys_fictitious_seg *);
 
 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree =
     RB_INITIALIZER(_vm_phys_fictitious_tree);
 
 struct vm_phys_fictitious_seg {
 	RB_ENTRY(vm_phys_fictitious_seg) node;
 	/* Memory region data */
 	vm_paddr_t	start;
 	vm_paddr_t	end;
 	vm_page_t	first_page;
 };
 
 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node,
     vm_phys_fictitious_cmp);
 
 static struct rwlock vm_phys_fictitious_reg_lock;
 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
 
 static struct vm_freelist
     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
 
 static int vm_nfreelists;
 
 /*
  * Provides the mapping from VM_FREELIST_* to free list indices (flind).
  */
 static int vm_freelist_to_flind[VM_NFREELIST];
 
 CTASSERT(VM_FREELIST_DEFAULT == 0);
 
 #ifdef VM_FREELIST_ISADMA
 #define	VM_ISADMA_BOUNDARY	16777216
 #endif
 #ifdef VM_FREELIST_DMA32
 #define	VM_DMA32_BOUNDARY	((vm_paddr_t)1 << 32)
 #endif
 
 /*
  * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about
  * the ordering of the free list boundaries.
  */
 #if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY)
 CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY);
 #endif
 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY)
 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY);
 #endif
 
 static int cnt_prezero;
 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
 
 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
 
 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
 
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS);
 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info");
 #endif
 
 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
     &vm_ndomains, 0, "Number of physical memory domains available.");
 
 /*
  * Default to first-touch + round-robin.
  */
 static struct mtx vm_default_policy_mtx;
 MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex",
     MTX_DEF);
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 static struct vm_domain_policy vm_default_policy =
     VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
 #else
 /* Use round-robin so the domain policy code will only try once per allocation */
 static struct vm_domain_policy vm_default_policy =
     VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0);
 #endif
 
 static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
     int order);
 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary);
 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain);
 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end);
 static int vm_phys_paddr_to_segind(vm_paddr_t pa);
 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
     int order);
 
 static int
 sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS)
 {
 	char policy_name[32];
 	int error;
 
 	mtx_lock(&vm_default_policy_mtx);
 
 	/* Map policy to output string */
 	switch (vm_default_policy.p.policy) {
 	case VM_POLICY_FIRST_TOUCH:
 		strcpy(policy_name, "first-touch");
 		break;
 	case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
 		strcpy(policy_name, "first-touch-rr");
 		break;
 	case VM_POLICY_ROUND_ROBIN:
 	default:
 		strcpy(policy_name, "rr");
 		break;
 	}
 	mtx_unlock(&vm_default_policy_mtx);
 
 	error = sysctl_handle_string(oidp, &policy_name[0],
 	    sizeof(policy_name), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	mtx_lock(&vm_default_policy_mtx);
 	/* Set: match on the subset of policies that make sense as a default */
 	if (strcmp("first-touch-rr", policy_name) == 0) {
 		vm_domain_policy_set(&vm_default_policy,
 		    VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
 	} else if (strcmp("first-touch", policy_name) == 0) {
 		vm_domain_policy_set(&vm_default_policy,
 		    VM_POLICY_FIRST_TOUCH, 0);
 	} else if (strcmp("rr", policy_name) == 0) {
 		vm_domain_policy_set(&vm_default_policy,
 		    VM_POLICY_ROUND_ROBIN, 0);
 	} else {
 		error = EINVAL;
 		goto finish;
 	}
 
 	error = 0;
 finish:
 	mtx_unlock(&vm_default_policy_mtx);
 	return (error);
 }
 
 SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW,
     0, 0, sysctl_vm_default_policy, "A",
     "Default policy (rr, first-touch, first-touch-rr");
 
 /*
  * Red-black tree helpers for vm fictitious range management.
  */
 static inline int
 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p,
     struct vm_phys_fictitious_seg *range)
 {
 
 	KASSERT(range->start != 0 && range->end != 0,
 	    ("Invalid range passed on search for vm_fictitious page"));
 	if (p->start >= range->end)
 		return (1);
 	if (p->start < range->start)
 		return (-1);
 
 	return (0);
 }
 
 static int
 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1,
     struct vm_phys_fictitious_seg *p2)
 {
 
 	/* Check if this is a search for a page */
 	if (p1->end == 0)
 		return (vm_phys_fictitious_in_range(p1, p2));
 
 	KASSERT(p2->end != 0,
     ("Invalid range passed as second parameter to vm fictitious comparison"));
 
 	/* Searching to add a new range */
 	if (p1->end <= p2->start)
 		return (-1);
 	if (p1->start >= p2->end)
 		return (1);
 
 	panic("Trying to add overlapping vm fictitious ranges:\n"
 	    "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start,
 	    (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
 }
 
 static __inline int
 vm_rr_selectdomain(void)
 {
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	struct thread *td;
 
 	td = curthread;
 
 	td->td_dom_rr_idx++;
 	td->td_dom_rr_idx %= vm_ndomains;
 	return (td->td_dom_rr_idx);
 #else
 	return (0);
 #endif
 }
 
 /*
  * Initialise a VM domain iterator.
  *
  * Check the thread policy, then the proc policy,
  * then default to the system policy.
  *
  * Later on the various layers will have this logic
  * plumbed into them and the phys code will be explicitly
  * handed a VM domain policy to use.
  */
 static void
 vm_policy_iterator_init(struct vm_domain_iterator *vi)
 {
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	struct vm_domain_policy lcl;
 #endif
 
 	vm_domain_iterator_init(vi);
 
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	/* Copy out the thread policy */
 	vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy);
 	if (lcl.p.policy != VM_POLICY_NONE) {
 		/* Thread policy is present; use it */
 		vm_domain_iterator_set_policy(vi, &lcl);
 		return;
 	}
 
 	vm_domain_policy_localcopy(&lcl,
 	    &curthread->td_proc->p_vm_dom_policy);
 	if (lcl.p.policy != VM_POLICY_NONE) {
 		/* Process policy is present; use it */
 		vm_domain_iterator_set_policy(vi, &lcl);
 		return;
 	}
 #endif
 	/* Use system default policy */
 	vm_domain_iterator_set_policy(vi, &vm_default_policy);
 }
 
 static void
 vm_policy_iterator_finish(struct vm_domain_iterator *vi)
 {
 
 	vm_domain_iterator_cleanup(vi);
 }
 
 boolean_t
 vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
 {
 	struct vm_phys_seg *s;
 	int idx;
 
 	while ((idx = ffsl(mask)) != 0) {
 		idx--;	/* ffsl counts from 1 */
 		mask &= ~(1UL << idx);
 		s = &vm_phys_segs[idx];
 		if (low < s->end && high > s->start)
 			return (TRUE);
 	}
 	return (FALSE);
 }
 
 /*
  * Outputs the state of the physical memory allocator, specifically,
  * the amount of physical memory in each free list.
  */
 static int
 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sbuf;
 	struct vm_freelist *fl;
 	int dom, error, flind, oind, pind;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
 	for (dom = 0; dom < vm_ndomains; dom++) {
 		sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
 		for (flind = 0; flind < vm_nfreelists; flind++) {
 			sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
 			    "\n  ORDER (SIZE)  |  NUMBER"
 			    "\n              ", flind);
 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
 				sbuf_printf(&sbuf, "  |  POOL %d", pind);
 			sbuf_printf(&sbuf, "\n--            ");
 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
 				sbuf_printf(&sbuf, "-- --      ");
 			sbuf_printf(&sbuf, "--\n");
 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
 				sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
 				    1 << (PAGE_SHIFT - 10 + oind));
 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 				fl = vm_phys_free_queues[dom][flind][pind];
 					sbuf_printf(&sbuf, "  |  %6d",
 					    fl[oind].lcnt);
 				}
 				sbuf_printf(&sbuf, "\n");
 			}
 		}
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 /*
  * Outputs the set of physical memory segments.
  */
 static int
 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sbuf;
 	struct vm_phys_seg *seg;
 	int error, segind;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
 		seg = &vm_phys_segs[segind];
 		sbuf_printf(&sbuf, "start:     %#jx\n",
 		    (uintmax_t)seg->start);
 		sbuf_printf(&sbuf, "end:       %#jx\n",
 		    (uintmax_t)seg->end);
 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 /*
  * Return affinity, or -1 if there's no affinity information.
  */
 int
 vm_phys_mem_affinity(int f, int t)
 {
 
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	if (mem_locality == NULL)
 		return (-1);
 	if (f >= vm_ndomains || t >= vm_ndomains)
 		return (-1);
 	return (mem_locality[f * vm_ndomains + t]);
 #else
 	return (-1);
 #endif
 }
 
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 /*
  * Outputs the VM locality table.
  */
 static int
 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sbuf;
 	int error, i, j;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 
 	sbuf_printf(&sbuf, "\n");
 
 	for (i = 0; i < vm_ndomains; i++) {
 		sbuf_printf(&sbuf, "%d: ", i);
 		for (j = 0; j < vm_ndomains; j++) {
 			sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j));
 		}
 		sbuf_printf(&sbuf, "\n");
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 #endif
 
 static void
 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
 {
 
 	m->order = order;
 	if (tail)
 		TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
 	else
 		TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
 	fl[order].lcnt++;
 }
 
 static void
 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
 {
 
 	TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
 	fl[order].lcnt--;
 	m->order = VM_NFREEORDER;
 }
 
 /*
  * Create a physical memory segment.
  */
 static void
 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain)
 {
 	struct vm_phys_seg *seg;
 
 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
 	KASSERT(domain < vm_ndomains,
 	    ("vm_phys_create_seg: invalid domain provided"));
 	seg = &vm_phys_segs[vm_phys_nsegs++];
 	while (seg > vm_phys_segs && (seg - 1)->start >= end) {
 		*seg = *(seg - 1);
 		seg--;
 	}
 	seg->start = start;
 	seg->end = end;
 	seg->domain = domain;
 }
 
 static void
 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end)
 {
+#ifdef VM_NUMA_ALLOC
 	int i;
 
 	if (mem_affinity == NULL) {
 		_vm_phys_create_seg(start, end, 0);
 		return;
 	}
 
 	for (i = 0;; i++) {
 		if (mem_affinity[i].end == 0)
 			panic("Reached end of affinity info");
 		if (mem_affinity[i].end <= start)
 			continue;
 		if (mem_affinity[i].start > start)
 			panic("No affinity info for start %jx",
 			    (uintmax_t)start);
 		if (mem_affinity[i].end >= end) {
 			_vm_phys_create_seg(start, end,
 			    mem_affinity[i].domain);
 			break;
 		}
 		_vm_phys_create_seg(start, mem_affinity[i].end,
 		    mem_affinity[i].domain);
 		start = mem_affinity[i].end;
 	}
+#else
+	_vm_phys_create_seg(start, end, 0);
+#endif
 }
 
 /*
  * Add a physical memory segment.
  */
 void
 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end)
 {
 	vm_paddr_t paddr;
 
 	KASSERT((start & PAGE_MASK) == 0,
 	    ("vm_phys_define_seg: start is not page aligned"));
 	KASSERT((end & PAGE_MASK) == 0,
 	    ("vm_phys_define_seg: end is not page aligned"));
 
 	/*
 	 * Split the physical memory segment if it spans two or more free
 	 * list boundaries.
 	 */
 	paddr = start;
 #ifdef	VM_FREELIST_ISADMA
 	if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) {
 		vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY);
 		paddr = VM_ISADMA_BOUNDARY;
 	}
 #endif
 #ifdef	VM_FREELIST_LOWMEM
 	if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) {
 		vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY);
 		paddr = VM_LOWMEM_BOUNDARY;
 	}
 #endif
 #ifdef	VM_FREELIST_DMA32
 	if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) {
 		vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY);
 		paddr = VM_DMA32_BOUNDARY;
 	}
 #endif
 	vm_phys_create_seg(paddr, end);
 }
 
 /*
  * Initialize the physical memory allocator.
  *
  * Requires that vm_page_array is initialized!
  */
 void
 vm_phys_init(void)
 {
 	struct vm_freelist *fl;
 	struct vm_phys_seg *seg;
 	u_long npages;
 	int dom, flind, freelist, oind, pind, segind;
 
 	/*
 	 * Compute the number of free lists, and generate the mapping from the
 	 * manifest constants VM_FREELIST_* to the free list indices.
 	 *
 	 * Initially, the entries of vm_freelist_to_flind[] are set to either
 	 * 0 or 1 to indicate which free lists should be created.
 	 */
 	npages = 0;
 	for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
 		seg = &vm_phys_segs[segind];
 #ifdef	VM_FREELIST_ISADMA
 		if (seg->end <= VM_ISADMA_BOUNDARY)
 			vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1;
 		else
 #endif
 #ifdef	VM_FREELIST_LOWMEM
 		if (seg->end <= VM_LOWMEM_BOUNDARY)
 			vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1;
 		else
 #endif
 #ifdef	VM_FREELIST_DMA32
 		if (
 #ifdef	VM_DMA32_NPAGES_THRESHOLD
 		    /*
 		     * Create the DMA32 free list only if the amount of
 		     * physical memory above physical address 4G exceeds the
 		     * given threshold.
 		     */
 		    npages > VM_DMA32_NPAGES_THRESHOLD &&
 #endif
 		    seg->end <= VM_DMA32_BOUNDARY)
 			vm_freelist_to_flind[VM_FREELIST_DMA32] = 1;
 		else
 #endif
 		{
 			npages += atop(seg->end - seg->start);
 			vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1;
 		}
 	}
 	/* Change each entry into a running total of the free lists. */
 	for (freelist = 1; freelist < VM_NFREELIST; freelist++) {
 		vm_freelist_to_flind[freelist] +=
 		    vm_freelist_to_flind[freelist - 1];
 	}
 	vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1];
 	KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists"));
 	/* Change each entry into a free list index. */
 	for (freelist = 0; freelist < VM_NFREELIST; freelist++)
 		vm_freelist_to_flind[freelist]--;
 
 	/*
 	 * Initialize the first_page and free_queues fields of each physical
 	 * memory segment.
 	 */
 #ifdef VM_PHYSSEG_SPARSE
 	npages = 0;
 #endif
 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
 		seg = &vm_phys_segs[segind];
 #ifdef VM_PHYSSEG_SPARSE
 		seg->first_page = &vm_page_array[npages];
 		npages += atop(seg->end - seg->start);
 #else
 		seg->first_page = PHYS_TO_VM_PAGE(seg->start);
 #endif
 #ifdef	VM_FREELIST_ISADMA
 		if (seg->end <= VM_ISADMA_BOUNDARY) {
 			flind = vm_freelist_to_flind[VM_FREELIST_ISADMA];
 			KASSERT(flind >= 0,
 			    ("vm_phys_init: ISADMA flind < 0"));
 		} else
 #endif
 #ifdef	VM_FREELIST_LOWMEM
 		if (seg->end <= VM_LOWMEM_BOUNDARY) {
 			flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM];
 			KASSERT(flind >= 0,
 			    ("vm_phys_init: LOWMEM flind < 0"));
 		} else
 #endif
 #ifdef	VM_FREELIST_DMA32
 		if (seg->end <= VM_DMA32_BOUNDARY) {
 			flind = vm_freelist_to_flind[VM_FREELIST_DMA32];
 			KASSERT(flind >= 0,
 			    ("vm_phys_init: DMA32 flind < 0"));
 		} else
 #endif
 		{
 			flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT];
 			KASSERT(flind >= 0,
 			    ("vm_phys_init: DEFAULT flind < 0"));
 		}
 		seg->free_queues = &vm_phys_free_queues[seg->domain][flind];
 	}
 
 	/*
 	 * Initialize the free queues.
 	 */
 	for (dom = 0; dom < vm_ndomains; dom++) {
 		for (flind = 0; flind < vm_nfreelists; flind++) {
 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 				fl = vm_phys_free_queues[dom][flind][pind];
 				for (oind = 0; oind < VM_NFREEORDER; oind++)
 					TAILQ_INIT(&fl[oind].pl);
 			}
 		}
 	}
 
 	rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
 }
 
 /*
  * Split a contiguous, power of two-sized set of physical pages.
  */
 static __inline void
 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
 {
 	vm_page_t m_buddy;
 
 	while (oind > order) {
 		oind--;
 		m_buddy = &m[1 << oind];
 		KASSERT(m_buddy->order == VM_NFREEORDER,
 		    ("vm_phys_split_pages: page %p has unexpected order %d",
 		    m_buddy, m_buddy->order));
 		vm_freelist_add(fl, m_buddy, oind, 0);
         }
 }
 
 /*
  * Initialize a physical page and add it to the free lists.
  */
 void
 vm_phys_add_page(vm_paddr_t pa)
 {
 	vm_page_t m;
 	struct vm_domain *vmd;
 
 	vm_cnt.v_page_count++;
 	m = vm_phys_paddr_to_vm_page(pa);
 	m->phys_addr = pa;
 	m->queue = PQ_NONE;
 	m->segind = vm_phys_paddr_to_segind(pa);
 	vmd = vm_phys_domain(m);
 	vmd->vmd_page_count++;
 	vmd->vmd_segs |= 1UL << m->segind;
 	KASSERT(m->order == VM_NFREEORDER,
 	    ("vm_phys_add_page: page %p has unexpected order %d",
 	    m, m->order));
 	m->pool = VM_FREEPOOL_DEFAULT;
 	pmap_page_init(m);
 	mtx_lock(&vm_page_queue_free_mtx);
 	vm_phys_freecnt_adj(m, 1);
 	vm_phys_free_pages(m, 0);
 	mtx_unlock(&vm_page_queue_free_mtx);
 }
 
 /*
  * Allocate a contiguous, power of two-sized set of physical pages
  * from the free lists.
  *
  * The free page queues must be locked.
  */
 vm_page_t
 vm_phys_alloc_pages(int pool, int order)
 {
 	vm_page_t m;
 	int domain, flind;
 	struct vm_domain_iterator vi;
 
 	KASSERT(pool < VM_NFREEPOOL,
 	    ("vm_phys_alloc_pages: pool %d is out of range", pool));
 	KASSERT(order < VM_NFREEORDER,
 	    ("vm_phys_alloc_pages: order %d is out of range", order));
 
 	vm_policy_iterator_init(&vi);
 
 	while ((vm_domain_iterator_run(&vi, &domain)) == 0) {
 		for (flind = 0; flind < vm_nfreelists; flind++) {
 			m = vm_phys_alloc_domain_pages(domain, flind, pool,
 			    order);
 			if (m != NULL)
 				return (m);
 		}
 	}
 
 	vm_policy_iterator_finish(&vi);
 	return (NULL);
 }
 
 /*
  * Allocate a contiguous, power of two-sized set of physical pages from the
  * specified free list.  The free list must be specified using one of the
  * manifest constants VM_FREELIST_*.
  *
  * The free page queues must be locked.
  */
 vm_page_t
 vm_phys_alloc_freelist_pages(int freelist, int pool, int order)
 {
 	vm_page_t m;
 	struct vm_domain_iterator vi;
 	int domain;
 
 	KASSERT(freelist < VM_NFREELIST,
 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range",
 	    freelist));
 	KASSERT(pool < VM_NFREEPOOL,
 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
 	KASSERT(order < VM_NFREEORDER,
 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
 
 	vm_policy_iterator_init(&vi);
 
 	while ((vm_domain_iterator_run(&vi, &domain)) == 0) {
 		m = vm_phys_alloc_domain_pages(domain,
 		    vm_freelist_to_flind[freelist], pool, order);
 		if (m != NULL)
 			return (m);
 	}
 
 	vm_policy_iterator_finish(&vi);
 	return (NULL);
 }
 
 static vm_page_t
 vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
 {	
 	struct vm_freelist *fl;
 	struct vm_freelist *alt;
 	int oind, pind;
 	vm_page_t m;
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	fl = &vm_phys_free_queues[domain][flind][pool][0];
 	for (oind = order; oind < VM_NFREEORDER; oind++) {
 		m = TAILQ_FIRST(&fl[oind].pl);
 		if (m != NULL) {
 			vm_freelist_rem(fl, m, oind);
 			vm_phys_split_pages(m, oind, fl, order);
 			return (m);
 		}
 	}
 
 	/*
 	 * The given pool was empty.  Find the largest
 	 * contiguous, power-of-two-sized set of pages in any
 	 * pool.  Transfer these pages to the given pool, and
 	 * use them to satisfy the allocation.
 	 */
 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 			alt = &vm_phys_free_queues[domain][flind][pind][0];
 			m = TAILQ_FIRST(&alt[oind].pl);
 			if (m != NULL) {
 				vm_freelist_rem(alt, m, oind);
 				vm_phys_set_pool(pool, m, oind);
 				vm_phys_split_pages(m, oind, fl, order);
 				return (m);
 			}
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Find the vm_page corresponding to the given physical address.
  */
 vm_page_t
 vm_phys_paddr_to_vm_page(vm_paddr_t pa)
 {
 	struct vm_phys_seg *seg;
 	int segind;
 
 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
 		seg = &vm_phys_segs[segind];
 		if (pa >= seg->start && pa < seg->end)
 			return (&seg->first_page[atop(pa - seg->start)]);
 	}
 	return (NULL);
 }
 
 vm_page_t
 vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
 {
 	struct vm_phys_fictitious_seg tmp, *seg;
 	vm_page_t m;
 
 	m = NULL;
 	tmp.start = pa;
 	tmp.end = 0;
 
 	rw_rlock(&vm_phys_fictitious_reg_lock);
 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
 	rw_runlock(&vm_phys_fictitious_reg_lock);
 	if (seg == NULL)
 		return (NULL);
 
 	m = &seg->first_page[atop(pa - seg->start)];
 	KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m));
 
 	return (m);
 }
 
 static inline void
 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start,
     long page_count, vm_memattr_t memattr)
 {
 	long i;
 
 	for (i = 0; i < page_count; i++) {
 		vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr);
 		range[i].oflags &= ~VPO_UNMANAGED;
 		range[i].busy_lock = VPB_UNBUSIED;
 	}
 }
 
 int
 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
     vm_memattr_t memattr)
 {
 	struct vm_phys_fictitious_seg *seg;
 	vm_page_t fp;
 	long page_count;
 #ifdef VM_PHYSSEG_DENSE
 	long pi, pe;
 	long dpage_count;
 #endif
 
 	KASSERT(start < end,
 	    ("Start of segment isn't less than end (start: %jx end: %jx)",
 	    (uintmax_t)start, (uintmax_t)end));
 
 	page_count = (end - start) / PAGE_SIZE;
 
 #ifdef VM_PHYSSEG_DENSE
 	pi = atop(start);
 	pe = atop(end);
 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
 		fp = &vm_page_array[pi - first_page];
 		if ((pe - first_page) > vm_page_array_size) {
 			/*
 			 * We have a segment that starts inside
 			 * of vm_page_array, but ends outside of it.
 			 *
 			 * Use vm_page_array pages for those that are
 			 * inside of the vm_page_array range, and
 			 * allocate the remaining ones.
 			 */
 			dpage_count = vm_page_array_size - (pi - first_page);
 			vm_phys_fictitious_init_range(fp, start, dpage_count,
 			    memattr);
 			page_count -= dpage_count;
 			start += ptoa(dpage_count);
 			goto alloc;
 		}
 		/*
 		 * We can allocate the full range from vm_page_array,
 		 * so there's no need to register the range in the tree.
 		 */
 		vm_phys_fictitious_init_range(fp, start, page_count, memattr);
 		return (0);
 	} else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
 		/*
 		 * We have a segment that ends inside of vm_page_array,
 		 * but starts outside of it.
 		 */
 		fp = &vm_page_array[0];
 		dpage_count = pe - first_page;
 		vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count,
 		    memattr);
 		end -= ptoa(dpage_count);
 		page_count -= dpage_count;
 		goto alloc;
 	} else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
 		/*
 		 * Trying to register a fictitious range that expands before
 		 * and after vm_page_array.
 		 */
 		return (EINVAL);
 	} else {
 alloc:
 #endif
 		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
 		    M_WAITOK | M_ZERO);
 #ifdef VM_PHYSSEG_DENSE
 	}
 #endif
 	vm_phys_fictitious_init_range(fp, start, page_count, memattr);
 
 	seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO);
 	seg->start = start;
 	seg->end = end;
 	seg->first_page = fp;
 
 	rw_wlock(&vm_phys_fictitious_reg_lock);
 	RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg);
 	rw_wunlock(&vm_phys_fictitious_reg_lock);
 
 	return (0);
 }
 
 void
 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
 {
 	struct vm_phys_fictitious_seg *seg, tmp;
 #ifdef VM_PHYSSEG_DENSE
 	long pi, pe;
 #endif
 
 	KASSERT(start < end,
 	    ("Start of segment isn't less than end (start: %jx end: %jx)",
 	    (uintmax_t)start, (uintmax_t)end));
 
 #ifdef VM_PHYSSEG_DENSE
 	pi = atop(start);
 	pe = atop(end);
 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
 		if ((pe - first_page) <= vm_page_array_size) {
 			/*
 			 * This segment was allocated using vm_page_array
 			 * only, there's nothing to do since those pages
 			 * were never added to the tree.
 			 */
 			return;
 		}
 		/*
 		 * We have a segment that starts inside
 		 * of vm_page_array, but ends outside of it.
 		 *
 		 * Calculate how many pages were added to the
 		 * tree and free them.
 		 */
 		start = ptoa(first_page + vm_page_array_size);
 	} else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
 		/*
 		 * We have a segment that ends inside of vm_page_array,
 		 * but starts outside of it.
 		 */
 		end = ptoa(first_page);
 	} else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
 		/* Since it's not possible to register such a range, panic. */
 		panic(
 		    "Unregistering not registered fictitious range [%#jx:%#jx]",
 		    (uintmax_t)start, (uintmax_t)end);
 	}
 #endif
 	tmp.start = start;
 	tmp.end = 0;
 
 	rw_wlock(&vm_phys_fictitious_reg_lock);
 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
 	if (seg->start != start || seg->end != end) {
 		rw_wunlock(&vm_phys_fictitious_reg_lock);
 		panic(
 		    "Unregistering not registered fictitious range [%#jx:%#jx]",
 		    (uintmax_t)start, (uintmax_t)end);
 	}
 	RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg);
 	rw_wunlock(&vm_phys_fictitious_reg_lock);
 	free(seg->first_page, M_FICT_PAGES);
 	free(seg, M_FICT_PAGES);
 }
 
 /*
  * Find the segment containing the given physical address.
  */
 static int
 vm_phys_paddr_to_segind(vm_paddr_t pa)
 {
 	struct vm_phys_seg *seg;
 	int segind;
 
 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
 		seg = &vm_phys_segs[segind];
 		if (pa >= seg->start && pa < seg->end)
 			return (segind);
 	}
 	panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
 	    (uintmax_t)pa);
 }
 
 /*
  * Free a contiguous, power of two-sized set of physical pages.
  *
  * The free page queues must be locked.
  */
 void
 vm_phys_free_pages(vm_page_t m, int order)
 {
 	struct vm_freelist *fl;
 	struct vm_phys_seg *seg;
 	vm_paddr_t pa;
 	vm_page_t m_buddy;
 
 	KASSERT(m->order == VM_NFREEORDER,
 	    ("vm_phys_free_pages: page %p has unexpected order %d",
 	    m, m->order));
 	KASSERT(m->pool < VM_NFREEPOOL,
 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
 	    m, m->pool));
 	KASSERT(order < VM_NFREEORDER,
 	    ("vm_phys_free_pages: order %d is out of range", order));
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	seg = &vm_phys_segs[m->segind];
 	if (order < VM_NFREEORDER - 1) {
 		pa = VM_PAGE_TO_PHYS(m);
 		do {
 			pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
 			if (pa < seg->start || pa >= seg->end)
 				break;
 			m_buddy = &seg->first_page[atop(pa - seg->start)];
 			if (m_buddy->order != order)
 				break;
 			fl = (*seg->free_queues)[m_buddy->pool];
 			vm_freelist_rem(fl, m_buddy, order);
 			if (m_buddy->pool != m->pool)
 				vm_phys_set_pool(m->pool, m_buddy, order);
 			order++;
 			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
 			m = &seg->first_page[atop(pa - seg->start)];
 		} while (order < VM_NFREEORDER - 1);
 	}
 	fl = (*seg->free_queues)[m->pool];
 	vm_freelist_add(fl, m, order, 1);
 }
 
 /*
  * Free a contiguous, arbitrarily sized set of physical pages.
  *
  * The free page queues must be locked.
  */
 void
 vm_phys_free_contig(vm_page_t m, u_long npages)
 {
 	u_int n;
 	int order;
 
 	/*
 	 * Avoid unnecessary coalescing by freeing the pages in the largest
 	 * possible power-of-two-sized subsets.
 	 */
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	for (;; npages -= n) {
 		/*
 		 * Unsigned "min" is used here so that "order" is assigned
 		 * "VM_NFREEORDER - 1" when "m"'s physical address is zero
 		 * or the low-order bits of its physical address are zero
 		 * because the size of a physical address exceeds the size of
 		 * a long.
 		 */
 		order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
 		    VM_NFREEORDER - 1);
 		n = 1 << order;
 		if (npages < n)
 			break;
 		vm_phys_free_pages(m, order);
 		m += n;
 	}
 	/* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
 	for (; npages > 0; npages -= n) {
 		order = flsl(npages) - 1;
 		n = 1 << order;
 		vm_phys_free_pages(m, order);
 		m += n;
 	}
 }
 
 /*
  * Scan physical memory between the specified addresses "low" and "high" for a
  * run of contiguous physical pages that satisfy the specified conditions, and
  * return the lowest page in the run.  The specified "alignment" determines
  * the alignment of the lowest physical page in the run.  If the specified
  * "boundary" is non-zero, then the run of physical pages cannot span a
  * physical address that is a multiple of "boundary".
  *
  * "npages" must be greater than zero.  Both "alignment" and "boundary" must
  * be a power of two.
  */
 vm_page_t
 vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
     u_long alignment, vm_paddr_t boundary, int options)
 {
 	vm_paddr_t pa_end;
 	vm_page_t m_end, m_run, m_start;
 	struct vm_phys_seg *seg;
 	int segind;
 
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
 	if (low >= high)
 		return (NULL);
 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
 		seg = &vm_phys_segs[segind];
 		if (seg->start >= high)
 			break;
 		if (low >= seg->end)
 			continue;
 		if (low <= seg->start)
 			m_start = seg->first_page;
 		else
 			m_start = &seg->first_page[atop(low - seg->start)];
 		if (high < seg->end)
 			pa_end = high;
 		else
 			pa_end = seg->end;
 		if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages))
 			continue;
 		m_end = &seg->first_page[atop(pa_end - seg->start)];
 		m_run = vm_page_scan_contig(npages, m_start, m_end,
 		    alignment, boundary, options);
 		if (m_run != NULL)
 			return (m_run);
 	}
 	return (NULL);
 }
 
 /*
  * Set the pool for a contiguous, power of two-sized set of physical pages. 
  */
 void
 vm_phys_set_pool(int pool, vm_page_t m, int order)
 {
 	vm_page_t m_tmp;
 
 	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
 		m_tmp->pool = pool;
 }
 
 /*
  * Search for the given physical page "m" in the free lists.  If the search
  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
  * FALSE, indicating that "m" is not in the free lists.
  *
  * The free page queues must be locked.
  */
 boolean_t
 vm_phys_unfree_page(vm_page_t m)
 {
 	struct vm_freelist *fl;
 	struct vm_phys_seg *seg;
 	vm_paddr_t pa, pa_half;
 	vm_page_t m_set, m_tmp;
 	int order;
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 
 	/*
 	 * First, find the contiguous, power of two-sized set of free
 	 * physical pages containing the given physical page "m" and
 	 * assign it to "m_set".
 	 */
 	seg = &vm_phys_segs[m->segind];
 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
 	    order < VM_NFREEORDER - 1; ) {
 		order++;
 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
 		if (pa >= seg->start)
 			m_set = &seg->first_page[atop(pa - seg->start)];
 		else
 			return (FALSE);
 	}
 	if (m_set->order < order)
 		return (FALSE);
 	if (m_set->order == VM_NFREEORDER)
 		return (FALSE);
 	KASSERT(m_set->order < VM_NFREEORDER,
 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
 	    m_set, m_set->order));
 
 	/*
 	 * Next, remove "m_set" from the free lists.  Finally, extract
 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
 	 * is larger than a page, shrink "m_set" by returning the half
 	 * of "m_set" that does not contain "m" to the free lists.
 	 */
 	fl = (*seg->free_queues)[m_set->pool];
 	order = m_set->order;
 	vm_freelist_rem(fl, m_set, order);
 	while (order > 0) {
 		order--;
 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
 		if (m->phys_addr < pa_half)
 			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
 		else {
 			m_tmp = m_set;
 			m_set = &seg->first_page[atop(pa_half - seg->start)];
 		}
 		vm_freelist_add(fl, m_tmp, order, 0);
 	}
 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
 	return (TRUE);
 }
 
 /*
  * Try to zero one physical page.  Used by an idle priority thread.
  */
 boolean_t
 vm_phys_zero_pages_idle(void)
 {
 	static struct vm_freelist *fl;
 	static int flind, oind, pind;
 	vm_page_t m, m_tmp;
 	int domain;
 
 	domain = vm_rr_selectdomain();
 	fl = vm_phys_free_queues[domain][0][0];
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	for (;;) {
 		TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) {
 			for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
 				if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
 					vm_phys_unfree_page(m_tmp);
 					vm_phys_freecnt_adj(m, -1);
 					mtx_unlock(&vm_page_queue_free_mtx);
 					pmap_zero_page_idle(m_tmp);
 					m_tmp->flags |= PG_ZERO;
 					mtx_lock(&vm_page_queue_free_mtx);
 					vm_phys_freecnt_adj(m, 1);
 					vm_phys_free_pages(m_tmp, 0);
 					vm_page_zero_count++;
 					cnt_prezero++;
 					return (TRUE);
 				}
 			}
 		}
 		oind++;
 		if (oind == VM_NFREEORDER) {
 			oind = 0;
 			pind++;
 			if (pind == VM_NFREEPOOL) {
 				pind = 0;
 				flind++;
 				if (flind == vm_nfreelists)
 					flind = 0;
 			}
 			fl = vm_phys_free_queues[domain][flind][pind];
 		}
 	}
 }
 
 /*
  * Allocate a contiguous set of physical pages of the given size
  * "npages" from the free lists.  All of the physical pages must be at
  * or above the given physical address "low" and below the given
  * physical address "high".  The given value "alignment" determines the
  * alignment of the first physical page in the set.  If the given value
  * "boundary" is non-zero, then the set of physical pages cannot cross
  * any physical address boundary that is a multiple of that value.  Both
  * "alignment" and "boundary" must be a power of two.
  */
 vm_page_t
 vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
     u_long alignment, vm_paddr_t boundary)
 {
 	vm_paddr_t pa_end, pa_start;
 	vm_page_t m_run;
 	struct vm_domain_iterator vi;
 	struct vm_phys_seg *seg;
 	int domain, segind;
 
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	if (low >= high)
 		return (NULL);
 	vm_policy_iterator_init(&vi);
 restartdom:
 	if (vm_domain_iterator_run(&vi, &domain) != 0) {
 		vm_policy_iterator_finish(&vi);
 		return (NULL);
 	}
 	m_run = NULL;
 	for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
 		seg = &vm_phys_segs[segind];
 		if (seg->start >= high || seg->domain != domain)
 			continue;
 		if (low >= seg->end)
 			break;
 		if (low <= seg->start)
 			pa_start = seg->start;
 		else
 			pa_start = low;
 		if (high < seg->end)
 			pa_end = high;
 		else
 			pa_end = seg->end;
 		if (pa_end - pa_start < ptoa(npages))
 			continue;
 		m_run = vm_phys_alloc_seg_contig(seg, npages, low, high,
 		    alignment, boundary);
 		if (m_run != NULL)
 			break;
 	}
 	if (m_run == NULL && !vm_domain_iterator_isdone(&vi))
 		goto restartdom;
 	vm_policy_iterator_finish(&vi);
 	return (m_run);
 }
 
 /*
  * Allocate a run of contiguous physical pages from the free list for the
  * specified segment.
  */
 static vm_page_t
 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages,
     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
 {
 	struct vm_freelist *fl;
 	vm_paddr_t pa, pa_end, size;
 	vm_page_t m, m_ret;
 	u_long npages_end;
 	int oind, order, pind;
 
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	/* Compute the queue that is the best fit for npages. */
 	for (order = 0; (1 << order) < npages; order++);
 	/* Search for a run satisfying the specified conditions. */
 	size = npages << PAGE_SHIFT;
 	for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER;
 	    oind++) {
 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 			fl = (*seg->free_queues)[pind];
 			TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
 				/*
 				 * Is the size of this allocation request
 				 * larger than the largest block size?
 				 */
 				if (order >= VM_NFREEORDER) {
 					/*
 					 * Determine if a sufficient number of
 					 * subsequent blocks to satisfy the
 					 * allocation request are free.
 					 */
 					pa = VM_PAGE_TO_PHYS(m_ret);
 					pa_end = pa + size;
 					for (;;) {
 						pa += 1 << (PAGE_SHIFT +
 						    VM_NFREEORDER - 1);
 						if (pa >= pa_end ||
 						    pa < seg->start ||
 						    pa >= seg->end)
 							break;
 						m = &seg->first_page[atop(pa -
 						    seg->start)];
 						if (m->order != VM_NFREEORDER -
 						    1)
 							break;
 					}
 					/* If not, go to the next block. */
 					if (pa < pa_end)
 						continue;
 				}
 
 				/*
 				 * Determine if the blocks are within the
 				 * given range, satisfy the given alignment,
 				 * and do not cross the given boundary.
 				 */
 				pa = VM_PAGE_TO_PHYS(m_ret);
 				pa_end = pa + size;
 				if (pa >= low && pa_end <= high && (pa &
 				    (alignment - 1)) == 0 && ((pa ^ (pa_end -
 				    1)) & ~(boundary - 1)) == 0)
 					goto done;
 			}
 		}
 	}
 	return (NULL);
 done:
 	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
 		fl = (*seg->free_queues)[m->pool];
 		vm_freelist_rem(fl, m, m->order);
 	}
 	if (m_ret->pool != VM_FREEPOOL_DEFAULT)
 		vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
 	fl = (*seg->free_queues)[m_ret->pool];
 	vm_phys_split_pages(m_ret, oind, fl, order);
 	/* Return excess pages to the free lists. */
 	npages_end = roundup2(npages, 1 << imin(oind, order));
 	if (npages < npages_end)
 		vm_phys_free_contig(&m_ret[npages], npages_end - npages);
 	return (m_ret);
 }
 
 #ifdef DDB
 /*
  * Show the number of physical pages in each of the free lists.
  */
 DB_SHOW_COMMAND(freepages, db_show_freepages)
 {
 	struct vm_freelist *fl;
 	int flind, oind, pind, dom;
 
 	for (dom = 0; dom < vm_ndomains; dom++) {
 		db_printf("DOMAIN: %d\n", dom);
 		for (flind = 0; flind < vm_nfreelists; flind++) {
 			db_printf("FREE LIST %d:\n"
 			    "\n  ORDER (SIZE)  |  NUMBER"
 			    "\n              ", flind);
 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
 				db_printf("  |  POOL %d", pind);
 			db_printf("\n--            ");
 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
 				db_printf("-- --      ");
 			db_printf("--\n");
 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
 				db_printf("  %2.2d (%6.6dK)", oind,
 				    1 << (PAGE_SHIFT - 10 + oind));
 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 				fl = vm_phys_free_queues[dom][flind][pind];
 					db_printf("  |  %6.6d", fl[oind].lcnt);
 				}
 				db_printf("\n");
 			}
 			db_printf("\n");
 		}
 		db_printf("\n");
 	}
 }
 #endif
Index: head/sys/vm/vm_phys.h
===================================================================
--- head/sys/vm/vm_phys.h	(revision 297747)
+++ head/sys/vm/vm_phys.h	(revision 297748)
@@ -1,126 +1,126 @@
 /*-
  * Copyright (c) 2002-2006 Rice University
  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Alan L. Cox,
  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  *	Physical memory system definitions
  */
 
 #ifndef	_VM_PHYS_H_
 #define	_VM_PHYS_H_
 
 #ifdef _KERNEL
 
 /* Domains must be dense (non-sparse) and zero-based. */
 struct mem_affinity {
 	vm_paddr_t start;
 	vm_paddr_t end;
 	int domain;
 };
 
 struct vm_freelist {
 	struct pglist pl;
 	int lcnt;
 };
 
 struct vm_phys_seg {
 	vm_paddr_t	start;
 	vm_paddr_t	end;
 	vm_page_t	first_page;
 	int		domain;
 	struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
 };
 
 extern struct mem_affinity *mem_affinity;
 extern int *mem_locality;
 extern int vm_ndomains;
 extern struct vm_phys_seg vm_phys_segs[];
 extern int vm_phys_nsegs;
 
 /*
  * The following functions are only to be used by the virtual memory system.
  */
 void vm_phys_add_page(vm_paddr_t pa);
 void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end);
 vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
     u_long alignment, vm_paddr_t boundary);
 vm_page_t vm_phys_alloc_freelist_pages(int freelist, int pool, int order);
 vm_page_t vm_phys_alloc_pages(int pool, int order);
 boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high);
 int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
     vm_memattr_t memattr);
 void vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end);
 vm_page_t vm_phys_fictitious_to_vm_page(vm_paddr_t pa);
 void vm_phys_free_contig(vm_page_t m, u_long npages);
 void vm_phys_free_pages(vm_page_t m, int order);
 void vm_phys_init(void);
 vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa);
 vm_page_t vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
     u_long alignment, vm_paddr_t boundary, int options);
 void vm_phys_set_pool(int pool, vm_page_t m, int order);
 boolean_t vm_phys_unfree_page(vm_page_t m);
 boolean_t vm_phys_zero_pages_idle(void);
 int vm_phys_mem_affinity(int f, int t);
 
 /*
  *	vm_phys_domain:
  *
  * 	Return the memory domain the page belongs to.
  */
 static inline struct vm_domain *
 vm_phys_domain(vm_page_t m)
 {
-#if MAXMEMDOM > 1
+#ifdef VM_NUMA_ALLOC
 	int domn, segind;
 
 	/* XXXKIB try to assert that the page is managed */
 	segind = m->segind;
 	KASSERT(segind < vm_phys_nsegs, ("segind %d m %p", segind, m));
 	domn = vm_phys_segs[segind].domain;
 	KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m));
 	return (&vm_dom[domn]);
 #else
 	return (&vm_dom[0]);
 #endif
 }
 
 static inline void
 vm_phys_freecnt_adj(vm_page_t m, int adj)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	vm_cnt.v_free_count += adj;
 	vm_phys_domain(m)->vmd_free_count += adj;
 }
 
 #endif	/* _KERNEL */
 #endif	/* !_VM_PHYS_H_ */
Index: head/sys/x86/acpica/srat.c
===================================================================
--- head/sys/x86/acpica/srat.c	(revision 297747)
+++ head/sys/x86/acpica/srat.c	(revision 297748)
@@ -1,506 +1,521 @@
 /*-
  * Copyright (c) 2010 Hudson River Trading LLC
  * Written by: John H. Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_vm.h"
+
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 #include <sys/vmmeter.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/actables.h>
 
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 
 #include <dev/acpica/acpivar.h>
 
 #if MAXMEMDOM > 1
 struct cpu_info {
 	int enabled:1;
 	int has_memory:1;
 	int domain;
 } cpus[MAX_APIC_ID + 1];
 
 struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1];
 int num_mem;
 
 static ACPI_TABLE_SRAT *srat;
 static vm_paddr_t srat_physaddr;
 
-static int vm_domains[VM_PHYSSEG_MAX];
+static int domain_pxm[MAXMEMDOM];
+static int ndomain;
 
 static ACPI_TABLE_SLIT *slit;
 static vm_paddr_t slit_physaddr;
 static int vm_locality_table[MAXMEMDOM * MAXMEMDOM];
 
 static void	srat_walk_table(acpi_subtable_handler *handler, void *arg);
 
 /*
  * SLIT parsing.
  */
 
 static void
 slit_parse_table(ACPI_TABLE_SLIT *s)
 {
 	int i, j;
 	int i_domain, j_domain;
 	int offset = 0;
 	uint8_t e;
 
 	/*
 	 * This maps the SLIT data into the VM-domain centric view.
 	 * There may be sparse entries in the PXM namespace, so
 	 * remap them to a VM-domain ID and if it doesn't exist,
 	 * skip it.
 	 *
 	 * It should result in a packed 2d array of VM-domain
 	 * locality information entries.
 	 */
 
 	if (bootverbose)
 		printf("SLIT.Localities: %d\n", (int) s->LocalityCount);
 	for (i = 0; i < s->LocalityCount; i++) {
 		i_domain = acpi_map_pxm_to_vm_domainid(i);
 		if (i_domain < 0)
 			continue;
 
 		if (bootverbose)
 			printf("%d: ", i);
 		for (j = 0; j < s->LocalityCount; j++) {
 			j_domain = acpi_map_pxm_to_vm_domainid(j);
 			if (j_domain < 0)
 				continue;
 			e = s->Entry[i * s->LocalityCount + j];
 			if (bootverbose)
 				printf("%d ", (int) e);
 			/* 255 == "no locality information" */
 			if (e == 255)
 				vm_locality_table[offset] = -1;
 			else
 				vm_locality_table[offset] = e;
 			offset++;
 		}
 		if (bootverbose)
 			printf("\n");
 	}
 }
 
 /*
  * Look for an ACPI System Locality Distance Information Table ("SLIT")
  */
 static int
 parse_slit(void)
 {
 
 	if (resource_disabled("slit", 0)) {
 		return (-1);
 	}
 
 	slit_physaddr = acpi_find_table(ACPI_SIG_SLIT);
 	if (slit_physaddr == 0) {
 		return (-1);
 	}
 
 	/*
 	 * Make a pass over the table to populate the cpus[] and
 	 * mem_info[] tables.
 	 */
 	slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT);
 	slit_parse_table(slit);
 	acpi_unmap_table(slit);
 	slit = NULL;
 
+#ifdef VM_NUMA_ALLOC
 	/* Tell the VM about it! */
 	mem_locality = vm_locality_table;
+#endif
 	return (0);
 }
 
 /*
  * SRAT parsing.
  */
 
 /*
  * Returns true if a memory range overlaps with at least one range in
  * phys_avail[].
  */
 static int
 overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end)
 {
 	int i;
 
 	for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) {
 		if (phys_avail[i + 1] < start)
 			continue;
 		if (phys_avail[i] < end)
 			return (1);
 		break;
 	}
 	return (0);
 	
 }
 
 static void
 srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg)
 {
 	ACPI_SRAT_CPU_AFFINITY *cpu;
 	ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic;
 	ACPI_SRAT_MEM_AFFINITY *mem;
 	int domain, i, slot;
 
 	switch (entry->Type) {
 	case ACPI_SRAT_TYPE_CPU_AFFINITY:
 		cpu = (ACPI_SRAT_CPU_AFFINITY *)entry;
 		domain = cpu->ProximityDomainLo |
 		    cpu->ProximityDomainHi[0] << 8 |
 		    cpu->ProximityDomainHi[1] << 16 |
 		    cpu->ProximityDomainHi[2] << 24;
 		if (bootverbose)
 			printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
 			    cpu->ApicId, domain,
 			    (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ?
 			    "enabled" : "disabled");
 		if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED))
 			break;
 		KASSERT(!cpus[cpu->ApicId].enabled,
 		    ("Duplicate local APIC ID %u", cpu->ApicId));
 		cpus[cpu->ApicId].domain = domain;
 		cpus[cpu->ApicId].enabled = 1;
 		break;
 	case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
 		x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry;
 		if (bootverbose)
 			printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
 			    x2apic->ApicId, x2apic->ProximityDomain,
 			    (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ?
 			    "enabled" : "disabled");
 		if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED))
 			break;
 		KASSERT(!cpus[x2apic->ApicId].enabled,
 		    ("Duplicate local APIC ID %u", x2apic->ApicId));
 		cpus[x2apic->ApicId].domain = x2apic->ProximityDomain;
 		cpus[x2apic->ApicId].enabled = 1;
 		break;
 	case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
 		mem = (ACPI_SRAT_MEM_AFFINITY *)entry;
 		if (bootverbose)
 			printf(
 		    "SRAT: Found memory domain %d addr %jx len %jx: %s\n",
 			    mem->ProximityDomain, (uintmax_t)mem->BaseAddress,
 			    (uintmax_t)mem->Length,
 			    (mem->Flags & ACPI_SRAT_MEM_ENABLED) ?
 			    "enabled" : "disabled");
 		if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED))
 			break;
 		if (!overlaps_phys_avail(mem->BaseAddress,
 		    mem->BaseAddress + mem->Length)) {
 			printf("SRAT: Ignoring memory at addr %jx\n",
 			    (uintmax_t)mem->BaseAddress);
 			break;
 		}
 		if (num_mem == VM_PHYSSEG_MAX) {
 			printf("SRAT: Too many memory regions\n");
 			*(int *)arg = ENXIO;
 			break;
 		}
 		slot = num_mem;
 		for (i = 0; i < num_mem; i++) {
 			if (mem_info[i].end <= mem->BaseAddress)
 				continue;
 			if (mem_info[i].start <
 			    (mem->BaseAddress + mem->Length)) {
 				printf("SRAT: Overlapping memory entries\n");
 				*(int *)arg = ENXIO;
 				return;
 			}
 			slot = i;
 		}
 		for (i = num_mem; i > slot; i--)
 			mem_info[i] = mem_info[i - 1];
 		mem_info[slot].start = mem->BaseAddress;
 		mem_info[slot].end = mem->BaseAddress + mem->Length;
 		mem_info[slot].domain = mem->ProximityDomain;
 		num_mem++;
 		break;
 	}
 }
 
 /*
  * Ensure each memory domain has at least one CPU and that each CPU
  * has at least one memory domain.
  */
 static int
 check_domains(void)
 {
 	int found, i, j;
 
 	for (i = 0; i < num_mem; i++) {
 		found = 0;
 		for (j = 0; j <= MAX_APIC_ID; j++)
 			if (cpus[j].enabled &&
 			    cpus[j].domain == mem_info[i].domain) {
 				cpus[j].has_memory = 1;
 				found++;
 			}
 		if (!found) {
 			printf("SRAT: No CPU found for memory domain %d\n",
 			    mem_info[i].domain);
 			return (ENXIO);
 		}
 	}
 	for (i = 0; i <= MAX_APIC_ID; i++)
 		if (cpus[i].enabled && !cpus[i].has_memory) {
 			printf("SRAT: No memory found for CPU %d\n", i);
 			return (ENXIO);
 		}
 	return (0);
 }
 
 /*
  * Check that the SRAT memory regions cover all of the regions in
  * phys_avail[].
  */
 static int
 check_phys_avail(void)
 {
 	vm_paddr_t address;
 	int i, j;
 
 	/* j is the current offset into phys_avail[]. */
 	address = phys_avail[0];
 	j = 0;
 	for (i = 0; i < num_mem; i++) {
 		/*
 		 * Consume as many phys_avail[] entries as fit in this
 		 * region.
 		 */
 		while (address >= mem_info[i].start &&
 		    address <= mem_info[i].end) {
 			/*
 			 * If we cover the rest of this phys_avail[] entry,
 			 * advance to the next entry.
 			 */
 			if (phys_avail[j + 1] <= mem_info[i].end) {
 				j += 2;
 				if (phys_avail[j] == 0 &&
 				    phys_avail[j + 1] == 0) {
 					return (0);
 				}
 				address = phys_avail[j];
 			} else
 				address = mem_info[i].end + 1;
 		}
 	}
 	printf("SRAT: No memory region found for %jx - %jx\n",
 	    (uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]);
 	return (ENXIO);
 }
 
 /*
  * Renumber the memory domains to be compact and zero-based if not
  * already.  Returns an error if there are too many domains.
  */
 static int
 renumber_domains(void)
 {
 	int i, j, slot;
 
 	/* Enumerate all the domains. */
-	vm_ndomains = 0;
+	ndomain = 0;
 	for (i = 0; i < num_mem; i++) {
 		/* See if this domain is already known. */
-		for (j = 0; j < vm_ndomains; j++) {
-			if (vm_domains[j] >= mem_info[i].domain)
+		for (j = 0; j < ndomain; j++) {
+			if (domain_pxm[j] >= mem_info[i].domain)
 				break;
 		}
-		if (j < vm_ndomains && vm_domains[j] == mem_info[i].domain)
+		if (j < ndomain && domain_pxm[j] == mem_info[i].domain)
 			continue;
 
 		/* Insert the new domain at slot 'j'. */
 		slot = j;
-		for (j = vm_ndomains; j > slot; j--)
-			vm_domains[j] = vm_domains[j - 1];
-		vm_domains[slot] = mem_info[i].domain;
-		vm_ndomains++;
-		if (vm_ndomains > MAXMEMDOM) {
-			vm_ndomains = 1;
+		for (j = ndomain; j > slot; j--)
+			domain_pxm[j] = domain_pxm[j - 1];
+		domain_pxm[slot] = mem_info[i].domain;
+		ndomain++;
+		if (ndomain > MAXMEMDOM) {
+			ndomain = 1;
 			printf("SRAT: Too many memory domains\n");
 			return (EFBIG);
 		}
 	}
 
-	/* Renumber each domain to its index in the sorted 'domains' list. */
-	for (i = 0; i < vm_ndomains; i++) {
+	/* Renumber each domain to its index in the sorted 'domain_pxm' list. */
+	for (i = 0; i < ndomain; i++) {
 		/*
 		 * If the domain is already the right value, no need
 		 * to renumber.
 		 */
-		if (vm_domains[i] == i)
+		if (domain_pxm[i] == i)
 			continue;
 
 		/* Walk the cpu[] and mem_info[] arrays to renumber. */
 		for (j = 0; j < num_mem; j++)
-			if (mem_info[j].domain == vm_domains[i])
+			if (mem_info[j].domain == domain_pxm[i])
 				mem_info[j].domain = i;
 		for (j = 0; j <= MAX_APIC_ID; j++)
-			if (cpus[j].enabled && cpus[j].domain == vm_domains[i])
+			if (cpus[j].enabled && cpus[j].domain == domain_pxm[i])
 				cpus[j].domain = i;
 	}
-	KASSERT(vm_ndomains > 0,
-	    ("renumber_domains: invalid final vm_ndomains setup"));
 
 	return (0);
 }
 
 /*
  * Look for an ACPI System Resource Affinity Table ("SRAT")
  */
 static int
 parse_srat(void)
 {
 	int error;
 
 	if (resource_disabled("srat", 0))
 		return (-1);
 
 	srat_physaddr = acpi_find_table(ACPI_SIG_SRAT);
 	if (srat_physaddr == 0)
 		return (-1);
 
 	/*
 	 * Make a pass over the table to populate the cpus[] and
 	 * mem_info[] tables.
 	 */
 	srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT);
 	error = 0;
 	srat_walk_table(srat_parse_entry, &error);
 	acpi_unmap_table(srat);
 	srat = NULL;
 	if (error || check_domains() != 0 || check_phys_avail() != 0 ||
 	    renumber_domains() != 0) {
 		srat_physaddr = 0;
 		return (-1);
 	}
 
+#ifdef VM_NUMA_ALLOC
 	/* Point vm_phys at our memory affinity table. */
+	vm_ndomains = ndomain;
 	mem_affinity = mem_info;
+#endif
 
 	return (0);
 }
 
 static void
 init_mem_locality(void)
 {
 	int i;
 
 	/*
 	 * For now, assume -1 == "no locality information for
 	 * this pairing.
 	 */
 	for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++)
 		vm_locality_table[i] = -1;
 }
 
 static void
 parse_acpi_tables(void *dummy)
 {
 
 	if (parse_srat() < 0)
 		return;
 	init_mem_locality();
 	(void) parse_slit();
 }
 SYSINIT(parse_acpi_tables, SI_SUB_VM - 1, SI_ORDER_FIRST, parse_acpi_tables,
     NULL);
 
 static void
 srat_walk_table(acpi_subtable_handler *handler, void *arg)
 {
 
 	acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length,
 	    handler, arg);
 }
 
 /*
  * Setup per-CPU domain IDs.
  */
 static void
 srat_set_cpus(void *dummy)
 {
 	struct cpu_info *cpu;
 	struct pcpu *pc;
 	u_int i;
 
 	if (srat_physaddr == 0)
 		return;
 	for (i = 0; i < MAXCPU; i++) {
 		if (CPU_ABSENT(i))
 			continue;
 		pc = pcpu_find(i);
 		KASSERT(pc != NULL, ("no pcpu data for CPU %u", i));
 		cpu = &cpus[pc->pc_apic_id];
 		if (!cpu->enabled)
 			panic("SRAT: CPU with APIC ID %u is not known",
 			    pc->pc_apic_id);
 		pc->pc_domain = cpu->domain;
 		CPU_SET(i, &cpuset_domain[cpu->domain]);
 		if (bootverbose)
 			printf("SRAT: CPU %u has memory domain %d\n", i,
 			    cpu->domain);
 	}
 }
 SYSINIT(srat_set_cpus, SI_SUB_CPU, SI_ORDER_ANY, srat_set_cpus, NULL);
 
 /*
  * Map a _PXM value to a VM domain ID.
  *
  * Returns the domain ID, or -1 if no domain ID was found.
  */
 int
 acpi_map_pxm_to_vm_domainid(int pxm)
 {
 	int i;
 
-	for (i = 0; i < vm_ndomains; i++) {
-		if (vm_domains[i] == pxm)
+	for (i = 0; i < ndomain; i++) {
+		if (domain_pxm[i] == pxm)
 			return (i);
 	}
+
+	return (-1);
+}
+
+#else /* MAXMEMDOM == 1 */
+
+int
+acpi_map_pxm_to_vm_domainid(int pxm)
+{
 
 	return (-1);
 }
 
 #endif /* MAXMEMDOM > 1 */