Index: head/sys/amd64/conf/GENERIC
===================================================================
--- head/sys/amd64/conf/GENERIC	(revision 327953)
+++ head/sys/amd64/conf/GENERIC	(revision 327954)
@@ -1,371 +1,370 @@
 #
 # GENERIC -- Generic kernel configuration file for FreeBSD/amd64
 #
 # For more information on this file, please read the config(5) manual page,
 # and/or the handbook section on Kernel Configuration Files:
 #
 #    https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
 #
 # The handbook is also available locally in /usr/share/doc/handbook
 # if you've installed the doc distribution, otherwise always see the
 # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the
 # latest information.
 #
 # An exhaustive list of options and more detailed explanations of the
 # device lines is also present in the ../../conf/NOTES and NOTES files.
 # If you are in doubt as to the purpose or necessity of a line, check first
 # in NOTES.
 #
 # $FreeBSD$
 
 cpu		HAMMER
 ident		GENERIC
 
 makeoptions	DEBUG=-g		# Build kernel with gdb(1) debug symbols
 makeoptions	WITH_CTF=1		# Run ctfconvert(1) for DTrace support
 
 options 	SCHED_ULE		# ULE scheduler
 options 	PREEMPTION		# Enable kernel thread preemption
 options 	VIMAGE			# Subsystem virtualization, e.g. VNET
 options 	INET			# InterNETworking
 options 	INET6			# IPv6 communications protocols
 options 	IPSEC			# IP (v4/v6) security
 options 	IPSEC_SUPPORT		# Allow kldload of ipsec and tcpmd5
 options 	TCP_OFFLOAD		# TCP offload
 options 	TCP_HHOOK		# hhook(9) framework for TCP
 options 	SCTP			# Stream Control Transmission Protocol
 options 	FFS			# Berkeley Fast Filesystem
 options 	SOFTUPDATES		# Enable FFS soft updates support
 options 	UFS_ACL			# Support for access control lists
 options 	UFS_DIRHASH		# Improve performance on big directories
 options 	UFS_GJOURNAL		# Enable gjournal-based UFS journaling
 options 	QUOTA			# Enable disk quotas for UFS
 options 	MD_ROOT			# MD is a potential root device
 options 	NFSCL			# Network Filesystem Client
 options 	NFSD			# Network Filesystem Server
 options 	NFSLOCKD		# Network Lock Manager
 options 	NFS_ROOT		# NFS usable as /, requires NFSCL
 options 	MSDOSFS			# MSDOS Filesystem
 options 	CD9660			# ISO 9660 Filesystem
 options 	PROCFS			# Process filesystem (requires PSEUDOFS)
 options 	PSEUDOFS		# Pseudo-filesystem framework
 options 	GEOM_PART_GPT		# GUID Partition Tables.
 options 	GEOM_RAID		# Soft RAID functionality.
 options 	GEOM_LABEL		# Provides labelization
 options 	COMPAT_FREEBSD32	# Compatible with i386 binaries
 options 	COMPAT_FREEBSD4		# Compatible with FreeBSD4
 options 	COMPAT_FREEBSD5		# Compatible with FreeBSD5
 options 	COMPAT_FREEBSD6		# Compatible with FreeBSD6
 options 	COMPAT_FREEBSD7		# Compatible with FreeBSD7
 options 	COMPAT_FREEBSD9		# Compatible with FreeBSD9
 options 	COMPAT_FREEBSD10	# Compatible with FreeBSD10
 options 	COMPAT_FREEBSD11	# Compatible with FreeBSD11
 options 	SCSI_DELAY=5000		# Delay (in ms) before probing SCSI
 options 	KTRACE			# ktrace(1) support
 options 	STACK			# stack(9) support
 options 	SYSVSHM			# SYSV-style shared memory
 options 	SYSVMSG			# SYSV-style message queues
 options 	SYSVSEM			# SYSV-style semaphores
 options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
 options 	PRINTF_BUFR_SIZE=128	# Prevent printf output being interspersed.
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 options 	HWPMC_HOOKS		# Necessary kernel hooks for hwpmc(4)
 options 	AUDIT			# Security event auditing
 options 	CAPABILITY_MODE		# Capsicum capability mode
 options 	CAPABILITIES		# Capsicum capabilities
 options 	MAC			# TrustedBSD MAC Framework
 options 	KDTRACE_FRAME		# Ensure frames are compiled in
 options 	KDTRACE_HOOKS		# Kernel DTrace hooks
 options 	DDB_CTF			# Kernel ELF linker loads CTF data
 options 	INCLUDE_CONFIG_FILE	# Include this file in kernel
 options 	RACCT			# Resource accounting framework
 options 	RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default
 options 	RCTL			# Resource limits
 
 # Debugging support.  Always need this:
 options 	KDB			# Enable kernel debugger support.
 options 	KDB_TRACE		# Print a stack trace for a panic.
 # For full debugger support use (turn off in stable branch):
 options 	BUF_TRACKING		# Track buffer history
 options 	DDB			# Support DDB.
 options 	FULL_BUF_TRACKING	# Track more buffer history
 options 	GDB			# Support remote GDB.
 options 	DEADLKRES		# Enable the deadlock resolver
 options 	INVARIANTS		# Enable calls of extra sanity checking
 options 	INVARIANT_SUPPORT	# Extra sanity checks of internal structures, required by INVARIANTS
 options 	WITNESS			# Enable checks to detect deadlocks and cycles
 options 	WITNESS_SKIPSPIN	# Don't run witness on spinlocks for speed
 options 	MALLOC_DEBUG_MAXZONES=8	# Separate malloc(9) zones
 
 # Make an SMP-capable kernel by default
 options 	SMP			# Symmetric MultiProcessor Kernel
-options 	DEVICE_NUMA		# I/O Device Affinity
 options 	EARLY_AP_STARTUP
 
 # CPU frequency control
 device		cpufreq
 
 # Bus support.
 device		acpi
 options 	ACPI_DMAR
 device		pci
 options 	PCI_HP			# PCI-Express native HotPlug
 options		PCI_IOV			# PCI SR-IOV support
 
 # Floppy drives
 device		fdc
 
 # ATA controllers
 device		ahci			# AHCI-compatible SATA controllers
 device		ata			# Legacy ATA/SATA controllers
 device		mvs			# Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA
 device		siis			# SiliconImage SiI3124/SiI3132/SiI3531 SATA
 
 # SCSI Controllers
 device		ahc			# AHA2940 and onboard AIC7xxx devices
 device		ahd			# AHA39320/29320 and onboard AIC79xx devices
 device		esp			# AMD Am53C974 (Tekram DC-390(T))
 device		hptiop			# Highpoint RocketRaid 3xxx series
 device		isp			# Qlogic family
 #device		ispfw			# Firmware for QLogic HBAs- normally a module
 device		mpt			# LSI-Logic MPT-Fusion
 device		mps			# LSI-Logic MPT-Fusion 2
 device		mpr			# LSI-Logic MPT-Fusion 3
 #device		ncr			# NCR/Symbios Logic
 device		sym			# NCR/Symbios Logic (newer chipsets + those of `ncr')
 device		trm			# Tekram DC395U/UW/F DC315U adapters
 
 device		adv			# Advansys SCSI adapters
 device		adw			# Advansys wide SCSI adapters
 device		aic			# Adaptec 15[012]x SCSI adapters, AIC-6[23]60.
 device		bt			# Buslogic/Mylex MultiMaster SCSI adapters
 device		isci			# Intel C600 SAS controller
 
 # ATA/SCSI peripherals
 device		scbus			# SCSI bus (required for ATA/SCSI)
 device		ch			# SCSI media changers
 device		da			# Direct Access (disks)
 device		sa			# Sequential Access (tape etc)
 device		cd			# CD
 device		pass			# Passthrough device (direct ATA/SCSI access)
 device		ses			# Enclosure Services (SES and SAF-TE)
 #device		ctl			# CAM Target Layer
 
 # RAID controllers interfaced to the SCSI subsystem
 device		amr			# AMI MegaRAID
 device		arcmsr			# Areca SATA II RAID
 device		ciss			# Compaq Smart RAID 5*
 device		dpt			# DPT Smartcache III, IV - See NOTES for options
 device		hptmv			# Highpoint RocketRAID 182x
 device		hptnr			# Highpoint DC7280, R750
 device		hptrr			# Highpoint RocketRAID 17xx, 22xx, 23xx, 25xx
 device		hpt27xx			# Highpoint RocketRAID 27xx
 device		iir			# Intel Integrated RAID
 device		ips			# IBM (Adaptec) ServeRAID
 device		mly			# Mylex AcceleRAID/eXtremeRAID
 device		twa			# 3ware 9000 series PATA/SATA RAID
 device		tws			# LSI 3ware 9750 SATA+SAS 6Gb/s RAID controller
 
 # RAID controllers
 device		aac			# Adaptec FSA RAID
 device		aacp			# SCSI passthrough for aac (requires CAM)
 device		aacraid			# Adaptec by PMC RAID
 device		ida			# Compaq Smart RAID
 device		mfi			# LSI MegaRAID SAS
 device		mlx			# Mylex DAC960 family
 device		mrsas			# LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s
 device		pmspcv			# PMC-Sierra SAS/SATA Controller driver
 #XXX pointer/int warnings
 #device		pst			# Promise Supertrak SX6000
 device		twe			# 3ware ATA RAID
 
 # NVM Express (NVMe) support
 device		nvme			# base NVMe driver
 device		nvd			# expose NVMe namespaces as disks, depends on nvme
 
 # atkbdc0 controls both the keyboard and the PS/2 mouse
 device		atkbdc			# AT keyboard controller
 device		atkbd			# AT keyboard
 device		psm			# PS/2 mouse
 
 device		kbdmux			# keyboard multiplexer
 
 device		vga			# VGA video card driver
 options 	VESA			# Add support for VESA BIOS Extensions (VBE)
 
 device		splash			# Splash screen and screen saver support
 
 # syscons is the default console driver, resembling an SCO console
 device		sc
 options 	SC_PIXEL_MODE		# add support for the raster text mode
 
 # vt is the new video console driver
 device		vt
 device		vt_vga
 device		vt_efifb
 
 device		agp			# support several AGP chipsets
 
 # PCCARD (PCMCIA) support
 # PCMCIA and cardbus bridge support
 device		cbb			# cardbus (yenta) bridge
 device		pccard			# PC Card (16-bit) bus
 device		cardbus			# CardBus (32-bit) bus
 
 # Serial (COM) ports
 device		uart			# Generic UART driver
 
 # Parallel port
 device		ppc
 device		ppbus			# Parallel port bus (required)
 device		lpt			# Printer
 device		ppi			# Parallel port interface device
 #device		vpo			# Requires scbus and da
 
 device		puc			# Multi I/O cards and multi-channel UARTs
 
 # PCI Ethernet NICs.
 device		bxe			# Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE
 device		de			# DEC/Intel DC21x4x (``Tulip'')
 device		em			# Intel PRO/1000 Gigabit Ethernet Family
 device		ix			# Intel PRO/10GbE PCIE PF Ethernet
 device		ixv			# Intel PRO/10GbE PCIE VF Ethernet
 device		ixl			# Intel XL710 40Gbe PCIE Ethernet
 options		IXL_IW			# Enable iWARP Client Interface in ixl(4)
 device		ixlv			# Intel XL710 40Gbe VF PCIE Ethernet
 device		le			# AMD Am7900 LANCE and Am79C9xx PCnet
 device		ti			# Alteon Networks Tigon I/II gigabit Ethernet
 device		txp			# 3Com 3cR990 (``Typhoon'')
 device		vx			# 3Com 3c590, 3c595 (``Vortex'')
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
 device		miibus			# MII bus support
 device		ae			# Attansic/Atheros L2 FastEthernet
 device		age			# Attansic/Atheros L1 Gigabit Ethernet
 device		alc			# Atheros AR8131/AR8132 Ethernet
 device		ale			# Atheros AR8121/AR8113/AR8114 Ethernet
 device		bce			# Broadcom BCM5706/BCM5708 Gigabit Ethernet
 device		bfe			# Broadcom BCM440x 10/100 Ethernet
 device		bge			# Broadcom BCM570xx Gigabit Ethernet
 device		cas			# Sun Cassini/Cassini+ and NS DP83065 Saturn
 device		dc			# DEC/Intel 21143 and various workalikes
 device		et			# Agere ET1310 10/100/Gigabit Ethernet
 device		fxp			# Intel EtherExpress PRO/100B (82557, 82558)
 device		gem			# Sun GEM/Sun ERI/Apple GMAC
 device		hme			# Sun HME (Happy Meal Ethernet)
 device		jme			# JMicron JMC250 Gigabit/JMC260 Fast Ethernet
 device		lge			# Level 1 LXT1001 gigabit Ethernet
 device		msk			# Marvell/SysKonnect Yukon II Gigabit Ethernet
 device		nfe			# nVidia nForce MCP on-board Ethernet
 device		nge			# NatSemi DP83820 gigabit Ethernet
 device		pcn			# AMD Am79C97x PCI 10/100 (precedence over 'le')
 device		re			# RealTek 8139C+/8169/8169S/8110S
 device		rl			# RealTek 8129/8139
 device		sf			# Adaptec AIC-6915 (``Starfire'')
 device		sge			# Silicon Integrated Systems SiS190/191
 device		sis			# Silicon Integrated Systems SiS 900/SiS 7016
 device		sk			# SysKonnect SK-984x & SK-982x gigabit Ethernet
 device		ste			# Sundance ST201 (D-Link DFE-550TX)
 device		stge			# Sundance/Tamarack TC9021 gigabit Ethernet
 device		tl			# Texas Instruments ThunderLAN
 device		tx			# SMC EtherPower II (83c170 ``EPIC'')
 device		vge			# VIA VT612x gigabit Ethernet
 device		vr			# VIA Rhine, Rhine II
 device		wb			# Winbond W89C840F
 device		xl			# 3Com 3c90x (``Boomerang'', ``Cyclone'')
 
 # Wireless NIC cards
 device		wlan			# 802.11 support
 options 	IEEE80211_DEBUG		# enable debug msgs
 options 	IEEE80211_AMPDU_AGE	# age frames in AMPDU reorder q's
 options 	IEEE80211_SUPPORT_MESH	# enable 802.11s draft support
 device		wlan_wep		# 802.11 WEP support
 device		wlan_ccmp		# 802.11 CCMP support
 device		wlan_tkip		# 802.11 TKIP support
 device		wlan_amrr		# AMRR transmit rate control algorithm
 device		an			# Aironet 4500/4800 802.11 wireless NICs.
 device		ath			# Atheros NICs
 device		ath_pci			# Atheros pci/cardbus glue
 device		ath_hal			# pci/cardbus chip support
 options 	AH_SUPPORT_AR5416	# enable AR5416 tx/rx descriptors
 options 	AH_AR5416_INTERRUPT_MITIGATION # AR5416 interrupt mitigation
 options 	ATH_ENABLE_11N		# Enable 802.11n support for AR5416 and later
 device		ath_rate_sample		# SampleRate tx rate control for ath
 #device		bwi			# Broadcom BCM430x/BCM431x wireless NICs.
 #device		bwn			# Broadcom BCM43xx wireless NICs.
 device		ipw			# Intel 2100 wireless NICs.
 device		iwi			# Intel 2200BG/2225BG/2915ABG wireless NICs.
 device		iwn			# Intel 4965/1000/5000/6000 wireless NICs.
 device		malo			# Marvell Libertas wireless NICs.
 device		mwl			# Marvell 88W8363 802.11n wireless NICs.
 device		ral			# Ralink Technology RT2500 wireless NICs.
 device		wi			# WaveLAN/Intersil/Symbol 802.11 wireless NICs.
 device		wpi			# Intel 3945ABG wireless NICs.
 
 # Pseudo devices.
 device		loop			# Network loopback
 device		random			# Entropy device
 device		padlock_rng		# VIA Padlock RNG
 device		rdrand_rng		# Intel Bull Mountain RNG
 device		ether			# Ethernet support
 device		vlan			# 802.1Q VLAN support
 device		tun			# Packet tunnel.
 device		md			# Memory "disks"
 device		gif			# IPv6 and IPv4 tunneling
 device		firmware		# firmware assist module
 
 # The `bpf' device enables the Berkeley Packet Filter.
 # Be aware of the administrative consequences of enabling this!
 # Note that 'bpf' is required for DHCP.
 device		bpf			# Berkeley packet filter
 
 # USB support
 options 	USB_DEBUG		# enable debug msgs
 device		uhci			# UHCI PCI->USB interface
 device		ohci			# OHCI PCI->USB interface
 device		ehci			# EHCI PCI->USB interface (USB 2.0)
 device		xhci			# XHCI PCI->USB interface (USB 3.0)
 device		usb			# USB Bus (required)
 device		ukbd			# Keyboard
 device		umass			# Disks/Mass storage - Requires scbus and da
 
 # Sound support
 device		sound			# Generic sound driver (required)
 device		snd_cmi			# CMedia CMI8338/CMI8738
 device		snd_csa			# Crystal Semiconductor CS461x/428x
 device		snd_emu10kx		# Creative SoundBlaster Live! and Audigy
 device		snd_es137x		# Ensoniq AudioPCI ES137x
 device		snd_hda			# Intel High Definition Audio
 device		snd_ich			# Intel, NVidia and other ICH AC'97 Audio
 device		snd_via8233		# VIA VT8233x Audio
 
 # MMC/SD
 device		mmc			# MMC/SD bus
 device		mmcsd			# MMC/SD memory card
 device		sdhci			# Generic PCI SD Host Controller
 
 # VirtIO support
 device		virtio			# Generic VirtIO bus (required)
 device		virtio_pci		# VirtIO PCI device
 device		vtnet			# VirtIO Ethernet device
 device		virtio_blk		# VirtIO Block device
 device		virtio_scsi		# VirtIO SCSI device
 device		virtio_balloon		# VirtIO Memory Balloon device
 
 # HyperV drivers and enhancement support
 device		hyperv			# HyperV drivers 
 
 # Xen HVM Guest Optimizations
 # NOTE: XENHVM depends on xenpci.  They must be added or removed together.
 options 	XENHVM			# Xen HVM kernel infrastructure
 device		xenpci			# Xen HVM Hypervisor services driver
 
 # VMware support
 device		vmx			# VMware VMXNET3 Ethernet
 
 # Netmap provides direct access to TX/RX rings on supported NICs
 device		netmap			# netmap(4) support
 
 # The crypto framework is required by IPSEC
 device		crypto			# Required by IPSEC
Index: head/sys/amd64/conf/MINIMAL
===================================================================
--- head/sys/amd64/conf/MINIMAL	(revision 327953)
+++ head/sys/amd64/conf/MINIMAL	(revision 327954)
@@ -1,148 +1,147 @@
 #
 # MINIMAL -- Mostly Minimal kernel configuration file for FreeBSD/amd64
 #
 # Many definitions of minimal are possible. The one this file follows is
 # GENERIC, minus all functionality that can be replaced by loading kernel
 # modules.
 #
 # Exceptions:
 # o While UFS is buildable as a module, the current module lacks
 #   some features (ACL, GJOURNAL) that GENERIC includes.
 # o acpi as a module has been reported flakey and not well tested, so
 #   is included in the kernel.
 # o random is included due to uncertaty...
 # o Many networking things are included
 #
 # For now, please run changes to these list past imp@freebsd.org
 #
 # For more information on this file, please read the config(5) manual page,
 # and/or the handbook section on Kernel Configuration Files:
 #
 #    https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
 #
 # The handbook is also available locally in /usr/share/doc/handbook
 # if you've installed the doc distribution, otherwise always see the
 # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the
 # latest information.
 #
 # An exhaustive list of options and more detailed explanations of the
 # device lines is also present in the ../../conf/NOTES and NOTES files.
 # If you are in doubt as to the purpose or necessity of a line, check first
 # in NOTES.
 #
 # $FreeBSD$
 
 cpu		HAMMER
 ident		MINIMAL
 
 makeoptions	DEBUG=-g		# Build kernel with gdb(1) debug symbols
 makeoptions	WITH_CTF=1		# Run ctfconvert(1) for DTrace support
 
 options 	SCHED_ULE		# ULE scheduler
 options 	PREEMPTION		# Enable kernel thread preemption
 options 	INET			# InterNETworking
 options 	INET6			# IPv6 communications protocols
 options 	TCP_OFFLOAD		# TCP offload
 options 	SCTP			# Stream Control Transmission Protocol
 options 	FFS			# Berkeley Fast Filesystem
 options 	SOFTUPDATES		# Enable FFS soft updates support
 options 	UFS_ACL			# Support for access control lists
 options 	UFS_DIRHASH		# Improve performance on big directories
 options 	UFS_GJOURNAL		# Enable gjournal-based UFS journaling
 options 	QUOTA			# Enable disk quotas for UFS
 options 	MD_ROOT			# MD is a potential root device
 options 	COMPAT_FREEBSD32	# Compatible with i386 binaries
 options 	COMPAT_FREEBSD4		# Compatible with FreeBSD4
 options 	COMPAT_FREEBSD5		# Compatible with FreeBSD5
 options 	COMPAT_FREEBSD6		# Compatible with FreeBSD6
 options 	COMPAT_FREEBSD7		# Compatible with FreeBSD7
 options 	COMPAT_FREEBSD9		# Compatible with FreeBSD9
 options 	COMPAT_FREEBSD10	# Compatible with FreeBSD10
 options 	COMPAT_FREEBSD11	# Compatible with FreeBSD11
 options 	SCSI_DELAY=5000		# Delay (in ms) before probing SCSI
 options 	KTRACE			# ktrace(1) support
 options 	STACK			# stack(9) support
 options 	SYSVSHM			# SYSV-style shared memory
 options 	SYSVMSG			# SYSV-style message queues
 options 	SYSVSEM			# SYSV-style semaphores
 options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
 options 	PRINTF_BUFR_SIZE=128	# Prevent printf output being interspersed.
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 options 	HWPMC_HOOKS		# Necessary kernel hooks for hwpmc(4)
 options 	AUDIT			# Security event auditing
 options 	CAPABILITY_MODE		# Capsicum capability mode
 options 	CAPABILITIES		# Capsicum capabilities
 options 	MAC			# TrustedBSD MAC Framework
 options 	KDTRACE_FRAME		# Ensure frames are compiled in
 options 	KDTRACE_HOOKS		# Kernel DTrace hooks
 options 	DDB_CTF			# Kernel ELF linker loads CTF data
 options 	INCLUDE_CONFIG_FILE	# Include this file in kernel
 
 # Debugging support.  Always need this:
 options 	KDB			# Enable kernel debugger support.
 options 	KDB_TRACE		# Print a stack trace for a panic.
 # For full debugger support use (turn off in stable branch):
 options 	DDB			# Support DDB.
 options 	GDB			# Support remote GDB.
 options 	DEADLKRES		# Enable the deadlock resolver
 options 	INVARIANTS		# Enable calls of extra sanity checking
 options 	INVARIANT_SUPPORT	# Extra sanity checks of internal structures, required by INVARIANTS
 options 	WITNESS			# Enable checks to detect deadlocks and cycles
 options 	WITNESS_SKIPSPIN	# Don't run witness on spinlocks for speed
 options 	MALLOC_DEBUG_MAXZONES=8	# Separate malloc(9) zones
 
 # Make an SMP-capable kernel by default
 options 	SMP			# Symmetric MultiProcessor Kernel
-options 	DEVICE_NUMA		# I/O Device Affinity
 options 	EARLY_AP_STARTUP
 
 # CPU frequency control
 device		cpufreq
 
 # Bus support.
 device		acpi
 options 	ACPI_DMAR
 device		pci
 
 # atkbdc0 controls both the keyboard and the PS/2 mouse
 device		atkbdc			# AT keyboard controller
 device		atkbd			# AT keyboard
 device		psm			# PS/2 mouse
 
 device		kbdmux			# keyboard multiplexer
 
 device		vga			# VGA video card driver
 options 	VESA			# Add support for VESA BIOS Extensions (VBE)
 
 device		splash			# Splash screen and screen saver support
 
 # syscons is the default console driver, resembling an SCO console
 device		sc
 options 	SC_PIXEL_MODE		# add support for the raster text mode
 
 # vt is the new video console driver
 device		vt
 device		vt_vga
 device		vt_efifb
 
 device		agp			# support several AGP chipsets
 
 # Pseudo devices.
 device		loop			# Network loopback
 device		random			# Entropy device
 device		padlock_rng		# VIA Padlock RNG
 device		rdrand_rng		# Intel Bull Mountain RNG
 device		ether			# Ethernet support
 device		vlan			# 802.1Q VLAN support
 device		tun			# Packet tunnel.
 device		gif			# IPv6 and IPv4 tunneling
 
 # The `bpf' device enables the Berkeley Packet Filter.
 # Be aware of the administrative consequences of enabling this!
 # Note that 'bpf' is required for DHCP.
 device		bpf			# Berkeley packet filter
 
 # Xen HVM Guest Optimizations
 # NOTE: XENHVM depends on xenpci.  They must be added or removed together.
 options 	XENHVM			# Xen HVM kernel infrastructure
 device		xenpci			# Xen HVM Hypervisor services driver
Index: head/sys/conf/NOTES
===================================================================
--- head/sys/conf/NOTES	(revision 327953)
+++ head/sys/conf/NOTES	(revision 327954)
@@ -1,3036 +1,3032 @@
 # $FreeBSD$
 #
 # NOTES -- Lines that can be cut/pasted into kernel and hints configs.
 #
 # Lines that begin with 'device', 'options', 'machine', 'ident', 'maxusers',
 # 'makeoptions', 'hints', etc. go into the kernel configuration that you
 # run config(8) with.
 #
 # Lines that begin with 'hint.' are NOT for config(8), they go into your
 # hints file.  See /boot/device.hints and/or the 'hints' config(8) directive.
 #
 # Please use ``make LINT'' to create an old-style LINT file if you want to
 # do kernel test-builds.
 #
 # This file contains machine independent kernel configuration notes.  For
 # machine dependent notes, look in /sys/<arch>/conf/NOTES.
 #
 
 #
 # NOTES conventions and style guide:
 #
 # Large block comments should begin and end with a line containing only a
 # comment character.
 #
 # To describe a particular object, a block comment (if it exists) should
 # come first.  Next should come device, options, and hints lines in that
 # order.  All device and option lines must be described by a comment that
 # doesn't just expand the device or option name.  Use only a concise
 # comment on the same line if possible.  Very detailed descriptions of
 # devices and subsystems belong in man pages.
 #
 # A space followed by a tab separates 'options' from an option name.  Two
 # spaces followed by a tab separate 'device' from a device name.  Comments
 # after an option or device should use one space after the comment character.
 # To comment out a negative option that disables code and thus should not be
 # enabled for LINT builds, precede 'options' with "#!".
 #
 
 #
 # This is the ``identification'' of the kernel.  Usually this should
 # be the same as the name of your kernel.
 #
 ident		LINT
 
 #
 # The `maxusers' parameter controls the static sizing of a number of
 # internal system tables by a formula defined in subr_param.c.
 # Omitting this parameter or setting it to 0 will cause the system to
 # auto-size based on physical memory.
 #
 maxusers	10
 
 # To statically compile in device wiring instead of /boot/device.hints
 #hints		"LINT.hints"		# Default places to look for devices.
 
 # Use the following to compile in values accessible to the kernel
 # through getenv() (or kenv(1) in userland). The format of the file
 # is 'variable=value', see kenv(1)
 #
 #env		"LINT.env"
 
 #
 # The `makeoptions' parameter allows variables to be passed to the
 # generated Makefile in the build area.
 #
 # CONF_CFLAGS gives some extra compiler flags that are added to ${CFLAGS}
 # after most other flags.  Here we use it to inhibit use of non-optimal
 # gcc built-in functions (e.g., memcmp).
 #
 # DEBUG happens to be magic.
 # The following is equivalent to 'config -g KERNELNAME' and creates
 # 'kernel.debug' compiled with -g debugging as well as a normal
 # 'kernel'.  Use 'make install.debug' to install the debug kernel
 # but that isn't normally necessary as the debug symbols are not loaded
 # by the kernel and are not useful there anyway.
 #
 # KERNEL can be overridden so that you can change the default name of your
 # kernel.
 #
 # MODULES_OVERRIDE can be used to limit modules built to a specific list.
 #
 makeoptions	CONF_CFLAGS=-fno-builtin  #Don't allow use of memcmp, etc.
 #makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
 #makeoptions	KERNEL=foo		#Build kernel "foo" and install "/foo"
 # Only build ext2fs module plus those parts of the sound system I need.
 #makeoptions	MODULES_OVERRIDE="ext2fs sound/sound sound/driver/maestro3"
 makeoptions	DESTDIR=/tmp
 
 #
 # FreeBSD processes are subject to certain limits to their consumption
 # of system resources.  See getrlimit(2) for more details.  Each
 # resource limit has two values, a "soft" limit and a "hard" limit.
 # The soft limits can be modified during normal system operation, but
 # the hard limits are set at boot time.  Their default values are
 # in sys/<arch>/include/vmparam.h.  There are two ways to change them:
 # 
 # 1.  Set the values at kernel build time.  The options below are one
 #     way to allow that limit to grow to 1GB.  They can be increased
 #     further by changing the parameters:
 #	
 # 2.  In /boot/loader.conf, set the tunables kern.maxswzone,
 #     kern.maxbcache, kern.maxtsiz, kern.dfldsiz, kern.maxdsiz,
 #     kern.dflssiz, kern.maxssiz and kern.sgrowsiz.
 #
 # The options in /boot/loader.conf override anything in the kernel
 # configuration file.  See the function init_param1 in
 # sys/kern/subr_param.c for more details.
 #
 
 options 	MAXDSIZ=(1024UL*1024*1024)
 options 	MAXSSIZ=(128UL*1024*1024)
 options 	DFLDSIZ=(1024UL*1024*1024)
 
 #
 # BLKDEV_IOSIZE sets the default block size used in user block
 # device I/O.  Note that this value will be overridden by the label
 # when specifying a block device from a label with a non-0
 # partition blocksize.  The default is PAGE_SIZE.
 #
 options 	BLKDEV_IOSIZE=8192
 
 #
 # MAXPHYS and DFLTPHYS
 #
 # These are the maximal and safe 'raw' I/O block device access sizes.
 # Reads and writes will be split into MAXPHYS chunks for known good
 # devices and DFLTPHYS for the rest. Some applications have better
 # performance with larger raw I/O access sizes. Note that certain VM
 # parameters are derived from these values and making them too large
 # can make an unbootable kernel.
 #
 # The defaults are 64K and 128K respectively.
 options 	DFLTPHYS=(64*1024)
 options 	MAXPHYS=(128*1024)
 
 
 # This allows you to actually store this configuration file into
 # the kernel binary itself. See config(8) for more details.
 #
 options 	INCLUDE_CONFIG_FILE     # Include this file in kernel
 
 #
 # Compile-time defaults for various boot parameters
 #
 options 	BOOTVERBOSE=1
 options 	BOOTHOWTO=RB_MULTIPLE
 
 options 	GEOM_AES		# Don't use, use GEOM_BDE
 options 	GEOM_BDE		# Disk encryption.
 options 	GEOM_BSD		# BSD disklabels (obsolete, gone in 12)
 options 	GEOM_CACHE		# Disk cache.
 options 	GEOM_CONCAT		# Disk concatenation.
 options 	GEOM_ELI		# Disk encryption.
 options 	GEOM_FOX		# Redundant path mitigation (obsolete, gone in 12)
 options 	GEOM_GATE		# Userland services.
 options 	GEOM_JOURNAL		# Journaling.
 options 	GEOM_LABEL		# Providers labelization.
 options 	GEOM_LINUX_LVM		# Linux LVM2 volumes
 options 	GEOM_MAP		# Map based partitioning
 options 	GEOM_MBR		# DOS/MBR partitioning (obsolete, gone in 12)
 options 	GEOM_MIRROR		# Disk mirroring.
 options 	GEOM_MULTIPATH		# Disk multipath
 options 	GEOM_NOP		# Test class.
 options 	GEOM_PART_APM		# Apple partitioning
 options 	GEOM_PART_BSD		# BSD disklabel
 options 	GEOM_PART_BSD64		# BSD disklabel64
 options 	GEOM_PART_EBR		# Extended Boot Records
 options 	GEOM_PART_EBR_COMPAT	# Backward compatible partition names
 options 	GEOM_PART_GPT		# GPT partitioning
 options 	GEOM_PART_LDM		# Logical Disk Manager
 options 	GEOM_PART_MBR		# MBR partitioning
 options 	GEOM_PART_VTOC8		# SMI VTOC8 disk label
 options 	GEOM_RAID		# Soft RAID functionality.
 options 	GEOM_RAID3		# RAID3 functionality.
 options 	GEOM_SHSEC		# Shared secret.
 options 	GEOM_STRIPE		# Disk striping.
 options 	GEOM_SUNLABEL		# Sun/Solaris partitioning (obsolete, gone in 12)
 options 	GEOM_UZIP		# Read-only compressed disks
 options 	GEOM_VINUM		# Vinum logical volume manager
 options 	GEOM_VIRSTOR		# Virtual storage.
 options 	GEOM_VOL		# Volume names from UFS superblock (obsolete, gone in 12)
 options 	GEOM_ZERO		# Performance testing helper.
 
 #
 # The root device and filesystem type can be compiled in;
 # this provides a fallback option if the root device cannot
 # be correctly guessed by the bootstrap code, or an override if
 # the RB_DFLTROOT flag (-r) is specified when booting the kernel.
 #
 options 	ROOTDEVNAME=\"ufs:da0s2e\"
 
 
 #####################################################################
 # Scheduler options:
 #
 # Specifying one of SCHED_4BSD or SCHED_ULE is mandatory.  These options
 # select which scheduler is compiled in.
 #
 # SCHED_4BSD is the historical, proven, BSD scheduler.  It has a global run
 # queue and no CPU affinity which makes it suboptimal for SMP.  It has very
 # good interactivity and priority selection.
 #
 # SCHED_ULE provides significant performance advantages over 4BSD on many
 # workloads on SMP machines.  It supports cpu-affinity, per-cpu runqueues
 # and scheduler locks.  It also has a stronger notion of interactivity 
 # which leads to better responsiveness even on uniprocessor machines.  This
 # is the default scheduler.
 #
 # SCHED_STATS is a debugging option which keeps some stats in the sysctl
 # tree at 'kern.sched.stats' and is useful for debugging scheduling decisions.
 #
 options 	SCHED_4BSD
 options 	SCHED_STATS
 #options 	SCHED_ULE
 
 #####################################################################
 # SMP OPTIONS:
 #
 # SMP enables building of a Symmetric MultiProcessor Kernel.
 
 # Mandatory:
 options 	SMP			# Symmetric MultiProcessor Kernel
 
 # EARLY_AP_STARTUP releases the Application Processors earlier in the
 # kernel startup process (before devices are probed) rather than at the
 # end.  This is a temporary option for use during the transition from
 # late to early AP startup.
 options		EARLY_AP_STARTUP
 
 # MAXCPU defines the maximum number of CPUs that can boot in the system.
 # A default value should be already present, for every architecture.
 options 	MAXCPU=32
 
+# NUMA enables use of Non-Uniform Memory Access policies in various kernel
+# subsystems.
+options 	NUMA
+
 # MAXMEMDOM defines the maximum number of memory domains that can boot in the
 # system.  A default value should already be defined by every architecture.
 options 	MAXMEMDOM=2
-
-# VM_NUMA_ALLOC enables use of memory domain-aware allocation in the VM
-# system.
-options 	VM_NUMA_ALLOC
-
-# DEVICE_NUMA enables reporting of domain affinity of I/O devices via
-# bus_get_domain(), etc.
-options 	DEVICE_NUMA
 
 # ADAPTIVE_MUTEXES changes the behavior of blocking mutexes to spin
 # if the thread that currently owns the mutex is executing on another
 # CPU.  This behavior is enabled by default, so this option can be used
 # to disable it.
 options 	NO_ADAPTIVE_MUTEXES
 
 # ADAPTIVE_RWLOCKS changes the behavior of reader/writer locks to spin
 # if the thread that currently owns the rwlock is executing on another
 # CPU.  This behavior is enabled by default, so this option can be used
 # to disable it.
 options 	NO_ADAPTIVE_RWLOCKS
 
 # ADAPTIVE_SX changes the behavior of sx locks to spin if the thread that
 # currently owns the sx lock is executing on another CPU.
 # This behavior is enabled by default, so this option can be used to
 # disable it.
 options 	NO_ADAPTIVE_SX
 
 # MUTEX_NOINLINE forces mutex operations to call functions to perform each
 # operation rather than inlining the simple cases.  This can be used to
 # shrink the size of the kernel text segment.  Note that this behavior is
 # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
 # and WITNESS options.
 options 	MUTEX_NOINLINE
 
 # RWLOCK_NOINLINE forces rwlock operations to call functions to perform each
 # operation rather than inlining the simple cases.  This can be used to
 # shrink the size of the kernel text segment.  Note that this behavior is
 # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
 # and WITNESS options.
 options 	RWLOCK_NOINLINE
 
 # SX_NOINLINE forces sx lock operations to call functions to perform each
 # operation rather than inlining the simple cases.  This can be used to
 # shrink the size of the kernel text segment.  Note that this behavior is
 # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
 # and WITNESS options.
 options 	SX_NOINLINE
 
 # SMP Debugging Options:
 #
 # CALLOUT_PROFILING enables rudimentary profiling of the callwheel data
 #	  structure used as backend in callout(9).
 # PREEMPTION allows the threads that are in the kernel to be preempted by
 #	  higher priority [interrupt] threads.  It helps with interactivity
 #	  and allows interrupt threads to run sooner rather than waiting.
 #	  WARNING! Only tested on amd64 and i386.
 # FULL_PREEMPTION instructs the kernel to preempt non-realtime kernel
 #	  threads.  Its sole use is to expose race conditions and other
 #	  bugs during development.  Enabling this option will reduce
 #	  performance and increase the frequency of kernel panics by
 #	  design.  If you aren't sure that you need it then you don't.
 #	  Relies on the PREEMPTION option.  DON'T TURN THIS ON.
 # SLEEPQUEUE_PROFILING enables rudimentary profiling of the hash table
 #	  used to hold active sleep queues as well as sleep wait message
 #	  frequency.
 # TURNSTILE_PROFILING enables rudimentary profiling of the hash table
 #	  used to hold active lock queues.
 # UMTX_PROFILING enables rudimentary profiling of the hash table used 
 #	  to hold active lock queues.
 # WITNESS enables the witness code which detects deadlocks and cycles
 #         during locking operations.
 # WITNESS_KDB causes the witness code to drop into the kernel debugger if
 #	  a lock hierarchy violation occurs or if locks are held when going to
 #	  sleep.
 # WITNESS_SKIPSPIN disables the witness checks on spin mutexes.
 options 	PREEMPTION
 options 	FULL_PREEMPTION
 options 	WITNESS
 options 	WITNESS_KDB
 options 	WITNESS_SKIPSPIN
 
 # LOCK_PROFILING - Profiling locks.  See LOCK_PROFILING(9) for details.
 options 	LOCK_PROFILING
 # Set the number of buffers and the hash size.  The hash size MUST be larger
 # than the number of buffers.  Hash size should be prime.
 options 	MPROF_BUFFERS="1536"
 options 	MPROF_HASH_SIZE="1543"
 
 # Profiling for the callout(9) backend.
 options 	CALLOUT_PROFILING
 
 # Profiling for internal hash tables.
 options 	SLEEPQUEUE_PROFILING
 options 	TURNSTILE_PROFILING
 options 	UMTX_PROFILING
 
 
 #####################################################################
 # COMPATIBILITY OPTIONS
 
 #
 # Implement system calls compatible with 4.3BSD and older versions of
 # FreeBSD.  You probably do NOT want to remove this as much current code
 # still relies on the 4.3 emulation.  Note that some architectures that
 # are supported by FreeBSD do not include support for certain important
 # aspects of this compatibility option, namely those related to the
 # signal delivery mechanism.
 #
 options 	COMPAT_43
 
 # Old tty interface.
 options 	COMPAT_43TTY
 
 # Note that as a general rule, COMPAT_FREEBSD<n> depends on
 # COMPAT_FREEBSD<n+1>, COMPAT_FREEBSD<n+2>, etc.
 
 # Enable FreeBSD4 compatibility syscalls
 options 	COMPAT_FREEBSD4
 
 # Enable FreeBSD5 compatibility syscalls
 options 	COMPAT_FREEBSD5
 
 # Enable FreeBSD6 compatibility syscalls
 options 	COMPAT_FREEBSD6
 
 # Enable FreeBSD7 compatibility syscalls
 options 	COMPAT_FREEBSD7
 
 # Enable FreeBSD9 compatibility syscalls
 options 	COMPAT_FREEBSD9
 
 # Enable FreeBSD10 compatibility syscalls
 options 	COMPAT_FREEBSD10
 
 # Enable FreeBSD11 compatibility syscalls
 options 	COMPAT_FREEBSD11
 
 # Enable Linux Kernel Programming Interface
 options 	COMPAT_LINUXKPI
 
 #
 # These three options provide support for System V Interface
 # Definition-style interprocess communication, in the form of shared
 # memory, semaphores, and message queues, respectively.
 #
 options 	SYSVSHM
 options 	SYSVSEM
 options 	SYSVMSG
 
 
 #####################################################################
 # DEBUGGING OPTIONS
 
 #
 # Compile with kernel debugger related code.
 #
 options 	KDB
 
 #
 # Print a stack trace of the current thread on the console for a panic.
 #
 options 	KDB_TRACE
 
 #
 # Don't enter the debugger for a panic. Intended for unattended operation
 # where you may want to enter the debugger from the console, but still want
 # the machine to recover from a panic.
 #
 options 	KDB_UNATTENDED
 
 #
 # Enable the ddb debugger backend.
 #
 options 	DDB
 
 #
 # Print the numerical value of symbols in addition to the symbolic
 # representation.
 #
 options 	DDB_NUMSYM
 
 #
 # Enable the remote gdb debugger backend.
 #
 options 	GDB
 
 #
 # SYSCTL_DEBUG enables a 'sysctl' debug tree that can be used to dump the
 # contents of the registered sysctl nodes on the console.  It is disabled by
 # default because it generates excessively verbose console output that can
 # interfere with serial console operation.
 #
 options 	SYSCTL_DEBUG
 
 #
 # Enable textdump by default, this disables kernel core dumps.
 #
 options		TEXTDUMP_PREFERRED
 
 #
 # Enable extra debug messages while performing textdumps.
 #
 options		TEXTDUMP_VERBOSE
 
 #
 # NO_SYSCTL_DESCR omits the sysctl node descriptions to save space in the
 # resulting kernel.
 options		NO_SYSCTL_DESCR
 
 #
 # MALLOC_DEBUG_MAXZONES enables multiple uma zones for malloc(9)
 # allocations that are smaller than a page.  The purpose is to isolate
 # different malloc types into hash classes, so that any buffer
 # overruns or use-after-free will usually only affect memory from
 # malloc types in that hash class.  This is purely a debugging tool;
 # by varying the hash function and tracking which hash class was
 # corrupted, the intersection of the hash classes from each instance
 # will point to a single malloc type that is being misused.  At this
 # point inspection or memguard(9) can be used to catch the offending
 # code.
 #
 options 	MALLOC_DEBUG_MAXZONES=8
 
 #
 # DEBUG_MEMGUARD builds and enables memguard(9), a replacement allocator
 # for the kernel used to detect modify-after-free scenarios.  See the
 # memguard(9) man page for more information on usage.
 #
 options 	DEBUG_MEMGUARD
 
 #
 # DEBUG_REDZONE enables buffer underflows and buffer overflows detection for
 # malloc(9).
 #
 options 	DEBUG_REDZONE
 
 #
 # EARLY_PRINTF enables support for calling a special printf (eprintf)
 # very early in the kernel (before cn_init() has been called).  This
 # should only be used for debugging purposes early in boot.  Normally,
 # it is not defined.  It is commented out here because this feature
 # isn't generally available. And the required eputc() isn't defined.
 #
 #options	EARLY_PRINTF
 
 #
 # KTRACE enables the system-call tracing facility ktrace(2).  To be more
 # SMP-friendly, KTRACE uses a worker thread to process most trace events
 # asynchronously to the thread generating the event.  This requires a
 # pre-allocated store of objects representing trace events.  The
 # KTRACE_REQUEST_POOL option specifies the initial size of this store.
 # The size of the pool can be adjusted both at boottime and runtime via
 # the kern.ktrace_request_pool tunable and sysctl.
 #
 options 	KTRACE			#kernel tracing
 options 	KTRACE_REQUEST_POOL=101
 
 #
 # KTR is a kernel tracing facility imported from BSD/OS.  It is
 # enabled with the KTR option.  KTR_ENTRIES defines the number of
 # entries in the circular trace buffer; it may be an arbitrary number.
 # KTR_BOOT_ENTRIES defines the number of entries during the early boot,
 # before malloc(9) is functional.
 # KTR_COMPILE defines the mask of events to compile into the kernel as
 # defined by the KTR_* constants in <sys/ktr.h>.  KTR_MASK defines the
 # initial value of the ktr_mask variable which determines at runtime
 # what events to trace.  KTR_CPUMASK determines which CPU's log
 # events, with bit X corresponding to CPU X.  The layout of the string
 # passed as KTR_CPUMASK must match a series of bitmasks each of them
 # separated by the "," character (ie:
 # KTR_CPUMASK=0xAF,0xFFFFFFFFFFFFFFFF).  KTR_VERBOSE enables
 # dumping of KTR events to the console by default.  This functionality
 # can be toggled via the debug.ktr_verbose sysctl and defaults to off
 # if KTR_VERBOSE is not defined.  See ktr(4) and ktrdump(8) for details.
 #
 options 	KTR
 options 	KTR_BOOT_ENTRIES=1024
 options 	KTR_ENTRIES=(128*1024)
 options 	KTR_COMPILE=(KTR_ALL)
 options 	KTR_MASK=KTR_INTR
 options 	KTR_CPUMASK=0x3
 options 	KTR_VERBOSE
 
 #
 # ALQ(9) is a facility for the asynchronous queuing of records from the kernel
 # to a vnode, and is employed by services such as ktr(4) to produce trace
 # files based on a kernel event stream.  Records are written asynchronously
 # in a worker thread.
 #
 options 	ALQ
 options 	KTR_ALQ
 
 #
 # The INVARIANTS option is used in a number of source files to enable
 # extra sanity checking of internal structures.  This support is not
 # enabled by default because of the extra time it would take to check
 # for these conditions, which can only occur as a result of
 # programming errors.
 #
 options 	INVARIANTS
 
 #
 # The INVARIANT_SUPPORT option makes us compile in support for
 # verifying some of the internal structures.  It is a prerequisite for
 # 'INVARIANTS', as enabling 'INVARIANTS' will make these functions be
 # called.  The intent is that you can set 'INVARIANTS' for single
 # source files (by changing the source file or specifying it on the
 # command line) if you have 'INVARIANT_SUPPORT' enabled.  Also, if you
 # wish to build a kernel module with 'INVARIANTS', then adding
 # 'INVARIANT_SUPPORT' to your kernel will provide all the necessary
 # infrastructure without the added overhead.
 #
 options 	INVARIANT_SUPPORT
 
 #
 # The DIAGNOSTIC option is used to enable extra debugging information
 # from some parts of the kernel.  As this makes everything more noisy,
 # it is disabled by default.
 #
 options 	DIAGNOSTIC
 
 #
 # REGRESSION causes optional kernel interfaces necessary only for regression
 # testing to be enabled.  These interfaces may constitute security risks
 # when enabled, as they permit processes to easily modify aspects of the
 # run-time environment to reproduce unlikely or unusual (possibly normally
 # impossible) scenarios.
 #
 options 	REGRESSION
 
 #
 # This option lets some drivers co-exist that can't co-exist in a running
 # system.  This is used to be able to compile all kernel code in one go for
 # quality assurance purposes (like this file, which the option takes it name
 # from.)
 #
 options 	COMPILING_LINT
 
 #
 # STACK enables the stack(9) facility, allowing the capture of kernel stack
 # for the purpose of procinfo(1), etc.  stack(9) will also be compiled in
 # automatically if DDB(4) is compiled into the kernel.
 #
 options 	STACK
 
 #
 # The NUM_CORE_FILES option specifies the limit for the number of core
 # files generated by a particular process, when the core file format
 # specifier includes the %I pattern. Since we only have 1 character for
 # the core count in the format string, meaning the range will be 0-9, the
 # maximum value allowed for this option is 10.
 # This core file limit can be adjusted at runtime via the debug.ncores
 # sysctl.
 #
 options 	NUM_CORE_FILES=5
 
 #
 # The TSLOG option enables timestamped logging of events, especially
 # function entries/exits, in order to track the time spent by the kernel.
 # In particular, this is useful when investigating the early boot process,
 # before it is possible to use more sophisticated tools like DTrace.
 # The TSLOGSIZE option controls the size of the (preallocated, fixed
 # length) buffer used for storing these events (default: 262144 records).
 #
 # For security reasons the TSLOG option should not be enabled on systems
 # used in production.
 #
 options 	TSLOG
 options 	TSLOGSIZE=262144
 
 
 #####################################################################
 # PERFORMANCE MONITORING OPTIONS
 
 #
 # The hwpmc driver that allows the use of in-CPU performance monitoring
 # counters for performance monitoring.  The base kernel needs to be configured
 # with the 'options' line, while the hwpmc device can be either compiled
 # in or loaded as a loadable kernel module.
 #
 # Additional configuration options may be required on specific architectures,
 # please see hwpmc(4).
 
 device		hwpmc			# Driver (also a loadable module)
 options 	HWPMC_DEBUG
 options 	HWPMC_HOOKS		# Other necessary kernel hooks
 
 
 #####################################################################
 # NETWORKING OPTIONS
 
 #
 # Protocol families
 #
 options 	INET			#Internet communications protocols
 options 	INET6			#IPv6 communications protocols
 
 options		RATELIMIT		# TX rate limiting support
 
 options 	ROUTETABLES=2		# allocated fibs up to 65536. default is 1.
 					# but that would be a bad idea as they are large.
 
 options 	TCP_OFFLOAD		# TCP offload support.
 
 # In order to enable IPSEC you MUST also add device crypto to 
 # your kernel configuration
 options 	IPSEC			#IP security (requires device crypto)
 
 # Option IPSEC_SUPPORT does not enable IPsec, but makes it possible to 
 # load it as a kernel module. You still MUST add device crypto to your kernel
 # configuration.
 options		IPSEC_SUPPORT
 #options 	IPSEC_DEBUG		#debug for IP security
 
 #
 # SMB/CIFS requester
 # NETSMB enables support for SMB protocol, it requires LIBMCHAIN and LIBICONV
 # options.
 options 	NETSMB			#SMB/CIFS requester
 
 # mchain library. It can be either loaded as KLD or compiled into kernel
 options 	LIBMCHAIN
 
 # libalias library, performing NAT
 options 	LIBALIAS
 
 #
 # SCTP is a NEW transport protocol defined by
 # RFC2960 updated by RFC3309 and RFC3758.. and
 # soon to have a new base RFC and many many more
 # extensions. This release supports all the extensions
 # including many drafts (most about to become RFC's).
 # It is the reference implementation of SCTP
 # and is quite well tested.
 #
 # Note YOU MUST have both INET and INET6 defined.
 # You don't have to enable V6, but SCTP is 
 # dual stacked and so far we have not torn apart
 # the V6 and V4.. since an association can span
 # both a V6 and V4 address at the SAME time :-)
 #
 options 	SCTP
 # There are bunches of options:
 # this one turns on all sorts of
 # nastily printing that you can
 # do. It's all controlled by a
 # bit mask (settable by socket opt and
 # by sysctl). Including will not cause
 # logging until you set the bits.. but it
 # can be quite verbose.. so without this
 # option we don't do any of the tests for
 # bits and prints.. which makes the code run
 # faster.. if you are not debugging don't use.
 options 	SCTP_DEBUG
 #
 # All that options after that turn on specific types of
 # logging. You can monitor CWND growth, flight size
 # and all sorts of things. Go look at the code and
 # see. I have used this to produce interesting 
 # charts and graphs as well :->
 # 
 # I have not yet committed the tools to get and print
 # the logs, I will do that eventually .. before then
 # if you want them send me an email rrs@freebsd.org
 # You basically must have ktr(4) enabled for these
 # and you then set the sysctl to turn on/off various
 # logging bits. Use ktrdump(8) to pull the log and run
 # it through a display program.. and graphs and other
 # things too.
 #
 options 	SCTP_LOCK_LOGGING
 options 	SCTP_MBUF_LOGGING
 options 	SCTP_MBCNT_LOGGING
 options 	SCTP_PACKET_LOGGING
 options 	SCTP_LTRACE_CHUNKS
 options 	SCTP_LTRACE_ERRORS
 
 
 # altq(9). Enable the base part of the hooks with the ALTQ option.
 # Individual disciplines must be built into the base system and can not be
 # loaded as modules at this point. ALTQ requires a stable TSC so if yours is
 # broken or changes with CPU throttling then you must also have the ALTQ_NOPCC
 # option.
 options 	ALTQ
 options 	ALTQ_CBQ	# Class Based Queueing
 options 	ALTQ_RED	# Random Early Detection
 options 	ALTQ_RIO	# RED In/Out
 options 	ALTQ_CODEL	# CoDel Active Queueing
 options 	ALTQ_HFSC	# Hierarchical Packet Scheduler
 options 	ALTQ_FAIRQ	# Fair Packet Scheduler
 options 	ALTQ_CDNR	# Traffic conditioner
 options 	ALTQ_PRIQ	# Priority Queueing
 options 	ALTQ_NOPCC	# Required if the TSC is unusable
 options 	ALTQ_DEBUG
 
 # netgraph(4). Enable the base netgraph code with the NETGRAPH option.
 # Individual node types can be enabled with the corresponding option
 # listed below; however, this is not strictly necessary as netgraph
 # will automatically load the corresponding KLD module if the node type
 # is not already compiled into the kernel. Each type below has a
 # corresponding man page, e.g., ng_async(8).
 options 	NETGRAPH		# netgraph(4) system
 options 	NETGRAPH_DEBUG		# enable extra debugging, this
 					# affects netgraph(4) and nodes
 # Node types
 options 	NETGRAPH_ASYNC
 options 	NETGRAPH_ATMLLC
 options 	NETGRAPH_ATM_ATMPIF
 options 	NETGRAPH_BLUETOOTH		# ng_bluetooth(4)
 options 	NETGRAPH_BLUETOOTH_BT3C		# ng_bt3c(4)
 options 	NETGRAPH_BLUETOOTH_HCI		# ng_hci(4)
 options 	NETGRAPH_BLUETOOTH_L2CAP	# ng_l2cap(4)
 options 	NETGRAPH_BLUETOOTH_SOCKET	# ng_btsocket(4)
 options 	NETGRAPH_BLUETOOTH_UBT		# ng_ubt(4)
 options 	NETGRAPH_BLUETOOTH_UBTBCMFW	# ubtbcmfw(4)
 options 	NETGRAPH_BPF
 options 	NETGRAPH_BRIDGE
 options 	NETGRAPH_CAR
 options 	NETGRAPH_CISCO
 options 	NETGRAPH_DEFLATE
 options 	NETGRAPH_DEVICE
 options 	NETGRAPH_ECHO
 options 	NETGRAPH_EIFACE
 options 	NETGRAPH_ETHER
 options 	NETGRAPH_FRAME_RELAY
 options 	NETGRAPH_GIF
 options 	NETGRAPH_GIF_DEMUX
 options 	NETGRAPH_HOLE
 options 	NETGRAPH_IFACE
 options 	NETGRAPH_IP_INPUT
 options 	NETGRAPH_IPFW
 options 	NETGRAPH_KSOCKET
 options 	NETGRAPH_L2TP
 options 	NETGRAPH_LMI
 options 	NETGRAPH_MPPC_COMPRESSION
 options 	NETGRAPH_MPPC_ENCRYPTION
 options 	NETGRAPH_NETFLOW
 options 	NETGRAPH_NAT
 options 	NETGRAPH_ONE2MANY
 options 	NETGRAPH_PATCH
 options 	NETGRAPH_PIPE
 options 	NETGRAPH_PPP
 options 	NETGRAPH_PPPOE
 options 	NETGRAPH_PPTPGRE
 options 	NETGRAPH_PRED1
 options 	NETGRAPH_RFC1490
 options 	NETGRAPH_SOCKET
 options 	NETGRAPH_SPLIT
 options 	NETGRAPH_SPPP
 options 	NETGRAPH_TAG
 options 	NETGRAPH_TCPMSS
 options 	NETGRAPH_TEE
 options 	NETGRAPH_UI
 options 	NETGRAPH_VJC
 options 	NETGRAPH_VLAN
 
 # NgATM - Netgraph ATM
 options 	NGATM_ATM
 options 	NGATM_ATMBASE
 options 	NGATM_SSCOP
 options 	NGATM_SSCFU
 options 	NGATM_UNI
 options 	NGATM_CCATM
 
 device		mn	# Munich32x/Falc54 Nx64kbit/sec cards.
 
 # Network stack virtualization.
 options	VIMAGE
 options	VNET_DEBUG	# debug for VIMAGE
 
 #
 # Network interfaces:
 #  The `loop' device is MANDATORY when networking is enabled.
 device		loop
 
 #  The `ether' device provides generic code to handle
 #  Ethernets; it is MANDATORY when an Ethernet device driver is
 #  configured or token-ring is enabled.
 device		ether
 
 #  The `vlan' device implements the VLAN tagging of Ethernet frames
 #  according to IEEE 802.1Q.
 device		vlan
 
 # The `vxlan' device implements the VXLAN encapsulation of Ethernet
 # frames in UDP packets according to RFC7348.
 device		vxlan
 
 #  The `wlan' device provides generic code to support 802.11
 #  drivers, including host AP mode; it is MANDATORY for the wi,
 #  and ath drivers and will eventually be required by all 802.11 drivers.
 device		wlan
 options 	IEEE80211_DEBUG		#enable debugging msgs
 options 	IEEE80211_AMPDU_AGE	#age frames in AMPDU reorder q's
 options 	IEEE80211_SUPPORT_MESH	#enable 802.11s D3.0 support
 options 	IEEE80211_SUPPORT_TDMA	#enable TDMA support
 
 #  The `wlan_wep', `wlan_tkip', and `wlan_ccmp' devices provide
 #  support for WEP, TKIP, and AES-CCMP crypto protocols optionally
 #  used with 802.11 devices that depend on the `wlan' module.
 device		wlan_wep
 device		wlan_ccmp
 device		wlan_tkip
 
 #  The `wlan_xauth' device provides support for external (i.e. user-mode)
 #  authenticators for use with 802.11 drivers that use the `wlan'
 #  module and support 802.1x and/or WPA security protocols.
 device		wlan_xauth
 
 #  The `wlan_acl' device provides a MAC-based access control mechanism
 #  for use with 802.11 drivers operating in ap mode and using the
 #  `wlan' module.
 #  The 'wlan_amrr' device provides AMRR transmit rate control algorithm
 device		wlan_acl
 device		wlan_amrr
 
 # Generic TokenRing
 device		token
 
 #  The `fddi' device provides generic code to support FDDI.
 device		fddi
 
 #  The `arcnet' device provides generic code to support Arcnet.
 device		arcnet
 
 #  The `sppp' device serves a similar role for certain types
 #  of synchronous PPP links (like `cx', `ar').
 device		sppp
 
 #  The `bpf' device enables the Berkeley Packet Filter.  Be
 #  aware of the legal and administrative consequences of enabling this
 #  option.  DHCP requires bpf.
 device		bpf
 
 #  The `netmap' device implements memory-mapped access to network
 #  devices from userspace, enabling wire-speed packet capture and
 #  generation even at 10Gbit/s. Requires support in the device
 #  driver. Supported drivers are ixgbe, e1000, re.
 device		netmap
 
 #  The `disc' device implements a minimal network interface,
 #  which throws away all packets sent and never receives any.  It is
 #  included for testing and benchmarking purposes.
 device		disc
 
 # The `epair' device implements a virtual back-to-back connected Ethernet
 # like interface pair.
 device		epair
 
 #  The `edsc' device implements a minimal Ethernet interface,
 #  which discards all packets sent and receives none.
 device		edsc
 
 #  The `tap' device is a pty-like virtual Ethernet interface
 device		tap
 
 #  The `tun' device implements (user-)ppp and nos-tun(8)
 device		tun
 
 #  The `gif' device implements IPv6 over IP4 tunneling,
 #  IPv4 over IPv6 tunneling, IPv4 over IPv4 tunneling and
 #  IPv6 over IPv6 tunneling.
 #  The `gre' device implements GRE (Generic Routing Encapsulation) tunneling,
 #  as specified in the RFC 2784 and RFC 2890.
 #  The `me' device implements Minimal Encapsulation within IPv4 as
 #  specified in the RFC 2004.
 #  The XBONEHACK option allows the same pair of addresses to be configured on
 #  multiple gif interfaces.
 device		gif
 device		gre
 device		me
 options 	XBONEHACK
 
 #  The `stf' device implements 6to4 encapsulation.
 device		stf
 
 # The pf packet filter consists of three devices:
 #  The `pf' device provides /dev/pf and the firewall code itself.
 #  The `pflog' device provides the pflog0 interface which logs packets.
 #  The `pfsync' device provides the pfsync0 interface used for
 #   synchronization of firewall state tables (over the net).
 device		pf
 device		pflog
 device		pfsync
 
 # Bridge interface.
 device		if_bridge
 
 # Common Address Redundancy Protocol. See carp(4) for more details.
 device		carp
 
 # IPsec interface.
 device		enc
 
 # Link aggregation interface.
 device		lagg
 
 #
 # Internet family options:
 #
 # MROUTING enables the kernel multicast packet forwarder, which works
 # with mrouted and XORP.
 #
 # IPFIREWALL enables support for IP firewall construction, in
 # conjunction with the `ipfw' program.  IPFIREWALL_VERBOSE sends
 # logged packets to the system logger.  IPFIREWALL_VERBOSE_LIMIT
 # limits the number of times a matching entry can be logged.
 #
 # WARNING:  IPFIREWALL defaults to a policy of "deny ip from any to any"
 # and if you do not add other rules during startup to allow access,
 # YOU WILL LOCK YOURSELF OUT.  It is suggested that you set firewall_type=open
 # in /etc/rc.conf when first enabling this feature, then refining the
 # firewall rules in /etc/rc.firewall after you've tested that the new kernel
 # feature works properly.
 #
 # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to
 # allow everything.  Use with care, if a cracker can crash your
 # firewall machine, they can get to your protected machines.  However,
 # if you are using it as an as-needed filter for specific problems as
 # they arise, then this may be for you.  Changing the default to 'allow'
 # means that you won't get stuck if the kernel and /sbin/ipfw binary get
 # out of sync.
 #
 # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''.  It
 # depends on IPFIREWALL if compiled into the kernel.
 #
 # IPFIREWALL_NAT adds support for in kernel nat in ipfw, and it requires
 # LIBALIAS.
 #
 # IPFIREWALL_NAT64 adds support for in kernel NAT64 in ipfw.
 #
 # IPFIREWALL_NPTV6 adds support for in kernel NPTv6 in ipfw.
 #
 # IPFIREWALL_PMOD adds support for protocols modification module. Currently
 # it supports only TCP MSS modification.
 #
 # IPSTEALTH enables code to support stealth forwarding (i.e., forwarding
 # packets without touching the TTL).  This can be useful to hide firewalls
 # from traceroute and similar tools.
 #
 # PF_DEFAULT_TO_DROP causes the default pf(4) rule to deny everything.
 #
 # TCPDEBUG enables code which keeps traces of the TCP state machine
 # for sockets with the SO_DEBUG option set, which can then be examined
 # using the trpt(8) utility.
 #
 # TCPPCAP enables code which keeps the last n packets sent and received
 # on a TCP socket.
 #
 # TCP_HHOOK enables the hhook(9) framework hooks for the TCP stack.
 #
 # RADIX_MPATH provides support for equal-cost multi-path routing.
 #
 options 	MROUTING		# Multicast routing
 options 	IPFIREWALL		#firewall
 options 	IPFIREWALL_VERBOSE	#enable logging to syslogd(8)
 options 	IPFIREWALL_VERBOSE_LIMIT=100	#limit verbosity
 options 	IPFIREWALL_DEFAULT_TO_ACCEPT	#allow everything by default
 options 	IPFIREWALL_NAT		#ipfw kernel nat support
 options 	IPFIREWALL_NAT64	#ipfw kernel NAT64 support
 options 	IPFIREWALL_NPTV6	#ipfw kernel IPv6 NPT support
 options 	IPDIVERT		#divert sockets
 options 	IPFILTER		#ipfilter support
 options 	IPFILTER_LOG		#ipfilter logging
 options 	IPFILTER_LOOKUP		#ipfilter pools
 options 	IPFILTER_DEFAULT_BLOCK	#block all packets by default
 options 	IPSTEALTH		#support for stealth forwarding
 options 	PF_DEFAULT_TO_DROP	#drop everything by default
 options 	TCPDEBUG
 options 	TCPPCAP
 options 	TCP_HHOOK
 options 	RADIX_MPATH
 
 # The MBUF_STRESS_TEST option enables options which create
 # various random failures / extreme cases related to mbuf
 # functions.  See mbuf(9) for a list of available test cases.
 # MBUF_PROFILING enables code to profile the mbuf chains
 # exiting the system (via participating interfaces) and
 # return a logarithmic histogram of monitored parameters
 # (e.g. packet size, wasted space, number of mbufs in chain).
 options 	MBUF_STRESS_TEST
 options 	MBUF_PROFILING
 
 # Statically link in accept filters
 options 	ACCEPT_FILTER_DATA
 options 	ACCEPT_FILTER_DNS
 options 	ACCEPT_FILTER_HTTP
 
 # TCP_SIGNATURE adds support for RFC 2385 (TCP-MD5) digests. These are
 # carried in TCP option 19. This option is commonly used to protect
 # TCP sessions (e.g. BGP) where IPSEC is not available nor desirable.
 # This is enabled on a per-socket basis using the TCP_MD5SIG socket option.
 # This requires the use of 'device crypto' and either 'options IPSEC' or
 # 'options IPSEC_SUPPORT'.
 options 	TCP_SIGNATURE		#include support for RFC 2385
 
 # DUMMYNET enables the "dummynet" bandwidth limiter.  You need IPFIREWALL
 # as well.  See dummynet(4) and ipfw(8) for more info.  When you run
 # DUMMYNET it is advisable to also have at least "options HZ=1000" to achieve
 # a smooth scheduling of the traffic.
 options 	DUMMYNET
 
 #####################################################################
 # FILESYSTEM OPTIONS
 
 #
 # Only the root filesystem needs to be statically compiled or preloaded
 # as module; everything else will be automatically loaded at mount
 # time.  Some people still prefer to statically compile other
 # filesystems as well.
 #
 # NB: The UNION filesystem was known to be buggy in the past.  It is now
 # being actively maintained, although there are still some issues being
 # resolved.
 #
 
 # One of these is mandatory:
 options 	FFS			#Fast filesystem
 options 	NFSCL			#Network File System client
 
 # The rest are optional:
 options 	AUTOFS			#Automounter filesystem
 options 	CD9660			#ISO 9660 filesystem
 options 	FDESCFS			#File descriptor filesystem
 options 	FUSE			#FUSE support module
 options 	MSDOSFS			#MS DOS File System (FAT, FAT32)
 options 	NFSLOCKD		#Network Lock Manager
 options 	NFSD			#Network Filesystem Server
 options 	KGSSAPI			#Kernel GSSAPI implementation
 
 options 	NULLFS			#NULL filesystem
 options 	PROCFS			#Process filesystem (requires PSEUDOFS)
 options 	PSEUDOFS		#Pseudo-filesystem framework
 options 	PSEUDOFS_TRACE		#Debugging support for PSEUDOFS
 options 	SMBFS			#SMB/CIFS filesystem
 options 	TMPFS			#Efficient memory filesystem
 options 	UDF			#Universal Disk Format
 options 	UNIONFS			#Union filesystem
 # The xFS_ROOT options REQUIRE the associated ``options xFS''
 options 	NFS_ROOT		#NFS usable as root device
 
 # Soft updates is a technique for improving filesystem speed and
 # making abrupt shutdown less risky.
 #
 options 	SOFTUPDATES
 
 # Extended attributes allow additional data to be associated with files,
 # and is used for ACLs, Capabilities, and MAC labels.
 # See src/sys/ufs/ufs/README.extattr for more information.
 options 	UFS_EXTATTR
 options 	UFS_EXTATTR_AUTOSTART
 
 # Access Control List support for UFS filesystems.  The current ACL
 # implementation requires extended attribute support, UFS_EXTATTR,
 # for the underlying filesystem.
 # See src/sys/ufs/ufs/README.acls for more information.
 options 	UFS_ACL
 
 # Directory hashing improves the speed of operations on very large
 # directories at the expense of some memory.
 options 	UFS_DIRHASH
 
 # Gjournal-based UFS journaling support.
 options 	UFS_GJOURNAL
 
 # Make space in the kernel for a root filesystem on a md device.
 # Define to the number of kilobytes to reserve for the filesystem.
 # This is now optional.
 # If not defined, the root filesystem passed in as the MFS_IMAGE makeoption
 # will be automatically embedded in the kernel during linking. Its exact size
 # will be consumed within the kernel.
 # If defined, the old way of embedding the filesystem in the kernel will be
 # used. That is to say MD_ROOT_SIZE KB will be allocated in the kernel and
 # later, the filesystem image passed in as the MFS_IMAGE makeoption will be
 # dd'd into the reserved space if it fits.
 options 	MD_ROOT_SIZE=10
 
 # Make the md device a potential root device, either with preloaded
 # images of type mfs_root or md_root.
 options 	MD_ROOT
 
 # Write-protect the md root device so that it may not be mounted writeable.
 options 	MD_ROOT_READONLY
 
 # Disk quotas are supported when this option is enabled.
 options 	QUOTA			#enable disk quotas
 
 # If you are running a machine just as a fileserver for PC and MAC
 # users, using SAMBA, you may consider setting this option
 # and keeping all those users' directories on a filesystem that is
 # mounted with the suiddir option. This gives new files the same
 # ownership as the directory (similar to group). It's a security hole
 # if you let these users run programs, so confine it to file-servers
 # (but it'll save you lots of headaches in those cases). Root owned
 # directories are exempt and X bits are cleared. The suid bit must be
 # set on the directory as well; see chmod(1). PC owners can't see/set
 # ownerships so they keep getting their toes trodden on. This saves
 # you all the support calls as the filesystem it's used on will act as
 # they expect: "It's my dir so it must be my file".
 #
 options 	SUIDDIR
 
 # NFS options:
 options 	NFS_MINATTRTIMO=3	# VREG attrib cache timeout in sec
 options 	NFS_MAXATTRTIMO=60
 options 	NFS_MINDIRATTRTIMO=30	# VDIR attrib cache timeout in sec
 options 	NFS_MAXDIRATTRTIMO=60
 options 	NFS_DEBUG		# Enable NFS Debugging
 
 #
 # Add support for the EXT2FS filesystem of Linux fame.  Be a bit
 # careful with this - the ext2fs code has a tendency to lag behind
 # changes and not be exercised very much, so mounting read/write could
 # be dangerous (and even mounting read only could result in panics.)
 #
 options 	EXT2FS
 
 # Cryptographically secure random number generator; /dev/random
 device		random
 
 # The system memory devices; /dev/mem, /dev/kmem
 device		mem
 
 # The kernel symbol table device; /dev/ksyms
 device		ksyms
 
 # Optional character code conversion support with LIBICONV.
 # Each option requires their base file system and LIBICONV.
 options 	CD9660_ICONV
 options 	MSDOSFS_ICONV
 options 	UDF_ICONV
 
 
 #####################################################################
 # POSIX P1003.1B
 
 # Real time extensions added in the 1993 POSIX
 # _KPOSIX_PRIORITY_SCHEDULING: Build in _POSIX_PRIORITY_SCHEDULING
 
 options 	_KPOSIX_PRIORITY_SCHEDULING
 # p1003_1b_semaphores are very experimental,
 # user should be ready to assist in debugging if problems arise.
 options 	P1003_1B_SEMAPHORES
 
 # POSIX message queue
 options 	P1003_1B_MQUEUE
 
 #####################################################################
 # SECURITY POLICY PARAMETERS
 
 # Support for BSM audit
 options 	AUDIT
 
 # Support for Mandatory Access Control (MAC):
 options 	MAC
 options 	MAC_BIBA
 options 	MAC_BSDEXTENDED
 options 	MAC_IFOFF
 options 	MAC_LOMAC
 options 	MAC_MLS
 options 	MAC_NONE
 options 	MAC_PARTITION
 options 	MAC_PORTACL
 options 	MAC_SEEOTHERUIDS
 options 	MAC_STUB
 options 	MAC_TEST
 
 # Support for Capsicum
 options 	CAPABILITIES	# fine-grained rights on file descriptors
 options 	CAPABILITY_MODE	# sandboxes with no global namespace access
 
 
 #####################################################################
 # CLOCK OPTIONS
 
 # The granularity of operation is controlled by the kernel option HZ whose
 # default value (1000 on most architectures) means a granularity of 1ms
 # (1s/HZ).  Historically, the default was 100, but finer granularity is
 # required for DUMMYNET and other systems on modern hardware.  There are
 # reasonable arguments that HZ should, in fact, be 100 still; consider,
 # that reducing the granularity too much might cause excessive overhead in
 # clock interrupt processing, potentially causing ticks to be missed and thus
 # actually reducing the accuracy of operation.
 
 options 	HZ=100
 
 # Enable support for the kernel PLL to use an external PPS signal,
 # under supervision of [x]ntpd(8)
 # More info in ntpd documentation: http://www.eecis.udel.edu/~ntp
 
 options 	PPS_SYNC
 
 # Enable support for generic feed-forward clocks in the kernel.
 # The feed-forward clock support is an alternative to the feedback oriented
 # ntpd/system clock approach, and is to be used with a feed-forward
 # synchronization algorithm such as the RADclock:
 # More info here: http://www.synclab.org/radclock
 
 options 	FFCLOCK
 
 
 #####################################################################
 # SCSI DEVICES
 
 # SCSI DEVICE CONFIGURATION
 
 # The SCSI subsystem consists of the `base' SCSI code, a number of
 # high-level SCSI device `type' drivers, and the low-level host-adapter
 # device drivers.  The host adapters are listed in the ISA and PCI
 # device configuration sections below.
 #
 # It is possible to wire down your SCSI devices so that a given bus,
 # target, and LUN always come on line as the same device unit.  In
 # earlier versions the unit numbers were assigned in the order that
 # the devices were probed on the SCSI bus.  This means that if you
 # removed a disk drive, you may have had to rewrite your /etc/fstab
 # file, and also that you had to be careful when adding a new disk
 # as it may have been probed earlier and moved your device configuration
 # around.  (See also option GEOM_VOL for a different solution to this
 # problem.)
 
 # This old behavior is maintained as the default behavior.  The unit
 # assignment begins with the first non-wired down unit for a device
 # type.  For example, if you wire a disk as "da3" then the first
 # non-wired disk will be assigned da4.
 
 # The syntax for wiring down devices is:
 
 hint.scbus.0.at="ahc0"
 hint.scbus.1.at="ahc1"
 hint.scbus.1.bus="0"
 hint.scbus.3.at="ahc2"
 hint.scbus.3.bus="0"
 hint.scbus.2.at="ahc2"
 hint.scbus.2.bus="1"
 hint.da.0.at="scbus0"
 hint.da.0.target="0"
 hint.da.0.unit="0"
 hint.da.1.at="scbus3"
 hint.da.1.target="1"
 hint.da.2.at="scbus2"
 hint.da.2.target="3"
 hint.sa.1.at="scbus1"
 hint.sa.1.target="6"
 
 # "units" (SCSI logical unit number) that are not specified are
 # treated as if specified as LUN 0.
 
 # All SCSI devices allocate as many units as are required.
 
 # The ch driver drives SCSI Media Changer ("jukebox") devices.
 #
 # The da driver drives SCSI Direct Access ("disk") and Optical Media
 # ("WORM") devices.
 #
 # The sa driver drives SCSI Sequential Access ("tape") devices.
 #
 # The cd driver drives SCSI Read Only Direct Access ("cd") devices.
 #
 # The ses driver drives SCSI Environment Services ("ses") and
 # SAF-TE ("SCSI Accessible Fault-Tolerant Enclosure") devices.
 #
 # The pt driver drives SCSI Processor devices.
 #
 # The sg driver provides a passthrough API that is compatible with the
 # Linux SG driver.  It will work in conjunction with the COMPAT_LINUX
 # option to run linux SG apps.  It can also stand on its own and provide
 # source level API compatibility for porting apps to FreeBSD.
 #
 # Target Mode support is provided here but also requires that a SIM
 # (SCSI Host Adapter Driver) provide support as well.
 #
 # The targ driver provides target mode support as a Processor type device.
 # It exists to give the minimal context necessary to respond to Inquiry
 # commands. There is a sample user application that shows how the rest
 # of the command support might be done in /usr/share/examples/scsi_target.
 #
 # The targbh driver provides target mode support and exists to respond
 # to incoming commands that do not otherwise have a logical unit assigned
 # to them.
 #
 # The pass driver provides a passthrough API to access the CAM subsystem.
 
 device		scbus		#base SCSI code
 device		ch		#SCSI media changers
 device		da		#SCSI direct access devices (aka disks)
 device		sa		#SCSI tapes
 device		cd		#SCSI CD-ROMs
 device		ses		#Enclosure Services (SES and SAF-TE)
 device		pt		#SCSI processor
 device		targ		#SCSI Target Mode Code
 device		targbh		#SCSI Target Mode Blackhole Device
 device		pass		#CAM passthrough driver
 device		sg		#Linux SCSI passthrough
 device		ctl		#CAM Target Layer
 
 # CAM OPTIONS:
 # debugging options:
 # CAMDEBUG		Compile in all possible debugging.
 # CAM_DEBUG_COMPILE	Debug levels to compile in.
 # CAM_DEBUG_FLAGS	Debug levels to enable on boot.
 # CAM_DEBUG_BUS		Limit debugging to the given bus.
 # CAM_DEBUG_TARGET	Limit debugging to the given target.
 # CAM_DEBUG_LUN		Limit debugging to the given lun.
 # CAM_DEBUG_DELAY	Delay in us after printing each debug line.
 #
 # CAM_MAX_HIGHPOWER: Maximum number of concurrent high power (start unit) cmds
 # SCSI_NO_SENSE_STRINGS: When defined disables sense descriptions
 # SCSI_NO_OP_STRINGS: When defined disables opcode descriptions
 # SCSI_DELAY: The number of MILLISECONDS to freeze the SIM (scsi adapter)
 #             queue after a bus reset, and the number of milliseconds to
 #             freeze the device queue after a bus device reset.  This
 #             can be changed at boot and runtime with the
 #             kern.cam.scsi_delay tunable/sysctl.
 options 	CAMDEBUG
 options 	CAM_DEBUG_COMPILE=-1
 options 	CAM_DEBUG_FLAGS=(CAM_DEBUG_INFO|CAM_DEBUG_PROBE|CAM_DEBUG_PERIPH)
 options 	CAM_DEBUG_BUS=-1
 options 	CAM_DEBUG_TARGET=-1
 options 	CAM_DEBUG_LUN=-1
 options 	CAM_DEBUG_DELAY=1
 options 	CAM_MAX_HIGHPOWER=4
 options 	SCSI_NO_SENSE_STRINGS
 options 	SCSI_NO_OP_STRINGS
 options 	SCSI_DELAY=5000	# Be pessimistic about Joe SCSI device
 options 	CAM_IOSCHED_DYNAMIC
 
 # Options for the CAM CDROM driver:
 # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN
 # CHANGER_MAX_BUSY_SECONDS: Maximum time quantum per changer LUN, only
 #                           enforced if there is I/O waiting for another LUN
 # The compiled in defaults for these variables are 2 and 10 seconds,
 # respectively.
 #
 # These can also be changed on the fly with the following sysctl variables:
 # kern.cam.cd.changer.min_busy_seconds
 # kern.cam.cd.changer.max_busy_seconds
 #
 options 	CHANGER_MIN_BUSY_SECONDS=2
 options 	CHANGER_MAX_BUSY_SECONDS=10
 
 # Options for the CAM sequential access driver:
 # SA_IO_TIMEOUT: Timeout for read/write/wfm  operations, in minutes
 # SA_SPACE_TIMEOUT: Timeout for space operations, in minutes
 # SA_REWIND_TIMEOUT: Timeout for rewind operations, in minutes
 # SA_ERASE_TIMEOUT: Timeout for erase operations, in minutes
 # SA_1FM_AT_EOD: Default to model which only has a default one filemark at EOT.
 options 	SA_IO_TIMEOUT=4
 options 	SA_SPACE_TIMEOUT=60
 options 	SA_REWIND_TIMEOUT=(2*60)
 options 	SA_ERASE_TIMEOUT=(4*60)
 options 	SA_1FM_AT_EOD
 
 # Optional timeout for the CAM processor target (pt) device
 # This is specified in seconds.  The default is 60 seconds.
 options 	SCSI_PT_DEFAULT_TIMEOUT=60
 
 # Optional enable of doing SES passthrough on other devices (e.g., disks)
 #
 # Normally disabled because a lot of newer SCSI disks report themselves
 # as having SES capabilities, but this can then clot up attempts to build
 # a topology with the SES device that's on the box these drives are in....
 options 	SES_ENABLE_PASSTHROUGH
 
 
 #####################################################################
 # MISCELLANEOUS DEVICES AND OPTIONS
 
 device		pty		#BSD-style compatibility pseudo ttys
 device		nmdm		#back-to-back tty devices
 device		md		#Memory/malloc disk
 device		snp		#Snoop device - to look at pty/vty/etc..
 device		ccd		#Concatenated disk driver
 device		firmware	#firmware(9) support
 
 # Kernel side iconv library
 options 	LIBICONV
 
 # Size of the kernel message buffer.  Should be N * pagesize.
 options 	MSGBUF_SIZE=40960
 
 
 #####################################################################
 # HARDWARE BUS CONFIGURATION
 
 #
 # PCI bus & PCI options:
 #
 device		pci
 options 	PCI_HP			# PCI-Express native HotPlug
 options 	PCI_IOV			# PCI SR-IOV support
 
 
 #####################################################################
 # HARDWARE DEVICE CONFIGURATION
 
 # For ISA the required hints are listed.
 # PCI, CardBus, SD/MMC and pccard are self identifying buses, so
 # no hints are needed.
 
 #
 # Mandatory devices:
 #
 
 # These options are valid for other keyboard drivers as well.
 options 	KBD_DISABLE_KEYMAP_LOAD	# refuse to load a keymap
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 
 device		kbdmux			# keyboard multiplexer
 options		KBDMUX_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	KBDMUX_DFLT_KEYMAP=it.iso
 
 options 	FB_DEBUG		# Frame buffer debugging
 
 device		splash			# Splash screen and screen saver support
 
 # Various screen savers.
 device		blank_saver
 device		daemon_saver
 device		dragon_saver
 device		fade_saver
 device		fire_saver
 device		green_saver
 device		logo_saver
 device		rain_saver
 device		snake_saver
 device		star_saver
 device		warp_saver
 
 # The syscons console driver (SCO color console compatible).
 device		sc
 hint.sc.0.at="isa"
 options 	MAXCONS=16		# number of virtual consoles
 options 	SC_ALT_MOUSE_IMAGE	# simplified mouse cursor in text mode
 options 	SC_DFLT_FONT		# compile font in
 makeoptions	SC_DFLT_FONT=cp850
 options 	SC_DISABLE_KDBKEY	# disable `debug' key
 options 	SC_DISABLE_REBOOT	# disable reboot key sequence
 options 	SC_HISTORY_SIZE=200	# number of history buffer lines
 options 	SC_MOUSE_CHAR=0x3	# char code for text mode mouse cursor
 options 	SC_PIXEL_MODE		# add support for the raster text mode
 
 # The following options will let you change the default colors of syscons.
 options 	SC_NORM_ATTR=(FG_GREEN|BG_BLACK)
 options 	SC_NORM_REV_ATTR=(FG_YELLOW|BG_GREEN)
 options 	SC_KERNEL_CONS_ATTR=(FG_RED|BG_BLACK)
 options 	SC_KERNEL_CONS_REV_ATTR=(FG_BLACK|BG_RED)
 
 # The following options will let you change the default behavior of
 # cut-n-paste feature
 options 	SC_CUT_SPACES2TABS	# convert leading spaces into tabs
 options 	SC_CUT_SEPCHARS=\"x09\"	# set of characters that delimit words
 					# (default is single space - \"x20\")
 
 # If you have a two button mouse, you may want to add the following option
 # to use the right button of the mouse to paste text.
 options 	SC_TWOBUTTON_MOUSE
 
 # You can selectively disable features in syscons.
 options 	SC_NO_CUTPASTE
 options 	SC_NO_FONT_LOADING
 options 	SC_NO_HISTORY
 options 	SC_NO_MODE_CHANGE
 options 	SC_NO_SYSMOUSE
 options 	SC_NO_SUSPEND_VTYSWITCH
 
 # `flags' for sc
 #	0x80	Put the video card in the VESA 800x600 dots, 16 color mode
 #	0x100	Probe for a keyboard device periodically if one is not present
 
 # Enable experimental features of the syscons terminal emulator (teken).
 options 	TEKEN_CONS25		# cons25-style terminal emulation
 options 	TEKEN_UTF8		# UTF-8 output handling
 
 # The vt video console driver.
 device		vt
 options		VT_ALT_TO_ESC_HACK=1	# Prepend ESC sequence to ALT keys
 options		VT_MAXWINDOWS=16	# Number of virtual consoles
 options		VT_TWOBUTTON_MOUSE	# Use right mouse button to paste
 
 # The following options set the default framebuffer size.
 options		VT_FB_DEFAULT_HEIGHT=480
 options		VT_FB_DEFAULT_WIDTH=640
 
 # The following options will let you change the default vt terminal colors.
 options		TERMINAL_NORM_ATTR=(FG_GREEN|BG_BLACK)
 options		TERMINAL_KERN_ATTR=(FG_LIGHTRED|BG_BLACK)
 
 #
 # Optional devices:
 #
 
 #
 # SCSI host adapters:
 #
 # adv: All Narrow SCSI bus AdvanSys controllers.
 # adw: Second Generation AdvanSys controllers including the ADV940UW.
 # aha: Adaptec 154x/1535/1640
 # ahc: Adaptec 274x/284x/2910/293x/294x/394x/3950x/3960x/398X/4944/
 #      19160x/29160x, aic7770/aic78xx
 # ahd: Adaptec 29320/39320 Controllers.
 # aic: Adaptec 6260/6360, APA-1460 (PC Card)
 # bt:  Most Buslogic controllers: including BT-445, BT-54x, BT-64x, BT-74x,
 #      BT-75x, BT-946, BT-948, BT-956, BT-958, SDC3211B, SDC3211F, SDC3222F
 # esp: Emulex ESP, NCR 53C9x and QLogic FAS families based controllers
 #      including the AMD Am53C974 (found on devices such as the Tekram
 #      DC-390(T)) and the Sun ESP and FAS families of controllers
 # isp: Qlogic ISP 1020, 1040 and 1040B PCI SCSI host adapters,
 #      ISP 1240 Dual Ultra SCSI, ISP 1080 and 1280 (Dual) Ultra2,
 #      ISP 12160 Ultra3 SCSI,
 #      Qlogic ISP 2100 and ISP 2200 1Gb Fibre Channel host adapters.
 #      Qlogic ISP 2300 and ISP 2312 2Gb Fibre Channel host adapters.
 #      Qlogic ISP 2322 and ISP 6322 2Gb Fibre Channel host adapters.
 # ispfw: Firmware module for Qlogic host adapters
 # mpt: LSI-Logic MPT/Fusion 53c1020 or 53c1030 Ultra4
 #      or FC9x9 Fibre Channel host adapters.
 # ncr: NCR 53C810, 53C825 self-contained SCSI host adapters.
 # sym: Symbios/Logic 53C8XX family of PCI-SCSI I/O processors:
 #      53C810, 53C810A, 53C815, 53C825,  53C825A, 53C860, 53C875,
 #      53C876, 53C885,  53C895, 53C895A, 53C896,  53C897, 53C1510D,
 #      53C1010-33, 53C1010-66.
 # trm: Tekram DC395U/UW/F DC315U adapters.
 
 #
 # Note that the order is important in order for Buslogic ISA cards to be
 # probed correctly.
 #
 device		bt
 hint.bt.0.at="isa"
 hint.bt.0.port="0x330"
 device		adv
 hint.adv.0.at="isa"
 device		adw
 device		aha
 hint.aha.0.at="isa"
 device		aic
 hint.aic.0.at="isa"
 device		ahc
 device		ahd
 device		esp
 device		iscsi_initiator
 device		isp
 hint.isp.0.disable="1"
 hint.isp.0.role="3"
 hint.isp.0.prefer_iomap="1"
 hint.isp.0.prefer_memmap="1"
 hint.isp.0.fwload_disable="1"
 hint.isp.0.ignore_nvram="1"
 hint.isp.0.fullduplex="1"
 hint.isp.0.topology="lport"
 hint.isp.0.topology="nport"
 hint.isp.0.topology="lport-only"
 hint.isp.0.topology="nport-only"
 # we can't get u_int64_t types, nor can we get strings if it's got
 # a leading 0x, hence this silly dodge.
 hint.isp.0.portwnn="w50000000aaaa0000"
 hint.isp.0.nodewnn="w50000000aaaa0001"
 device		ispfw
 device		mpt
 device		ncr
 device		sym
 device		trm
 
 # The aic7xxx driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set. Unfortunately,
 # this doesn't work on some motherboards, which prevents it from being the
 # default.
 options 	AHC_ALLOW_MEMIO
 
 # Dump the contents of the ahc controller configuration PROM.
 options 	AHC_DUMP_EEPROM
 
 # Bitmap of units to enable targetmode operations.
 options 	AHC_TMODE_ENABLE
 
 # Compile in Aic7xxx Debugging code.
 options 	AHC_DEBUG
 
 # Aic7xxx driver debugging options. See sys/dev/aic7xxx/aic7xxx.h
 options 	AHC_DEBUG_OPTS
 
 # Print register bitfields in debug output.  Adds ~128k to driver
 # See ahc(4).
 options 	AHC_REG_PRETTY_PRINT
 
 # Compile in aic79xx debugging code.
 options 	AHD_DEBUG
 
 # Aic79xx driver debugging options.  Adds ~215k to driver.  See ahd(4).
 options 	AHD_DEBUG_OPTS=0xFFFFFFFF
 
 # Print human-readable register definitions when debugging
 options 	AHD_REG_PRETTY_PRINT
 
 # Bitmap of units to enable targetmode operations.
 options 	AHD_TMODE_ENABLE
 
 # The adw driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set.
 options 	ADW_ALLOW_MEMIO
 
 # Options used in dev/iscsi (Software iSCSI stack)
 #
 options 	ISCSI_INITIATOR_DEBUG=9
 
 # Options used in dev/isp/ (Qlogic SCSI/FC driver).
 #
 #	ISP_TARGET_MODE		-	enable target mode operation
 #
 options 	ISP_TARGET_MODE=1
 #
 #	ISP_DEFAULT_ROLES	-	default role
 #		none=0
 #		target=1
 #		initiator=2
 #		both=3			(not supported currently)
 #
 #	ISP_INTERNAL_TARGET		(trivial internal disk target, for testing)
 #
 options 	ISP_DEFAULT_ROLES=0
 
 # Options used in dev/sym/ (Symbios SCSI driver).
 #options 	SYM_SETUP_LP_PROBE_MAP	#-Low Priority Probe Map (bits)
 					# Allows the ncr to take precedence
 					# 1 (1<<0) -> 810a, 860
 					# 2 (1<<1) -> 825a, 875, 885, 895
 					# 4 (1<<2) -> 895a, 896, 1510d
 #options 	SYM_SETUP_SCSI_DIFF	#-HVD support for 825a, 875, 885
 					# disabled:0 (default), enabled:1
 #options 	SYM_SETUP_PCI_PARITY	#-PCI parity checking
 					# disabled:0, enabled:1 (default)
 #options 	SYM_SETUP_MAX_LUN	#-Number of LUNs supported
 					# default:8, range:[1..64]
 
 # The 'dpt' driver provides support for old DPT controllers (http://www.dpt.com/).
 # These have hardware RAID-{0,1,5} support, and do multi-initiator I/O.
 # The DPT controllers are commonly re-licensed under other brand-names -
 # some controllers by Olivetti, Dec, HP, AT&T, SNI, AST, Alphatronic, NEC and
 # Compaq are actually DPT controllers.
 #
 # See src/sys/dev/dpt for debugging and other subtle options.
 #  DPT_MEASURE_PERFORMANCE  Enables a set of (semi)invasive metrics. Various
 #                           instruments are enabled.  The tools in
 #                           /usr/sbin/dpt_* assume these to be enabled.
 #  DPT_DEBUG_xxxx           These are controllable from sys/dev/dpt/dpt.h
 #  DPT_RESET_HBA            Make "reset" actually reset the controller
 #                           instead of fudging it.  Only enable this if you
 #			    are 100% certain you need it.
 
 device		dpt
 
 # DPT options
 #!CAM# options 	DPT_MEASURE_PERFORMANCE
 options 	DPT_RESET_HBA
 
 #
 # Compaq "CISS" RAID controllers (SmartRAID 5* series)
 # These controllers have a SCSI-like interface, and require the
 # CAM infrastructure.
 #
 device		ciss
 
 #
 # Intel Integrated RAID controllers.
 # This driver was developed and is maintained by Intel.  Contacts
 # at Intel for this driver are
 # "Kannanthanam, Boji T" <boji.t.kannanthanam@intel.com> and
 # "Leubner, Achim" <achim.leubner@intel.com>.
 #
 device		iir
 
 #
 # Mylex AcceleRAID and eXtremeRAID controllers with v6 and later
 # firmware.  These controllers have a SCSI-like interface, and require
 # the CAM infrastructure.
 #
 device		mly
 
 #
 # Compaq Smart RAID, Mylex DAC960 and AMI MegaRAID controllers.  Only
 # one entry is needed; the code will find and configure all supported
 # controllers.
 #
 device		ida		# Compaq Smart RAID
 device		mlx		# Mylex DAC960
 device		amr		# AMI MegaRAID
 device		amrp		# SCSI Passthrough interface (optional, CAM req.)
 device		mfi		# LSI MegaRAID SAS
 device		mfip		# LSI MegaRAID SAS passthrough, requires CAM
 options 	MFI_DEBUG
 device		mrsas		# LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s
 
 #
 # 3ware ATA RAID
 #
 device		twe		# 3ware ATA RAID
 
 #
 # Serial ATA host controllers:
 #
 # ahci: Advanced Host Controller Interface (AHCI) compatible
 # mvs:  Marvell 88SX50XX/88SX60XX/88SX70XX/SoC controllers
 # siis: SiliconImage SiI3124/SiI3132/SiI3531 controllers
 #
 # These drivers are part of cam(4) subsystem. They supersede less featured
 # ata(4) subsystem drivers, supporting same hardware.
 
 device		ahci
 device		mvs
 device		siis
 
 #
 # The 'ATA' driver supports all legacy ATA/ATAPI controllers, including
 # PC Card devices. You only need one "device ata" for it to find all
 # PCI and PC Card ATA/ATAPI devices on modern machines.
 # Alternatively, individual bus and chipset drivers may be chosen by using
 # the 'atacore' driver then selecting the drivers on a per vendor basis.
 # For example to build a system which only supports a VIA chipset,
 # omit 'ata' and include the 'atacore', 'atapci' and 'atavia' drivers.
 device		ata
 
 # Modular ATA
 #device		atacore		# Core ATA functionality
 #device		atacard		# CARDBUS support
 #device		ataisa		# ISA bus support
 #device		atapci		# PCI bus support; only generic chipset support
 
 # PCI ATA chipsets
 #device		ataacard	# ACARD
 #device		ataacerlabs	# Acer Labs Inc. (ALI)
 #device		ataamd		# American Micro Devices (AMD)
 #device		ataati		# ATI
 #device		atacenatek	# Cenatek
 #device		atacypress	# Cypress
 #device		atacyrix	# Cyrix
 #device		atahighpoint	# HighPoint
 #device		ataintel	# Intel
 #device		ataite		# Integrated Technology Inc. (ITE)
 #device		atajmicron	# JMicron
 #device		atamarvell	# Marvell
 #device		atamicron	# Micron
 #device		atanational	# National
 #device		atanetcell	# NetCell
 #device		atanvidia	# nVidia
 #device		atapromise	# Promise
 #device		ataserverworks	# ServerWorks
 #device		atasiliconimage	# Silicon Image Inc. (SiI) (formerly CMD)
 #device		atasis		# Silicon Integrated Systems Corp.(SiS)
 #device		atavia		# VIA Technologies Inc.
 
 #
 # For older non-PCI, non-PnPBIOS systems, these are the hints lines to add:
 hint.ata.0.at="isa"
 hint.ata.0.port="0x1f0"
 hint.ata.0.irq="14"
 hint.ata.1.at="isa"
 hint.ata.1.port="0x170"
 hint.ata.1.irq="15"
 
 #
 # The following options are valid on the ATA driver:
 #
 # ATA_REQUEST_TIMEOUT:	the number of seconds to wait for an ATA request
 #			before timing out.
 
 #options 	ATA_REQUEST_TIMEOUT=10
 
 #
 # Standard floppy disk controllers and floppy tapes, supports
 # the Y-E DATA External FDD (PC Card)
 #
 device		fdc
 hint.fdc.0.at="isa"
 hint.fdc.0.port="0x3F0"
 hint.fdc.0.irq="6"
 hint.fdc.0.drq="2"
 #
 # FDC_DEBUG enables floppy debugging.  Since the debug output is huge, you
 # gotta turn it actually on by setting the variable fd_debug with DDB,
 # however.
 options 	FDC_DEBUG
 #
 # Activate this line if you happen to have an Insight floppy tape.
 # Probing them proved to be dangerous for people with floppy disks only,
 # so it's "hidden" behind a flag:
 #hint.fdc.0.flags="1"
 
 # Specify floppy devices
 hint.fd.0.at="fdc0"
 hint.fd.0.drive="0"
 hint.fd.1.at="fdc0"
 hint.fd.1.drive="1"
 
 #
 # uart: newbusified driver for serial interfaces.  It consolidates the sio(4),
 #	sab(4) and zs(4) drivers.
 #
 device		uart
 
 # Options for uart(4)
 options 	UART_PPS_ON_CTS		# Do time pulse capturing using CTS
 					# instead of DCD.
 options 	UART_POLL_FREQ		# Set polling rate, used when hw has
 					# no interrupt support (50 Hz default).
 
 # The following hint should only be used for pure ISA devices.  It is not
 # needed otherwise.  Use of hints is strongly discouraged.
 hint.uart.0.at="isa"
 
 # The following 3 hints are used when the UART is a system device (i.e., a
 # console or debug port), but only on platforms that don't have any other
 # means to pass the information to the kernel.  The unit number of the hint
 # is only used to bundle the hints together.  There is no relation to the
 # unit number of the probed UART.
 hint.uart.0.port="0x3f8"
 hint.uart.0.flags="0x10"
 hint.uart.0.baud="115200"
 
 # `flags' for serial drivers that support consoles like sio(4) and uart(4):
 #	0x10	enable console support for this unit.  Other console flags
 #		(if applicable) are ignored unless this is set.  Enabling
 #		console support does not make the unit the preferred console.
 #		Boot with -h or set boot_serial=YES in the loader.  For sio(4)
 #		specifically, the 0x20 flag can also be set (see above).
 #		Currently, at most one unit can have console support; the
 #		first one (in config file order) with this flag set is
 #		preferred.  Setting this flag for sio0 gives the old behavior.
 #	0x80	use this port for serial line gdb support in ddb.  Also known
 #		as debug port.
 #
 
 # Options for serial drivers that support consoles:
 options 	BREAK_TO_DEBUGGER	# A BREAK/DBG on the console goes to
 					# ddb, if available.
 
 # Solaris implements a new BREAK which is initiated by a character
 # sequence CR ~ ^b which is similar to a familiar pattern used on
 # Sun servers by the Remote Console.  There are FreeBSD extensions:
 # CR ~ ^p requests force panic and CR ~ ^r requests a clean reboot.
 options 	ALT_BREAK_TO_DEBUGGER
 
 # Serial Communications Controller
 # Supports the Siemens SAB 82532 and Zilog Z8530 multi-channel
 # communications controllers.
 device		scc
 
 # PCI Universal Communications driver
 # Supports various multi port PCI I/O cards.
 device		puc
 
 #
 # Network interfaces:
 #
 # MII bus support is required for many PCI Ethernet NICs,
 # namely those which use MII-compliant transceivers or implement
 # transceiver control interfaces that operate like an MII.  Adding
 # "device miibus" to the kernel config pulls in support for the generic
 # miibus API, the common support for for bit-bang'ing the MII and all
 # of the PHY drivers, including a generic one for PHYs that aren't
 # specifically handled by an individual driver.  Support for specific
 # PHYs may be built by adding "device mii", "device mii_bitbang" if
 # needed by the NIC driver and then adding the appropriate PHY driver.
 device  	mii		# Minimal MII support
 device  	mii_bitbang	# Common module for bit-bang'ing the MII
 device  	miibus		# MII support w/ bit-bang'ing and all PHYs
 
 device  	acphy		# Altima Communications AC101
 device  	amphy		# AMD AM79c873 / Davicom DM910{1,2}
 device  	atphy		# Attansic/Atheros F1
 device  	axphy		# Asix Semiconductor AX88x9x
 device  	bmtphy		# Broadcom BCM5201/BCM5202 and 3Com 3c905C
 device		bnxt		# Broadcom NetXtreme-C/NetXtreme-E
 device  	brgphy		# Broadcom BCM54xx/57xx 1000baseTX
 device  	ciphy		# Cicada/Vitesse CS/VSC8xxx
 device  	e1000phy	# Marvell 88E1000 1000/100/10-BT
 device  	gentbi		# Generic 10-bit 1000BASE-{LX,SX} fiber ifaces
 device  	icsphy		# ICS ICS1889-1893
 device  	ip1000phy	# IC Plus IP1000A/IP1001
 device  	jmphy		# JMicron JMP211/JMP202
 device  	lxtphy		# Level One LXT-970
 device  	mlphy		# Micro Linear 6692
 device  	nsgphy		# NatSemi DP8361/DP83865/DP83891
 device  	nsphy		# NatSemi DP83840A
 device  	nsphyter	# NatSemi DP83843/DP83815
 device  	pnaphy		# HomePNA
 device  	qsphy		# Quality Semiconductor QS6612
 device  	rdcphy		# RDC Semiconductor R6040
 device  	rgephy		# RealTek 8169S/8110S/8211B/8211C
 device  	rlphy		# RealTek 8139
 device  	rlswitch	# RealTek 8305
 device  	smcphy		# SMSC LAN91C111
 device  	tdkphy		# TDK 89Q2120
 device  	tlphy		# Texas Instruments ThunderLAN
 device  	truephy		# LSI TruePHY
 device		xmphy		# XaQti XMAC II
 
 # an:   Aironet 4500/4800 802.11 wireless adapters. Supports the PCMCIA,
 #       PCI and ISA varieties.
 # ae:   Support for gigabit ethernet adapters based on the Attansic/Atheros
 #       L2 PCI-Express FastEthernet controllers.
 # age:  Support for gigabit ethernet adapters based on the Attansic/Atheros
 #       L1 PCI express gigabit ethernet controllers.
 # alc:  Support for Atheros AR8131/AR8132 PCIe ethernet controllers.
 # ale:  Support for Atheros AR8121/AR8113/AR8114 PCIe ethernet controllers.
 # ath:  Atheros a/b/g WiFi adapters (requires ath_hal and wlan)
 # bce:	Broadcom NetXtreme II (BCM5706/BCM5708) PCI/PCIe Gigabit Ethernet
 #       adapters.
 # bfe:	Broadcom BCM4401 Ethernet adapter.
 # bge:	Support for gigabit ethernet adapters based on the Broadcom
 #	BCM570x family of controllers, including the 3Com 3c996-T,
 #	the Netgear GA302T, the SysKonnect SK-9D21 and SK-9D41, and
 #	the embedded gigE NICs on Dell PowerEdge 2550 servers.
 # bnxt:	Broadcom NetXtreme-C and NetXtreme-E PCIe 10/25/50G Ethernet adapters.
 # bxe:	Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet
 #       adapters.
 # bwi:	Broadcom BCM430* and BCM431* family of wireless adapters.
 # bwn:	Broadcom BCM43xx family of wireless adapters.
 # cas:	Sun Cassini/Cassini+ and National Semiconductor DP83065 Saturn
 # cm:	Arcnet SMC COM90c26 / SMC COM90c56
 #	(and SMC COM90c66 in '56 compatibility mode) adapters.
 # cxgb: Chelsio T3 based 1GbE/10GbE PCIe Ethernet adapters.
 # cxgbe:Chelsio T4, T5, and T6-based 1/10/25/40/100GbE PCIe Ethernet
 #	adapters.
 # cxgbev: Chelsio T4, T5, and T6-based PCIe Virtual Functions.
 # dc:   Support for PCI fast ethernet adapters based on the DEC/Intel 21143
 #       and various workalikes including:
 #       the ADMtek AL981 Comet and AN985 Centaur, the ASIX Electronics
 #       AX88140A and AX88141, the Davicom DM9100 and DM9102, the Lite-On
 #       82c168 and 82c169 PNIC, the Lite-On/Macronix LC82C115 PNIC II
 #       and the Macronix 98713/98713A/98715/98715A/98725 PMAC. This driver
 #       replaces the old al, ax, dm, pn and mx drivers.  List of brands:
 #       Digital DE500-BA, Kingston KNE100TX, D-Link DFE-570TX, SOHOware SFA110,
 #       SVEC PN102-TX, CNet Pro110B, 120A, and 120B, Compex RL100-TX,
 #       LinkSys LNE100TX, LNE100TX V2.0, Jaton XpressNet, Alfa Inc GFC2204,
 #       KNE110TX.
 # de:   Digital Equipment DC21040
 # em:   Intel Pro/1000 Gigabit Ethernet 82542, 82543, 82544 based adapters.
 # ep:   3Com 3C509, 3C529, 3C556, 3C562D, 3C563D, 3C572, 3C574X, 3C579, 3C589
 #       and PC Card devices using these chipsets.
 # ex:   Intel EtherExpress Pro/10 and other i82595-based adapters,
 #       Olicom Ethernet PC Card devices.
 # fe:   Fujitsu MB86960A/MB86965A Ethernet
 # fpa:  Support for the Digital DEFPA PCI FDDI. `device fddi' is also needed.
 # fxp:  Intel EtherExpress Pro/100B
 #	(hint of prefer_iomap can be done to prefer I/O instead of Mem mapping)
 # gem:  Apple GMAC/Sun ERI/Sun GEM
 # hme:  Sun HME (Happy Meal Ethernet)
 # jme:  JMicron JMC260 Fast Ethernet/JMC250 Gigabit Ethernet based adapters.
 # le:   AMD Am7900 LANCE and Am79C9xx PCnet
 # lge:	Support for PCI gigabit ethernet adapters based on the Level 1
 #	LXT1001 NetCellerator chipset. This includes the D-Link DGE-500SX,
 #	SMC TigerCard 1000 (SMC9462SX), and some Addtron cards.
 # lio:  Support for Cavium 23XX Ethernet adapters
 # malo: Marvell Libertas wireless NICs.
 # mwl:  Marvell 88W8363 802.11n wireless NICs.
 #	Requires the mwl firmware module
 # mwlfw: Marvell 88W8363 firmware
 # msk:	Support for gigabit ethernet adapters based on the Marvell/SysKonnect
 #	Yukon II Gigabit controllers, including 88E8021, 88E8022, 88E8061,
 #	88E8062, 88E8035, 88E8036, 88E8038, 88E8050, 88E8052, 88E8053,
 #	88E8055, 88E8056 and D-Link 560T/550SX.
 # lmc:	Support for the LMC/SBE wide-area network interface cards.
 # mlx5:	Mellanox ConnectX-4 and ConnectX-4 LX IB and Eth shared code module.
 # mlx5en:Mellanox ConnectX-4 and ConnectX-4 LX PCIe Ethernet adapters.
 # my:	Myson Fast Ethernet (MTD80X, MTD89X)
 # nge:	Support for PCI gigabit ethernet adapters based on the National
 #	Semiconductor DP83820 and DP83821 chipset. This includes the
 #	SMC EZ Card 1000 (SMC9462TX), D-Link DGE-500T, Asante FriendlyNet
 #	GigaNIX 1000TA and 1000TPC, the Addtron AEG320T, the Surecom
 #	EP-320G-TX and the Netgear GA622T.
 # oce:	Emulex 10 Gbit adapters (OneConnect Ethernet)
 # pcn:	Support for PCI fast ethernet adapters based on the AMD Am79c97x
 #	PCnet-FAST, PCnet-FAST+, PCnet-FAST III, PCnet-PRO and PCnet-Home
 #	chipsets. These can also be handled by the le(4) driver if the
 #	pcn(4) driver is left out of the kernel. The le(4) driver does not
 #	support the additional features like the MII bus and burst mode of
 #	the PCnet-FAST and greater chipsets though.
 # ral:	Ralink Technology IEEE 802.11 wireless adapter
 # re:   RealTek 8139C+/8169/816xS/811xS/8101E PCI/PCIe Ethernet adapter
 # rl:   Support for PCI fast ethernet adapters based on the RealTek 8129/8139
 #       chipset.  Note that the RealTek driver defaults to using programmed
 #       I/O to do register accesses because memory mapped mode seems to cause
 #       severe lockups on SMP hardware.  This driver also supports the
 #       Accton EN1207D `Cheetah' adapter, which uses a chip called
 #       the MPX 5030/5038, which is either a RealTek in disguise or a
 #       RealTek workalike.  Note that the D-Link DFE-530TX+ uses the RealTek
 #       chipset and is supported by this driver, not the 'vr' driver.
 # rtwn: RealTek wireless adapters.
 # rtwnfw: RealTek wireless firmware.
 # sf:   Support for Adaptec Duralink PCI fast ethernet adapters based on the
 #       Adaptec AIC-6915 "starfire" controller.
 #       This includes dual and quad port cards, as well as one 100baseFX card.
 #       Most of these are 64-bit PCI devices, except for one single port
 #       card which is 32-bit.
 # sge:  Silicon Integrated Systems SiS190/191 Fast/Gigabit Ethernet adapter
 # sis:  Support for NICs based on the Silicon Integrated Systems SiS 900,
 #       SiS 7016 and NS DP83815 PCI fast ethernet controller chips.
 # sk:   Support for the SysKonnect SK-984x series PCI gigabit ethernet NICs.
 #       This includes the SK-9841 and SK-9842 single port cards (single mode
 #       and multimode fiber) and the SK-9843 and SK-9844 dual port cards
 #       (also single mode and multimode).
 #       The driver will autodetect the number of ports on the card and
 #       attach each one as a separate network interface.
 # sn:   Support for ISA and PC Card Ethernet devices using the
 #       SMC91C90/92/94/95 chips.
 # ste:  Sundance Technologies ST201 PCI fast ethernet controller, includes
 #       the D-Link DFE-550TX.
 # stge: Support for gigabit ethernet adapters based on the Sundance/Tamarack
 #       TC9021 family of controllers, including the Sundance ST2021/ST2023,
 #       the Sundance/Tamarack TC9021, the D-Link DL-4000 and ASUS NX1101.
 # ti:   Support for PCI gigabit ethernet NICs based on the Alteon Networks
 #       Tigon 1 and Tigon 2 chipsets.  This includes the Alteon AceNIC, the
 #       3Com 3c985, the Netgear GA620 and various others.  Note that you will
 #       probably want to bump up kern.ipc.nmbclusters a lot to use this driver.
 # tl:   Support for the Texas Instruments TNETE100 series 'ThunderLAN'
 #       cards and integrated ethernet controllers.  This includes several
 #       Compaq Netelligent 10/100 cards and the built-in ethernet controllers
 #       in several Compaq Prosignia, Proliant and Deskpro systems.  It also
 #       supports several Olicom 10Mbps and 10/100 boards.
 # tx:   SMC 9432 TX, BTX and FTX cards. (SMC EtherPower II series)
 # txp:	Support for 3Com 3cR990 cards with the "Typhoon" chipset
 # vr:   Support for various fast ethernet adapters based on the VIA
 #       Technologies VT3043 `Rhine I' and VT86C100A `Rhine II' chips,
 #       including the D-Link DFE520TX and D-Link DFE530TX (see 'rl' for
 #       DFE530TX+), the Hawking Technologies PN102TX, and the AOpen/Acer ALN-320.
 # vte:  DM&P Vortex86 RDC R6040 Fast Ethernet
 # vx:   3Com 3C590 and 3C595
 # wb:   Support for fast ethernet adapters based on the Winbond W89C840F chip.
 #       Note: this is not the same as the Winbond W89C940F, which is a
 #       NE2000 clone.
 # wi:   Lucent WaveLAN/IEEE 802.11 PCMCIA adapters. Note: this supports both
 #       the PCMCIA and ISA cards: the ISA card is really a PCMCIA to ISA
 #       bridge with a PCMCIA adapter plugged into it.
 # xe:   Xircom/Intel EtherExpress Pro100/16 PC Card ethernet controller,
 #       Accton Fast EtherCard-16, Compaq Netelligent 10/100 PC Card,
 #       Toshiba 10/100 Ethernet PC Card, Xircom 16-bit Ethernet + Modem 56
 # xl:   Support for the 3Com 3c900, 3c905, 3c905B and 3c905C (Fast)
 #       Etherlink XL cards and integrated controllers.  This includes the
 #       integrated 3c905B-TX chips in certain Dell Optiplex and Dell
 #       Precision desktop machines and the integrated 3c905-TX chips
 #       in Dell Latitude laptop docking stations.
 #       Also supported: 3Com 3c980(C)-TX, 3Com 3cSOHO100-TX, 3Com 3c450-TX
 
 # Order for ISA devices is important here
 
 device		cm
 hint.cm.0.at="isa"
 hint.cm.0.port="0x2e0"
 hint.cm.0.irq="9"
 hint.cm.0.maddr="0xdc000"
 device		ep
 device		ex
 device		fe
 hint.fe.0.at="isa"
 hint.fe.0.port="0x300"
 device		sn
 hint.sn.0.at="isa"
 hint.sn.0.port="0x300"
 hint.sn.0.irq="10"
 device		an
 device		wi
 device		xe
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 device		ae		# Attansic/Atheros L2 FastEthernet
 device		age		# Attansic/Atheros L1 Gigabit Ethernet
 device		alc		# Atheros AR8131/AR8132 Ethernet
 device		ale		# Atheros AR8121/AR8113/AR8114 Ethernet
 device		bce		# Broadcom BCM5706/BCM5708 Gigabit Ethernet
 device		bfe		# Broadcom BCM440x 10/100 Ethernet
 device		bge		# Broadcom BCM570xx Gigabit Ethernet
 device		cas		# Sun Cassini/Cassini+ and NS DP83065 Saturn
 device		dc		# DEC/Intel 21143 and various workalikes
 device		et		# Agere ET1310 10/100/Gigabit Ethernet
 device		fxp		# Intel EtherExpress PRO/100B (82557, 82558)
 hint.fxp.0.prefer_iomap="0"
 device		gem		# Apple GMAC/Sun ERI/Sun GEM
 device		hme		# Sun HME (Happy Meal Ethernet)
 device		jme		# JMicron JMC250 Gigabit/JMC260 Fast Ethernet
 device		lge		# Level 1 LXT1001 gigabit Ethernet
 device		mlx5		# Shared code module between IB and Ethernet
 device		mlx5en		# Mellanox ConnectX-4 and ConnectX-4 LX
 device		msk		# Marvell/SysKonnect Yukon II Gigabit Ethernet
 device		my		# Myson Fast Ethernet (MTD80X, MTD89X)
 device		nge		# NatSemi DP83820 gigabit Ethernet
 device		re		# RealTek 8139C+/8169/8169S/8110S
 device		rl		# RealTek 8129/8139
 device		pcn		# AMD Am79C97x PCI 10/100 NICs
 device		sf		# Adaptec AIC-6915 (``Starfire'')
 device		sge		# Silicon Integrated Systems SiS190/191
 device		sis		# Silicon Integrated Systems SiS 900/SiS 7016
 device		sk		# SysKonnect SK-984x & SK-982x gigabit Ethernet
 device		ste		# Sundance ST201 (D-Link DFE-550TX)
 device		stge		# Sundance/Tamarack TC9021 gigabit Ethernet
 device		tl		# Texas Instruments ThunderLAN
 device		tx		# SMC EtherPower II (83c170 ``EPIC'')
 device		vr		# VIA Rhine, Rhine II
 device		vte		# DM&P Vortex86 RDC R6040 Fast Ethernet
 device		wb		# Winbond W89C840F
 device		xl		# 3Com 3c90x (``Boomerang'', ``Cyclone'')
 
 # PCI Ethernet NICs.
 device		cxgb		# Chelsio T3 10 Gigabit Ethernet
 device		cxgb_t3fw	# Chelsio T3 10 Gigabit Ethernet firmware
 device		cxgbe		# Chelsio T4-T6 1/10/25/40/100 Gigabit Ethernet
 device		cxgbev		# Chelsio T4-T6 Virtual Functions
 device		de		# DEC/Intel DC21x4x (``Tulip'')
 device		em		# Intel Pro/1000 Gigabit Ethernet
 device		ixgb		# Intel Pro/10Gbe PCI-X Ethernet
 device		ix		# Intel Pro/10Gbe PCIE Ethernet
 device		ixv		# Intel Pro/10Gbe PCIE Ethernet VF
 device		le		# AMD Am7900 LANCE and Am79C9xx PCnet
 device		mxge		# Myricom Myri-10G 10GbE NIC
 device		nxge		# Neterion Xframe 10GbE Server/Storage Adapter
 device		oce		# Emulex 10 GbE (OneConnect Ethernet)
 device		ti		# Alteon Networks Tigon I/II gigabit Ethernet
 device		txp		# 3Com 3cR990 (``Typhoon'')
 device		vx		# 3Com 3c590, 3c595 (``Vortex'')
 device		vxge		# Exar/Neterion XFrame 3100 10GbE
 
 # PCI FDDI NICs.
 device		fpa
 
 # PCI WAN adapters.
 device		lmc
 
 # PCI IEEE 802.11 Wireless NICs
 device		ath		# Atheros pci/cardbus NIC's
 device		ath_hal		# pci/cardbus chip support
 #device		ath_ar5210	# AR5210 chips
 #device		ath_ar5211	# AR5211 chips
 #device		ath_ar5212	# AR5212 chips
 #device		ath_rf2413
 #device		ath_rf2417
 #device		ath_rf2425
 #device		ath_rf5111
 #device		ath_rf5112
 #device		ath_rf5413
 #device		ath_ar5416	# AR5416 chips
 options 	AH_SUPPORT_AR5416	# enable AR5416 tx/rx descriptors
 # All of the AR5212 parts have a problem when paired with the AR71xx
 # CPUS.  These parts have a bug that triggers a fatal bus error on the AR71xx
 # only.  Details of the exact nature of the bug are sketchy, but some can be
 # found at https://forum.openwrt.org/viewtopic.php?pid=70060 on pages 4, 5 and
 # 6.  This option enables this workaround.  There is a performance penalty
 # for this work around, but without it things don't work at all.  The DMA
 # from the card usually bursts 128 bytes, but on the affected CPUs, only
 # 4 are safe.
 options	   	AH_RXCFG_SDMAMW_4BYTES
 #device		ath_ar9160	# AR9160 chips
 #device		ath_ar9280	# AR9280 chips
 #device		ath_ar9285	# AR9285 chips
 device		ath_rate_sample	# SampleRate tx rate control for ath
 device		bwi		# Broadcom BCM430* BCM431*
 device		bwn		# Broadcom BCM43xx
 device		malo		# Marvell Libertas wireless NICs.
 device		mwl		# Marvell 88W8363 802.11n wireless NICs.
 device		mwlfw
 device		ral		# Ralink Technology RT2500 wireless NICs.
 device		rtwn		# Realtek wireless NICs
 device		rtwnfw
 
 # Use sf_buf(9) interface for jumbo buffers on ti(4) controllers.
 #options 	TI_SF_BUF_JUMBO
 # Turn on the header splitting option for the ti(4) driver firmware.  This
 # only works for Tigon II chips, and has no effect for Tigon I chips.
 # This option requires the TI_SF_BUF_JUMBO option above.
 #options 	TI_JUMBO_HDRSPLIT
 
 # These two options allow manipulating the mbuf cluster size and mbuf size,
 # respectively.  Be very careful with NIC driver modules when changing
 # these from their default values, because that can potentially cause a
 # mismatch between the mbuf size assumed by the kernel and the mbuf size
 # assumed by a module.  The only driver that currently has the ability to
 # detect a mismatch is ti(4).
 options 	MCLSHIFT=12	# mbuf cluster shift in bits, 12 == 4KB
 options 	MSIZE=512	# mbuf size in bytes
 
 #
 # Sound drivers
 #
 # sound: The generic sound driver.
 #
 
 device		sound
 
 #
 # snd_*: Device-specific drivers.
 #
 # The flags of the device tell the device a bit more info about the
 # device that normally is obtained through the PnP interface.
 #	bit  2..0   secondary DMA channel;
 #	bit  4      set if the board uses two dma channels;
 #	bit 15..8   board type, overrides autodetection; leave it
 #		    zero if don't know what to put in (and you don't,
 #		    since this is unsupported at the moment...).
 #
 # snd_ad1816:		Analog Devices AD1816 ISA PnP/non-PnP.
 # snd_als4000:		Avance Logic ALS4000 PCI.
 # snd_atiixp:		ATI IXP 200/300/400 PCI.
 # snd_audiocs:		Crystal Semiconductor CS4231 SBus/EBus. Only
 #			for sparc64.
 # snd_cmi:		CMedia CMI8338/CMI8738 PCI.
 # snd_cs4281:		Crystal Semiconductor CS4281 PCI.
 # snd_csa:		Crystal Semiconductor CS461x/428x PCI. (except
 #			4281)
 # snd_ds1:		Yamaha DS-1 PCI.
 # snd_emu10k1:		Creative EMU10K1 PCI and EMU10K2 (Audigy) PCI.
 # snd_emu10kx:		Creative SoundBlaster Live! and Audigy
 # snd_envy24:		VIA Envy24 and compatible, needs snd_spicds.
 # snd_envy24ht:		VIA Envy24HT and compatible, needs snd_spicds.
 # snd_es137x:		Ensoniq AudioPCI ES137x PCI.
 # snd_ess:		Ensoniq ESS ISA PnP/non-PnP, to be used in
 #			conjunction with snd_sbc.
 # snd_fm801:		Forte Media FM801 PCI.
 # snd_gusc:		Gravis UltraSound ISA PnP/non-PnP.
 # snd_hda:		Intel High Definition Audio (Controller) and
 #			compatible.
 # snd_hdspe:		RME HDSPe AIO and RayDAT.
 # snd_ich:		Intel ICH AC'97 and some more audio controllers
 #			embedded in a chipset, for example nVidia
 #			nForce controllers.
 # snd_maestro:		ESS Technology Maestro-1/2x PCI.
 # snd_maestro3:		ESS Technology Maestro-3/Allegro PCI.
 # snd_mss:		Microsoft Sound System ISA PnP/non-PnP.
 # snd_neomagic:		Neomagic 256 AV/ZX PCI.
 # snd_sb16:		Creative SoundBlaster16, to be used in
 #			conjunction with snd_sbc.
 # snd_sb8:		Creative SoundBlaster (pre-16), to be used in
 #			conjunction with snd_sbc.
 # snd_sbc:		Creative SoundBlaster ISA PnP/non-PnP.
 #			Supports ESS and Avance ISA chips as well.
 # snd_solo:		ESS Solo-1x PCI.
 # snd_spicds:		SPI codec driver, needed by Envy24/Envy24HT drivers.
 # snd_t4dwave:		Trident 4DWave DX/NX PCI, Sis 7018 PCI and Acer Labs
 #			M5451 PCI.
 # snd_uaudio:		USB audio.
 # snd_via8233:		VIA VT8233x PCI.
 # snd_via82c686:	VIA VT82C686A PCI.
 # snd_vibes:		S3 Sonicvibes PCI.
 
 device		snd_ad1816
 device		snd_als4000
 device		snd_atiixp
 #device		snd_audiocs
 device		snd_cmi
 device		snd_cs4281
 device		snd_csa
 device		snd_ds1
 device		snd_emu10k1
 device		snd_emu10kx
 device		snd_envy24
 device		snd_envy24ht
 device		snd_es137x
 device		snd_ess
 device		snd_fm801
 device		snd_gusc
 device		snd_hda
 device		snd_hdspe
 device		snd_ich
 device		snd_maestro
 device		snd_maestro3
 device		snd_mss
 device		snd_neomagic
 device		snd_sb16
 device		snd_sb8
 device		snd_sbc
 device		snd_solo
 device		snd_spicds
 device		snd_t4dwave
 device		snd_uaudio
 device		snd_via8233
 device		snd_via82c686
 device		snd_vibes
 
 # For non-PnP sound cards:
 hint.pcm.0.at="isa"
 hint.pcm.0.irq="10"
 hint.pcm.0.drq="1"
 hint.pcm.0.flags="0x0"
 hint.sbc.0.at="isa"
 hint.sbc.0.port="0x220"
 hint.sbc.0.irq="5"
 hint.sbc.0.drq="1"
 hint.sbc.0.flags="0x15"
 hint.gusc.0.at="isa"
 hint.gusc.0.port="0x220"
 hint.gusc.0.irq="5"
 hint.gusc.0.drq="1"
 hint.gusc.0.flags="0x13"
 
 #
 # Following options are intended for debugging/testing purposes:
 #
 # SND_DEBUG                    Enable extra debugging code that includes
 #                              sanity checking and possible increase of
 #                              verbosity.
 #
 # SND_DIAGNOSTIC               Similar in a spirit of INVARIANTS/DIAGNOSTIC,
 #                              zero tolerance against inconsistencies.
 #
 # SND_FEEDER_MULTIFORMAT       By default, only 16/32 bit feeders are compiled
 #                              in. This options enable most feeder converters
 #                              except for 8bit. WARNING: May bloat the kernel.
 #
 # SND_FEEDER_FULL_MULTIFORMAT  Ditto, but includes 8bit feeders as well.
 #
 # SND_FEEDER_RATE_HP           (feeder_rate) High precision 64bit arithmetic
 #                              as much as possible (the default trying to
 #                              avoid it). Possible slowdown.
 #
 # SND_PCM_64                   (Only applicable for i386/32bit arch)
 #                              Process 32bit samples through 64bit
 #                              integer/arithmetic. Slight increase of dynamic
 #                              range at a cost of possible slowdown.
 #
 # SND_OLDSTEREO                Only 2 channels are allowed, effectively
 #                              disabling multichannel processing.
 #
 options		SND_DEBUG
 options		SND_DIAGNOSTIC
 options		SND_FEEDER_MULTIFORMAT
 options		SND_FEEDER_FULL_MULTIFORMAT
 options		SND_FEEDER_RATE_HP
 options		SND_PCM_64
 options		SND_OLDSTEREO
 
 #
 # Miscellaneous hardware:
 #
 # bktr: Brooktree bt848/848a/849a/878/879 video capture and TV Tuner board
 # joy: joystick (including IO DATA PCJOY PC Card joystick)
 # cmx: OmniKey CardMan 4040 pccard smartcard reader
 
 device		joy			# PnP aware, hints for non-PnP only
 hint.joy.0.at="isa"
 hint.joy.0.port="0x201"
 device		cmx
 
 #
 # The 'bktr' device is a PCI video capture device using the Brooktree
 # bt848/bt848a/bt849a/bt878/bt879 chipset. When used with a TV Tuner it forms a
 # TV card, e.g. Miro PC/TV, Hauppauge WinCast/TV WinTV, VideoLogic Captivator,
 # Intel Smart Video III, AverMedia, IMS Turbo, FlyVideo.
 #
 # options 	OVERRIDE_CARD=xxx
 # options 	OVERRIDE_TUNER=xxx
 # options 	OVERRIDE_MSP=1
 # options 	OVERRIDE_DBX=1
 # These options can be used to override the auto detection
 # The current values for xxx are found in src/sys/dev/bktr/bktr_card.h
 # Using sysctl(8) run-time overrides on a per-card basis can be made
 #
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_PAL
 # or
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_NTSC
 # Specifies the default video capture mode.
 # This is required for Dual Crystal (28&35MHz) boards where PAL is used
 # to prevent hangs during initialization, e.g. VideoLogic Captivator PCI.
 #
 # options 	BKTR_USE_PLL
 # This is required for PAL or SECAM boards with a 28MHz crystal and no 35MHz
 # crystal, e.g. some new Bt878 cards.
 #
 # options 	BKTR_GPIO_ACCESS
 # This enables IOCTLs which give user level access to the GPIO port.
 #
 # options 	BKTR_NO_MSP_RESET
 # Prevents the MSP34xx reset. Good if you initialize the MSP in another OS first
 #
 # options 	BKTR_430_FX_MODE
 # Switch Bt878/879 cards into Intel 430FX chipset compatibility mode.
 #
 # options 	BKTR_SIS_VIA_MODE
 # Switch Bt878/879 cards into SIS/VIA chipset compatibility mode which is
 # needed for some old SiS and VIA chipset motherboards.
 # This also allows Bt878/879 chips to work on old OPTi (<1997) chipset
 # motherboards and motherboards with bad or incomplete PCI 2.1 support.
 # As a rough guess, old = before 1998
 #
 # options 	BKTR_NEW_MSP34XX_DRIVER
 # Use new, more complete initialization scheme for the msp34* soundchip.
 # Should fix stereo autodetection if the old driver does only output
 # mono sound.
 
 #
 # options 	BKTR_USE_FREEBSD_SMBUS
 # Compile with FreeBSD SMBus implementation
 #
 # Brooktree driver has been ported to the new I2C framework. Thus,
 # you'll need to have the following 3 lines in the kernel config.
 #     device smbus
 #     device iicbus
 #     device iicbb
 #     device iicsmb
 # The iic and smb devices are only needed if you want to control other
 # I2C slaves connected to the external connector of some cards.
 #
 device		bktr
  
 #
 # PC Card/PCMCIA and Cardbus
 #
 # cbb: pci/cardbus bridge implementing YENTA interface
 # pccard: pccard slots
 # cardbus: cardbus slots
 device		cbb
 device		pccard
 device		cardbus
 
 #
 # MMC/SD
 #
 # mmc 		MMC/SD bus
 # mmcsd		MMC/SD memory card
 # sdhci		Generic PCI SD Host Controller
 #
 device		mmc
 device		mmcsd
 device		sdhci
 
 #
 # SMB bus
 #
 # System Management Bus support is provided by the 'smbus' device.
 # Access to the SMBus device is via the 'smb' device (/dev/smb*),
 # which is a child of the 'smbus' device.
 #
 # Supported devices:
 # smb		standard I/O through /dev/smb*
 #
 # Supported SMB interfaces:
 # iicsmb	I2C to SMB bridge with any iicbus interface
 # bktr		brooktree848 I2C hardware interface
 # intpm		Intel PIIX4 (82371AB, 82443MX) Power Management Unit
 # alpm		Acer Aladdin-IV/V/Pro2 Power Management Unit
 # ichsmb	Intel ICH SMBus controller chips (82801AA, 82801AB, 82801BA)
 # viapm		VIA VT82C586B/596B/686A and VT8233 Power Management Unit
 # amdpm		AMD 756 Power Management Unit
 # amdsmb	AMD 8111 SMBus 2.0 Controller
 # nfpm		NVIDIA nForce Power Management Unit
 # nfsmb		NVIDIA nForce2/3/4 MCP SMBus 2.0 Controller
 # ismt		Intel SMBus 2.0 controller chips (on Atom S1200, C2000)
 #
 device		smbus		# Bus support, required for smb below.
 
 device		intpm
 device		alpm
 device		ichsmb
 device		viapm
 device		amdpm
 device		amdsmb
 device		nfpm
 device		nfsmb
 device		ismt
 
 device		smb
 
 # SMBus peripheral devices
 #
 # jedec_ts	Temperature Sensor compliant with JEDEC Standard 21-C
 #
 device		jedec_ts
 
 # I2C Bus
 #
 # Philips i2c bus support is provided by the `iicbus' device.
 #
 # Supported devices:
 # ic	i2c network interface
 # iic	i2c standard io
 # iicsmb i2c to smb bridge. Allow i2c i/o with smb commands.
 # iicoc simple polling driver for OpenCores I2C controller
 #
 # Supported interfaces:
 # bktr	brooktree848 I2C software interface
 #
 # Other:
 # iicbb	generic I2C bit-banging code (needed by lpbb, bktr)
 #
 device		iicbus		# Bus support, required for ic/iic/iicsmb below.
 device		iicbb
 
 device		ic
 device		iic
 device		iicsmb		# smb over i2c bridge
 device		iicoc		# OpenCores I2C controller support
 
 # I2C peripheral devices
 #
 device		ds1307		# Dallas DS1307 RTC and compatible
 device		ds13rtc		# All Dallas/Maxim ds13xx chips
 device		ds1672		# Dallas DS1672 RTC
 device		ds3231		# Dallas DS3231 RTC + temperature
 device		icee		# AT24Cxxx and compatible EEPROMs
 device		lm75		# LM75 compatible temperature sensor
 device		nxprtc		# NXP RTCs: PCA/PFC212x PCA/PCF85xx
 device		s35390a		# Seiko Instruments S-35390A RTC
 
 # Parallel-Port Bus
 #
 # Parallel port bus support is provided by the `ppbus' device.
 # Multiple devices may be attached to the parallel port, devices
 # are automatically probed and attached when found.
 #
 # Supported devices:
 # vpo	Iomega Zip Drive
 #	Requires SCSI disk support ('scbus' and 'da'), best
 #	performance is achieved with ports in EPP 1.9 mode.
 # lpt	Parallel Printer
 # plip	Parallel network interface
 # ppi	General-purpose I/O ("Geek Port") + IEEE1284 I/O
 # pps	Pulse per second Timing Interface
 # lpbb	Philips official parallel port I2C bit-banging interface
 # pcfclock Parallel port clock driver.
 #
 # Supported interfaces:
 # ppc	ISA-bus parallel port interfaces.
 #
 
 options 	PPC_PROBE_CHIPSET # Enable chipset specific detection
 				  # (see flags in ppc(4))
 options 	DEBUG_1284	# IEEE1284 signaling protocol debug
 options 	PERIPH_1284	# Makes your computer act as an IEEE1284
 				# compliant peripheral
 options 	DONTPROBE_1284	# Avoid boot detection of PnP parallel devices
 options 	VP0_DEBUG	# ZIP/ZIP+ debug
 options 	LPT_DEBUG	# Printer driver debug
 options 	PPC_DEBUG	# Parallel chipset level debug
 options 	PLIP_DEBUG	# Parallel network IP interface debug
 options 	PCFCLOCK_VERBOSE         # Verbose pcfclock driver
 options 	PCFCLOCK_MAX_RETRIES=5   # Maximum read tries (default 10)
 
 device		ppc
 hint.ppc.0.at="isa"
 hint.ppc.0.irq="7"
 device		ppbus
 device		vpo
 device		lpt
 device		plip
 device		ppi
 device		pps
 device		lpbb
 device		pcfclock
 
 #
 # Etherswitch framework and drivers
 #
 # etherswitch	The etherswitch(4) framework
 # miiproxy	Proxy device for miibus(4) functionality
 # 
 # Switch hardware support:
 # arswitch	Atheros switches
 # ip17x 	IC+ 17x family switches
 # rtl8366r	Realtek RTL8366 switches
 # ukswitch	Multi-PHY switches
 #
 device		etherswitch
 device		miiproxy
 device		arswitch
 device		ip17x
 device		rtl8366rb
 device		ukswitch
 
 # Kernel BOOTP support
 
 options 	BOOTP		# Use BOOTP to obtain IP address/hostname
 				# Requires NFSCL and NFS_ROOT
 options 	BOOTP_NFSROOT	# NFS mount root filesystem using BOOTP info
 options 	BOOTP_NFSV3	# Use NFS v3 to NFS mount root
 options 	BOOTP_COMPAT	# Workaround for broken bootp daemons.
 options 	BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP
 options 	BOOTP_BLOCKSIZE=8192 # Override NFS block size
 
 #
 # Enable software watchdog routines, even if hardware watchdog is present.
 # By default, software watchdog timer is enabled only if no hardware watchdog
 # is present.
 #
 options 	SW_WATCHDOG
 
 #
 # Add the software deadlock resolver thread.
 #
 options 	DEADLKRES
 
 #
 # Disable swapping of stack pages.  This option removes all
 # code which actually performs swapping, so it's not possible to turn
 # it back on at run-time.
 #
 # This is sometimes usable for systems which don't have any swap space
 # (see also sysctl "vm.disable_swapspace_pageouts")
 #
 #options 	NO_SWAPPING
 
 # Set the number of sf_bufs to allocate. sf_bufs are virtual buffers
 # for sendfile(2) that are used to map file VM pages, and normally
 # default to a quantity that is roughly 16*MAXUSERS+512. You would
 # typically want about 4 of these for each simultaneous file send.
 #
 options 	NSFBUFS=1024
 
 #
 # Enable extra debugging code for locks.  This stores the filename and
 # line of whatever acquired the lock in the lock itself, and changes a
 # number of function calls to pass around the relevant data.  This is
 # not at all useful unless you are debugging lock code.  Note that
 # modules should be recompiled as this option modifies KBI.
 #
 options 	DEBUG_LOCKS
 
 
 #####################################################################
 # USB support
 # UHCI controller
 device		uhci
 # OHCI controller
 device		ohci
 # EHCI controller
 device		ehci
 # XHCI controller
 device		xhci
 # SL811 Controller
 #device		slhci
 # General USB code (mandatory for USB)
 device		usb
 #
 # USB Double Bulk Pipe devices
 device		udbp
 # USB Fm Radio
 device		ufm
 # USB temperature meter
 device		ugold
 # USB LED
 device		uled
 # Human Interface Device (anything with buttons and dials)
 device		uhid
 # USB keyboard
 device		ukbd
 # USB printer
 device		ulpt
 # USB mass storage driver (Requires scbus and da)
 device		umass
 # USB mass storage driver for device-side mode
 device		usfs
 # USB support for Belkin F5U109 and Magic Control Technology serial adapters
 device		umct
 # USB modem support
 device		umodem
 # USB mouse
 device		ums
 # USB touchpad(s)
 device		atp
 device		wsp
 # eGalax USB touch screen
 device		uep
 # Diamond Rio 500 MP3 player
 device		urio
 #
 # USB serial support
 device		ucom
 # USB support for 3G modem cards by Option, Novatel, Huawei and Sierra
 device		u3g
 # USB support for Technologies ARK3116 based serial adapters
 device		uark
 # USB support for Belkin F5U103 and compatible serial adapters
 device		ubsa
 # USB support for serial adapters based on the FT8U100AX and FT8U232AM
 device		uftdi
 # USB support for some Windows CE based serial communication.
 device		uipaq
 # USB support for Prolific PL-2303 serial adapters
 device		uplcom
 # USB support for Silicon Laboratories CP2101/CP2102 based USB serial adapters
 device		uslcom
 # USB Visor and Palm devices
 device		uvisor
 # USB serial support for DDI pocket's PHS
 device		uvscom
 #
 # USB ethernet support
 device		uether
 # ADMtek USB ethernet. Supports the LinkSys USB100TX,
 # the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX
 # and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus
 # eval board.
 device		aue
 
 # ASIX Electronics AX88172 USB 2.0 ethernet driver. Used in the
 # LinkSys USB200M and various other adapters.
 device		axe
 # ASIX Electronics AX88178A/AX88179 USB 2.0/3.0 gigabit ethernet driver.
 device		axge
 
 #
 # Devices which communicate using Ethernet over USB, particularly
 # Communication Device Class (CDC) Ethernet specification. Supports
 # Sharp Zaurus PDAs, some DOCSIS cable modems and so on.
 device		cdce
 #
 # CATC USB-EL1201A USB ethernet. Supports the CATC Netmate
 # and Netmate II, and the Belkin F5U111.
 device		cue
 #
 # Kawasaki LSI ethernet. Supports the LinkSys USB10T,
 # Entrega USB-NET-E45, Peracom Ethernet Adapter, the
 # 3Com 3c19250, the ADS Technologies USB-10BT, the ATen UC10T,
 # the Netgear EA101, the D-Link DSB-650, the SMC 2102USB
 # and 2104USB, and the Corega USB-T.
 device		kue
 #
 # RealTek RTL8150 USB to fast ethernet. Supports the Melco LUA-KTX
 # and the GREEN HOUSE GH-USB100B.
 device		rue
 #
 # Davicom DM9601E USB to fast ethernet. Supports the Corega FEther USB-TXC.
 device		udav
 #
 # RealTek RTL8152/RTL8153 USB Ethernet driver
 device		ure
 #
 # Moschip MCS7730/MCS7840 USB to fast ethernet. Supports the Sitecom LN030.
 device		mos
 #
 # HSxPA devices from Option N.V
 device		uhso
 
 # Realtek RTL8188SU/RTL8191SU/RTL8192SU wireless driver
 device		rsu
 #
 # Ralink Technology RT2501USB/RT2601USB wireless driver
 device		rum
 # Ralink Technology RT2700U/RT2800U/RT3000U wireless driver
 device		run
 #
 # Atheros AR5523 wireless driver
 device		uath
 #
 # Conexant/Intersil PrismGT wireless driver
 device		upgt
 #
 # Ralink Technology RT2500USB wireless driver
 device		ural
 #
 # RNDIS USB ethernet driver
 device		urndis
 # Realtek RTL8187B/L wireless driver
 device		urtw
 #
 # ZyDas ZD1211/ZD1211B wireless driver
 device		zyd
 #
 # Sierra USB wireless driver
 device		usie
 
 # 
 # debugging options for the USB subsystem
 #
 options 	USB_DEBUG
 options 	U3G_DEBUG
 
 # options for ukbd:
 options 	UKBD_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	UKBD_DFLT_KEYMAP=jp
 
 # options for uplcom:
 options 	UPLCOM_INTR_INTERVAL=100	# interrupt pipe interval
 						# in milliseconds
 
 # options for uvscom:
 options 	UVSCOM_DEFAULT_OPKTSIZE=8	# default output packet size
 options 	UVSCOM_INTR_INTERVAL=100	# interrupt pipe interval
 						# in milliseconds
 
 #####################################################################
 # FireWire support
 
 device		firewire	# FireWire bus code
 device		sbp		# SCSI over Firewire (Requires scbus and da)
 device		sbp_targ	# SBP-2 Target mode  (Requires scbus and targ)
 device		fwe		# Ethernet over FireWire (non-standard!)
 device		fwip		# IP over FireWire (RFC2734 and RFC3146)
 
 #####################################################################
 # dcons support (Dumb Console Device)
 
 device		dcons			# dumb console driver
 device		dcons_crom		# FireWire attachment
 options 	DCONS_BUF_SIZE=16384	# buffer size
 options 	DCONS_POLL_HZ=100	# polling rate
 options 	DCONS_FORCE_CONSOLE=0	# force to be the primary console
 options 	DCONS_FORCE_GDB=1	# force to be the gdb device
 
 #####################################################################
 # crypto subsystem
 #
 # This is a port of the OpenBSD crypto framework.  Include this when
 # configuring IPSEC and when you have a h/w crypto device to accelerate
 # user applications that link to OpenSSL.
 #
 # Drivers are ports from OpenBSD with some simple enhancements that have
 # been fed back to OpenBSD.
 
 device		crypto		# core crypto support
 
 # Only install the cryptodev device if you are running tests, or know
 # specifically why you need it.  In most cases, it is not needed and
 # will make things slower.
 device		cryptodev	# /dev/crypto for access to h/w
 
 device		rndtest		# FIPS 140-2 entropy tester
 
 device		ccr		# Chelsio T6
 
 device		hifn		# Hifn 7951, 7781, etc.
 options 	HIFN_DEBUG	# enable debugging support: hw.hifn.debug
 options 	HIFN_RNDTEST	# enable rndtest support
 
 device		ubsec		# Broadcom 5501, 5601, 58xx
 options 	UBSEC_DEBUG	# enable debugging support: hw.ubsec.debug
 options 	UBSEC_RNDTEST	# enable rndtest support
 
 #####################################################################
 
 
 #
 # Embedded system options:
 #
 # An embedded system might want to run something other than init.
 options 	INIT_PATH=/sbin/init:/rescue/init
 
 # Debug options
 options 	BUS_DEBUG	# enable newbus debugging
 options 	DEBUG_VFS_LOCKS	# enable VFS lock debugging
 options 	SOCKBUF_DEBUG	# enable sockbuf last record/mb tail checking
 options 	IFMEDIA_DEBUG	# enable debugging in net/if_media.c
 
 #
 # Verbose SYSINIT
 #
 # Make the SYSINIT process performed by mi_startup() verbose.  This is very
 # useful when porting to a new architecture.  If DDB is also enabled, this
 # will print function names instead of addresses.
 options 	VERBOSE_SYSINIT
 
 #####################################################################
 # SYSV IPC KERNEL PARAMETERS
 #
 # Maximum number of System V semaphores that can be used on the system at
 # one time.
 options 	SEMMNI=11
 
 # Total number of semaphores system wide
 options 	SEMMNS=61
 
 # Total number of undo structures in system
 options 	SEMMNU=31
 
 # Maximum number of System V semaphores that can be used by a single process
 # at one time.
 options 	SEMMSL=61
 
 # Maximum number of operations that can be outstanding on a single System V
 # semaphore at one time.
 options 	SEMOPM=101
 
 # Maximum number of undo operations that can be outstanding on a single
 # System V semaphore at one time.
 options 	SEMUME=11
 
 # Maximum number of shared memory pages system wide.
 options 	SHMALL=1025
 
 # Maximum size, in bytes, of a single System V shared memory region.
 options 	SHMMAX=(SHMMAXPGS*PAGE_SIZE+1)
 options 	SHMMAXPGS=1025
 
 # Minimum size, in bytes, of a single System V shared memory region.
 options 	SHMMIN=2
 
 # Maximum number of shared memory regions that can be used on the system
 # at one time.
 options 	SHMMNI=33
 
 # Maximum number of System V shared memory regions that can be attached to
 # a single process at one time.
 options 	SHMSEG=9
 
 # Set the amount of time (in seconds) the system will wait before
 # rebooting automatically when a kernel panic occurs.  If set to (-1),
 # the system will wait indefinitely until a key is pressed on the
 # console.
 options 	PANIC_REBOOT_WAIT_TIME=16
 
 # Attempt to bypass the buffer cache and put data directly into the
 # userland buffer for read operation when O_DIRECT flag is set on the
 # file.  Both offset and length of the read operation must be
 # multiples of the physical media sector size.
 #
 options 	DIRECTIO
 
 # Specify a lower limit for the number of swap I/O buffers.  They are
 # (among other things) used when bypassing the buffer cache due to
 # DIRECTIO kernel option enabled and O_DIRECT flag set on file.
 #
 options 	NSWBUF_MIN=120
 
 #####################################################################
 
 # More undocumented options for linting.
 # Note that documenting these is not considered an affront.
 
 options 	CAM_DEBUG_DELAY
 
 # VFS cluster debugging.
 options 	CLUSTERDEBUG
 
 options 	DEBUG
 
 # Kernel filelock debugging.
 options 	LOCKF_DEBUG
 
 # System V compatible message queues
 # Please note that the values provided here are used to test kernel
 # building.  The defaults in the sources provide almost the same numbers.
 # MSGSSZ must be a power of 2 between 8 and 1024.
 options 	MSGMNB=2049	# Max number of chars in queue
 options 	MSGMNI=41	# Max number of message queue identifiers
 options 	MSGSEG=2049	# Max number of message segments
 options 	MSGSSZ=16	# Size of a message segment
 options 	MSGTQL=41	# Max number of messages in system
 
 options 	NBUF=512	# Number of buffer headers
 
 options 	SCSI_NCR_DEBUG
 options 	SCSI_NCR_MAX_SYNC=10000
 options 	SCSI_NCR_MAX_WIDE=1
 options 	SCSI_NCR_MYADDR=7
 
 options 	SC_DEBUG_LEVEL=5	# Syscons debug level
 options 	SC_RENDER_DEBUG	# syscons rendering debugging
 
 options 	VFS_BIO_DEBUG	# VFS buffer I/O debugging
 
 options 	KSTACK_MAX_PAGES=32 # Maximum pages to give the kernel stack
 options 	KSTACK_USAGE_PROF
 
 # Adaptec Array Controller driver options
 options 	AAC_DEBUG	# Debugging levels:
 				# 0 - quiet, only emit warnings
 				# 1 - noisy, emit major function
 				#     points and things done
 				# 2 - extremely noisy, emit trace
 				#     items in loops, etc.
 
 # Resource Accounting
 options 	RACCT
 
 # Resource Limits
 options 	RCTL
 
 # Yet more undocumented options for linting.
 # BKTR_ALLOC_PAGES has no effect except to cause warnings, and
 # BROOKTREE_ALLOC_PAGES hasn't actually been superseded by it, since the
 # driver still mostly spells this option BROOKTREE_ALLOC_PAGES.
 ##options 	BKTR_ALLOC_PAGES=(217*4+1)
 options 	BROOKTREE_ALLOC_PAGES=(217*4+1)
 options 	MAXFILES=999
 
 # Random number generator
 # Only ONE of the below two may be used; they are mutually exclusive.
 # If neither is present, then the Fortuna algorithm is selected.
 #options 	RANDOM_YARROW	# Yarrow CSPRNG (old default)
 #options 	RANDOM_LOADABLE	# Allow the algorithm to be loaded as
 				# a module.
 # Select this to allow high-rate but potentially expensive
 # harvesting of Slab-Allocator entropy. In very high-rate
 # situations the value of doing this is dubious at best.
 options 	RANDOM_ENABLE_UMA	# slab allocator
 
 # Module to enable execution of application via emulators like QEMU
 options         IMAGACT_BINMISC
 
 # zlib I/O stream support
 # This enables support for compressed core dumps.
 options 	GZIO
 
 # BHND(4) drivers
 options		BHND_LOGLEVEL	# Logging threshold level
 
 # evdev interface 
 device		evdev		# input event device support
 options 	EVDEV_SUPPORT	# evdev support in legacy drivers
 options 	EVDEV_DEBUG	# enable event debug msgs
 device		uinput		# install /dev/uinput cdev
 options 	UINPUT_DEBUG	# enable uinput debug msgs
 
 # Encrypted kernel crash dumps.
 options 	EKCD
Index: head/sys/conf/options
===================================================================
--- head/sys/conf/options	(revision 327953)
+++ head/sys/conf/options	(revision 327954)
@@ -1,1001 +1,1000 @@
 # $FreeBSD$
 #
 #        On the handling of kernel options
 #
 # All kernel options should be listed in NOTES, with suitable
 # descriptions.  Negative options (options that make some code not
 # compile) should be commented out; LINT (generated from NOTES) should
 # compile as much code as possible.  Try to structure option-using
 # code so that a single option only switch code on, or only switch
 # code off, to make it possible to have a full compile-test.  If
 # necessary, you can check for COMPILING_LINT to get maximum code
 # coverage.
 #
 # All new options shall also be listed in either "conf/options" or
 # "conf/options.<machine>".  Options that affect a single source-file
 # <xxx>.[c|s] should be directed into "opt_<xxx>.h", while options
 # that affect multiple files should either go in "opt_global.h" if
 # this is a kernel-wide option (used just about everywhere), or in
 # "opt_<option-name-in-lower-case>.h" if it affects only some files.
 # Note that the effect of listing only an option without a
 # header-file-name in conf/options (and cousins) is that the last
 # convention is followed.
 #
 # This handling scheme is not yet fully implemented.
 #
 #
 # Format of this file:
 # Option name	filename
 #
 # If filename is missing, the default is
 # opt_<name-of-option-in-lower-case>.h
 
 AAC_DEBUG		opt_aac.h
 AACRAID_DEBUG		opt_aacraid.h
 AHC_ALLOW_MEMIO		opt_aic7xxx.h
 AHC_TMODE_ENABLE	opt_aic7xxx.h
 AHC_DUMP_EEPROM		opt_aic7xxx.h
 AHC_DEBUG		opt_aic7xxx.h
 AHC_DEBUG_OPTS		opt_aic7xxx.h
 AHC_REG_PRETTY_PRINT	opt_aic7xxx.h
 AHD_DEBUG		opt_aic79xx.h
 AHD_DEBUG_OPTS		opt_aic79xx.h
 AHD_TMODE_ENABLE	opt_aic79xx.h	
 AHD_REG_PRETTY_PRINT	opt_aic79xx.h
 ADW_ALLOW_MEMIO		opt_adw.h
 
 TWA_DEBUG		opt_twa.h
 
 # Debugging options.
 ALT_BREAK_TO_DEBUGGER	opt_kdb.h
 BREAK_TO_DEBUGGER	opt_kdb.h
 BUF_TRACKING		opt_global.h
 DDB
 DDB_BUFR_SIZE	opt_ddb.h
 DDB_CAPTURE_DEFAULTBUFSIZE	opt_ddb.h
 DDB_CAPTURE_MAXBUFSIZE	opt_ddb.h
 DDB_CTF		opt_ddb.h
 DDB_NUMSYM	opt_ddb.h
 FULL_BUF_TRACKING	opt_global.h
 GDB
 KDB		opt_global.h
 KDB_TRACE	opt_kdb.h
 KDB_UNATTENDED	opt_kdb.h
 KLD_DEBUG	opt_kld.h
 SYSCTL_DEBUG	opt_sysctl.h
 EARLY_PRINTF	opt_global.h
 TEXTDUMP_PREFERRED	opt_ddb.h
 TEXTDUMP_VERBOSE	opt_ddb.h
 NUM_CORE_FILES	opt_global.h
 TSLOG	opt_global.h
 TSLOGSIZE	opt_global.h
 
 # Miscellaneous options.
 ALQ
 ALTERA_SDCARD_FAST_SIM	opt_altera_sdcard.h
 ATSE_CFI_HACK	opt_cfi.h
 AUDIT		opt_global.h
 BOOTHOWTO	opt_global.h
 BOOTVERBOSE	opt_global.h
 CALLOUT_PROFILING
 CAPABILITIES	opt_capsicum.h
 CAPABILITY_MODE	opt_capsicum.h
 COMPAT_43	opt_compat.h
 COMPAT_43TTY	opt_compat.h
 COMPAT_FREEBSD4	opt_compat.h
 COMPAT_FREEBSD5	opt_compat.h
 COMPAT_FREEBSD6	opt_compat.h
 COMPAT_FREEBSD7	opt_compat.h
 COMPAT_FREEBSD9	opt_compat.h
 COMPAT_FREEBSD10	opt_compat.h
 COMPAT_FREEBSD11	opt_compat.h
 COMPAT_CLOUDABI32	opt_dontuse.h
 COMPAT_CLOUDABI64	opt_dontuse.h
 COMPAT_LINUXKPI	opt_compat.h
 COMPILING_LINT	opt_global.h
 CY_PCI_FASTINTR
 DEADLKRES	opt_watchdog.h
-DEVICE_NUMA
 EXT_RESOURCES	opt_global.h
 DIRECTIO
 FILEMON		opt_dontuse.h
 FFCLOCK
 FULL_PREEMPTION	opt_sched.h
 GZIO		opt_gzio.h
 IMAGACT_BINMISC		opt_dontuse.h
 IPI_PREEMPTION	opt_sched.h
 GEOM_AES	opt_geom.h
 GEOM_BDE	opt_geom.h
 GEOM_BSD	opt_geom.h
 GEOM_CACHE	opt_geom.h
 GEOM_CONCAT	opt_geom.h
 GEOM_ELI	opt_geom.h
 GEOM_FOX	opt_geom.h
 GEOM_GATE	opt_geom.h
 GEOM_JOURNAL	opt_geom.h
 GEOM_LABEL	opt_geom.h
 GEOM_LABEL_GPT	opt_geom.h
 GEOM_LINUX_LVM	opt_geom.h
 GEOM_MAP	opt_geom.h
 GEOM_MBR	opt_geom.h
 GEOM_MIRROR	opt_geom.h
 GEOM_MOUNTVER	opt_geom.h
 GEOM_MULTIPATH	opt_geom.h
 GEOM_NOP	opt_geom.h
 GEOM_PART_APM	opt_geom.h
 GEOM_PART_BSD	opt_geom.h
 GEOM_PART_BSD64	opt_geom.h
 GEOM_PART_EBR	opt_geom.h
 GEOM_PART_EBR_COMPAT	opt_geom.h
 GEOM_PART_GPT	opt_geom.h
 GEOM_PART_LDM	opt_geom.h
 GEOM_PART_MBR	opt_geom.h
 GEOM_PART_VTOC8	opt_geom.h
 GEOM_RAID	opt_geom.h
 GEOM_RAID3	opt_geom.h
 GEOM_SHSEC	opt_geom.h
 GEOM_STRIPE	opt_geom.h
 GEOM_SUNLABEL	opt_geom.h
 GEOM_UZIP	opt_geom.h
 GEOM_UZIP_DEBUG	opt_geom.h
 GEOM_VINUM	opt_geom.h
 GEOM_VIRSTOR	opt_geom.h
 GEOM_VOL	opt_geom.h
 GEOM_ZERO	opt_geom.h
 IFLIB		opt_iflib.h
 KDTRACE_HOOKS	opt_global.h
 KDTRACE_FRAME	opt_kdtrace.h
 KN_HASHSIZE	opt_kqueue.h
 KSTACK_MAX_PAGES
 KSTACK_PAGES
 KSTACK_USAGE_PROF
 KTRACE
 KTRACE_REQUEST_POOL	opt_ktrace.h
 LIBICONV
 MAC		opt_global.h
 MAC_BIBA	opt_dontuse.h
 MAC_BSDEXTENDED	opt_dontuse.h
 MAC_IFOFF	opt_dontuse.h
 MAC_LOMAC	opt_dontuse.h
 MAC_MLS		opt_dontuse.h
 MAC_NONE	opt_dontuse.h
 MAC_PARTITION	opt_dontuse.h
 MAC_PORTACL	opt_dontuse.h
 MAC_SEEOTHERUIDS	opt_dontuse.h
 MAC_STATIC	opt_mac.h
 MAC_STUB	opt_dontuse.h
 MAC_TEST	opt_dontuse.h
 MD_ROOT		opt_md.h
 MD_ROOT_FSTYPE	opt_md.h
 MD_ROOT_READONLY	opt_md.h
 MD_ROOT_SIZE	opt_md.h
 MFI_DEBUG	opt_mfi.h
 MFI_DECODE_LOG	opt_mfi.h
 MPROF_BUFFERS	opt_mprof.h
 MPROF_HASH_SIZE	opt_mprof.h
 NEW_PCIB	opt_global.h
 NO_ADAPTIVE_MUTEXES	opt_adaptive_mutexes.h
 NO_ADAPTIVE_RWLOCKS
 NO_ADAPTIVE_SX
 NO_EVENTTIMERS		opt_timer.h
 NO_SYSCTL_DESCR	opt_global.h
 NSWBUF_MIN	opt_swap.h
 MBUF_PACKET_ZONE_DISABLE	opt_global.h
 PANIC_REBOOT_WAIT_TIME	opt_panic.h
 PCI_HP		opt_pci.h
 PCI_IOV		opt_global.h
 PPC_DEBUG	opt_ppc.h
 PPC_PROBE_CHIPSET	opt_ppc.h
 PPS_SYNC	opt_ntp.h
 PREEMPTION	opt_sched.h
 QUOTA
 SCHED_4BSD	opt_sched.h
 SCHED_STATS	opt_sched.h
 SCHED_ULE	opt_sched.h
 SLEEPQUEUE_PROFILING
 SLHCI_DEBUG	opt_slhci.h
 SPX_HACK
 STACK		opt_stack.h
 SUIDDIR
 MSGMNB		opt_sysvipc.h
 MSGMNI		opt_sysvipc.h
 MSGSEG		opt_sysvipc.h
 MSGSSZ		opt_sysvipc.h
 MSGTQL		opt_sysvipc.h
 SEMMNI		opt_sysvipc.h
 SEMMNS		opt_sysvipc.h
 SEMMNU		opt_sysvipc.h
 SEMMSL		opt_sysvipc.h
 SEMOPM		opt_sysvipc.h
 SEMUME		opt_sysvipc.h
 SHMALL		opt_sysvipc.h
 SHMMAX		opt_sysvipc.h
 SHMMAXPGS	opt_sysvipc.h
 SHMMIN		opt_sysvipc.h
 SHMMNI		opt_sysvipc.h
 SHMSEG		opt_sysvipc.h
 SYSVMSG		opt_sysvipc.h
 SYSVSEM		opt_sysvipc.h
 SYSVSHM		opt_sysvipc.h
 SW_WATCHDOG	opt_watchdog.h
 TURNSTILE_PROFILING
 UMTX_PROFILING
 UMTX_CHAINS	opt_global.h
 VERBOSE_SYSINIT
 
 # POSIX kernel options
 P1003_1B_MQUEUE			opt_posix.h
 P1003_1B_SEMAPHORES		opt_posix.h
 _KPOSIX_PRIORITY_SCHEDULING	opt_posix.h
 
 # Do we want the config file compiled into the kernel?
 INCLUDE_CONFIG_FILE	opt_config.h
 
 # Options for static filesystems.  These should only be used at config
 # time, since the corresponding lkms cannot work if there are any static
 # dependencies.  Unusability is enforced by hiding the defines for the
 # options in a never-included header.
 AUTOFS		opt_dontuse.h
 CD9660		opt_dontuse.h
 EXT2FS		opt_dontuse.h
 FDESCFS		opt_dontuse.h
 FFS		opt_dontuse.h
 FUSE		opt_dontuse.h
 MSDOSFS		opt_dontuse.h
 NANDFS		opt_dontuse.h
 NULLFS		opt_dontuse.h
 PROCFS		opt_dontuse.h
 PSEUDOFS	opt_dontuse.h
 SMBFS		opt_dontuse.h
 TMPFS		opt_dontuse.h
 UDF		opt_dontuse.h
 UNIONFS		opt_dontuse.h
 ZFS		opt_dontuse.h
 
 # Pseudofs debugging
 PSEUDOFS_TRACE	opt_pseudofs.h
 
 # In-kernel GSS-API
 KGSSAPI		opt_kgssapi.h
 KGSSAPI_DEBUG	opt_kgssapi.h
 
 # These static filesystems have one slightly bogus static dependency in
 # sys/i386/i386/autoconf.c.  If any of these filesystems are
 # statically compiled into the kernel, code for mounting them as root
 # filesystems will be enabled - but look below.
 # NFSCL - client
 # NFSD - server
 NFSCL		opt_nfs.h
 NFSD		opt_nfs.h
 
 # filesystems and libiconv bridge
 CD9660_ICONV	opt_dontuse.h
 MSDOSFS_ICONV	opt_dontuse.h
 UDF_ICONV	opt_dontuse.h
 
 # If you are following the conditions in the copyright,
 # you can enable soft-updates which will speed up a lot of thigs
 # and make the system safer from crashes at the same time.
 # otherwise a STUB module will be compiled in.
 SOFTUPDATES	opt_ffs.h
 
 # On small, embedded systems, it can be useful to turn off support for
 # snapshots.  It saves about 30-40k for a feature that would be lightly
 # used, if it is used at all.
 NO_FFS_SNAPSHOT	opt_ffs.h
 
 # Enabling this option turns on support for Access Control Lists in UFS,
 # which can be used to support high security configurations.  Depends on
 # UFS_EXTATTR.
 UFS_ACL		opt_ufs.h
 
 # Enabling this option turns on support for extended attributes in UFS-based
 # filesystems, which can be used to support high security configurations
 # as well as new filesystem features.
 UFS_EXTATTR	opt_ufs.h
 UFS_EXTATTR_AUTOSTART	opt_ufs.h
 
 # Enable fast hash lookups for large directories on UFS-based filesystems.
 UFS_DIRHASH	opt_ufs.h
 
 # Enable gjournal-based UFS journal.
 UFS_GJOURNAL	opt_ufs.h
 
 # The below sentence is not in English, and neither is this one.
 # We plan to remove the static dependences above, with a
 # <filesystem>_ROOT option to control if it usable as root.  This list
 # allows these options to be present in config files already (though
 # they won't make any difference yet).
 NFS_ROOT	opt_nfsroot.h
 
 # SMB/CIFS requester
 NETSMB		opt_netsmb.h
 
 # Options used only in subr_param.c.
 HZ		opt_param.h
 MAXFILES	opt_param.h
 NBUF		opt_param.h
 NSFBUFS		opt_param.h
 VM_BCACHE_SIZE_MAX	opt_param.h
 VM_SWZONE_SIZE_MAX	opt_param.h
 MAXUSERS
 DFLDSIZ		opt_param.h
 MAXDSIZ		opt_param.h
 MAXSSIZ		opt_param.h
 
 # Generic SCSI options.
 CAM_MAX_HIGHPOWER	opt_cam.h
 CAMDEBUG		opt_cam.h
 CAM_DEBUG_COMPILE	opt_cam.h
 CAM_DEBUG_DELAY		opt_cam.h
 CAM_DEBUG_BUS		opt_cam.h
 CAM_DEBUG_TARGET	opt_cam.h
 CAM_DEBUG_LUN		opt_cam.h
 CAM_DEBUG_FLAGS		opt_cam.h
 CAM_BOOT_DELAY		opt_cam.h
 CAM_IOSCHED_DYNAMIC	opt_cam.h
 SCSI_DELAY		opt_scsi.h
 SCSI_NO_SENSE_STRINGS	opt_scsi.h
 SCSI_NO_OP_STRINGS	opt_scsi.h
 
 # Options used only in cam/ata/ata_da.c
 ADA_TEST_FAILURE	opt_ada.h
 ATA_STATIC_ID		opt_ada.h
 
 # Options used only in cam/scsi/scsi_cd.c
 CHANGER_MIN_BUSY_SECONDS	opt_cd.h
 CHANGER_MAX_BUSY_SECONDS	opt_cd.h
 
 # Options used only in cam/scsi/scsi_sa.c.
 SA_IO_TIMEOUT		opt_sa.h
 SA_SPACE_TIMEOUT	opt_sa.h
 SA_REWIND_TIMEOUT	opt_sa.h
 SA_ERASE_TIMEOUT	opt_sa.h
 SA_1FM_AT_EOD		opt_sa.h
 
 # Options used only in cam/scsi/scsi_pt.c
 SCSI_PT_DEFAULT_TIMEOUT	opt_pt.h
 
 # Options used only in cam/scsi/scsi_ses.c
 SES_ENABLE_PASSTHROUGH	opt_ses.h
 
 # Options used in dev/sym/ (Symbios SCSI driver).
 SYM_SETUP_LP_PROBE_MAP	opt_sym.h	#-Low Priority Probe Map (bits)
 					# Allows the ncr to take precedence
 					# 1 (1<<0) -> 810a, 860
 					# 2 (1<<1) -> 825a, 875, 885, 895
 					# 4 (1<<2) -> 895a, 896, 1510d 
 SYM_SETUP_SCSI_DIFF	opt_sym.h	#-HVD support for 825a, 875, 885
 					# disabled:0 (default), enabled:1
 SYM_SETUP_PCI_PARITY	opt_sym.h	#-PCI parity checking
 					# disabled:0, enabled:1 (default)
 SYM_SETUP_MAX_LUN	opt_sym.h	#-Number of LUNs supported
 					# default:8, range:[1..64]
 
 # Options used only in dev/ncr/*
 SCSI_NCR_DEBUG		opt_ncr.h
 SCSI_NCR_MAX_SYNC	opt_ncr.h
 SCSI_NCR_MAX_WIDE	opt_ncr.h
 SCSI_NCR_MYADDR		opt_ncr.h
 
 # Options used only in dev/isp/*
 ISP_TARGET_MODE		opt_isp.h
 ISP_FW_CRASH_DUMP	opt_isp.h
 ISP_DEFAULT_ROLES	opt_isp.h
 ISP_INTERNAL_TARGET	opt_isp.h
 ISP_FCTAPE_OFF		opt_isp.h
 
 # Options used only in dev/iscsi
 ISCSI_INITIATOR_DEBUG	opt_iscsi_initiator.h
 
 # Net stuff.
 ACCEPT_FILTER_DATA
 ACCEPT_FILTER_DNS
 ACCEPT_FILTER_HTTP
 ALTQ			opt_global.h
 ALTQ_CBQ		opt_altq.h
 ALTQ_CDNR		opt_altq.h
 ALTQ_CODEL		opt_altq.h
 ALTQ_DEBUG		opt_altq.h
 ALTQ_HFSC		opt_altq.h
 ALTQ_FAIRQ		opt_altq.h
 ALTQ_NOPCC		opt_altq.h
 ALTQ_PRIQ		opt_altq.h
 ALTQ_RED		opt_altq.h
 ALTQ_RIO		opt_altq.h
 BOOTP			opt_bootp.h
 BOOTP_BLOCKSIZE		opt_bootp.h
 BOOTP_COMPAT		opt_bootp.h
 BOOTP_NFSROOT		opt_bootp.h
 BOOTP_NFSV3		opt_bootp.h
 BOOTP_WIRED_TO		opt_bootp.h
 DEVICE_POLLING
 DUMMYNET		opt_ipdn.h
 RATELIMIT		opt_ratelimit.h
 INET			opt_inet.h
 INET6			opt_inet6.h
 IPDIVERT
 IPFILTER		opt_ipfilter.h
 IPFILTER_DEFAULT_BLOCK	opt_ipfilter.h
 IPFILTER_LOG		opt_ipfilter.h
 IPFILTER_LOOKUP		opt_ipfilter.h
 IPFIREWALL		opt_ipfw.h
 IPFIREWALL_DEFAULT_TO_ACCEPT	opt_ipfw.h
 IPFIREWALL_NAT		opt_ipfw.h
 IPFIREWALL_NAT64	opt_ipfw.h
 IPFIREWALL_NAT64_DIRECT_OUTPUT	opt_ipfw.h
 IPFIREWALL_NPTV6	opt_ipfw.h
 IPFIREWALL_VERBOSE	opt_ipfw.h
 IPFIREWALL_VERBOSE_LIMIT	opt_ipfw.h
 IPFIREWALL_PMOD		opt_ipfw.h
 IPSEC			opt_ipsec.h
 IPSEC_DEBUG		opt_ipsec.h
 IPSEC_SUPPORT		opt_ipsec.h
 IPSTEALTH
 KRPC
 LIBALIAS
 LIBMCHAIN
 MBUF_PROFILING
 MBUF_STRESS_TEST
 MROUTING		opt_mrouting.h
 NFSLOCKD
 PCBGROUP		opt_pcbgroup.h
 PF_DEFAULT_TO_DROP	opt_pf.h
 RADIX_MPATH		opt_mpath.h
 ROUTETABLES		opt_route.h
 RSS			opt_rss.h
 SLIP_IFF_OPTS		opt_slip.h
 TCPDEBUG
 TCPPCAP		opt_global.h
 SIFTR
 TCP_HHOOK		opt_inet.h
 TCP_OFFLOAD		opt_inet.h # Enable code to dispatch TCP offloading
 TCP_RFC7413		opt_inet.h
 TCP_RFC7413_MAX_KEYS	opt_inet.h
 TCP_SIGNATURE		opt_ipsec.h
 VLAN_ARRAY		opt_vlan.h
 XBONEHACK
 
 #
 # SCTP
 #
 SCTP			opt_sctp.h
 SCTP_DEBUG		opt_sctp.h # Enable debug printfs
 SCTP_LOCK_LOGGING	opt_sctp.h # Log to KTR lock activity
 SCTP_MBUF_LOGGING	opt_sctp.h # Log to KTR general mbuf aloc/free
 SCTP_MBCNT_LOGGING	opt_sctp.h # Log to KTR mbcnt activity
 SCTP_PACKET_LOGGING	opt_sctp.h # Log to a packet buffer last N packets
 SCTP_LTRACE_CHUNKS	opt_sctp.h # Log to KTR chunks processed
 SCTP_LTRACE_ERRORS	opt_sctp.h # Log to KTR error returns.
 SCTP_USE_PERCPU_STAT	opt_sctp.h # Use per cpu stats.
 SCTP_MCORE_INPUT	opt_sctp.h # Have multiple input threads for input mbufs
 SCTP_LOCAL_TRACE_BUF	opt_sctp.h # Use tracebuffer exported via sysctl
 SCTP_DETAILED_STR_STATS	opt_sctp.h # Use per PR-SCTP policy stream stats
 #
 #
 #
 
 # Netgraph(4). Use option NETGRAPH to enable the base netgraph code.
 # Each netgraph node type can be either be compiled into the kernel
 # or loaded dynamically. To get the former, include the corresponding
 # option below. Each type has its own man page, e.g. ng_async(4).
 NETGRAPH
 NETGRAPH_DEBUG		opt_netgraph.h
 NETGRAPH_ASYNC		opt_netgraph.h
 NETGRAPH_ATMLLC		opt_netgraph.h
 NETGRAPH_ATM_ATMPIF	opt_netgraph.h
 NETGRAPH_BLUETOOTH	opt_netgraph.h
 NETGRAPH_BLUETOOTH_BT3C	opt_netgraph.h
 NETGRAPH_BLUETOOTH_H4	opt_netgraph.h
 NETGRAPH_BLUETOOTH_HCI	opt_netgraph.h
 NETGRAPH_BLUETOOTH_L2CAP	opt_netgraph.h
 NETGRAPH_BLUETOOTH_SOCKET	opt_netgraph.h
 NETGRAPH_BLUETOOTH_UBT	opt_netgraph.h
 NETGRAPH_BLUETOOTH_UBTBCMFW	opt_netgraph.h
 NETGRAPH_BPF		opt_netgraph.h
 NETGRAPH_BRIDGE		opt_netgraph.h
 NETGRAPH_CAR		opt_netgraph.h
 NETGRAPH_CISCO		opt_netgraph.h
 NETGRAPH_DEFLATE	opt_netgraph.h
 NETGRAPH_DEVICE		opt_netgraph.h
 NETGRAPH_ECHO		opt_netgraph.h
 NETGRAPH_EIFACE		opt_netgraph.h
 NETGRAPH_ETHER		opt_netgraph.h
 NETGRAPH_ETHER_ECHO	opt_netgraph.h
 NETGRAPH_FEC		opt_netgraph.h
 NETGRAPH_FRAME_RELAY	opt_netgraph.h
 NETGRAPH_GIF		opt_netgraph.h
 NETGRAPH_GIF_DEMUX	opt_netgraph.h
 NETGRAPH_HOLE		opt_netgraph.h
 NETGRAPH_IFACE		opt_netgraph.h
 NETGRAPH_IP_INPUT	opt_netgraph.h
 NETGRAPH_IPFW		opt_netgraph.h
 NETGRAPH_KSOCKET	opt_netgraph.h
 NETGRAPH_L2TP		opt_netgraph.h
 NETGRAPH_LMI		opt_netgraph.h
 NETGRAPH_MPPC_COMPRESSION	opt_netgraph.h
 NETGRAPH_MPPC_ENCRYPTION	opt_netgraph.h
 NETGRAPH_NAT		opt_netgraph.h
 NETGRAPH_NETFLOW	opt_netgraph.h
 NETGRAPH_ONE2MANY	opt_netgraph.h
 NETGRAPH_PATCH		opt_netgraph.h
 NETGRAPH_PIPE		opt_netgraph.h
 NETGRAPH_PPP		opt_netgraph.h
 NETGRAPH_PPPOE		opt_netgraph.h
 NETGRAPH_PPTPGRE	opt_netgraph.h
 NETGRAPH_PRED1		opt_netgraph.h
 NETGRAPH_RFC1490	opt_netgraph.h
 NETGRAPH_SOCKET		opt_netgraph.h
 NETGRAPH_SPLIT		opt_netgraph.h
 NETGRAPH_SPPP		opt_netgraph.h
 NETGRAPH_TAG		opt_netgraph.h
 NETGRAPH_TCPMSS		opt_netgraph.h
 NETGRAPH_TEE		opt_netgraph.h
 NETGRAPH_TTY		opt_netgraph.h
 NETGRAPH_UI		opt_netgraph.h
 NETGRAPH_VJC		opt_netgraph.h
 NETGRAPH_VLAN		opt_netgraph.h
 
 # NgATM options
 NGATM_ATM		opt_netgraph.h
 NGATM_ATMBASE		opt_netgraph.h
 NGATM_SSCOP		opt_netgraph.h
 NGATM_SSCFU		opt_netgraph.h
 NGATM_UNI		opt_netgraph.h
 NGATM_CCATM		opt_netgraph.h
 
 # DRM options
 DRM_DEBUG		opt_drm.h
 
 TI_SF_BUF_JUMBO		opt_ti.h
 TI_JUMBO_HDRSPLIT	opt_ti.h
 
 # DPT driver debug flags
 DPT_MEASURE_PERFORMANCE	opt_dpt.h
 DPT_RESET_HBA		opt_dpt.h
 
 # Misc debug flags.  Most of these should probably be replaced with
 # 'DEBUG', and then let people recompile just the interesting modules
 # with 'make CC="cc -DDEBUG"'.
 CLUSTERDEBUG		opt_debug_cluster.h
 DEBUG_1284		opt_ppb_1284.h
 VP0_DEBUG		opt_vpo.h
 LPT_DEBUG		opt_lpt.h
 PLIP_DEBUG		opt_plip.h
 LOCKF_DEBUG		opt_debug_lockf.h
 SI_DEBUG		opt_debug_si.h
 IFMEDIA_DEBUG		opt_ifmedia.h
 
 # Fb options
 FB_DEBUG		opt_fb.h
 FB_INSTALL_CDEV		opt_fb.h
 
 # ppbus related options
 PERIPH_1284		opt_ppb_1284.h
 DONTPROBE_1284		opt_ppb_1284.h
 
 # smbus related options
 ENABLE_ALART		opt_intpm.h
 
 # These cause changes all over the kernel
 BLKDEV_IOSIZE		opt_global.h
 BURN_BRIDGES		opt_global.h
 DEBUG			opt_global.h
 DEBUG_LOCKS		opt_global.h
 DEBUG_VFS_LOCKS		opt_global.h
 DFLTPHYS		opt_global.h
 DIAGNOSTIC		opt_global.h
 INVARIANT_SUPPORT	opt_global.h
 INVARIANTS		opt_global.h
 MAXCPU			opt_global.h
 MAXMEMDOM		opt_global.h
 MAXPHYS			opt_global.h
 MCLSHIFT		opt_global.h
 MUTEX_NOINLINE		opt_global.h
 LOCK_PROFILING		opt_global.h
 LOCK_PROFILING_FAST	opt_global.h
 MSIZE			opt_global.h
 REGRESSION		opt_global.h
 RWLOCK_NOINLINE		opt_global.h
 SX_NOINLINE		opt_global.h
 VFS_BIO_DEBUG		opt_global.h
 
 # These are VM related options
 VM_KMEM_SIZE		opt_vm.h
 VM_KMEM_SIZE_SCALE	opt_vm.h
 VM_KMEM_SIZE_MAX	opt_vm.h
 VM_NRESERVLEVEL		opt_vm.h
-VM_NUMA_ALLOC		opt_vm.h
 VM_LEVEL_0_ORDER	opt_vm.h
 NO_SWAPPING		opt_vm.h
 MALLOC_MAKE_FAILURES	opt_vm.h
 MALLOC_PROFILE		opt_vm.h
 MALLOC_DEBUG_MAXZONES	opt_vm.h
 
 # The MemGuard replacement allocator used for tamper-after-free detection
 DEBUG_MEMGUARD		opt_vm.h
 
 # The RedZone malloc(9) protection
 DEBUG_REDZONE		opt_vm.h
 
 # Standard SMP options
 EARLY_AP_STARTUP	opt_global.h
 SMP			opt_global.h
+NUMA			opt_global.h
 
 # Size of the kernel message buffer
 MSGBUF_SIZE		opt_msgbuf.h
 
 # NFS options
 NFS_MINATTRTIMO		opt_nfs.h
 NFS_MAXATTRTIMO		opt_nfs.h
 NFS_MINDIRATTRTIMO	opt_nfs.h
 NFS_MAXDIRATTRTIMO	opt_nfs.h
 NFS_DEBUG		opt_nfs.h
 
 # For the Bt848/Bt848A/Bt849/Bt878/Bt879 driver
 OVERRIDE_CARD			opt_bktr.h
 OVERRIDE_TUNER			opt_bktr.h
 OVERRIDE_DBX			opt_bktr.h
 OVERRIDE_MSP			opt_bktr.h
 BROOKTREE_SYSTEM_DEFAULT	opt_bktr.h
 BROOKTREE_ALLOC_PAGES		opt_bktr.h
 BKTR_OVERRIDE_CARD		opt_bktr.h
 BKTR_OVERRIDE_TUNER		opt_bktr.h
 BKTR_OVERRIDE_DBX		opt_bktr.h
 BKTR_OVERRIDE_MSP		opt_bktr.h
 BKTR_SYSTEM_DEFAULT		opt_bktr.h
 BKTR_ALLOC_PAGES		opt_bktr.h
 BKTR_USE_PLL			opt_bktr.h	
 BKTR_GPIO_ACCESS		opt_bktr.h
 BKTR_NO_MSP_RESET		opt_bktr.h
 BKTR_430_FX_MODE		opt_bktr.h
 BKTR_SIS_VIA_MODE		opt_bktr.h
 BKTR_USE_FREEBSD_SMBUS		opt_bktr.h
 BKTR_NEW_MSP34XX_DRIVER		opt_bktr.h
 
 # Options for uart(4)
 UART_PPS_ON_CTS		opt_uart.h
 UART_POLL_FREQ		opt_uart.h
 UART_DEV_TOLERANCE_PCT	opt_uart.h
 
 # options for bus/device framework
 BUS_DEBUG		opt_bus.h
 
 # options for USB support
 USB_DEBUG		opt_usb.h
 USB_HOST_ALIGN		opt_usb.h
 USB_REQ_DEBUG		opt_usb.h
 USB_TEMPLATE		opt_usb.h
 USB_VERBOSE		opt_usb.h
 USB_DMA_SINGLE_ALLOC	opt_usb.h
 USB_EHCI_BIG_ENDIAN_DESC	opt_usb.h
 U3G_DEBUG		opt_u3g.h
 UKBD_DFLT_KEYMAP	opt_ukbd.h
 UPLCOM_INTR_INTERVAL	opt_uplcom.h
 UVSCOM_DEFAULT_OPKTSIZE	opt_uvscom.h
 UVSCOM_INTR_INTERVAL	opt_uvscom.h
 
 # options for the Realtek rtwn driver
 RTWN_DEBUG		opt_rtwn.h
 RTWN_WITHOUT_UCODE	opt_rtwn.h
 
 # Embedded system options
 INIT_PATH
 
 ROOTDEVNAME
 
 FDC_DEBUG 		opt_fdc.h
 PCFCLOCK_VERBOSE	opt_pcfclock.h
 PCFCLOCK_MAX_RETRIES	opt_pcfclock.h
 
 KTR			opt_global.h
 KTR_ALQ			opt_ktr.h
 KTR_MASK		opt_ktr.h
 KTR_CPUMASK		opt_ktr.h
 KTR_COMPILE		opt_global.h
 KTR_BOOT_ENTRIES	opt_global.h
 KTR_ENTRIES		opt_global.h
 KTR_VERBOSE		opt_ktr.h
 WITNESS			opt_global.h
 WITNESS_KDB		opt_witness.h
 WITNESS_NO_VNODE	opt_witness.h
 WITNESS_SKIPSPIN	opt_witness.h
 WITNESS_COUNT		opt_witness.h
 OPENSOLARIS_WITNESS	opt_global.h
 
 # options for ACPI support
 ACPI_DEBUG		opt_acpi.h
 ACPI_MAX_TASKS		opt_acpi.h
 ACPI_MAX_THREADS	opt_acpi.h
 ACPI_DMAR		opt_acpi.h
 DEV_ACPI		opt_acpi.h
 
 # ISA support
 DEV_ISA			opt_isa.h
 ISAPNP			opt_isa.h
 
 # various 'device presence' options.
 DEV_BPF			opt_bpf.h
 DEV_CARP		opt_carp.h
 DEV_NETMAP		opt_global.h
 DEV_PCI			opt_pci.h
 DEV_PF			opt_pf.h
 DEV_PFLOG		opt_pf.h
 DEV_PFSYNC		opt_pf.h
 DEV_RANDOM		opt_global.h
 DEV_SPLASH		opt_splash.h
 DEV_VLAN		opt_vlan.h
 
 # ed driver
 ED_HPP			opt_ed.h
 ED_3C503		opt_ed.h
 ED_SIC			opt_ed.h
 
 # bce driver
 BCE_DEBUG		opt_bce.h
 BCE_NVRAM_WRITE_SUPPORT	opt_bce.h
 
 SOCKBUF_DEBUG		opt_global.h
 
 
 # options for ubsec driver
 UBSEC_DEBUG		opt_ubsec.h
 UBSEC_RNDTEST		opt_ubsec.h
 UBSEC_NO_RNG		opt_ubsec.h
 
 # options for hifn driver
 HIFN_DEBUG		opt_hifn.h
 HIFN_RNDTEST		opt_hifn.h
 
 # options for safenet driver
 SAFE_DEBUG		opt_safe.h
 SAFE_NO_RNG		opt_safe.h
 SAFE_RNDTEST		opt_safe.h
 
 # syscons/vt options
 MAXCONS			opt_syscons.h
 SC_ALT_MOUSE_IMAGE	opt_syscons.h
 SC_CUT_SPACES2TABS	opt_syscons.h
 SC_CUT_SEPCHARS		opt_syscons.h
 SC_DEBUG_LEVEL		opt_syscons.h
 SC_DFLT_FONT		opt_syscons.h
 SC_DISABLE_KDBKEY	opt_syscons.h
 SC_DISABLE_REBOOT	opt_syscons.h
 SC_HISTORY_SIZE		opt_syscons.h
 SC_KERNEL_CONS_ATTR	opt_syscons.h
 SC_KERNEL_CONS_REV_ATTR	opt_syscons.h
 SC_MOUSE_CHAR		opt_syscons.h
 SC_NO_CUTPASTE		opt_syscons.h
 SC_NO_FONT_LOADING	opt_syscons.h
 SC_NO_HISTORY		opt_syscons.h
 SC_NO_MODE_CHANGE	opt_syscons.h
 SC_NO_SUSPEND_VTYSWITCH	opt_syscons.h
 SC_NO_SYSMOUSE		opt_syscons.h
 SC_NORM_ATTR		opt_syscons.h
 SC_NORM_REV_ATTR	opt_syscons.h
 SC_PIXEL_MODE		opt_syscons.h
 SC_RENDER_DEBUG		opt_syscons.h
 SC_TWOBUTTON_MOUSE	opt_syscons.h
 VT_ALT_TO_ESC_HACK	opt_syscons.h
 VT_FB_DEFAULT_WIDTH	opt_syscons.h
 VT_FB_DEFAULT_HEIGHT	opt_syscons.h
 VT_MAXWINDOWS		opt_syscons.h
 VT_TWOBUTTON_MOUSE	opt_syscons.h
 DEV_SC			opt_syscons.h
 DEV_VT			opt_syscons.h
 
 # teken terminal emulator options
 TEKEN_CONS25		opt_teken.h
 TEKEN_UTF8		opt_teken.h
 TERMINAL_KERN_ATTR	opt_teken.h
 TERMINAL_NORM_ATTR	opt_teken.h
 
 # options for printf
 PRINTF_BUFR_SIZE	opt_printf.h
 
 # kbd options
 KBD_DISABLE_KEYMAP_LOAD	opt_kbd.h
 KBD_INSTALL_CDEV	opt_kbd.h
 KBD_MAXRETRY		opt_kbd.h
 KBD_MAXWAIT		opt_kbd.h
 KBD_RESETDELAY		opt_kbd.h
 KBDIO_DEBUG		opt_kbd.h
 
 KBDMUX_DFLT_KEYMAP	opt_kbdmux.h
 
 # options for the Atheros driver
 ATH_DEBUG		opt_ath.h
 ATH_TXBUF		opt_ath.h
 ATH_RXBUF		opt_ath.h
 ATH_DIAGAPI		opt_ath.h
 ATH_TX99_DIAG		opt_ath.h
 ATH_ENABLE_11N		opt_ath.h
 ATH_ENABLE_DFS		opt_ath.h
 ATH_EEPROM_FIRMWARE	opt_ath.h
 ATH_ENABLE_RADIOTAP_VENDOR_EXT	opt_ath.h
 ATH_DEBUG_ALQ		opt_ath.h
 ATH_KTR_INTR_DEBUG	opt_ath.h
 
 # options for the Atheros hal
 AH_SUPPORT_AR5416	opt_ah.h
 # XXX For now, this breaks non-AR9130 chipsets, so only use it
 # XXX when actually targeting AR9130.
 AH_SUPPORT_AR9130	opt_ah.h
 
 # This is required for AR933x SoC support
 AH_SUPPORT_AR9330	opt_ah.h
 AH_SUPPORT_AR9340	opt_ah.h
 AH_SUPPORT_QCA9530	opt_ah.h
 AH_SUPPORT_QCA9550	opt_ah.h
 
 AH_DEBUG		opt_ah.h
 AH_ASSERT		opt_ah.h
 AH_DEBUG_ALQ		opt_ah.h
 AH_REGOPS_FUNC		opt_ah.h
 AH_WRITE_REGDOMAIN	opt_ah.h
 AH_DEBUG_COUNTRY	opt_ah.h
 AH_WRITE_EEPROM		opt_ah.h
 AH_PRIVATE_DIAG		opt_ah.h
 AH_NEED_DESC_SWAP	opt_ah.h
 AH_USE_INIPDGAIN	opt_ah.h
 AH_MAXCHAN		opt_ah.h
 AH_RXCFG_SDMAMW_4BYTES	opt_ah.h
 AH_INTERRUPT_DEBUGGING	opt_ah.h
 # AR5416 and later interrupt mitigation
 # XXX do not use this for AR9130
 AH_AR5416_INTERRUPT_MITIGATION	opt_ah.h
 
 # options for the Broadcom BCM43xx driver (bwi)
 BWI_DEBUG		opt_bwi.h
 BWI_DEBUG_VERBOSE	opt_bwi.h
 
 # options for the Brodacom BCM43xx driver (bwn)
 BWN_DEBUG		opt_bwn.h
 BWN_GPL_PHY		opt_bwn.h
 BWN_USE_SIBA		opt_bwn.h
 
 # Options for the SIBA driver
 SIBA_DEBUG		opt_siba.h
 
 # options for the Marvell 8335 wireless driver
 MALO_DEBUG		opt_malo.h
 MALO_TXBUF		opt_malo.h
 MALO_RXBUF		opt_malo.h
 
 # options for the Marvell wireless driver
 MWL_DEBUG		opt_mwl.h
 MWL_TXBUF		opt_mwl.h
 MWL_RXBUF		opt_mwl.h
 MWL_DIAGAPI		opt_mwl.h
 MWL_AGGR_SIZE		opt_mwl.h
 MWL_TX_NODROP		opt_mwl.h
 
 # Options for the Marvell NETA driver
 MVNETA_MULTIQUEUE	opt_mvneta.h
 MVNETA_KTR		opt_mvneta.h
 
 # Options for the Intel 802.11ac wireless driver
 IWM_DEBUG		opt_iwm.h
 
 # Options for the Intel 802.11n wireless driver
 IWN_DEBUG		opt_iwn.h
 
 # Options for the Intel 3945ABG wireless driver
 WPI_DEBUG		opt_wpi.h
 
 # dcons options 
 DCONS_BUF_SIZE		opt_dcons.h
 DCONS_POLL_HZ		opt_dcons.h
 DCONS_FORCE_CONSOLE	opt_dcons.h
 DCONS_FORCE_GDB		opt_dcons.h
 
 # HWPMC options
 HWPMC_DEBUG		opt_global.h
 HWPMC_HOOKS
 HWPMC_MIPS_BACKTRACE 	opt_hwpmc_hooks.h
 
 # Interrupt filtering
 INTR_FILTER
 
 # 802.11 support layer
 IEEE80211_DEBUG		opt_wlan.h
 IEEE80211_DEBUG_REFCNT	opt_wlan.h
 IEEE80211_AMPDU_AGE	opt_wlan.h
 IEEE80211_SUPPORT_MESH	opt_wlan.h
 IEEE80211_SUPPORT_SUPERG	opt_wlan.h
 IEEE80211_SUPPORT_TDMA	opt_wlan.h
 IEEE80211_ALQ		opt_wlan.h
 IEEE80211_DFS_DEBUG	opt_wlan.h
 
 # 802.11 TDMA support
 TDMA_SLOTLEN_DEFAULT	opt_tdma.h
 TDMA_SLOTCNT_DEFAULT	opt_tdma.h
 TDMA_BINTVAL_DEFAULT	opt_tdma.h
 TDMA_TXRATE_11B_DEFAULT	opt_tdma.h
 TDMA_TXRATE_11G_DEFAULT	opt_tdma.h
 TDMA_TXRATE_11A_DEFAULT	opt_tdma.h
 TDMA_TXRATE_TURBO_DEFAULT	opt_tdma.h
 TDMA_TXRATE_HALF_DEFAULT	opt_tdma.h
 TDMA_TXRATE_QUARTER_DEFAULT	opt_tdma.h
 TDMA_TXRATE_11NA_DEFAULT	opt_tdma.h
 TDMA_TXRATE_11NG_DEFAULT	opt_tdma.h
 
 # VideoMode
 PICKMODE_DEBUG			opt_videomode.h
 
 # Network stack virtualization options
 VIMAGE			opt_global.h
 VNET_DEBUG		opt_global.h
 
 # Common Flash Interface (CFI) options
 CFI_SUPPORT_STRATAFLASH	opt_cfi.h
 CFI_ARMEDANDDANGEROUS	opt_cfi.h
 CFI_HARDWAREBYTESWAP	opt_cfi.h
 
 # Sound options
 SND_DEBUG		opt_snd.h
 SND_DIAGNOSTIC		opt_snd.h
 SND_FEEDER_MULTIFORMAT	opt_snd.h
 SND_FEEDER_FULL_MULTIFORMAT	opt_snd.h
 SND_FEEDER_RATE_HP	opt_snd.h
 SND_PCM_64		opt_snd.h
 SND_OLDSTEREO		opt_snd.h
 
 X86BIOS
 
 # Flattened device tree options
 FDT		opt_platform.h
 FDT_DTB_STATIC	opt_platform.h
 
 # OFED Infiniband stack
 OFED		opt_ofed.h
 OFED_DEBUG_INIT	opt_ofed.h
 SDP		opt_ofed.h
 SDP_DEBUG	opt_ofed.h
 IPOIB		opt_ofed.h
 IPOIB_DEBUG	opt_ofed.h
 IPOIB_CM	opt_ofed.h
 
 # Resource Accounting
 RACCT		opt_global.h
 RACCT_DEFAULT_TO_DISABLED	opt_global.h
 
 # Resource Limits
 RCTL		opt_global.h
 
 # Random number generator(s)
 # Which CSPRNG hash we get.
 # If Yarrow is not chosen, Fortuna is selected.
 RANDOM_YARROW	opt_global.h
 # With this, no entropy processor is loaded, but the entropy
 # harvesting infrastructure is present. This means an entropy
 # processor may be loaded as a module.
 RANDOM_LOADABLE	opt_global.h
 # This turns on high-rate and potentially expensive harvesting in
 # the uma slab allocator.
 RANDOM_ENABLE_UMA	opt_global.h
 
 # BHND(4) driver
 BHND_LOGLEVEL	opt_global.h
 
 # GPIO and child devices
 GPIO_SPI_DEBUG	opt_gpio.h
 
 # etherswitch(4) driver
 RTL8366_SOFT_RESET opt_etherswitch.h
 
 # evdev protocol support
 EVDEV_SUPPORT	opt_evdev.h
 EVDEV_DEBUG	opt_evdev.h
 UINPUT_DEBUG	opt_evdev.h
 
 # Hyper-V network driver
 HN_DEBUG	opt_hn.h
 
 # CAM-based MMC stack
 MMCCAM
 # Encrypted kernel crash dumps
 EKCD		opt_ekcd.h
 
 # NVME options
 NVME_USE_NVD	opt_nvme.h
Index: head/sys/dev/acpica/acpi.c
===================================================================
--- head/sys/dev/acpica/acpi.c	(revision 327953)
+++ head/sys/dev/acpica/acpi.c	(revision 327954)
@@ -1,4154 +1,4153 @@
 /*-
  * Copyright (c) 2000 Takanori Watanabe <takawata@jp.freebsd.org>
  * Copyright (c) 2000 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
  * Copyright (c) 2000, 2001 Michael Smith
  * Copyright (c) 2000 BSDi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_acpi.h"
-#include "opt_device_numa.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/ioccom.h>
 #include <sys/reboot.h>
 #include <sys/sysctl.h>
 #include <sys/ctype.h>
 #include <sys/linker.h>
 #include <sys/power.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/timetc.h>
 
 #if defined(__i386__) || defined(__amd64__)
 #include <machine/pci_cfgreg.h>
 #endif
 #include <machine/resource.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <isa/isavar.h>
 #include <isa/pnpvar.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/accommon.h>
 #include <contrib/dev/acpica/include/acnamesp.h>
 
 #include <dev/acpica/acpivar.h>
 #include <dev/acpica/acpiio.h>
 
 #include <dev/pci/pcivar.h>
 
 #include <vm/vm_param.h>
 
 static MALLOC_DEFINE(M_ACPIDEV, "acpidev", "ACPI devices");
 
 /* Hooks for the ACPI CA debugging infrastructure */
 #define _COMPONENT	ACPI_BUS
 ACPI_MODULE_NAME("ACPI")
 
 static d_open_t		acpiopen;
 static d_close_t	acpiclose;
 static d_ioctl_t	acpiioctl;
 
 static struct cdevsw acpi_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_open =	acpiopen,
 	.d_close =	acpiclose,
 	.d_ioctl =	acpiioctl,
 	.d_name =	"acpi",
 };
 
 struct acpi_interface {
 	ACPI_STRING	*data;
 	int		num;
 };
 
 static char *sysres_ids[] = { "PNP0C01", "PNP0C02", NULL };
 static char *pcilink_ids[] = { "PNP0C0F", NULL };
 
 /* Global mutex for locking access to the ACPI subsystem. */
 struct mtx	acpi_mutex;
 struct callout	acpi_sleep_timer;
 
 /* Bitmap of device quirks. */
 int		acpi_quirks;
 
 /* Supported sleep states. */
 static BOOLEAN	acpi_sleep_states[ACPI_S_STATE_COUNT];
 
 static void	acpi_lookup(void *arg, const char *name, device_t *dev);
 static int	acpi_modevent(struct module *mod, int event, void *junk);
 static int	acpi_probe(device_t dev);
 static int	acpi_attach(device_t dev);
 static int	acpi_suspend(device_t dev);
 static int	acpi_resume(device_t dev);
 static int	acpi_shutdown(device_t dev);
 static device_t	acpi_add_child(device_t bus, u_int order, const char *name,
 			int unit);
 static int	acpi_print_child(device_t bus, device_t child);
 static void	acpi_probe_nomatch(device_t bus, device_t child);
 static void	acpi_driver_added(device_t dev, driver_t *driver);
 static int	acpi_read_ivar(device_t dev, device_t child, int index,
 			uintptr_t *result);
 static int	acpi_write_ivar(device_t dev, device_t child, int index,
 			uintptr_t value);
 static struct resource_list *acpi_get_rlist(device_t dev, device_t child);
 static void	acpi_reserve_resources(device_t dev);
 static int	acpi_sysres_alloc(device_t dev);
 static int	acpi_set_resource(device_t dev, device_t child, int type,
 			int rid, rman_res_t start, rman_res_t count);
 static struct resource *acpi_alloc_resource(device_t bus, device_t child,
 			int type, int *rid, rman_res_t start, rman_res_t end,
 			rman_res_t count, u_int flags);
 static int	acpi_adjust_resource(device_t bus, device_t child, int type,
 			struct resource *r, rman_res_t start, rman_res_t end);
 static int	acpi_release_resource(device_t bus, device_t child, int type,
 			int rid, struct resource *r);
 static void	acpi_delete_resource(device_t bus, device_t child, int type,
 		    int rid);
 static uint32_t	acpi_isa_get_logicalid(device_t dev);
 static int	acpi_isa_get_compatid(device_t dev, uint32_t *cids, int count);
 static char	*acpi_device_id_probe(device_t bus, device_t dev, char **ids);
 static ACPI_STATUS acpi_device_eval_obj(device_t bus, device_t dev,
 		    ACPI_STRING pathname, ACPI_OBJECT_LIST *parameters,
 		    ACPI_BUFFER *ret);
 static ACPI_STATUS acpi_device_scan_cb(ACPI_HANDLE h, UINT32 level,
 		    void *context, void **retval);
 static ACPI_STATUS acpi_device_scan_children(device_t bus, device_t dev,
 		    int max_depth, acpi_scan_cb_t user_fn, void *arg);
 static int	acpi_set_powerstate(device_t child, int state);
 static int	acpi_isa_pnp_probe(device_t bus, device_t child,
 		    struct isa_pnp_id *ids);
 static void	acpi_probe_children(device_t bus);
 static void	acpi_probe_order(ACPI_HANDLE handle, int *order);
 static ACPI_STATUS acpi_probe_child(ACPI_HANDLE handle, UINT32 level,
 		    void *context, void **status);
 static void	acpi_sleep_enable(void *arg);
 static ACPI_STATUS acpi_sleep_disable(struct acpi_softc *sc);
 static ACPI_STATUS acpi_EnterSleepState(struct acpi_softc *sc, int state);
 static void	acpi_shutdown_final(void *arg, int howto);
 static void	acpi_enable_fixed_events(struct acpi_softc *sc);
 static BOOLEAN	acpi_has_hid(ACPI_HANDLE handle);
 static void	acpi_resync_clock(struct acpi_softc *sc);
 static int	acpi_wake_sleep_prep(ACPI_HANDLE handle, int sstate);
 static int	acpi_wake_run_prep(ACPI_HANDLE handle, int sstate);
 static int	acpi_wake_prep_walk(int sstate);
 static int	acpi_wake_sysctl_walk(device_t dev);
 static int	acpi_wake_set_sysctl(SYSCTL_HANDLER_ARGS);
 static void	acpi_system_eventhandler_sleep(void *arg, int state);
 static void	acpi_system_eventhandler_wakeup(void *arg, int state);
 static int	acpi_sname2sstate(const char *sname);
 static const char *acpi_sstate2sname(int sstate);
 static int	acpi_supported_sleep_state_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_sleep_state_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_debug_objects_sysctl(SYSCTL_HANDLER_ARGS);
 static int	acpi_pm_func(u_long cmd, void *arg, ...);
 static int	acpi_child_location_str_method(device_t acdev, device_t child,
 					       char *buf, size_t buflen);
 static int	acpi_child_pnpinfo_str_method(device_t acdev, device_t child,
 					      char *buf, size_t buflen);
 #if defined(__i386__) || defined(__amd64__)
 static void	acpi_enable_pcie(void);
 #endif
 static void	acpi_hint_device_unit(device_t acdev, device_t child,
 		    const char *name, int *unitp);
 static void	acpi_reset_interfaces(device_t dev);
 
 static device_method_t acpi_methods[] = {
     /* Device interface */
     DEVMETHOD(device_probe,		acpi_probe),
     DEVMETHOD(device_attach,		acpi_attach),
     DEVMETHOD(device_shutdown,		acpi_shutdown),
     DEVMETHOD(device_detach,		bus_generic_detach),
     DEVMETHOD(device_suspend,		acpi_suspend),
     DEVMETHOD(device_resume,		acpi_resume),
 
     /* Bus interface */
     DEVMETHOD(bus_add_child,		acpi_add_child),
     DEVMETHOD(bus_print_child,		acpi_print_child),
     DEVMETHOD(bus_probe_nomatch,	acpi_probe_nomatch),
     DEVMETHOD(bus_driver_added,		acpi_driver_added),
     DEVMETHOD(bus_read_ivar,		acpi_read_ivar),
     DEVMETHOD(bus_write_ivar,		acpi_write_ivar),
     DEVMETHOD(bus_get_resource_list,	acpi_get_rlist),
     DEVMETHOD(bus_set_resource,		acpi_set_resource),
     DEVMETHOD(bus_get_resource,		bus_generic_rl_get_resource),
     DEVMETHOD(bus_alloc_resource,	acpi_alloc_resource),
     DEVMETHOD(bus_adjust_resource,	acpi_adjust_resource),
     DEVMETHOD(bus_release_resource,	acpi_release_resource),
     DEVMETHOD(bus_delete_resource,	acpi_delete_resource),
     DEVMETHOD(bus_child_pnpinfo_str,	acpi_child_pnpinfo_str_method),
     DEVMETHOD(bus_child_location_str,	acpi_child_location_str_method),
     DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
     DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
     DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
     DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
     DEVMETHOD(bus_hint_device_unit,	acpi_hint_device_unit),
     DEVMETHOD(bus_get_cpus,		acpi_get_cpus),
     DEVMETHOD(bus_get_domain,		acpi_get_domain),
 
     /* ACPI bus */
     DEVMETHOD(acpi_id_probe,		acpi_device_id_probe),
     DEVMETHOD(acpi_evaluate_object,	acpi_device_eval_obj),
     DEVMETHOD(acpi_pwr_for_sleep,	acpi_device_pwr_for_sleep),
     DEVMETHOD(acpi_scan_children,	acpi_device_scan_children),
 
     /* ISA emulation */
     DEVMETHOD(isa_pnp_probe,		acpi_isa_pnp_probe),
 
     DEVMETHOD_END
 };
 
 static driver_t acpi_driver = {
     "acpi",
     acpi_methods,
     sizeof(struct acpi_softc),
 };
 
 static devclass_t acpi_devclass;
 DRIVER_MODULE(acpi, nexus, acpi_driver, acpi_devclass, acpi_modevent, 0);
 MODULE_VERSION(acpi, 1);
 
 ACPI_SERIAL_DECL(acpi, "ACPI root bus");
 
 /* Local pools for managing system resources for ACPI child devices. */
 static struct rman acpi_rman_io, acpi_rman_mem;
 
 #define ACPI_MINIMUM_AWAKETIME	5
 
 /* Holds the description of the acpi0 device. */
 static char acpi_desc[ACPI_OEM_ID_SIZE + ACPI_OEM_TABLE_ID_SIZE + 2];
 
 SYSCTL_NODE(_debug, OID_AUTO, acpi, CTLFLAG_RD, NULL, "ACPI debugging");
 static char acpi_ca_version[12];
 SYSCTL_STRING(_debug_acpi, OID_AUTO, acpi_ca_version, CTLFLAG_RD,
 	      acpi_ca_version, 0, "Version of Intel ACPI-CA");
 
 /*
  * Allow overriding _OSI methods.
  */
 static char acpi_install_interface[256];
 TUNABLE_STR("hw.acpi.install_interface", acpi_install_interface,
     sizeof(acpi_install_interface));
 static char acpi_remove_interface[256];
 TUNABLE_STR("hw.acpi.remove_interface", acpi_remove_interface,
     sizeof(acpi_remove_interface));
 
 /* Allow users to dump Debug objects without ACPI debugger. */
 static int acpi_debug_objects;
 TUNABLE_INT("debug.acpi.enable_debug_objects", &acpi_debug_objects);
 SYSCTL_PROC(_debug_acpi, OID_AUTO, enable_debug_objects,
     CTLFLAG_RW | CTLTYPE_INT, NULL, 0, acpi_debug_objects_sysctl, "I",
     "Enable Debug objects");
 
 /* Allow the interpreter to ignore common mistakes in BIOS. */
 static int acpi_interpreter_slack = 1;
 TUNABLE_INT("debug.acpi.interpreter_slack", &acpi_interpreter_slack);
 SYSCTL_INT(_debug_acpi, OID_AUTO, interpreter_slack, CTLFLAG_RDTUN,
     &acpi_interpreter_slack, 1, "Turn on interpreter slack mode.");
 
 /* Ignore register widths set by FADT and use default widths instead. */
 static int acpi_ignore_reg_width = 1;
 TUNABLE_INT("debug.acpi.default_register_width", &acpi_ignore_reg_width);
 SYSCTL_INT(_debug_acpi, OID_AUTO, default_register_width, CTLFLAG_RDTUN,
     &acpi_ignore_reg_width, 1, "Ignore register widths set by FADT");
 
 /* Allow users to override quirks. */
 TUNABLE_INT("debug.acpi.quirks", &acpi_quirks);
 
 static int acpi_susp_bounce;
 SYSCTL_INT(_debug_acpi, OID_AUTO, suspend_bounce, CTLFLAG_RW,
     &acpi_susp_bounce, 0, "Don't actually suspend, just test devices.");
 
 /*
  * ACPI can only be loaded as a module by the loader; activating it after
  * system bootstrap time is not useful, and can be fatal to the system.
  * It also cannot be unloaded, since the entire system bus hierarchy hangs
  * off it.
  */
 static int
 acpi_modevent(struct module *mod, int event, void *junk)
 {
     switch (event) {
     case MOD_LOAD:
 	if (!cold) {
 	    printf("The ACPI driver cannot be loaded after boot.\n");
 	    return (EPERM);
 	}
 	break;
     case MOD_UNLOAD:
 	if (!cold && power_pm_get_type() == POWER_PM_TYPE_ACPI)
 	    return (EBUSY);
 	break;
     default:
 	break;
     }
     return (0);
 }
 
 /*
  * Perform early initialization.
  */
 ACPI_STATUS
 acpi_Startup(void)
 {
     static int started = 0;
     ACPI_STATUS status;
     int val;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /* Only run the startup code once.  The MADT driver also calls this. */
     if (started)
 	return_VALUE (AE_OK);
     started = 1;
 
     /*
      * Initialize the ACPICA subsystem.
      */
     if (ACPI_FAILURE(status = AcpiInitializeSubsystem())) {
 	printf("ACPI: Could not initialize Subsystem: %s\n",
 	    AcpiFormatException(status));
 	return_VALUE (status);
     }
 
     /*
      * Pre-allocate space for RSDT/XSDT and DSDT tables and allow resizing
      * if more tables exist.
      */
     if (ACPI_FAILURE(status = AcpiInitializeTables(NULL, 2, TRUE))) {
 	printf("ACPI: Table initialisation failed: %s\n",
 	    AcpiFormatException(status));
 	return_VALUE (status);
     }
 
     /* Set up any quirks we have for this system. */
     if (acpi_quirks == ACPI_Q_OK)
 	acpi_table_quirks(&acpi_quirks);
 
     /* If the user manually set the disabled hint to 0, force-enable ACPI. */
     if (resource_int_value("acpi", 0, "disabled", &val) == 0 && val == 0)
 	acpi_quirks &= ~ACPI_Q_BROKEN;
     if (acpi_quirks & ACPI_Q_BROKEN) {
 	printf("ACPI disabled by blacklist.  Contact your BIOS vendor.\n");
 	status = AE_SUPPORT;
     }
 
     return_VALUE (status);
 }
 
 /*
  * Detect ACPI and perform early initialisation.
  */
 int
 acpi_identify(void)
 {
     ACPI_TABLE_RSDP	*rsdp;
     ACPI_TABLE_HEADER	*rsdt;
     ACPI_PHYSICAL_ADDRESS paddr;
     struct sbuf		sb;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (!cold)
 	return (ENXIO);
 
     /* Check that we haven't been disabled with a hint. */
     if (resource_disabled("acpi", 0))
 	return (ENXIO);
 
     /* Check for other PM systems. */
     if (power_pm_get_type() != POWER_PM_TYPE_NONE &&
 	power_pm_get_type() != POWER_PM_TYPE_ACPI) {
 	printf("ACPI identify failed, other PM system enabled.\n");
 	return (ENXIO);
     }
 
     /* Initialize root tables. */
     if (ACPI_FAILURE(acpi_Startup())) {
 	printf("ACPI: Try disabling either ACPI or apic support.\n");
 	return (ENXIO);
     }
 
     if ((paddr = AcpiOsGetRootPointer()) == 0 ||
 	(rsdp = AcpiOsMapMemory(paddr, sizeof(ACPI_TABLE_RSDP))) == NULL)
 	return (ENXIO);
     if (rsdp->Revision > 1 && rsdp->XsdtPhysicalAddress != 0)
 	paddr = (ACPI_PHYSICAL_ADDRESS)rsdp->XsdtPhysicalAddress;
     else
 	paddr = (ACPI_PHYSICAL_ADDRESS)rsdp->RsdtPhysicalAddress;
     AcpiOsUnmapMemory(rsdp, sizeof(ACPI_TABLE_RSDP));
 
     if ((rsdt = AcpiOsMapMemory(paddr, sizeof(ACPI_TABLE_HEADER))) == NULL)
 	return (ENXIO);
     sbuf_new(&sb, acpi_desc, sizeof(acpi_desc), SBUF_FIXEDLEN);
     sbuf_bcat(&sb, rsdt->OemId, ACPI_OEM_ID_SIZE);
     sbuf_trim(&sb);
     sbuf_putc(&sb, ' ');
     sbuf_bcat(&sb, rsdt->OemTableId, ACPI_OEM_TABLE_ID_SIZE);
     sbuf_trim(&sb);
     sbuf_finish(&sb);
     sbuf_delete(&sb);
     AcpiOsUnmapMemory(rsdt, sizeof(ACPI_TABLE_HEADER));
 
     snprintf(acpi_ca_version, sizeof(acpi_ca_version), "%x", ACPI_CA_VERSION);
 
     return (0);
 }
 
 /*
  * Fetch some descriptive data from ACPI to put in our attach message.
  */
 static int
 acpi_probe(device_t dev)
 {
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     device_set_desc(dev, acpi_desc);
 
     return_VALUE (BUS_PROBE_NOWILDCARD);
 }
 
 static int
 acpi_attach(device_t dev)
 {
     struct acpi_softc	*sc;
     ACPI_STATUS		status;
     int			error, state;
     UINT32		flags;
     UINT8		TypeA, TypeB;
     char		*env;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     sc = device_get_softc(dev);
     sc->acpi_dev = dev;
     callout_init(&sc->susp_force_to, 1);
 
     error = ENXIO;
 
     /* Initialize resource manager. */
     acpi_rman_io.rm_type = RMAN_ARRAY;
     acpi_rman_io.rm_start = 0;
     acpi_rman_io.rm_end = 0xffff;
     acpi_rman_io.rm_descr = "ACPI I/O ports";
     if (rman_init(&acpi_rman_io) != 0)
 	panic("acpi rman_init IO ports failed");
     acpi_rman_mem.rm_type = RMAN_ARRAY;
     acpi_rman_mem.rm_descr = "ACPI I/O memory addresses";
     if (rman_init(&acpi_rman_mem) != 0)
 	panic("acpi rman_init memory failed");
 
     /* Initialise the ACPI mutex */
     mtx_init(&acpi_mutex, "ACPI global lock", NULL, MTX_DEF);
 
     /*
      * Set the globals from our tunables.  This is needed because ACPI-CA
      * uses UINT8 for some values and we have no tunable_byte.
      */
     AcpiGbl_EnableInterpreterSlack = acpi_interpreter_slack ? TRUE : FALSE;
     AcpiGbl_EnableAmlDebugObject = acpi_debug_objects ? TRUE : FALSE;
     AcpiGbl_UseDefaultRegisterWidths = acpi_ignore_reg_width ? TRUE : FALSE;
 
 #ifndef ACPI_DEBUG
     /*
      * Disable all debugging layers and levels.
      */
     AcpiDbgLayer = 0;
     AcpiDbgLevel = 0;
 #endif
 
     /* Override OS interfaces if the user requested. */
     acpi_reset_interfaces(dev);
 
     /* Load ACPI name space. */
     status = AcpiLoadTables();
     if (ACPI_FAILURE(status)) {
 	device_printf(dev, "Could not load Namespace: %s\n",
 		      AcpiFormatException(status));
 	goto out;
     }
 
 #if defined(__i386__) || defined(__amd64__)
     /* Handle MCFG table if present. */
     acpi_enable_pcie();
 #endif
 
     /*
      * Note that some systems (specifically, those with namespace evaluation
      * issues that require the avoidance of parts of the namespace) must
      * avoid running _INI and _STA on everything, as well as dodging the final
      * object init pass.
      *
      * For these devices, we set ACPI_NO_DEVICE_INIT and ACPI_NO_OBJECT_INIT).
      *
      * XXX We should arrange for the object init pass after we have attached
      *     all our child devices, but on many systems it works here.
      */
     flags = 0;
     if (testenv("debug.acpi.avoid"))
 	flags = ACPI_NO_DEVICE_INIT | ACPI_NO_OBJECT_INIT;
 
     /* Bring the hardware and basic handlers online. */
     if (ACPI_FAILURE(status = AcpiEnableSubsystem(flags))) {
 	device_printf(dev, "Could not enable ACPI: %s\n",
 		      AcpiFormatException(status));
 	goto out;
     }
 
     /*
      * Call the ECDT probe function to provide EC functionality before
      * the namespace has been evaluated.
      *
      * XXX This happens before the sysresource devices have been probed and
      * attached so its resources come from nexus0.  In practice, this isn't
      * a problem but should be addressed eventually.
      */
     acpi_ec_ecdt_probe(dev);
 
     /* Bring device objects and regions online. */
     if (ACPI_FAILURE(status = AcpiInitializeObjects(flags))) {
 	device_printf(dev, "Could not initialize ACPI objects: %s\n",
 		      AcpiFormatException(status));
 	goto out;
     }
 
     /*
      * Setup our sysctl tree.
      *
      * XXX: This doesn't check to make sure that none of these fail.
      */
     sysctl_ctx_init(&sc->acpi_sysctl_ctx);
     sc->acpi_sysctl_tree = SYSCTL_ADD_NODE(&sc->acpi_sysctl_ctx,
 			       SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
 			       device_get_name(dev), CTLFLAG_RD, 0, "");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "supported_sleep_state", CTLTYPE_STRING | CTLFLAG_RD,
 	0, 0, acpi_supported_sleep_state_sysctl, "A",
 	"List supported ACPI sleep states.");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "power_button_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_power_button_sx, 0, acpi_sleep_state_sysctl, "A",
 	"Power button ACPI sleep state.");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "sleep_button_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_sleep_button_sx, 0, acpi_sleep_state_sysctl, "A",
 	"Sleep button ACPI sleep state.");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "lid_switch_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_lid_switch_sx, 0, acpi_sleep_state_sysctl, "A",
 	"Lid ACPI sleep state. Set to S3 if you want to suspend your laptop when close the Lid.");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "standby_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_standby_sx, 0, acpi_sleep_state_sysctl, "A", "");
     SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "suspend_state", CTLTYPE_STRING | CTLFLAG_RW,
 	&sc->acpi_suspend_sx, 0, acpi_sleep_state_sysctl, "A", "");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "sleep_delay", CTLFLAG_RW, &sc->acpi_sleep_delay, 0,
 	"sleep delay in seconds");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "s4bios", CTLFLAG_RW, &sc->acpi_s4bios, 0, "S4BIOS mode");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "verbose", CTLFLAG_RW, &sc->acpi_verbose, 0, "verbose mode");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "disable_on_reboot", CTLFLAG_RW,
 	&sc->acpi_do_disable, 0, "Disable ACPI when rebooting/halting system");
     SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree),
 	OID_AUTO, "handle_reboot", CTLFLAG_RW,
 	&sc->acpi_handle_reboot, 0, "Use ACPI Reset Register to reboot");
 
     /*
      * Default to 1 second before sleeping to give some machines time to
      * stabilize.
      */
     sc->acpi_sleep_delay = 1;
     if (bootverbose)
 	sc->acpi_verbose = 1;
     if ((env = kern_getenv("hw.acpi.verbose")) != NULL) {
 	if (strcmp(env, "0") != 0)
 	    sc->acpi_verbose = 1;
 	freeenv(env);
     }
 
     /* Only enable reboot by default if the FADT says it is available. */
     if (AcpiGbl_FADT.Flags & ACPI_FADT_RESET_REGISTER)
 	sc->acpi_handle_reboot = 1;
 
 #if !ACPI_REDUCED_HARDWARE
     /* Only enable S4BIOS by default if the FACS says it is available. */
     if (AcpiGbl_FACS != NULL && AcpiGbl_FACS->Flags & ACPI_FACS_S4_BIOS_PRESENT)
 	sc->acpi_s4bios = 1;
 #endif
 
     /* Probe all supported sleep states. */
     acpi_sleep_states[ACPI_STATE_S0] = TRUE;
     for (state = ACPI_STATE_S1; state < ACPI_S_STATE_COUNT; state++)
 	if (ACPI_SUCCESS(AcpiEvaluateObject(ACPI_ROOT_OBJECT,
 	    __DECONST(char *, AcpiGbl_SleepStateNames[state]), NULL, NULL)) &&
 	    ACPI_SUCCESS(AcpiGetSleepTypeData(state, &TypeA, &TypeB)))
 	    acpi_sleep_states[state] = TRUE;
 
     /*
      * Dispatch the default sleep state to devices.  The lid switch is set
      * to UNKNOWN by default to avoid surprising users.
      */
     sc->acpi_power_button_sx = acpi_sleep_states[ACPI_STATE_S5] ?
 	ACPI_STATE_S5 : ACPI_STATE_UNKNOWN;
     sc->acpi_lid_switch_sx = ACPI_STATE_UNKNOWN;
     sc->acpi_standby_sx = acpi_sleep_states[ACPI_STATE_S1] ?
 	ACPI_STATE_S1 : ACPI_STATE_UNKNOWN;
     sc->acpi_suspend_sx = acpi_sleep_states[ACPI_STATE_S3] ?
 	ACPI_STATE_S3 : ACPI_STATE_UNKNOWN;
 
     /* Pick the first valid sleep state for the sleep button default. */
     sc->acpi_sleep_button_sx = ACPI_STATE_UNKNOWN;
     for (state = ACPI_STATE_S1; state <= ACPI_STATE_S4; state++)
 	if (acpi_sleep_states[state]) {
 	    sc->acpi_sleep_button_sx = state;
 	    break;
 	}
 
     acpi_enable_fixed_events(sc);
 
     /*
      * Scan the namespace and attach/initialise children.
      */
 
     /* Register our shutdown handler. */
     EVENTHANDLER_REGISTER(shutdown_final, acpi_shutdown_final, sc,
 	SHUTDOWN_PRI_LAST);
 
     /*
      * Register our acpi event handlers.
      * XXX should be configurable eg. via userland policy manager.
      */
     EVENTHANDLER_REGISTER(acpi_sleep_event, acpi_system_eventhandler_sleep,
 	sc, ACPI_EVENT_PRI_LAST);
     EVENTHANDLER_REGISTER(acpi_wakeup_event, acpi_system_eventhandler_wakeup,
 	sc, ACPI_EVENT_PRI_LAST);
 
     /* Flag our initial states. */
     sc->acpi_enabled = TRUE;
     sc->acpi_sstate = ACPI_STATE_S0;
     sc->acpi_sleep_disabled = TRUE;
 
     /* Create the control device */
     sc->acpi_dev_t = make_dev(&acpi_cdevsw, 0, UID_ROOT, GID_WHEEL, 0644,
 			      "acpi");
     sc->acpi_dev_t->si_drv1 = sc;
 
     if ((error = acpi_machdep_init(dev)))
 	goto out;
 
     /* Register ACPI again to pass the correct argument of pm_func. */
     power_pm_register(POWER_PM_TYPE_ACPI, acpi_pm_func, sc);
 
     if (!acpi_disabled("bus")) {
 	EVENTHANDLER_REGISTER(dev_lookup, acpi_lookup, NULL, 1000);
 	acpi_probe_children(dev);
     }
 
     /* Update all GPEs and enable runtime GPEs. */
     status = AcpiUpdateAllGpes();
     if (ACPI_FAILURE(status))
 	device_printf(dev, "Could not update all GPEs: %s\n",
 	    AcpiFormatException(status));
 
     /* Allow sleep request after a while. */
     callout_init_mtx(&acpi_sleep_timer, &acpi_mutex, 0);
     callout_reset(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME,
 	acpi_sleep_enable, sc);
 
     error = 0;
 
  out:
     return_VALUE (error);
 }
 
 static void
 acpi_set_power_children(device_t dev, int state)
 {
 	device_t child;
 	device_t *devlist;
 	int dstate, i, numdevs;
 
 	if (device_get_children(dev, &devlist, &numdevs) != 0)
 		return;
 
 	/*
 	 * Retrieve and set D-state for the sleep state if _SxD is present.
 	 * Skip children who aren't attached since they are handled separately.
 	 */
 	for (i = 0; i < numdevs; i++) {
 		child = devlist[i];
 		dstate = state;
 		if (device_is_attached(child) &&
 		    acpi_device_pwr_for_sleep(dev, child, &dstate) == 0)
 			acpi_set_powerstate(child, dstate);
 	}
 	free(devlist, M_TEMP);
 }
 
 static int
 acpi_suspend(device_t dev)
 {
     int error;
 
     GIANT_REQUIRED;
 
     error = bus_generic_suspend(dev);
     if (error == 0)
 	acpi_set_power_children(dev, ACPI_STATE_D3);
 
     return (error);
 }
 
 static int
 acpi_resume(device_t dev)
 {
 
     GIANT_REQUIRED;
 
     acpi_set_power_children(dev, ACPI_STATE_D0);
 
     return (bus_generic_resume(dev));
 }
 
 static int
 acpi_shutdown(device_t dev)
 {
 
     GIANT_REQUIRED;
 
     /* Allow children to shutdown first. */
     bus_generic_shutdown(dev);
 
     /*
      * Enable any GPEs that are able to power-on the system (i.e., RTC).
      * Also, disable any that are not valid for this state (most).
      */
     acpi_wake_prep_walk(ACPI_STATE_S5);
 
     return (0);
 }
 
 /*
  * Handle a new device being added
  */
 static device_t
 acpi_add_child(device_t bus, u_int order, const char *name, int unit)
 {
     struct acpi_device	*ad;
     device_t		child;
 
     if ((ad = malloc(sizeof(*ad), M_ACPIDEV, M_NOWAIT | M_ZERO)) == NULL)
 	return (NULL);
 
     resource_list_init(&ad->ad_rl);
 
     child = device_add_child_ordered(bus, order, name, unit);
     if (child != NULL)
 	device_set_ivars(child, ad);
     else
 	free(ad, M_ACPIDEV);
     return (child);
 }
 
 static int
 acpi_print_child(device_t bus, device_t child)
 {
     struct acpi_device	 *adev = device_get_ivars(child);
     struct resource_list *rl = &adev->ad_rl;
     int retval = 0;
 
     retval += bus_print_child_header(bus, child);
     retval += resource_list_print_type(rl, "port",  SYS_RES_IOPORT, "%#jx");
     retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#jx");
     retval += resource_list_print_type(rl, "irq",   SYS_RES_IRQ,    "%jd");
     retval += resource_list_print_type(rl, "drq",   SYS_RES_DRQ,    "%jd");
     if (device_get_flags(child))
 	retval += printf(" flags %#x", device_get_flags(child));
     retval += bus_print_child_domain(bus, child);
     retval += bus_print_child_footer(bus, child);
 
     return (retval);
 }
 
 /*
  * If this device is an ACPI child but no one claimed it, attempt
  * to power it off.  We'll power it back up when a driver is added.
  *
  * XXX Disabled for now since many necessary devices (like fdc and
  * ATA) don't claim the devices we created for them but still expect
  * them to be powered up.
  */
 static void
 acpi_probe_nomatch(device_t bus, device_t child)
 {
 #ifdef ACPI_ENABLE_POWERDOWN_NODRIVER
     acpi_set_powerstate(child, ACPI_STATE_D3);
 #endif
 }
 
 /*
  * If a new driver has a chance to probe a child, first power it up.
  *
  * XXX Disabled for now (see acpi_probe_nomatch for details).
  */
 static void
 acpi_driver_added(device_t dev, driver_t *driver)
 {
     device_t child, *devlist;
     int i, numdevs;
 
     DEVICE_IDENTIFY(driver, dev);
     if (device_get_children(dev, &devlist, &numdevs))
 	    return;
     for (i = 0; i < numdevs; i++) {
 	child = devlist[i];
 	if (device_get_state(child) == DS_NOTPRESENT) {
 #ifdef ACPI_ENABLE_POWERDOWN_NODRIVER
 	    acpi_set_powerstate(child, ACPI_STATE_D0);
 	    if (device_probe_and_attach(child) != 0)
 		acpi_set_powerstate(child, ACPI_STATE_D3);
 #else
 	    device_probe_and_attach(child);
 #endif
 	}
     }
     free(devlist, M_TEMP);
 }
 
 /* Location hint for devctl(8) */
 static int
 acpi_child_location_str_method(device_t cbdev, device_t child, char *buf,
     size_t buflen)
 {
     struct acpi_device *dinfo = device_get_ivars(child);
     char buf2[32];
     int pxm;
 
     if (dinfo->ad_handle) {
         snprintf(buf, buflen, "handle=%s", acpi_name(dinfo->ad_handle));
         if (ACPI_SUCCESS(acpi_GetInteger(dinfo->ad_handle, "_PXM", &pxm))) {
                 snprintf(buf2, 32, " _PXM=%d", pxm);
                 strlcat(buf, buf2, buflen);
         }
     } else {
         snprintf(buf, buflen, "unknown");
     }
     return (0);
 }
 
 /* PnP information for devctl(8) */
 static int
 acpi_child_pnpinfo_str_method(device_t cbdev, device_t child, char *buf,
     size_t buflen)
 {
     struct acpi_device *dinfo = device_get_ivars(child);
     ACPI_DEVICE_INFO *adinfo;
 
     if (ACPI_FAILURE(AcpiGetObjectInfo(dinfo->ad_handle, &adinfo))) {
 	snprintf(buf, buflen, "unknown");
 	return (0);
     }
 
     snprintf(buf, buflen, "_HID=%s _UID=%lu",
 	(adinfo->Valid & ACPI_VALID_HID) ?
 	adinfo->HardwareId.String : "none",
 	(adinfo->Valid & ACPI_VALID_UID) ?
 	strtoul(adinfo->UniqueId.String, NULL, 10) : 0UL);
     AcpiOsFree(adinfo);
 
     return (0);
 }
 
 /*
  * Handle per-device ivars
  */
 static int
 acpi_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
 {
     struct acpi_device	*ad;
 
     if ((ad = device_get_ivars(child)) == NULL) {
 	device_printf(child, "device has no ivars\n");
 	return (ENOENT);
     }
 
     /* ACPI and ISA compatibility ivars */
     switch(index) {
     case ACPI_IVAR_HANDLE:
 	*(ACPI_HANDLE *)result = ad->ad_handle;
 	break;
     case ACPI_IVAR_PRIVATE:
 	*(void **)result = ad->ad_private;
 	break;
     case ACPI_IVAR_FLAGS:
 	*(int *)result = ad->ad_flags;
 	break;
     case ISA_IVAR_VENDORID:
     case ISA_IVAR_SERIAL:
     case ISA_IVAR_COMPATID:
 	*(int *)result = -1;
 	break;
     case ISA_IVAR_LOGICALID:
 	*(int *)result = acpi_isa_get_logicalid(child);
 	break;
     case PCI_IVAR_CLASS:
 	*(uint8_t*)result = (ad->ad_cls_class >> 16) & 0xff;
 	break;
     case PCI_IVAR_SUBCLASS:
 	*(uint8_t*)result = (ad->ad_cls_class >> 8) & 0xff;
 	break;
     case PCI_IVAR_PROGIF:
 	*(uint8_t*)result = (ad->ad_cls_class >> 0) & 0xff;
 	break;
     default:
 	return (ENOENT);
     }
 
     return (0);
 }
 
 static int
 acpi_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
 {
     struct acpi_device	*ad;
 
     if ((ad = device_get_ivars(child)) == NULL) {
 	device_printf(child, "device has no ivars\n");
 	return (ENOENT);
     }
 
     switch(index) {
     case ACPI_IVAR_HANDLE:
 	ad->ad_handle = (ACPI_HANDLE)value;
 	break;
     case ACPI_IVAR_PRIVATE:
 	ad->ad_private = (void *)value;
 	break;
     case ACPI_IVAR_FLAGS:
 	ad->ad_flags = (int)value;
 	break;
     default:
 	panic("bad ivar write request (%d)", index);
 	return (ENOENT);
     }
 
     return (0);
 }
 
 /*
  * Handle child resource allocation/removal
  */
 static struct resource_list *
 acpi_get_rlist(device_t dev, device_t child)
 {
     struct acpi_device		*ad;
 
     ad = device_get_ivars(child);
     return (&ad->ad_rl);
 }
 
 static int
 acpi_match_resource_hint(device_t dev, int type, long value)
 {
     struct acpi_device *ad = device_get_ivars(dev);
     struct resource_list *rl = &ad->ad_rl;
     struct resource_list_entry *rle;
 
     STAILQ_FOREACH(rle, rl, link) {
 	if (rle->type != type)
 	    continue;
 	if (rle->start <= value && rle->end >= value)
 	    return (1);
     }
     return (0);
 }
 
 /*
  * Wire device unit numbers based on resource matches in hints.
  */
 static void
 acpi_hint_device_unit(device_t acdev, device_t child, const char *name,
     int *unitp)
 {
     const char *s;
     long value;
     int line, matches, unit;
 
     /*
      * Iterate over all the hints for the devices with the specified
      * name to see if one's resources are a subset of this device.
      */
     line = 0;
     while (resource_find_dev(&line, name, &unit, "at", NULL) == 0) {
 	/* Must have an "at" for acpi or isa. */
 	resource_string_value(name, unit, "at", &s);
 	if (!(strcmp(s, "acpi0") == 0 || strcmp(s, "acpi") == 0 ||
 	    strcmp(s, "isa0") == 0 || strcmp(s, "isa") == 0))
 	    continue;
 
 	/*
 	 * Check for matching resources.  We must have at least one match.
 	 * Since I/O and memory resources cannot be shared, if we get a
 	 * match on either of those, ignore any mismatches in IRQs or DRQs.
 	 *
 	 * XXX: We may want to revisit this to be more lenient and wire
 	 * as long as it gets one match.
 	 */
 	matches = 0;
 	if (resource_long_value(name, unit, "port", &value) == 0) {
 	    /*
 	     * Floppy drive controllers are notorious for having a
 	     * wide variety of resources not all of which include the
 	     * first port that is specified by the hint (typically
 	     * 0x3f0) (see the comment above fdc_isa_alloc_resources()
 	     * in fdc_isa.c).  However, they do all seem to include
 	     * port + 2 (e.g. 0x3f2) so for a floppy device, look for
 	     * 'value + 2' in the port resources instead of the hint
 	     * value.
 	     */
 	    if (strcmp(name, "fdc") == 0)
 		value += 2;
 	    if (acpi_match_resource_hint(child, SYS_RES_IOPORT, value))
 		matches++;
 	    else
 		continue;
 	}
 	if (resource_long_value(name, unit, "maddr", &value) == 0) {
 	    if (acpi_match_resource_hint(child, SYS_RES_MEMORY, value))
 		matches++;
 	    else
 		continue;
 	}
 	if (matches > 0)
 	    goto matched;
 	if (resource_long_value(name, unit, "irq", &value) == 0) {
 	    if (acpi_match_resource_hint(child, SYS_RES_IRQ, value))
 		matches++;
 	    else
 		continue;
 	}
 	if (resource_long_value(name, unit, "drq", &value) == 0) {
 	    if (acpi_match_resource_hint(child, SYS_RES_DRQ, value))
 		matches++;
 	    else
 		continue;
 	}
 
     matched:
 	if (matches > 0) {
 	    /* We have a winner! */
 	    *unitp = unit;
 	    break;
 	}
     }
 }
 
 /*
  * Fetch the NUMA domain for a device by mapping the value returned by
  * _PXM to a NUMA domain.  If the device does not have a _PXM method,
  * -2 is returned.  If any other error occurs, -1 is returned.
  */
 static int
 acpi_parse_pxm(device_t dev)
 {
-#ifdef DEVICE_NUMA
+#ifdef NUMA
 	ACPI_HANDLE handle;
 	ACPI_STATUS status;
 	int pxm;
 
 	handle = acpi_get_handle(dev);
 	if (handle == NULL)
 		return (-2);
 	status = acpi_GetInteger(handle, "_PXM", &pxm);
 	if (ACPI_SUCCESS(status))
 		return (acpi_map_pxm_to_vm_domainid(pxm));
 	if (status == AE_NOT_FOUND)
 		return (-2);
 #endif
 	return (-1);
 }
 
 int
 acpi_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize,
     cpuset_t *cpuset)
 {
 	int d, error;
 
 	d = acpi_parse_pxm(child);
 	if (d < 0)
 		return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
 
 	switch (op) {
 	case LOCAL_CPUS:
 		if (setsize != sizeof(cpuset_t))
 			return (EINVAL);
 		*cpuset = cpuset_domain[d];
 		return (0);
 	case INTR_CPUS:
 		error = bus_generic_get_cpus(dev, child, op, setsize, cpuset);
 		if (error != 0)
 			return (error);
 		if (setsize != sizeof(cpuset_t))
 			return (EINVAL);
 		CPU_AND(cpuset, &cpuset_domain[d]);
 		return (0);
 	default:
 		return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
 	}
 }
 
 /*
  * Fetch the NUMA domain for the given device 'dev'.
  *
  * If a device has a _PXM method, map that to a NUMA domain.
  * Otherwise, pass the request up to the parent.
  * If there's no matching domain or the domain cannot be
  * determined, return ENOENT.
  */
 int
 acpi_get_domain(device_t dev, device_t child, int *domain)
 {
 	int d;
 
 	d = acpi_parse_pxm(child);
 	if (d >= 0) {
 		*domain = d;
 		return (0);
 	}
 	if (d == -1)
 		return (ENOENT);
 
 	/* No _PXM node; go up a level */
 	return (bus_generic_get_domain(dev, child, domain));
 }
 
 /*
  * Pre-allocate/manage all memory and IO resources.  Since rman can't handle
  * duplicates, we merge any in the sysresource attach routine.
  */
 static int
 acpi_sysres_alloc(device_t dev)
 {
     struct resource *res;
     struct resource_list *rl;
     struct resource_list_entry *rle;
     struct rman *rm;
     device_t *children;
     int child_count, i;
 
     /*
      * Probe/attach any sysresource devices.  This would be unnecessary if we
      * had multi-pass probe/attach.
      */
     if (device_get_children(dev, &children, &child_count) != 0)
 	return (ENXIO);
     for (i = 0; i < child_count; i++) {
 	if (ACPI_ID_PROBE(dev, children[i], sysres_ids) != NULL)
 	    device_probe_and_attach(children[i]);
     }
     free(children, M_TEMP);
 
     rl = BUS_GET_RESOURCE_LIST(device_get_parent(dev), dev);
     STAILQ_FOREACH(rle, rl, link) {
 	if (rle->res != NULL) {
 	    device_printf(dev, "duplicate resource for %jx\n", rle->start);
 	    continue;
 	}
 
 	/* Only memory and IO resources are valid here. */
 	switch (rle->type) {
 	case SYS_RES_IOPORT:
 	    rm = &acpi_rman_io;
 	    break;
 	case SYS_RES_MEMORY:
 	    rm = &acpi_rman_mem;
 	    break;
 	default:
 	    continue;
 	}
 
 	/* Pre-allocate resource and add to our rman pool. */
 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), dev, rle->type,
 	    &rle->rid, rle->start, rle->start + rle->count - 1, rle->count, 0);
 	if (res != NULL) {
 	    rman_manage_region(rm, rman_get_start(res), rman_get_end(res));
 	    rle->res = res;
 	} else if (bootverbose)
 	    device_printf(dev, "reservation of %jx, %jx (%d) failed\n",
 		rle->start, rle->count, rle->type);
     }
     return (0);
 }
 
 /*
  * Reserve declared resources for devices found during attach once system
  * resources have been allocated.
  */
 static void
 acpi_reserve_resources(device_t dev)
 {
     struct resource_list_entry *rle;
     struct resource_list *rl;
     struct acpi_device *ad;
     struct acpi_softc *sc;
     device_t *children;
     int child_count, i;
 
     sc = device_get_softc(dev);
     if (device_get_children(dev, &children, &child_count) != 0)
 	return;
     for (i = 0; i < child_count; i++) {
 	ad = device_get_ivars(children[i]);
 	rl = &ad->ad_rl;
 
 	/* Don't reserve system resources. */
 	if (ACPI_ID_PROBE(dev, children[i], sysres_ids) != NULL)
 	    continue;
 
 	STAILQ_FOREACH(rle, rl, link) {
 	    /*
 	     * Don't reserve IRQ resources.  There are many sticky things
 	     * to get right otherwise (e.g. IRQs for psm, atkbd, and HPET
 	     * when using legacy routing).
 	     */
 	    if (rle->type == SYS_RES_IRQ)
 		continue;
 
 	    /*
 	     * Don't reserve the resource if it is already allocated.
 	     * The acpi_ec(4) driver can allocate its resources early
 	     * if ECDT is present.
 	     */
 	    if (rle->res != NULL)
 		continue;
 
 	    /*
 	     * Try to reserve the resource from our parent.  If this
 	     * fails because the resource is a system resource, just
 	     * let it be.  The resource range is already reserved so
 	     * that other devices will not use it.  If the driver
 	     * needs to allocate the resource, then
 	     * acpi_alloc_resource() will sub-alloc from the system
 	     * resource.
 	     */
 	    resource_list_reserve(rl, dev, children[i], rle->type, &rle->rid,
 		rle->start, rle->end, rle->count, 0);
 	}
     }
     free(children, M_TEMP);
     sc->acpi_resources_reserved = 1;
 }
 
 static int
 acpi_set_resource(device_t dev, device_t child, int type, int rid,
     rman_res_t start, rman_res_t count)
 {
     struct acpi_softc *sc = device_get_softc(dev);
     struct acpi_device *ad = device_get_ivars(child);
     struct resource_list *rl = &ad->ad_rl;
 #if defined(__i386__) || defined(__amd64__)
     ACPI_DEVICE_INFO *devinfo;
 #endif
     rman_res_t end;
     
     /* Ignore IRQ resources for PCI link devices. */
     if (type == SYS_RES_IRQ && ACPI_ID_PROBE(dev, child, pcilink_ids) != NULL)
 	return (0);
 
     /*
      * Ignore most resources for PCI root bridges.  Some BIOSes
      * incorrectly enumerate the memory ranges they decode as plain
      * memory resources instead of as ResourceProducer ranges.  Other
      * BIOSes incorrectly list system resource entries for I/O ranges
      * under the PCI bridge.  Do allow the one known-correct case on
      * x86 of a PCI bridge claiming the I/O ports used for PCI config
      * access.
      */
 #if defined(__i386__) || defined(__amd64__)
     if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) {
 	if (ACPI_SUCCESS(AcpiGetObjectInfo(ad->ad_handle, &devinfo))) {
 	    if ((devinfo->Flags & ACPI_PCI_ROOT_BRIDGE) != 0) {
 		if (!(type == SYS_RES_IOPORT && start == CONF1_ADDR_PORT)) {
 		    AcpiOsFree(devinfo);
 		    return (0);
 		}
 	    }
 	    AcpiOsFree(devinfo);
 	}
     }
 #endif
 
     /* If the resource is already allocated, fail. */
     if (resource_list_busy(rl, type, rid))
 	return (EBUSY);
 
     /* If the resource is already reserved, release it. */
     if (resource_list_reserved(rl, type, rid))
 	resource_list_unreserve(rl, dev, child, type, rid);
 
     /* Add the resource. */
     end = (start + count - 1);
     resource_list_add(rl, type, rid, start, end, count);
 
     /* Don't reserve resources until the system resources are allocated. */
     if (!sc->acpi_resources_reserved)
 	return (0);
 
     /* Don't reserve system resources. */
     if (ACPI_ID_PROBE(dev, child, sysres_ids) != NULL)
 	return (0);
 
     /*
      * Don't reserve IRQ resources.  There are many sticky things to
      * get right otherwise (e.g. IRQs for psm, atkbd, and HPET when
      * using legacy routing).
      */
     if (type == SYS_RES_IRQ)
 	return (0);
 
     /*
      * Reserve the resource.
      *
      * XXX: Ignores failure for now.  Failure here is probably a
      * BIOS/firmware bug?
      */
     resource_list_reserve(rl, dev, child, type, &rid, start, end, count, 0);
     return (0);
 }
 
 static struct resource *
 acpi_alloc_resource(device_t bus, device_t child, int type, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 #ifndef INTRNG
     ACPI_RESOURCE ares;
 #endif
     struct acpi_device *ad;
     struct resource_list_entry *rle;
     struct resource_list *rl;
     struct resource *res;
     int isdefault = RMAN_IS_DEFAULT_RANGE(start, end);
 
     /*
      * First attempt at allocating the resource.  For direct children,
      * use resource_list_alloc() to handle reserved resources.  For
      * other devices, pass the request up to our parent.
      */
     if (bus == device_get_parent(child)) {
 	ad = device_get_ivars(child);
 	rl = &ad->ad_rl;
 
 	/*
 	 * Simulate the behavior of the ISA bus for direct children
 	 * devices.  That is, if a non-default range is specified for
 	 * a resource that doesn't exist, use bus_set_resource() to
 	 * add the resource before allocating it.  Note that these
 	 * resources will not be reserved.
 	 */
 	if (!isdefault && resource_list_find(rl, type, *rid) == NULL)
 		resource_list_add(rl, type, *rid, start, end, count);
 	res = resource_list_alloc(rl, bus, child, type, rid, start, end, count,
 	    flags);
 #ifndef INTRNG
 	if (res != NULL && type == SYS_RES_IRQ) {
 	    /*
 	     * Since bus_config_intr() takes immediate effect, we cannot
 	     * configure the interrupt associated with a device when we
 	     * parse the resources but have to defer it until a driver
 	     * actually allocates the interrupt via bus_alloc_resource().
 	     *
 	     * XXX: Should we handle the lookup failing?
 	     */
 	    if (ACPI_SUCCESS(acpi_lookup_irq_resource(child, *rid, res, &ares)))
 		acpi_config_intr(child, &ares);
 	}
 #endif
 
 	/*
 	 * If this is an allocation of the "default" range for a given
 	 * RID, fetch the exact bounds for this resource from the
 	 * resource list entry to try to allocate the range from the
 	 * system resource regions.
 	 */
 	if (res == NULL && isdefault) {
 	    rle = resource_list_find(rl, type, *rid);
 	    if (rle != NULL) {
 		start = rle->start;
 		end = rle->end;
 		count = rle->count;
 	    }
 	}
     } else
 	res = BUS_ALLOC_RESOURCE(device_get_parent(bus), child, type, rid,
 	    start, end, count, flags);
 
     /*
      * If the first attempt failed and this is an allocation of a
      * specific range, try to satisfy the request via a suballocation
      * from our system resource regions.
      */
     if (res == NULL && start + count - 1 == end)
 	res = acpi_alloc_sysres(child, type, rid, start, end, count, flags);
     return (res);
 }
 
 /*
  * Attempt to allocate a specific resource range from the system
  * resource ranges.  Note that we only handle memory and I/O port
  * system resources.
  */
 struct resource *
 acpi_alloc_sysres(device_t child, int type, int *rid, rman_res_t start,
     rman_res_t end, rman_res_t count, u_int flags)
 {
     struct rman *rm;
     struct resource *res;
 
     switch (type) {
     case SYS_RES_IOPORT:
 	rm = &acpi_rman_io;
 	break;
     case SYS_RES_MEMORY:
 	rm = &acpi_rman_mem;
 	break;
     default:
 	return (NULL);
     }
 
     KASSERT(start + count - 1 == end, ("wildcard resource range"));
     res = rman_reserve_resource(rm, start, end, count, flags & ~RF_ACTIVE,
 	child);
     if (res == NULL)
 	return (NULL);
 
     rman_set_rid(res, *rid);
 
     /* If requested, activate the resource using the parent's method. */
     if (flags & RF_ACTIVE)
 	if (bus_activate_resource(child, type, *rid, res) != 0) {
 	    rman_release_resource(res);
 	    return (NULL);
 	}
 
     return (res);
 }
 
 static int
 acpi_is_resource_managed(int type, struct resource *r)
 {
 
     /* We only handle memory and IO resources through rman. */
     switch (type) {
     case SYS_RES_IOPORT:
 	return (rman_is_region_manager(r, &acpi_rman_io));
     case SYS_RES_MEMORY:
 	return (rman_is_region_manager(r, &acpi_rman_mem));
     }
     return (0);
 }
 
 static int
 acpi_adjust_resource(device_t bus, device_t child, int type, struct resource *r,
     rman_res_t start, rman_res_t end)
 {
 
     if (acpi_is_resource_managed(type, r))
 	return (rman_adjust_resource(r, start, end));
     return (bus_generic_adjust_resource(bus, child, type, r, start, end));
 }
 
 static int
 acpi_release_resource(device_t bus, device_t child, int type, int rid,
     struct resource *r)
 {
     int ret;
 
     /*
      * If this resource belongs to one of our internal managers,
      * deactivate it and release it to the local pool.
      */
     if (acpi_is_resource_managed(type, r)) {
 	if (rman_get_flags(r) & RF_ACTIVE) {
 	    ret = bus_deactivate_resource(child, type, rid, r);
 	    if (ret != 0)
 		return (ret);
 	}
 	return (rman_release_resource(r));
     }
 
     return (bus_generic_rl_release_resource(bus, child, type, rid, r));
 }
 
 static void
 acpi_delete_resource(device_t bus, device_t child, int type, int rid)
 {
     struct resource_list *rl;
 
     rl = acpi_get_rlist(bus, child);
     if (resource_list_busy(rl, type, rid)) {
 	device_printf(bus, "delete_resource: Resource still owned by child"
 	    " (type=%d, rid=%d)\n", type, rid);
 	return;
     }
     resource_list_unreserve(rl, bus, child, type, rid);
     resource_list_delete(rl, type, rid);
 }
 
 /* Allocate an IO port or memory resource, given its GAS. */
 int
 acpi_bus_alloc_gas(device_t dev, int *type, int *rid, ACPI_GENERIC_ADDRESS *gas,
     struct resource **res, u_int flags)
 {
     int error, res_type;
 
     error = ENOMEM;
     if (type == NULL || rid == NULL || gas == NULL || res == NULL)
 	return (EINVAL);
 
     /* We only support memory and IO spaces. */
     switch (gas->SpaceId) {
     case ACPI_ADR_SPACE_SYSTEM_MEMORY:
 	res_type = SYS_RES_MEMORY;
 	break;
     case ACPI_ADR_SPACE_SYSTEM_IO:
 	res_type = SYS_RES_IOPORT;
 	break;
     default:
 	return (EOPNOTSUPP);
     }
 
     /*
      * If the register width is less than 8, assume the BIOS author means
      * it is a bit field and just allocate a byte.
      */
     if (gas->BitWidth && gas->BitWidth < 8)
 	gas->BitWidth = 8;
 
     /* Validate the address after we're sure we support the space. */
     if (gas->Address == 0 || gas->BitWidth == 0)
 	return (EINVAL);
 
     bus_set_resource(dev, res_type, *rid, gas->Address,
 	gas->BitWidth / 8);
     *res = bus_alloc_resource_any(dev, res_type, rid, RF_ACTIVE | flags);
     if (*res != NULL) {
 	*type = res_type;
 	error = 0;
     } else
 	bus_delete_resource(dev, res_type, *rid);
 
     return (error);
 }
 
 /* Probe _HID and _CID for compatible ISA PNP ids. */
 static uint32_t
 acpi_isa_get_logicalid(device_t dev)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_HANDLE		h;
     uint32_t		pnpid;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /* Fetch and validate the HID. */
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return_VALUE (0);
 
     pnpid = (devinfo->Valid & ACPI_VALID_HID) != 0 &&
 	devinfo->HardwareId.Length >= ACPI_EISAID_STRING_SIZE ?
 	PNP_EISAID(devinfo->HardwareId.String) : 0;
     AcpiOsFree(devinfo);
 
     return_VALUE (pnpid);
 }
 
 static int
 acpi_isa_get_compatid(device_t dev, uint32_t *cids, int count)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_PNP_DEVICE_ID	*ids;
     ACPI_HANDLE		h;
     uint32_t		*pnpid;
     int			i, valid;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     pnpid = cids;
 
     /* Fetch and validate the CID */
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return_VALUE (0);
 
     if ((devinfo->Valid & ACPI_VALID_CID) == 0) {
 	AcpiOsFree(devinfo);
 	return_VALUE (0);
     }
 
     if (devinfo->CompatibleIdList.Count < count)
 	count = devinfo->CompatibleIdList.Count;
     ids = devinfo->CompatibleIdList.Ids;
     for (i = 0, valid = 0; i < count; i++)
 	if (ids[i].Length >= ACPI_EISAID_STRING_SIZE &&
 	    strncmp(ids[i].String, "PNP", 3) == 0) {
 	    *pnpid++ = PNP_EISAID(ids[i].String);
 	    valid++;
 	}
     AcpiOsFree(devinfo);
 
     return_VALUE (valid);
 }
 
 static char *
 acpi_device_id_probe(device_t bus, device_t dev, char **ids) 
 {
     ACPI_HANDLE h;
     ACPI_OBJECT_TYPE t;
     int i;
 
     h = acpi_get_handle(dev);
     if (ids == NULL || h == NULL)
 	return (NULL);
     t = acpi_get_type(dev);
     if (t != ACPI_TYPE_DEVICE && t != ACPI_TYPE_PROCESSOR)
 	return (NULL);
 
     /* Try to match one of the array of IDs with a HID or CID. */
     for (i = 0; ids[i] != NULL; i++) {
 	if (acpi_MatchHid(h, ids[i]))
 	    return (ids[i]);
     }
     return (NULL);
 }
 
 static ACPI_STATUS
 acpi_device_eval_obj(device_t bus, device_t dev, ACPI_STRING pathname,
     ACPI_OBJECT_LIST *parameters, ACPI_BUFFER *ret)
 {
     ACPI_HANDLE h;
 
     if (dev == NULL)
 	h = ACPI_ROOT_OBJECT;
     else if ((h = acpi_get_handle(dev)) == NULL)
 	return (AE_BAD_PARAMETER);
     return (AcpiEvaluateObject(h, pathname, parameters, ret));
 }
 
 int
 acpi_device_pwr_for_sleep(device_t bus, device_t dev, int *dstate)
 {
     struct acpi_softc *sc;
     ACPI_HANDLE handle;
     ACPI_STATUS status;
     char sxd[8];
 
     handle = acpi_get_handle(dev);
 
     /*
      * XXX If we find these devices, don't try to power them down.
      * The serial and IRDA ports on my T23 hang the system when
      * set to D3 and it appears that such legacy devices may
      * need special handling in their drivers.
      */
     if (dstate == NULL || handle == NULL ||
 	acpi_MatchHid(handle, "PNP0500") ||
 	acpi_MatchHid(handle, "PNP0501") ||
 	acpi_MatchHid(handle, "PNP0502") ||
 	acpi_MatchHid(handle, "PNP0510") ||
 	acpi_MatchHid(handle, "PNP0511"))
 	return (ENXIO);
 
     /*
      * Override next state with the value from _SxD, if present.
      * Note illegal _S0D is evaluated because some systems expect this.
      */
     sc = device_get_softc(bus);
     snprintf(sxd, sizeof(sxd), "_S%dD", sc->acpi_sstate);
     status = acpi_GetInteger(handle, sxd, dstate);
     if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
 	    device_printf(dev, "failed to get %s on %s: %s\n", sxd,
 		acpi_name(handle), AcpiFormatException(status));
 	    return (ENXIO);
     }
 
     return (0);
 }
 
 /* Callback arg for our implementation of walking the namespace. */
 struct acpi_device_scan_ctx {
     acpi_scan_cb_t	user_fn;
     void		*arg;
     ACPI_HANDLE		parent;
 };
 
 static ACPI_STATUS
 acpi_device_scan_cb(ACPI_HANDLE h, UINT32 level, void *arg, void **retval)
 {
     struct acpi_device_scan_ctx *ctx;
     device_t dev, old_dev;
     ACPI_STATUS status;
     ACPI_OBJECT_TYPE type;
 
     /*
      * Skip this device if we think we'll have trouble with it or it is
      * the parent where the scan began.
      */
     ctx = (struct acpi_device_scan_ctx *)arg;
     if (acpi_avoid(h) || h == ctx->parent)
 	return (AE_OK);
 
     /* If this is not a valid device type (e.g., a method), skip it. */
     if (ACPI_FAILURE(AcpiGetType(h, &type)))
 	return (AE_OK);
     if (type != ACPI_TYPE_DEVICE && type != ACPI_TYPE_PROCESSOR &&
 	type != ACPI_TYPE_THERMAL && type != ACPI_TYPE_POWER)
 	return (AE_OK);
 
     /*
      * Call the user function with the current device.  If it is unchanged
      * afterwards, return.  Otherwise, we update the handle to the new dev.
      */
     old_dev = acpi_get_device(h);
     dev = old_dev;
     status = ctx->user_fn(h, &dev, level, ctx->arg);
     if (ACPI_FAILURE(status) || old_dev == dev)
 	return (status);
 
     /* Remove the old child and its connection to the handle. */
     if (old_dev != NULL) {
 	device_delete_child(device_get_parent(old_dev), old_dev);
 	AcpiDetachData(h, acpi_fake_objhandler);
     }
 
     /* Recreate the handle association if the user created a device. */
     if (dev != NULL)
 	AcpiAttachData(h, acpi_fake_objhandler, dev);
 
     return (AE_OK);
 }
 
 static ACPI_STATUS
 acpi_device_scan_children(device_t bus, device_t dev, int max_depth,
     acpi_scan_cb_t user_fn, void *arg)
 {
     ACPI_HANDLE h;
     struct acpi_device_scan_ctx ctx;
 
     if (acpi_disabled("children"))
 	return (AE_OK);
 
     if (dev == NULL)
 	h = ACPI_ROOT_OBJECT;
     else if ((h = acpi_get_handle(dev)) == NULL)
 	return (AE_BAD_PARAMETER);
     ctx.user_fn = user_fn;
     ctx.arg = arg;
     ctx.parent = h;
     return (AcpiWalkNamespace(ACPI_TYPE_ANY, h, max_depth,
 	acpi_device_scan_cb, NULL, &ctx, NULL));
 }
 
 /*
  * Even though ACPI devices are not PCI, we use the PCI approach for setting
  * device power states since it's close enough to ACPI.
  */
 static int
 acpi_set_powerstate(device_t child, int state)
 {
     ACPI_HANDLE h;
     ACPI_STATUS status;
 
     h = acpi_get_handle(child);
     if (state < ACPI_STATE_D0 || state > ACPI_D_STATES_MAX)
 	return (EINVAL);
     if (h == NULL)
 	return (0);
 
     /* Ignore errors if the power methods aren't present. */
     status = acpi_pwr_switch_consumer(h, state);
     if (ACPI_SUCCESS(status)) {
 	if (bootverbose)
 	    device_printf(child, "set ACPI power state D%d on %s\n",
 		state, acpi_name(h));
     } else if (status != AE_NOT_FOUND)
 	device_printf(child,
 	    "failed to set ACPI power state D%d on %s: %s\n", state,
 	    acpi_name(h), AcpiFormatException(status));
 
     return (0);
 }
 
 static int
 acpi_isa_pnp_probe(device_t bus, device_t child, struct isa_pnp_id *ids)
 {
     int			result, cid_count, i;
     uint32_t		lid, cids[8];
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /*
      * ISA-style drivers attached to ACPI may persist and
      * probe manually if we return ENOENT.  We never want
      * that to happen, so don't ever return it.
      */
     result = ENXIO;
 
     /* Scan the supplied IDs for a match */
     lid = acpi_isa_get_logicalid(child);
     cid_count = acpi_isa_get_compatid(child, cids, 8);
     while (ids && ids->ip_id) {
 	if (lid == ids->ip_id) {
 	    result = 0;
 	    goto out;
 	}
 	for (i = 0; i < cid_count; i++) {
 	    if (cids[i] == ids->ip_id) {
 		result = 0;
 		goto out;
 	    }
 	}
 	ids++;
     }
 
  out:
     if (result == 0 && ids->ip_desc)
 	device_set_desc(child, ids->ip_desc);
 
     return_VALUE (result);
 }
 
 #if defined(__i386__) || defined(__amd64__)
 /*
  * Look for a MCFG table.  If it is present, use the settings for
  * domain (segment) 0 to setup PCI config space access via the memory
  * map.
  */
 static void
 acpi_enable_pcie(void)
 {
 	ACPI_TABLE_HEADER *hdr;
 	ACPI_MCFG_ALLOCATION *alloc, *end;
 	ACPI_STATUS status;
 
 	status = AcpiGetTable(ACPI_SIG_MCFG, 1, &hdr);
 	if (ACPI_FAILURE(status))
 		return;
 
 	end = (ACPI_MCFG_ALLOCATION *)((char *)hdr + hdr->Length);
 	alloc = (ACPI_MCFG_ALLOCATION *)((ACPI_TABLE_MCFG *)hdr + 1);
 	while (alloc < end) {
 		if (alloc->PciSegment == 0) {
 			pcie_cfgregopen(alloc->Address, alloc->StartBusNumber,
 			    alloc->EndBusNumber);
 			return;
 		}
 		alloc++;
 	}
 }
 #endif
 
 /*
  * Scan all of the ACPI namespace and attach child devices.
  *
  * We should only expect to find devices in the \_PR, \_TZ, \_SI, and
  * \_SB scopes, and \_PR and \_TZ became obsolete in the ACPI 2.0 spec.
  * However, in violation of the spec, some systems place their PCI link
  * devices in \, so we have to walk the whole namespace.  We check the
  * type of namespace nodes, so this should be ok.
  */
 static void
 acpi_probe_children(device_t bus)
 {
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     /*
      * Scan the namespace and insert placeholders for all the devices that
      * we find.  We also probe/attach any early devices.
      *
      * Note that we use AcpiWalkNamespace rather than AcpiGetDevices because
      * we want to create nodes for all devices, not just those that are
      * currently present. (This assumes that we don't want to create/remove
      * devices as they appear, which might be smarter.)
      */
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "namespace scan\n"));
     AcpiWalkNamespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, 100, acpi_probe_child,
 	NULL, bus, NULL);
 
     /* Pre-allocate resources for our rman from any sysresource devices. */
     acpi_sysres_alloc(bus);
 
     /* Reserve resources already allocated to children. */
     acpi_reserve_resources(bus);
 
     /* Create any static children by calling device identify methods. */
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "device identify routines\n"));
     bus_generic_probe(bus);
 
     /* Probe/attach all children, created statically and from the namespace. */
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "acpi bus_generic_attach\n"));
     bus_generic_attach(bus);
 
     /* Attach wake sysctls. */
     acpi_wake_sysctl_walk(bus);
 
     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "done attaching children\n"));
     return_VOID;
 }
 
 /*
  * Determine the probe order for a given device.
  */
 static void
 acpi_probe_order(ACPI_HANDLE handle, int *order)
 {
 	ACPI_OBJECT_TYPE type;
 
 	/*
 	 * 0. CPUs
 	 * 1. I/O port and memory system resource holders
 	 * 2. Clocks and timers (to handle early accesses)
 	 * 3. Embedded controllers (to handle early accesses)
 	 * 4. PCI Link Devices
 	 */
 	AcpiGetType(handle, &type);
 	if (type == ACPI_TYPE_PROCESSOR)
 		*order = 0;
 	else if (acpi_MatchHid(handle, "PNP0C01") ||
 	    acpi_MatchHid(handle, "PNP0C02"))
 		*order = 1;
 	else if (acpi_MatchHid(handle, "PNP0100") ||
 	    acpi_MatchHid(handle, "PNP0103") ||
 	    acpi_MatchHid(handle, "PNP0B00"))
 		*order = 2;
 	else if (acpi_MatchHid(handle, "PNP0C09"))
 		*order = 3;
 	else if (acpi_MatchHid(handle, "PNP0C0F"))
 		*order = 4;
 }
 
 /*
  * Evaluate a child device and determine whether we might attach a device to
  * it.
  */
 static ACPI_STATUS
 acpi_probe_child(ACPI_HANDLE handle, UINT32 level, void *context, void **status)
 {
     ACPI_DEVICE_INFO *devinfo;
     struct acpi_device	*ad;
     struct acpi_prw_data prw;
     ACPI_OBJECT_TYPE type;
     ACPI_HANDLE h;
     device_t bus, child;
     char *handle_str;
     int order;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (acpi_disabled("children"))
 	return_ACPI_STATUS (AE_OK);
 
     /* Skip this device if we think we'll have trouble with it. */
     if (acpi_avoid(handle))
 	return_ACPI_STATUS (AE_OK);
 
     bus = (device_t)context;
     if (ACPI_SUCCESS(AcpiGetType(handle, &type))) {
 	handle_str = acpi_name(handle);
 	switch (type) {
 	case ACPI_TYPE_DEVICE:
 	    /*
 	     * Since we scan from \, be sure to skip system scope objects.
 	     * \_SB_ and \_TZ_ are defined in ACPICA as devices to work around
 	     * BIOS bugs.  For example, \_SB_ is to allow \_SB_._INI to be run
 	     * during the initialization and \_TZ_ is to support Notify() on it.
 	     */
 	    if (strcmp(handle_str, "\\_SB_") == 0 ||
 		strcmp(handle_str, "\\_TZ_") == 0)
 		break;
 	    if (acpi_parse_prw(handle, &prw) == 0)
 		AcpiSetupGpeForWake(handle, prw.gpe_handle, prw.gpe_bit);
 
 	    /*
 	     * Ignore devices that do not have a _HID or _CID.  They should
 	     * be discovered by other buses (e.g. the PCI bus driver).
 	     */
 	    if (!acpi_has_hid(handle))
 		break;
 	    /* FALLTHROUGH */
 	case ACPI_TYPE_PROCESSOR:
 	case ACPI_TYPE_THERMAL:
 	case ACPI_TYPE_POWER:
 	    /* 
 	     * Create a placeholder device for this node.  Sort the
 	     * placeholder so that the probe/attach passes will run
 	     * breadth-first.  Orders less than ACPI_DEV_BASE_ORDER
 	     * are reserved for special objects (i.e., system
 	     * resources).
 	     */
 	    ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "scanning '%s'\n", handle_str));
 	    order = level * 10 + ACPI_DEV_BASE_ORDER;
 	    acpi_probe_order(handle, &order);
 	    child = BUS_ADD_CHILD(bus, order, NULL, -1);
 	    if (child == NULL)
 		break;
 
 	    /* Associate the handle with the device_t and vice versa. */
 	    acpi_set_handle(child, handle);
 	    AcpiAttachData(handle, acpi_fake_objhandler, child);
 
 	    /*
 	     * Check that the device is present.  If it's not present,
 	     * leave it disabled (so that we have a device_t attached to
 	     * the handle, but we don't probe it).
 	     *
 	     * XXX PCI link devices sometimes report "present" but not
 	     * "functional" (i.e. if disabled).  Go ahead and probe them
 	     * anyway since we may enable them later.
 	     */
 	    if (type == ACPI_TYPE_DEVICE && !acpi_DeviceIsPresent(child)) {
 		/* Never disable PCI link devices. */
 		if (acpi_MatchHid(handle, "PNP0C0F"))
 		    break;
 		/*
 		 * Docking stations should remain enabled since the system
 		 * may be undocked at boot.
 		 */
 		if (ACPI_SUCCESS(AcpiGetHandle(handle, "_DCK", &h)))
 		    break;
 
 		device_disable(child);
 		break;
 	    }
 
 	    /*
 	     * Get the device's resource settings and attach them.
 	     * Note that if the device has _PRS but no _CRS, we need
 	     * to decide when it's appropriate to try to configure the
 	     * device.  Ignore the return value here; it's OK for the
 	     * device not to have any resources.
 	     */
 	    acpi_parse_resources(child, handle, &acpi_res_parse_set, NULL);
 
 	    ad = device_get_ivars(child);
 	    ad->ad_cls_class = 0xffffff;
 	    if (ACPI_SUCCESS(AcpiGetObjectInfo(handle, &devinfo))) {
 		if ((devinfo->Valid & ACPI_VALID_CLS) != 0 &&
 		    devinfo->ClassCode.Length >= ACPI_PCICLS_STRING_SIZE) {
 		    ad->ad_cls_class = strtoul(devinfo->ClassCode.String,
 			NULL, 16);
 		}
 		AcpiOsFree(devinfo);
 	    }
 	    break;
 	}
     }
 
     return_ACPI_STATUS (AE_OK);
 }
 
 /*
  * AcpiAttachData() requires an object handler but never uses it.  This is a
  * placeholder object handler so we can store a device_t in an ACPI_HANDLE.
  */
 void
 acpi_fake_objhandler(ACPI_HANDLE h, void *data)
 {
 }
 
 static void
 acpi_shutdown_final(void *arg, int howto)
 {
     struct acpi_softc *sc = (struct acpi_softc *)arg;
     register_t intr;
     ACPI_STATUS status;
 
     /*
      * XXX Shutdown code should only run on the BSP (cpuid 0).
      * Some chipsets do not power off the system correctly if called from
      * an AP.
      */
     if ((howto & RB_POWEROFF) != 0) {
 	status = AcpiEnterSleepStatePrep(ACPI_STATE_S5);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(sc->acpi_dev, "AcpiEnterSleepStatePrep failed - %s\n",
 		AcpiFormatException(status));
 	    return;
 	}
 	device_printf(sc->acpi_dev, "Powering system off\n");
 	intr = intr_disable();
 	status = AcpiEnterSleepState(ACPI_STATE_S5);
 	if (ACPI_FAILURE(status)) {
 	    intr_restore(intr);
 	    device_printf(sc->acpi_dev, "power-off failed - %s\n",
 		AcpiFormatException(status));
 	} else {
 	    DELAY(1000000);
 	    intr_restore(intr);
 	    device_printf(sc->acpi_dev, "power-off failed - timeout\n");
 	}
     } else if ((howto & RB_HALT) == 0 && sc->acpi_handle_reboot) {
 	/* Reboot using the reset register. */
 	status = AcpiReset();
 	if (ACPI_SUCCESS(status)) {
 	    DELAY(1000000);
 	    device_printf(sc->acpi_dev, "reset failed - timeout\n");
 	} else if (status != AE_NOT_EXIST)
 	    device_printf(sc->acpi_dev, "reset failed - %s\n",
 		AcpiFormatException(status));
     } else if (sc->acpi_do_disable && panicstr == NULL) {
 	/*
 	 * Only disable ACPI if the user requested.  On some systems, writing
 	 * the disable value to SMI_CMD hangs the system.
 	 */
 	device_printf(sc->acpi_dev, "Shutting down\n");
 	AcpiTerminate();
     }
 }
 
 static void
 acpi_enable_fixed_events(struct acpi_softc *sc)
 {
     static int	first_time = 1;
 
     /* Enable and clear fixed events and install handlers. */
     if ((AcpiGbl_FADT.Flags & ACPI_FADT_POWER_BUTTON) == 0) {
 	AcpiClearEvent(ACPI_EVENT_POWER_BUTTON);
 	AcpiInstallFixedEventHandler(ACPI_EVENT_POWER_BUTTON,
 				     acpi_event_power_button_sleep, sc);
 	if (first_time)
 	    device_printf(sc->acpi_dev, "Power Button (fixed)\n");
     }
     if ((AcpiGbl_FADT.Flags & ACPI_FADT_SLEEP_BUTTON) == 0) {
 	AcpiClearEvent(ACPI_EVENT_SLEEP_BUTTON);
 	AcpiInstallFixedEventHandler(ACPI_EVENT_SLEEP_BUTTON,
 				     acpi_event_sleep_button_sleep, sc);
 	if (first_time)
 	    device_printf(sc->acpi_dev, "Sleep Button (fixed)\n");
     }
 
     first_time = 0;
 }
 
 /*
  * Returns true if the device is actually present and should
  * be attached to.  This requires the present, enabled, UI-visible 
  * and diagnostics-passed bits to be set.
  */
 BOOLEAN
 acpi_DeviceIsPresent(device_t dev)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_HANDLE		h;
     BOOLEAN		present;
 
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     /* If no _STA method, must be present */
     present = (devinfo->Valid & ACPI_VALID_STA) == 0 ||
 	ACPI_DEVICE_PRESENT(devinfo->CurrentStatus) ? TRUE : FALSE;
 
     AcpiOsFree(devinfo);
     return (present);
 }
 
 /*
  * Returns true if the battery is actually present and inserted.
  */
 BOOLEAN
 acpi_BatteryIsPresent(device_t dev)
 {
     ACPI_DEVICE_INFO	*devinfo;
     ACPI_HANDLE		h;
     BOOLEAN		present;
 
     if ((h = acpi_get_handle(dev)) == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     /* If no _STA method, must be present */
     present = (devinfo->Valid & ACPI_VALID_STA) == 0 ||
 	ACPI_BATTERY_PRESENT(devinfo->CurrentStatus) ? TRUE : FALSE;
 
     AcpiOsFree(devinfo);
     return (present);
 }
 
 /*
  * Returns true if a device has at least one valid device ID.
  */
 static BOOLEAN
 acpi_has_hid(ACPI_HANDLE h)
 {
     ACPI_DEVICE_INFO	*devinfo;
     BOOLEAN		ret;
 
     if (h == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     ret = FALSE;
     if ((devinfo->Valid & ACPI_VALID_HID) != 0)
 	ret = TRUE;
     else if ((devinfo->Valid & ACPI_VALID_CID) != 0)
 	if (devinfo->CompatibleIdList.Count > 0)
 	    ret = TRUE;
 
     AcpiOsFree(devinfo);
     return (ret);
 }
 
 /*
  * Match a HID string against a handle
  */
 BOOLEAN
 acpi_MatchHid(ACPI_HANDLE h, const char *hid) 
 {
     ACPI_DEVICE_INFO	*devinfo;
     BOOLEAN		ret;
     int			i;
 
     if (hid == NULL || h == NULL ||
 	ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo)))
 	return (FALSE);
 
     ret = FALSE;
     if ((devinfo->Valid & ACPI_VALID_HID) != 0 &&
 	strcmp(hid, devinfo->HardwareId.String) == 0)
 	    ret = TRUE;
     else if ((devinfo->Valid & ACPI_VALID_CID) != 0)
 	for (i = 0; i < devinfo->CompatibleIdList.Count; i++) {
 	    if (strcmp(hid, devinfo->CompatibleIdList.Ids[i].String) == 0) {
 		ret = TRUE;
 		break;
 	    }
 	}
 
     AcpiOsFree(devinfo);
     return (ret);
 }
 
 /*
  * Return the handle of a named object within our scope, ie. that of (parent)
  * or one if its parents.
  */
 ACPI_STATUS
 acpi_GetHandleInScope(ACPI_HANDLE parent, char *path, ACPI_HANDLE *result)
 {
     ACPI_HANDLE		r;
     ACPI_STATUS		status;
 
     /* Walk back up the tree to the root */
     for (;;) {
 	status = AcpiGetHandle(parent, path, &r);
 	if (ACPI_SUCCESS(status)) {
 	    *result = r;
 	    return (AE_OK);
 	}
 	/* XXX Return error here? */
 	if (status != AE_NOT_FOUND)
 	    return (AE_OK);
 	if (ACPI_FAILURE(AcpiGetParent(parent, &r)))
 	    return (AE_NOT_FOUND);
 	parent = r;
     }
 }
 
 /*
  * Allocate a buffer with a preset data size.
  */
 ACPI_BUFFER *
 acpi_AllocBuffer(int size)
 {
     ACPI_BUFFER	*buf;
 
     if ((buf = malloc(size + sizeof(*buf), M_ACPIDEV, M_NOWAIT)) == NULL)
 	return (NULL);
     buf->Length = size;
     buf->Pointer = (void *)(buf + 1);
     return (buf);
 }
 
 ACPI_STATUS
 acpi_SetInteger(ACPI_HANDLE handle, char *path, UINT32 number)
 {
     ACPI_OBJECT arg1;
     ACPI_OBJECT_LIST args;
 
     arg1.Type = ACPI_TYPE_INTEGER;
     arg1.Integer.Value = number;
     args.Count = 1;
     args.Pointer = &arg1;
 
     return (AcpiEvaluateObject(handle, path, &args, NULL));
 }
 
 /*
  * Evaluate a path that should return an integer.
  */
 ACPI_STATUS
 acpi_GetInteger(ACPI_HANDLE handle, char *path, UINT32 *number)
 {
     ACPI_STATUS	status;
     ACPI_BUFFER	buf;
     ACPI_OBJECT	param;
 
     if (handle == NULL)
 	handle = ACPI_ROOT_OBJECT;
 
     /*
      * Assume that what we've been pointed at is an Integer object, or
      * a method that will return an Integer.
      */
     buf.Pointer = &param;
     buf.Length = sizeof(param);
     status = AcpiEvaluateObject(handle, path, NULL, &buf);
     if (ACPI_SUCCESS(status)) {
 	if (param.Type == ACPI_TYPE_INTEGER)
 	    *number = param.Integer.Value;
 	else
 	    status = AE_TYPE;
     }
 
     /* 
      * In some applications, a method that's expected to return an Integer
      * may instead return a Buffer (probably to simplify some internal
      * arithmetic).  We'll try to fetch whatever it is, and if it's a Buffer,
      * convert it into an Integer as best we can.
      *
      * This is a hack.
      */
     if (status == AE_BUFFER_OVERFLOW) {
 	if ((buf.Pointer = AcpiOsAllocate(buf.Length)) == NULL) {
 	    status = AE_NO_MEMORY;
 	} else {
 	    status = AcpiEvaluateObject(handle, path, NULL, &buf);
 	    if (ACPI_SUCCESS(status))
 		status = acpi_ConvertBufferToInteger(&buf, number);
 	    AcpiOsFree(buf.Pointer);
 	}
     }
     return (status);
 }
 
 ACPI_STATUS
 acpi_ConvertBufferToInteger(ACPI_BUFFER *bufp, UINT32 *number)
 {
     ACPI_OBJECT	*p;
     UINT8	*val;
     int		i;
 
     p = (ACPI_OBJECT *)bufp->Pointer;
     if (p->Type == ACPI_TYPE_INTEGER) {
 	*number = p->Integer.Value;
 	return (AE_OK);
     }
     if (p->Type != ACPI_TYPE_BUFFER)
 	return (AE_TYPE);
     if (p->Buffer.Length > sizeof(int))
 	return (AE_BAD_DATA);
 
     *number = 0;
     val = p->Buffer.Pointer;
     for (i = 0; i < p->Buffer.Length; i++)
 	*number += val[i] << (i * 8);
     return (AE_OK);
 }
 
 /*
  * Iterate over the elements of an a package object, calling the supplied
  * function for each element.
  *
  * XXX possible enhancement might be to abort traversal on error.
  */
 ACPI_STATUS
 acpi_ForeachPackageObject(ACPI_OBJECT *pkg,
 	void (*func)(ACPI_OBJECT *comp, void *arg), void *arg)
 {
     ACPI_OBJECT	*comp;
     int		i;
 
     if (pkg == NULL || pkg->Type != ACPI_TYPE_PACKAGE)
 	return (AE_BAD_PARAMETER);
 
     /* Iterate over components */
     i = 0;
     comp = pkg->Package.Elements;
     for (; i < pkg->Package.Count; i++, comp++)
 	func(comp, arg);
 
     return (AE_OK);
 }
 
 /*
  * Find the (index)th resource object in a set.
  */
 ACPI_STATUS
 acpi_FindIndexedResource(ACPI_BUFFER *buf, int index, ACPI_RESOURCE **resp)
 {
     ACPI_RESOURCE	*rp;
     int			i;
 
     rp = (ACPI_RESOURCE *)buf->Pointer;
     i = index;
     while (i-- > 0) {
 	/* Range check */
 	if (rp > (ACPI_RESOURCE *)((u_int8_t *)buf->Pointer + buf->Length))
 	    return (AE_BAD_PARAMETER);
 
 	/* Check for terminator */
 	if (rp->Type == ACPI_RESOURCE_TYPE_END_TAG || rp->Length == 0)
 	    return (AE_NOT_FOUND);
 	rp = ACPI_NEXT_RESOURCE(rp);
     }
     if (resp != NULL)
 	*resp = rp;
 
     return (AE_OK);
 }
 
 /*
  * Append an ACPI_RESOURCE to an ACPI_BUFFER.
  *
  * Given a pointer to an ACPI_RESOURCE structure, expand the ACPI_BUFFER
  * provided to contain it.  If the ACPI_BUFFER is empty, allocate a sensible
  * backing block.  If the ACPI_RESOURCE is NULL, return an empty set of
  * resources.
  */
 #define ACPI_INITIAL_RESOURCE_BUFFER_SIZE	512
 
 ACPI_STATUS
 acpi_AppendBufferResource(ACPI_BUFFER *buf, ACPI_RESOURCE *res)
 {
     ACPI_RESOURCE	*rp;
     void		*newp;
 
     /* Initialise the buffer if necessary. */
     if (buf->Pointer == NULL) {
 	buf->Length = ACPI_INITIAL_RESOURCE_BUFFER_SIZE;
 	if ((buf->Pointer = AcpiOsAllocate(buf->Length)) == NULL)
 	    return (AE_NO_MEMORY);
 	rp = (ACPI_RESOURCE *)buf->Pointer;
 	rp->Type = ACPI_RESOURCE_TYPE_END_TAG;
 	rp->Length = ACPI_RS_SIZE_MIN;
     }
     if (res == NULL)
 	return (AE_OK);
 
     /*
      * Scan the current buffer looking for the terminator.
      * This will either find the terminator or hit the end
      * of the buffer and return an error.
      */
     rp = (ACPI_RESOURCE *)buf->Pointer;
     for (;;) {
 	/* Range check, don't go outside the buffer */
 	if (rp >= (ACPI_RESOURCE *)((u_int8_t *)buf->Pointer + buf->Length))
 	    return (AE_BAD_PARAMETER);
 	if (rp->Type == ACPI_RESOURCE_TYPE_END_TAG || rp->Length == 0)
 	    break;
 	rp = ACPI_NEXT_RESOURCE(rp);
     }
 
     /*
      * Check the size of the buffer and expand if required.
      *
      * Required size is:
      *	size of existing resources before terminator + 
      *	size of new resource and header +
      * 	size of terminator.
      *
      * Note that this loop should really only run once, unless
      * for some reason we are stuffing a *really* huge resource.
      */
     while ((((u_int8_t *)rp - (u_int8_t *)buf->Pointer) + 
 	    res->Length + ACPI_RS_SIZE_NO_DATA +
 	    ACPI_RS_SIZE_MIN) >= buf->Length) {
 	if ((newp = AcpiOsAllocate(buf->Length * 2)) == NULL)
 	    return (AE_NO_MEMORY);
 	bcopy(buf->Pointer, newp, buf->Length);
 	rp = (ACPI_RESOURCE *)((u_int8_t *)newp +
 			       ((u_int8_t *)rp - (u_int8_t *)buf->Pointer));
 	AcpiOsFree(buf->Pointer);
 	buf->Pointer = newp;
 	buf->Length += buf->Length;
     }
 
     /* Insert the new resource. */
     bcopy(res, rp, res->Length + ACPI_RS_SIZE_NO_DATA);
 
     /* And add the terminator. */
     rp = ACPI_NEXT_RESOURCE(rp);
     rp->Type = ACPI_RESOURCE_TYPE_END_TAG;
     rp->Length = ACPI_RS_SIZE_MIN;
 
     return (AE_OK);
 }
 
 ACPI_STATUS
 acpi_EvaluateOSC(ACPI_HANDLE handle, uint8_t *uuid, int revision, int count,
     uint32_t *caps_in, uint32_t *caps_out, bool query)
 {
 	ACPI_OBJECT arg[4], *ret;
 	ACPI_OBJECT_LIST arglist;
 	ACPI_BUFFER buf;
 	ACPI_STATUS status;
 
 	arglist.Pointer = arg;
 	arglist.Count = 4;
 	arg[0].Type = ACPI_TYPE_BUFFER;
 	arg[0].Buffer.Length = ACPI_UUID_LENGTH;
 	arg[0].Buffer.Pointer = uuid;
 	arg[1].Type = ACPI_TYPE_INTEGER;
 	arg[1].Integer.Value = revision;
 	arg[2].Type = ACPI_TYPE_INTEGER;
 	arg[2].Integer.Value = count;
 	arg[3].Type = ACPI_TYPE_BUFFER;
 	arg[3].Buffer.Length = count * sizeof(*caps_in);
 	arg[3].Buffer.Pointer = (uint8_t *)caps_in;
 	caps_in[0] = query ? 1 : 0;
 	buf.Pointer = NULL;
 	buf.Length = ACPI_ALLOCATE_BUFFER;
 	status = AcpiEvaluateObjectTyped(handle, "_OSC", &arglist, &buf,
 	    ACPI_TYPE_BUFFER);
 	if (ACPI_FAILURE(status))
 		return (status);
 	if (caps_out != NULL) {
 		ret = buf.Pointer;
 		if (ret->Buffer.Length != count * sizeof(*caps_out)) {
 			AcpiOsFree(buf.Pointer);
 			return (AE_BUFFER_OVERFLOW);
 		}
 		bcopy(ret->Buffer.Pointer, caps_out, ret->Buffer.Length);
 	}
 	AcpiOsFree(buf.Pointer);
 	return (status);
 }
 
 /*
  * Set interrupt model.
  */
 ACPI_STATUS
 acpi_SetIntrModel(int model)
 {
 
     return (acpi_SetInteger(ACPI_ROOT_OBJECT, "_PIC", model));
 }
 
 /*
  * Walk subtables of a table and call a callback routine for each
  * subtable.  The caller should provide the first subtable and a
  * pointer to the end of the table.  This can be used to walk tables
  * such as MADT and SRAT that use subtable entries.
  */
 void
 acpi_walk_subtables(void *first, void *end, acpi_subtable_handler *handler,
     void *arg)
 {
     ACPI_SUBTABLE_HEADER *entry;
 
     for (entry = first; (void *)entry < end; ) {
 	/* Avoid an infinite loop if we hit a bogus entry. */
 	if (entry->Length < sizeof(ACPI_SUBTABLE_HEADER))
 	    return;
 
 	handler(entry, arg);
 	entry = ACPI_ADD_PTR(ACPI_SUBTABLE_HEADER, entry, entry->Length);
     }
 }
 
 /*
  * DEPRECATED.  This interface has serious deficiencies and will be
  * removed.
  *
  * Immediately enter the sleep state.  In the old model, acpiconf(8) ran
  * rc.suspend and rc.resume so we don't have to notify devd(8) to do this.
  */
 ACPI_STATUS
 acpi_SetSleepState(struct acpi_softc *sc, int state)
 {
     static int once;
 
     if (!once) {
 	device_printf(sc->acpi_dev,
 "warning: acpi_SetSleepState() deprecated, need to update your software\n");
 	once = 1;
     }
     return (acpi_EnterSleepState(sc, state));
 }
 
 #if defined(__amd64__) || defined(__i386__)
 static void
 acpi_sleep_force_task(void *context)
 {
     struct acpi_softc *sc = (struct acpi_softc *)context;
 
     if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
 	device_printf(sc->acpi_dev, "force sleep state S%d failed\n",
 	    sc->acpi_next_sstate);
 }
 
 static void
 acpi_sleep_force(void *arg)
 {
     struct acpi_softc *sc = (struct acpi_softc *)arg;
 
     device_printf(sc->acpi_dev,
 	"suspend request timed out, forcing sleep now\n");
     /*
      * XXX Suspending from callout causes freezes in DEVICE_SUSPEND().
      * Suspend from acpi_task thread instead.
      */
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_sleep_force_task, sc)))
 	device_printf(sc->acpi_dev, "AcpiOsExecute() for sleeping failed\n");
 }
 #endif
 
 /*
  * Request that the system enter the given suspend state.  All /dev/apm
  * devices and devd(8) will be notified.  Userland then has a chance to
  * save state and acknowledge the request.  The system sleeps once all
  * acks are in.
  */
 int
 acpi_ReqSleepState(struct acpi_softc *sc, int state)
 {
 #if defined(__amd64__) || defined(__i386__)
     struct apm_clone_data *clone;
     ACPI_STATUS status;
 
     if (state < ACPI_STATE_S1 || state > ACPI_S_STATES_MAX)
 	return (EINVAL);
     if (!acpi_sleep_states[state])
 	return (EOPNOTSUPP);
 
     /*
      * If a reboot/shutdown/suspend request is already in progress or
      * suspend is blocked due to an upcoming shutdown, just return.
      */
     if (rebooting || sc->acpi_next_sstate != 0 || suspend_blocked) {
 	return (0);
     }
 
     /* Wait until sleep is enabled. */
     while (sc->acpi_sleep_disabled) {
 	AcpiOsSleep(1000);
     }
 
     ACPI_LOCK(acpi);
 
     sc->acpi_next_sstate = state;
 
     /* S5 (soft-off) should be entered directly with no waiting. */
     if (state == ACPI_STATE_S5) {
     	ACPI_UNLOCK(acpi);
 	status = acpi_EnterSleepState(sc, state);
 	return (ACPI_SUCCESS(status) ? 0 : ENXIO);
     }
 
     /* Record the pending state and notify all apm devices. */
     STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) {
 	clone->notify_status = APM_EV_NONE;
 	if ((clone->flags & ACPI_EVF_DEVD) == 0) {
 	    selwakeuppri(&clone->sel_read, PZERO);
 	    KNOTE_LOCKED(&clone->sel_read.si_note, 0);
 	}
     }
 
     /* If devd(8) is not running, immediately enter the sleep state. */
     if (!devctl_process_running()) {
 	ACPI_UNLOCK(acpi);
 	status = acpi_EnterSleepState(sc, state);
 	return (ACPI_SUCCESS(status) ? 0 : ENXIO);
     }
 
     /*
      * Set a timeout to fire if userland doesn't ack the suspend request
      * in time.  This way we still eventually go to sleep if we were
      * overheating or running low on battery, even if userland is hung.
      * We cancel this timeout once all userland acks are in or the
      * suspend request is aborted.
      */
     callout_reset(&sc->susp_force_to, 10 * hz, acpi_sleep_force, sc);
     ACPI_UNLOCK(acpi);
 
     /* Now notify devd(8) also. */
     acpi_UserNotify("Suspend", ACPI_ROOT_OBJECT, state);
 
     return (0);
 #else
     /* This platform does not support acpi suspend/resume. */
     return (EOPNOTSUPP);
 #endif
 }
 
 /*
  * Acknowledge (or reject) a pending sleep state.  The caller has
  * prepared for suspend and is now ready for it to proceed.  If the
  * error argument is non-zero, it indicates suspend should be cancelled
  * and gives an errno value describing why.  Once all votes are in,
  * we suspend the system.
  */
 int
 acpi_AckSleepState(struct apm_clone_data *clone, int error)
 {
 #if defined(__amd64__) || defined(__i386__)
     struct acpi_softc *sc;
     int ret, sleeping;
 
     /* If no pending sleep state, return an error. */
     ACPI_LOCK(acpi);
     sc = clone->acpi_sc;
     if (sc->acpi_next_sstate == 0) {
     	ACPI_UNLOCK(acpi);
 	return (ENXIO);
     }
 
     /* Caller wants to abort suspend process. */
     if (error) {
 	sc->acpi_next_sstate = 0;
 	callout_stop(&sc->susp_force_to);
 	device_printf(sc->acpi_dev,
 	    "listener on %s cancelled the pending suspend\n",
 	    devtoname(clone->cdev));
     	ACPI_UNLOCK(acpi);
 	return (0);
     }
 
     /*
      * Mark this device as acking the suspend request.  Then, walk through
      * all devices, seeing if they agree yet.  We only count devices that
      * are writable since read-only devices couldn't ack the request.
      */
     sleeping = TRUE;
     clone->notify_status = APM_EV_ACKED;
     STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) {
 	if ((clone->flags & ACPI_EVF_WRITE) != 0 &&
 	    clone->notify_status != APM_EV_ACKED) {
 	    sleeping = FALSE;
 	    break;
 	}
     }
 
     /* If all devices have voted "yes", we will suspend now. */
     if (sleeping)
 	callout_stop(&sc->susp_force_to);
     ACPI_UNLOCK(acpi);
     ret = 0;
     if (sleeping) {
 	if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate)))
 		ret = ENODEV;
     }
     return (ret);
 #else
     /* This platform does not support acpi suspend/resume. */
     return (EOPNOTSUPP);
 #endif
 }
 
 static void
 acpi_sleep_enable(void *arg)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)arg;
 
     ACPI_LOCK_ASSERT(acpi);
 
     /* Reschedule if the system is not fully up and running. */
     if (!AcpiGbl_SystemAwakeAndRunning) {
 	callout_schedule(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME);
 	return;
     }
 
     sc->acpi_sleep_disabled = FALSE;
 }
 
 static ACPI_STATUS
 acpi_sleep_disable(struct acpi_softc *sc)
 {
     ACPI_STATUS		status;
 
     /* Fail if the system is not fully up and running. */
     if (!AcpiGbl_SystemAwakeAndRunning)
 	return (AE_ERROR);
 
     ACPI_LOCK(acpi);
     status = sc->acpi_sleep_disabled ? AE_ERROR : AE_OK;
     sc->acpi_sleep_disabled = TRUE;
     ACPI_UNLOCK(acpi);
 
     return (status);
 }
 
 enum acpi_sleep_state {
     ACPI_SS_NONE,
     ACPI_SS_GPE_SET,
     ACPI_SS_DEV_SUSPEND,
     ACPI_SS_SLP_PREP,
     ACPI_SS_SLEPT,
 };
 
 /*
  * Enter the desired system sleep state.
  *
  * Currently we support S1-S5 but S4 is only S4BIOS
  */
 static ACPI_STATUS
 acpi_EnterSleepState(struct acpi_softc *sc, int state)
 {
     register_t intr;
     ACPI_STATUS status;
     ACPI_EVENT_STATUS power_button_status;
     enum acpi_sleep_state slp_state;
     int sleep_result;
 
     ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
 
     if (state < ACPI_STATE_S1 || state > ACPI_S_STATES_MAX)
 	return_ACPI_STATUS (AE_BAD_PARAMETER);
     if (!acpi_sleep_states[state]) {
 	device_printf(sc->acpi_dev, "Sleep state S%d not supported by BIOS\n",
 	    state);
 	return (AE_SUPPORT);
     }
 
     /* Re-entry once we're suspending is not allowed. */
     status = acpi_sleep_disable(sc);
     if (ACPI_FAILURE(status)) {
 	device_printf(sc->acpi_dev,
 	    "suspend request ignored (not ready yet)\n");
 	return (status);
     }
 
     if (state == ACPI_STATE_S5) {
 	/*
 	 * Shut down cleanly and power off.  This will call us back through the
 	 * shutdown handlers.
 	 */
 	shutdown_nice(RB_POWEROFF);
 	return_ACPI_STATUS (AE_OK);
     }
 
     EVENTHANDLER_INVOKE(power_suspend_early);
     stop_all_proc();
     EVENTHANDLER_INVOKE(power_suspend);
 
 #ifdef EARLY_AP_STARTUP
     MPASS(mp_ncpus == 1 || smp_started);
     thread_lock(curthread);
     sched_bind(curthread, 0);
     thread_unlock(curthread);
 #else
     if (smp_started) {
 	thread_lock(curthread);
 	sched_bind(curthread, 0);
 	thread_unlock(curthread);
     }
 #endif
 
     /*
      * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
      * drivers need this.
      */
     mtx_lock(&Giant);
 
     slp_state = ACPI_SS_NONE;
 
     sc->acpi_sstate = state;
 
     /* Enable any GPEs as appropriate and requested by the user. */
     acpi_wake_prep_walk(state);
     slp_state = ACPI_SS_GPE_SET;
 
     /*
      * Inform all devices that we are going to sleep.  If at least one
      * device fails, DEVICE_SUSPEND() automatically resumes the tree.
      *
      * XXX Note that a better two-pass approach with a 'veto' pass
      * followed by a "real thing" pass would be better, but the current
      * bus interface does not provide for this.
      */
     if (DEVICE_SUSPEND(root_bus) != 0) {
 	device_printf(sc->acpi_dev, "device_suspend failed\n");
 	goto backout;
     }
     slp_state = ACPI_SS_DEV_SUSPEND;
 
     /* If testing device suspend only, back out of everything here. */
     if (acpi_susp_bounce)
 	goto backout;
 
     status = AcpiEnterSleepStatePrep(state);
     if (ACPI_FAILURE(status)) {
 	device_printf(sc->acpi_dev, "AcpiEnterSleepStatePrep failed - %s\n",
 		      AcpiFormatException(status));
 	goto backout;
     }
     slp_state = ACPI_SS_SLP_PREP;
 
     if (sc->acpi_sleep_delay > 0)
 	DELAY(sc->acpi_sleep_delay * 1000000);
 
     intr = intr_disable();
     if (state != ACPI_STATE_S1) {
 	sleep_result = acpi_sleep_machdep(sc, state);
 	acpi_wakeup_machdep(sc, state, sleep_result, 0);
 
 	/*
 	 * XXX According to ACPI specification SCI_EN bit should be restored
 	 * by ACPI platform (BIOS, firmware) to its pre-sleep state.
 	 * Unfortunately some BIOSes fail to do that and that leads to
 	 * unexpected and serious consequences during wake up like a system
 	 * getting stuck in SMI handlers.
 	 * This hack is picked up from Linux, which claims that it follows
 	 * Windows behavior.
 	 */
 	if (sleep_result == 1 && state != ACPI_STATE_S4)
 	    AcpiWriteBitRegister(ACPI_BITREG_SCI_ENABLE, ACPI_ENABLE_EVENT);
 
 	AcpiLeaveSleepStatePrep(state);
 
 	if (sleep_result == 1 && state == ACPI_STATE_S3) {
 	    /*
 	     * Prevent mis-interpretation of the wakeup by power button
 	     * as a request for power off.
 	     * Ideally we should post an appropriate wakeup event,
 	     * perhaps using acpi_event_power_button_wake or alike.
 	     *
 	     * Clearing of power button status after wakeup is mandated
 	     * by ACPI specification in section "Fixed Power Button".
 	     *
 	     * XXX As of ACPICA 20121114 AcpiGetEventStatus provides
 	     * status as 0/1 corressponding to inactive/active despite
 	     * its type being ACPI_EVENT_STATUS.  In other words,
 	     * we should not test for ACPI_EVENT_FLAG_SET for time being.
 	     */
 	    if (ACPI_SUCCESS(AcpiGetEventStatus(ACPI_EVENT_POWER_BUTTON,
 		&power_button_status)) && power_button_status != 0) {
 		AcpiClearEvent(ACPI_EVENT_POWER_BUTTON);
 		device_printf(sc->acpi_dev,
 		    "cleared fixed power button status\n");
 	    }
 	}
 
 	intr_restore(intr);
 
 	/* call acpi_wakeup_machdep() again with interrupt enabled */
 	acpi_wakeup_machdep(sc, state, sleep_result, 1);
 
 	if (sleep_result == -1)
 		goto backout;
 
 	/* Re-enable ACPI hardware on wakeup from sleep state 4. */
 	if (state == ACPI_STATE_S4)
 	    AcpiEnable();
     } else {
 	status = AcpiEnterSleepState(state);
 	AcpiLeaveSleepStatePrep(state);
 	intr_restore(intr);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(sc->acpi_dev, "AcpiEnterSleepState failed - %s\n",
 			  AcpiFormatException(status));
 	    goto backout;
 	}
     }
     slp_state = ACPI_SS_SLEPT;
 
     /*
      * Back out state according to how far along we got in the suspend
      * process.  This handles both the error and success cases.
      */
 backout:
     if (slp_state >= ACPI_SS_GPE_SET) {
 	acpi_wake_prep_walk(state);
 	sc->acpi_sstate = ACPI_STATE_S0;
     }
     if (slp_state >= ACPI_SS_DEV_SUSPEND)
 	DEVICE_RESUME(root_bus);
     if (slp_state >= ACPI_SS_SLP_PREP)
 	AcpiLeaveSleepState(state);
     if (slp_state >= ACPI_SS_SLEPT) {
 	acpi_resync_clock(sc);
 	acpi_enable_fixed_events(sc);
     }
     sc->acpi_next_sstate = 0;
 
     mtx_unlock(&Giant);
 
 #ifdef EARLY_AP_STARTUP
     thread_lock(curthread);
     sched_unbind(curthread);
     thread_unlock(curthread);
 #else
     if (smp_started) {
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
     }
 #endif
 
     resume_all_proc();
 
     EVENTHANDLER_INVOKE(power_resume);
 
     /* Allow another sleep request after a while. */
     callout_schedule(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME);
 
     /* Run /etc/rc.resume after we are back. */
     if (devctl_process_running())
 	acpi_UserNotify("Resume", ACPI_ROOT_OBJECT, state);
 
     return_ACPI_STATUS (status);
 }
 
 static void
 acpi_resync_clock(struct acpi_softc *sc)
 {
 
     /*
      * Warm up timecounter again and reset system clock.
      */
     (void)timecounter->tc_get_timecount(timecounter);
     (void)timecounter->tc_get_timecount(timecounter);
     inittodr(time_second + sc->acpi_sleep_delay);
 }
 
 /* Enable or disable the device's wake GPE. */
 int
 acpi_wake_set_enable(device_t dev, int enable)
 {
     struct acpi_prw_data prw;
     ACPI_STATUS status;
     int flags;
 
     /* Make sure the device supports waking the system and get the GPE. */
     if (acpi_parse_prw(acpi_get_handle(dev), &prw) != 0)
 	return (ENXIO);
 
     flags = acpi_get_flags(dev);
     if (enable) {
 	status = AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit,
 	    ACPI_GPE_ENABLE);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(dev, "enable wake failed\n");
 	    return (ENXIO);
 	}
 	acpi_set_flags(dev, flags | ACPI_FLAG_WAKE_ENABLED);
     } else {
 	status = AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit,
 	    ACPI_GPE_DISABLE);
 	if (ACPI_FAILURE(status)) {
 	    device_printf(dev, "disable wake failed\n");
 	    return (ENXIO);
 	}
 	acpi_set_flags(dev, flags & ~ACPI_FLAG_WAKE_ENABLED);
     }
 
     return (0);
 }
 
 static int
 acpi_wake_sleep_prep(ACPI_HANDLE handle, int sstate)
 {
     struct acpi_prw_data prw;
     device_t dev;
 
     /* Check that this is a wake-capable device and get its GPE. */
     if (acpi_parse_prw(handle, &prw) != 0)
 	return (ENXIO);
     dev = acpi_get_device(handle);
 
     /*
      * The destination sleep state must be less than (i.e., higher power)
      * or equal to the value specified by _PRW.  If this GPE cannot be
      * enabled for the next sleep state, then disable it.  If it can and
      * the user requested it be enabled, turn on any required power resources
      * and set _PSW.
      */
     if (sstate > prw.lowest_wake) {
 	AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit, ACPI_GPE_DISABLE);
 	if (bootverbose)
 	    device_printf(dev, "wake_prep disabled wake for %s (S%d)\n",
 		acpi_name(handle), sstate);
     } else if (dev && (acpi_get_flags(dev) & ACPI_FLAG_WAKE_ENABLED) != 0) {
 	acpi_pwr_wake_enable(handle, 1);
 	acpi_SetInteger(handle, "_PSW", 1);
 	if (bootverbose)
 	    device_printf(dev, "wake_prep enabled for %s (S%d)\n",
 		acpi_name(handle), sstate);
     }
 
     return (0);
 }
 
 static int
 acpi_wake_run_prep(ACPI_HANDLE handle, int sstate)
 {
     struct acpi_prw_data prw;
     device_t dev;
 
     /*
      * Check that this is a wake-capable device and get its GPE.  Return
      * now if the user didn't enable this device for wake.
      */
     if (acpi_parse_prw(handle, &prw) != 0)
 	return (ENXIO);
     dev = acpi_get_device(handle);
     if (dev == NULL || (acpi_get_flags(dev) & ACPI_FLAG_WAKE_ENABLED) == 0)
 	return (0);
 
     /*
      * If this GPE couldn't be enabled for the previous sleep state, it was
      * disabled before going to sleep so re-enable it.  If it was enabled,
      * clear _PSW and turn off any power resources it used.
      */
     if (sstate > prw.lowest_wake) {
 	AcpiSetGpeWakeMask(prw.gpe_handle, prw.gpe_bit, ACPI_GPE_ENABLE);
 	if (bootverbose)
 	    device_printf(dev, "run_prep re-enabled %s\n", acpi_name(handle));
     } else {
 	acpi_SetInteger(handle, "_PSW", 0);
 	acpi_pwr_wake_enable(handle, 0);
 	if (bootverbose)
 	    device_printf(dev, "run_prep cleaned up for %s\n",
 		acpi_name(handle));
     }
 
     return (0);
 }
 
 static ACPI_STATUS
 acpi_wake_prep(ACPI_HANDLE handle, UINT32 level, void *context, void **status)
 {
     int sstate;
 
     /* If suspending, run the sleep prep function, otherwise wake. */
     sstate = *(int *)context;
     if (AcpiGbl_SystemAwakeAndRunning)
 	acpi_wake_sleep_prep(handle, sstate);
     else
 	acpi_wake_run_prep(handle, sstate);
     return (AE_OK);
 }
 
 /* Walk the tree rooted at acpi0 to prep devices for suspend/resume. */
 static int
 acpi_wake_prep_walk(int sstate)
 {
     ACPI_HANDLE sb_handle;
 
     if (ACPI_SUCCESS(AcpiGetHandle(ACPI_ROOT_OBJECT, "\\_SB_", &sb_handle)))
 	AcpiWalkNamespace(ACPI_TYPE_DEVICE, sb_handle, 100,
 	    acpi_wake_prep, NULL, &sstate, NULL);
     return (0);
 }
 
 /* Walk the tree rooted at acpi0 to attach per-device wake sysctls. */
 static int
 acpi_wake_sysctl_walk(device_t dev)
 {
     int error, i, numdevs;
     device_t *devlist;
     device_t child;
     ACPI_STATUS status;
 
     error = device_get_children(dev, &devlist, &numdevs);
     if (error != 0 || numdevs == 0) {
 	if (numdevs == 0)
 	    free(devlist, M_TEMP);
 	return (error);
     }
     for (i = 0; i < numdevs; i++) {
 	child = devlist[i];
 	acpi_wake_sysctl_walk(child);
 	if (!device_is_attached(child))
 	    continue;
 	status = AcpiEvaluateObject(acpi_get_handle(child), "_PRW", NULL, NULL);
 	if (ACPI_SUCCESS(status)) {
 	    SYSCTL_ADD_PROC(device_get_sysctl_ctx(child),
 		SYSCTL_CHILDREN(device_get_sysctl_tree(child)), OID_AUTO,
 		"wake", CTLTYPE_INT | CTLFLAG_RW, child, 0,
 		acpi_wake_set_sysctl, "I", "Device set to wake the system");
 	}
     }
     free(devlist, M_TEMP);
 
     return (0);
 }
 
 /* Enable or disable wake from userland. */
 static int
 acpi_wake_set_sysctl(SYSCTL_HANDLER_ARGS)
 {
     int enable, error;
     device_t dev;
 
     dev = (device_t)arg1;
     enable = (acpi_get_flags(dev) & ACPI_FLAG_WAKE_ENABLED) ? 1 : 0;
 
     error = sysctl_handle_int(oidp, &enable, 0, req);
     if (error != 0 || req->newptr == NULL)
 	return (error);
     if (enable != 0 && enable != 1)
 	return (EINVAL);
 
     return (acpi_wake_set_enable(dev, enable));
 }
 
 /* Parse a device's _PRW into a structure. */
 int
 acpi_parse_prw(ACPI_HANDLE h, struct acpi_prw_data *prw)
 {
     ACPI_STATUS			status;
     ACPI_BUFFER			prw_buffer;
     ACPI_OBJECT			*res, *res2;
     int				error, i, power_count;
 
     if (h == NULL || prw == NULL)
 	return (EINVAL);
 
     /*
      * The _PRW object (7.2.9) is only required for devices that have the
      * ability to wake the system from a sleeping state.
      */
     error = EINVAL;
     prw_buffer.Pointer = NULL;
     prw_buffer.Length = ACPI_ALLOCATE_BUFFER;
     status = AcpiEvaluateObject(h, "_PRW", NULL, &prw_buffer);
     if (ACPI_FAILURE(status))
 	return (ENOENT);
     res = (ACPI_OBJECT *)prw_buffer.Pointer;
     if (res == NULL)
 	return (ENOENT);
     if (!ACPI_PKG_VALID(res, 2))
 	goto out;
 
     /*
      * Element 1 of the _PRW object:
      * The lowest power system sleeping state that can be entered while still
      * providing wake functionality.  The sleeping state being entered must
      * be less than (i.e., higher power) or equal to this value.
      */
     if (acpi_PkgInt32(res, 1, &prw->lowest_wake) != 0)
 	goto out;
 
     /*
      * Element 0 of the _PRW object:
      */
     switch (res->Package.Elements[0].Type) {
     case ACPI_TYPE_INTEGER:
 	/*
 	 * If the data type of this package element is numeric, then this
 	 * _PRW package element is the bit index in the GPEx_EN, in the
 	 * GPE blocks described in the FADT, of the enable bit that is
 	 * enabled for the wake event.
 	 */
 	prw->gpe_handle = NULL;
 	prw->gpe_bit = res->Package.Elements[0].Integer.Value;
 	error = 0;
 	break;
     case ACPI_TYPE_PACKAGE:
 	/*
 	 * If the data type of this package element is a package, then this
 	 * _PRW package element is itself a package containing two
 	 * elements.  The first is an object reference to the GPE Block
 	 * device that contains the GPE that will be triggered by the wake
 	 * event.  The second element is numeric and it contains the bit
 	 * index in the GPEx_EN, in the GPE Block referenced by the
 	 * first element in the package, of the enable bit that is enabled for
 	 * the wake event.
 	 *
 	 * For example, if this field is a package then it is of the form:
 	 * Package() {\_SB.PCI0.ISA.GPE, 2}
 	 */
 	res2 = &res->Package.Elements[0];
 	if (!ACPI_PKG_VALID(res2, 2))
 	    goto out;
 	prw->gpe_handle = acpi_GetReference(NULL, &res2->Package.Elements[0]);
 	if (prw->gpe_handle == NULL)
 	    goto out;
 	if (acpi_PkgInt32(res2, 1, &prw->gpe_bit) != 0)
 	    goto out;
 	error = 0;
 	break;
     default:
 	goto out;
     }
 
     /* Elements 2 to N of the _PRW object are power resources. */
     power_count = res->Package.Count - 2;
     if (power_count > ACPI_PRW_MAX_POWERRES) {
 	printf("ACPI device %s has too many power resources\n", acpi_name(h));
 	power_count = 0;
     }
     prw->power_res_count = power_count;
     for (i = 0; i < power_count; i++)
 	prw->power_res[i] = res->Package.Elements[i];
 
 out:
     if (prw_buffer.Pointer != NULL)
 	AcpiOsFree(prw_buffer.Pointer);
     return (error);
 }
 
 /*
  * ACPI Event Handlers
  */
 
 /* System Event Handlers (registered by EVENTHANDLER_REGISTER) */
 
 static void
 acpi_system_eventhandler_sleep(void *arg, int state)
 {
     struct acpi_softc *sc = (struct acpi_softc *)arg;
     int ret;
 
     ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
 
     /* Check if button action is disabled or unknown. */
     if (state == ACPI_STATE_UNKNOWN)
 	return;
 
     /* Request that the system prepare to enter the given suspend state. */
     ret = acpi_ReqSleepState(sc, state);
     if (ret != 0)
 	device_printf(sc->acpi_dev,
 	    "request to enter state S%d failed (err %d)\n", state, ret);
 
     return_VOID;
 }
 
 static void
 acpi_system_eventhandler_wakeup(void *arg, int state)
 {
 
     ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, state);
 
     /* Currently, nothing to do for wakeup. */
 
     return_VOID;
 }
 
 /* 
  * ACPICA Event Handlers (FixedEvent, also called from button notify handler)
  */
 static void
 acpi_invoke_sleep_eventhandler(void *context)
 {
 
     EVENTHANDLER_INVOKE(acpi_sleep_event, *(int *)context);
 }
 
 static void
 acpi_invoke_wake_eventhandler(void *context)
 {
 
     EVENTHANDLER_INVOKE(acpi_wakeup_event, *(int *)context);
 }
 
 UINT32
 acpi_event_power_button_sleep(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_sleep_eventhandler, &sc->acpi_power_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 UINT32
 acpi_event_power_button_wake(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_wake_eventhandler, &sc->acpi_power_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 UINT32
 acpi_event_sleep_button_sleep(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_sleep_eventhandler, &sc->acpi_sleep_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 UINT32
 acpi_event_sleep_button_wake(void *context)
 {
     struct acpi_softc	*sc = (struct acpi_softc *)context;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     if (ACPI_FAILURE(AcpiOsExecute(OSL_NOTIFY_HANDLER,
 	acpi_invoke_wake_eventhandler, &sc->acpi_sleep_button_sx)))
 	return_VALUE (ACPI_INTERRUPT_NOT_HANDLED);
     return_VALUE (ACPI_INTERRUPT_HANDLED);
 }
 
 /*
  * XXX This static buffer is suboptimal.  There is no locking so only
  * use this for single-threaded callers.
  */
 char *
 acpi_name(ACPI_HANDLE handle)
 {
     ACPI_BUFFER buf;
     static char data[256];
 
     buf.Length = sizeof(data);
     buf.Pointer = data;
 
     if (handle && ACPI_SUCCESS(AcpiGetName(handle, ACPI_FULL_PATHNAME, &buf)))
 	return (data);
     return ("(unknown)");
 }
 
 /*
  * Debugging/bug-avoidance.  Avoid trying to fetch info on various
  * parts of the namespace.
  */
 int
 acpi_avoid(ACPI_HANDLE handle)
 {
     char	*cp, *env, *np;
     int		len;
 
     np = acpi_name(handle);
     if (*np == '\\')
 	np++;
     if ((env = kern_getenv("debug.acpi.avoid")) == NULL)
 	return (0);
 
     /* Scan the avoid list checking for a match */
     cp = env;
     for (;;) {
 	while (*cp != 0 && isspace(*cp))
 	    cp++;
 	if (*cp == 0)
 	    break;
 	len = 0;
 	while (cp[len] != 0 && !isspace(cp[len]))
 	    len++;
 	if (!strncmp(cp, np, len)) {
 	    freeenv(env);
 	    return(1);
 	}
 	cp += len;
     }
     freeenv(env);
 
     return (0);
 }
 
 /*
  * Debugging/bug-avoidance.  Disable ACPI subsystem components.
  */
 int
 acpi_disabled(char *subsys)
 {
     char	*cp, *env;
     int		len;
 
     if ((env = kern_getenv("debug.acpi.disabled")) == NULL)
 	return (0);
     if (strcmp(env, "all") == 0) {
 	freeenv(env);
 	return (1);
     }
 
     /* Scan the disable list, checking for a match. */
     cp = env;
     for (;;) {
 	while (*cp != '\0' && isspace(*cp))
 	    cp++;
 	if (*cp == '\0')
 	    break;
 	len = 0;
 	while (cp[len] != '\0' && !isspace(cp[len]))
 	    len++;
 	if (strncmp(cp, subsys, len) == 0) {
 	    freeenv(env);
 	    return (1);
 	}
 	cp += len;
     }
     freeenv(env);
 
     return (0);
 }
 
 static void
 acpi_lookup(void *arg, const char *name, device_t *dev)
 {
     ACPI_HANDLE handle;
 
     if (*dev != NULL)
 	return;
 
     /*
      * Allow any handle name that is specified as an absolute path and
      * starts with '\'.  We could restrict this to \_SB and friends,
      * but see acpi_probe_children() for notes on why we scan the entire
      * namespace for devices.
      *
      * XXX: The pathname argument to AcpiGetHandle() should be fixed to
      * be const.
      */
     if (name[0] != '\\')
 	return;
     if (ACPI_FAILURE(AcpiGetHandle(ACPI_ROOT_OBJECT, __DECONST(char *, name),
 	&handle)))
 	return;
     *dev = acpi_get_device(handle);
 }
 
 /*
  * Control interface.
  *
  * We multiplex ioctls for all participating ACPI devices here.  Individual 
  * drivers wanting to be accessible via /dev/acpi should use the
  * register/deregister interface to make their handlers visible.
  */
 struct acpi_ioctl_hook
 {
     TAILQ_ENTRY(acpi_ioctl_hook) link;
     u_long			 cmd;
     acpi_ioctl_fn		 fn;
     void			 *arg;
 };
 
 static TAILQ_HEAD(,acpi_ioctl_hook)	acpi_ioctl_hooks;
 static int				acpi_ioctl_hooks_initted;
 
 int
 acpi_register_ioctl(u_long cmd, acpi_ioctl_fn fn, void *arg)
 {
     struct acpi_ioctl_hook	*hp;
 
     if ((hp = malloc(sizeof(*hp), M_ACPIDEV, M_NOWAIT)) == NULL)
 	return (ENOMEM);
     hp->cmd = cmd;
     hp->fn = fn;
     hp->arg = arg;
 
     ACPI_LOCK(acpi);
     if (acpi_ioctl_hooks_initted == 0) {
 	TAILQ_INIT(&acpi_ioctl_hooks);
 	acpi_ioctl_hooks_initted = 1;
     }
     TAILQ_INSERT_TAIL(&acpi_ioctl_hooks, hp, link);
     ACPI_UNLOCK(acpi);
 
     return (0);
 }
 
 void
 acpi_deregister_ioctl(u_long cmd, acpi_ioctl_fn fn)
 {
     struct acpi_ioctl_hook	*hp;
 
     ACPI_LOCK(acpi);
     TAILQ_FOREACH(hp, &acpi_ioctl_hooks, link)
 	if (hp->cmd == cmd && hp->fn == fn)
 	    break;
 
     if (hp != NULL) {
 	TAILQ_REMOVE(&acpi_ioctl_hooks, hp, link);
 	free(hp, M_ACPIDEV);
     }
     ACPI_UNLOCK(acpi);
 }
 
 static int
 acpiopen(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
     return (0);
 }
 
 static int
 acpiclose(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
     return (0);
 }
 
 static int
 acpiioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
 {
     struct acpi_softc		*sc;
     struct acpi_ioctl_hook	*hp;
     int				error, state;
 
     error = 0;
     hp = NULL;
     sc = dev->si_drv1;
 
     /*
      * Scan the list of registered ioctls, looking for handlers.
      */
     ACPI_LOCK(acpi);
     if (acpi_ioctl_hooks_initted)
 	TAILQ_FOREACH(hp, &acpi_ioctl_hooks, link) {
 	    if (hp->cmd == cmd)
 		break;
 	}
     ACPI_UNLOCK(acpi);
     if (hp)
 	return (hp->fn(cmd, addr, hp->arg));
 
     /*
      * Core ioctls are not permitted for non-writable user.
      * Currently, other ioctls just fetch information.
      * Not changing system behavior.
      */
     if ((flag & FWRITE) == 0)
 	return (EPERM);
 
     /* Core system ioctls. */
     switch (cmd) {
     case ACPIIO_REQSLPSTATE:
 	state = *(int *)addr;
 	if (state != ACPI_STATE_S5)
 	    return (acpi_ReqSleepState(sc, state));
 	device_printf(sc->acpi_dev, "power off via acpi ioctl not supported\n");
 	error = EOPNOTSUPP;
 	break;
     case ACPIIO_ACKSLPSTATE:
 	error = *(int *)addr;
 	error = acpi_AckSleepState(sc->acpi_clone, error);
 	break;
     case ACPIIO_SETSLPSTATE:	/* DEPRECATED */
 	state = *(int *)addr;
 	if (state < ACPI_STATE_S0 || state > ACPI_S_STATES_MAX)
 	    return (EINVAL);
 	if (!acpi_sleep_states[state])
 	    return (EOPNOTSUPP);
 	if (ACPI_FAILURE(acpi_SetSleepState(sc, state)))
 	    error = ENXIO;
 	break;
     default:
 	error = ENXIO;
 	break;
     }
 
     return (error);
 }
 
 static int
 acpi_sname2sstate(const char *sname)
 {
     int sstate;
 
     if (toupper(sname[0]) == 'S') {
 	sstate = sname[1] - '0';
 	if (sstate >= ACPI_STATE_S0 && sstate <= ACPI_STATE_S5 &&
 	    sname[2] == '\0')
 	    return (sstate);
     } else if (strcasecmp(sname, "NONE") == 0)
 	return (ACPI_STATE_UNKNOWN);
     return (-1);
 }
 
 static const char *
 acpi_sstate2sname(int sstate)
 {
     static const char *snames[] = { "S0", "S1", "S2", "S3", "S4", "S5" };
 
     if (sstate >= ACPI_STATE_S0 && sstate <= ACPI_STATE_S5)
 	return (snames[sstate]);
     else if (sstate == ACPI_STATE_UNKNOWN)
 	return ("NONE");
     return (NULL);
 }
 
 static int
 acpi_supported_sleep_state_sysctl(SYSCTL_HANDLER_ARGS)
 {
     int error;
     struct sbuf sb;
     UINT8 state;
 
     sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
     for (state = ACPI_STATE_S1; state < ACPI_S_STATE_COUNT; state++)
 	if (acpi_sleep_states[state])
 	    sbuf_printf(&sb, "%s ", acpi_sstate2sname(state));
     sbuf_trim(&sb);
     sbuf_finish(&sb);
     error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
     sbuf_delete(&sb);
     return (error);
 }
 
 static int
 acpi_sleep_state_sysctl(SYSCTL_HANDLER_ARGS)
 {
     char sleep_state[10];
     int error, new_state, old_state;
 
     old_state = *(int *)oidp->oid_arg1;
     strlcpy(sleep_state, acpi_sstate2sname(old_state), sizeof(sleep_state));
     error = sysctl_handle_string(oidp, sleep_state, sizeof(sleep_state), req);
     if (error == 0 && req->newptr != NULL) {
 	new_state = acpi_sname2sstate(sleep_state);
 	if (new_state < ACPI_STATE_S1)
 	    return (EINVAL);
 	if (new_state < ACPI_S_STATE_COUNT && !acpi_sleep_states[new_state])
 	    return (EOPNOTSUPP);
 	if (new_state != old_state)
 	    *(int *)oidp->oid_arg1 = new_state;
     }
     return (error);
 }
 
 /* Inform devctl(4) when we receive a Notify. */
 void
 acpi_UserNotify(const char *subsystem, ACPI_HANDLE h, uint8_t notify)
 {
     char		notify_buf[16];
     ACPI_BUFFER		handle_buf;
     ACPI_STATUS		status;
 
     if (subsystem == NULL)
 	return;
 
     handle_buf.Pointer = NULL;
     handle_buf.Length = ACPI_ALLOCATE_BUFFER;
     status = AcpiNsHandleToPathname(h, &handle_buf, FALSE);
     if (ACPI_FAILURE(status))
 	return;
     snprintf(notify_buf, sizeof(notify_buf), "notify=0x%02x", notify);
     devctl_notify("ACPI", subsystem, handle_buf.Pointer, notify_buf);
     AcpiOsFree(handle_buf.Pointer);
 }
 
 #ifdef ACPI_DEBUG
 /*
  * Support for parsing debug options from the kernel environment.
  *
  * Bits may be set in the AcpiDbgLayer and AcpiDbgLevel debug registers
  * by specifying the names of the bits in the debug.acpi.layer and
  * debug.acpi.level environment variables.  Bits may be unset by 
  * prefixing the bit name with !.
  */
 struct debugtag
 {
     char	*name;
     UINT32	value;
 };
 
 static struct debugtag	dbg_layer[] = {
     {"ACPI_UTILITIES",		ACPI_UTILITIES},
     {"ACPI_HARDWARE",		ACPI_HARDWARE},
     {"ACPI_EVENTS",		ACPI_EVENTS},
     {"ACPI_TABLES",		ACPI_TABLES},
     {"ACPI_NAMESPACE",		ACPI_NAMESPACE},
     {"ACPI_PARSER",		ACPI_PARSER},
     {"ACPI_DISPATCHER",		ACPI_DISPATCHER},
     {"ACPI_EXECUTER",		ACPI_EXECUTER},
     {"ACPI_RESOURCES",		ACPI_RESOURCES},
     {"ACPI_CA_DEBUGGER",	ACPI_CA_DEBUGGER},
     {"ACPI_OS_SERVICES",	ACPI_OS_SERVICES},
     {"ACPI_CA_DISASSEMBLER",	ACPI_CA_DISASSEMBLER},
     {"ACPI_ALL_COMPONENTS",	ACPI_ALL_COMPONENTS},
 
     {"ACPI_AC_ADAPTER",		ACPI_AC_ADAPTER},
     {"ACPI_BATTERY",		ACPI_BATTERY},
     {"ACPI_BUS",		ACPI_BUS},
     {"ACPI_BUTTON",		ACPI_BUTTON},
     {"ACPI_EC", 		ACPI_EC},
     {"ACPI_FAN",		ACPI_FAN},
     {"ACPI_POWERRES",		ACPI_POWERRES},
     {"ACPI_PROCESSOR",		ACPI_PROCESSOR},
     {"ACPI_THERMAL",		ACPI_THERMAL},
     {"ACPI_TIMER",		ACPI_TIMER},
     {"ACPI_ALL_DRIVERS",	ACPI_ALL_DRIVERS},
     {NULL, 0}
 };
 
 static struct debugtag dbg_level[] = {
     {"ACPI_LV_INIT",		ACPI_LV_INIT},
     {"ACPI_LV_DEBUG_OBJECT",	ACPI_LV_DEBUG_OBJECT},
     {"ACPI_LV_INFO",		ACPI_LV_INFO},
     {"ACPI_LV_REPAIR",		ACPI_LV_REPAIR},
     {"ACPI_LV_ALL_EXCEPTIONS",	ACPI_LV_ALL_EXCEPTIONS},
 
     /* Trace verbosity level 1 [Standard Trace Level] */
     {"ACPI_LV_INIT_NAMES",	ACPI_LV_INIT_NAMES},
     {"ACPI_LV_PARSE",		ACPI_LV_PARSE},
     {"ACPI_LV_LOAD",		ACPI_LV_LOAD},
     {"ACPI_LV_DISPATCH",	ACPI_LV_DISPATCH},
     {"ACPI_LV_EXEC",		ACPI_LV_EXEC},
     {"ACPI_LV_NAMES",		ACPI_LV_NAMES},
     {"ACPI_LV_OPREGION",	ACPI_LV_OPREGION},
     {"ACPI_LV_BFIELD",		ACPI_LV_BFIELD},
     {"ACPI_LV_TABLES",		ACPI_LV_TABLES},
     {"ACPI_LV_VALUES",		ACPI_LV_VALUES},
     {"ACPI_LV_OBJECTS",		ACPI_LV_OBJECTS},
     {"ACPI_LV_RESOURCES",	ACPI_LV_RESOURCES},
     {"ACPI_LV_USER_REQUESTS",	ACPI_LV_USER_REQUESTS},
     {"ACPI_LV_PACKAGE",		ACPI_LV_PACKAGE},
     {"ACPI_LV_VERBOSITY1",	ACPI_LV_VERBOSITY1},
 
     /* Trace verbosity level 2 [Function tracing and memory allocation] */
     {"ACPI_LV_ALLOCATIONS",	ACPI_LV_ALLOCATIONS},
     {"ACPI_LV_FUNCTIONS",	ACPI_LV_FUNCTIONS},
     {"ACPI_LV_OPTIMIZATIONS",	ACPI_LV_OPTIMIZATIONS},
     {"ACPI_LV_VERBOSITY2",	ACPI_LV_VERBOSITY2},
     {"ACPI_LV_ALL",		ACPI_LV_ALL},
 
     /* Trace verbosity level 3 [Threading, I/O, and Interrupts] */
     {"ACPI_LV_MUTEX",		ACPI_LV_MUTEX},
     {"ACPI_LV_THREADS",		ACPI_LV_THREADS},
     {"ACPI_LV_IO",		ACPI_LV_IO},
     {"ACPI_LV_INTERRUPTS",	ACPI_LV_INTERRUPTS},
     {"ACPI_LV_VERBOSITY3",	ACPI_LV_VERBOSITY3},
 
     /* Exceptionally verbose output -- also used in the global "DebugLevel"  */
     {"ACPI_LV_AML_DISASSEMBLE",	ACPI_LV_AML_DISASSEMBLE},
     {"ACPI_LV_VERBOSE_INFO",	ACPI_LV_VERBOSE_INFO},
     {"ACPI_LV_FULL_TABLES",	ACPI_LV_FULL_TABLES},
     {"ACPI_LV_EVENTS",		ACPI_LV_EVENTS},
     {"ACPI_LV_VERBOSE",		ACPI_LV_VERBOSE},
     {NULL, 0}
 };    
 
 static void
 acpi_parse_debug(char *cp, struct debugtag *tag, UINT32 *flag)
 {
     char	*ep;
     int		i, l;
     int		set;
 
     while (*cp) {
 	if (isspace(*cp)) {
 	    cp++;
 	    continue;
 	}
 	ep = cp;
 	while (*ep && !isspace(*ep))
 	    ep++;
 	if (*cp == '!') {
 	    set = 0;
 	    cp++;
 	    if (cp == ep)
 		continue;
 	} else {
 	    set = 1;
 	}
 	l = ep - cp;
 	for (i = 0; tag[i].name != NULL; i++) {
 	    if (!strncmp(cp, tag[i].name, l)) {
 		if (set)
 		    *flag |= tag[i].value;
 		else
 		    *flag &= ~tag[i].value;
 	    }
 	}
 	cp = ep;
     }
 }
 
 static void
 acpi_set_debugging(void *junk)
 {
     char	*layer, *level;
 
     if (cold) {
 	AcpiDbgLayer = 0;
 	AcpiDbgLevel = 0;
     }
 
     layer = kern_getenv("debug.acpi.layer");
     level = kern_getenv("debug.acpi.level");
     if (layer == NULL && level == NULL)
 	return;
 
     printf("ACPI set debug");
     if (layer != NULL) {
 	if (strcmp("NONE", layer) != 0)
 	    printf(" layer '%s'", layer);
 	acpi_parse_debug(layer, &dbg_layer[0], &AcpiDbgLayer);
 	freeenv(layer);
     }
     if (level != NULL) {
 	if (strcmp("NONE", level) != 0)
 	    printf(" level '%s'", level);
 	acpi_parse_debug(level, &dbg_level[0], &AcpiDbgLevel);
 	freeenv(level);
     }
     printf("\n");
 }
 
 SYSINIT(acpi_debugging, SI_SUB_TUNABLES, SI_ORDER_ANY, acpi_set_debugging,
 	NULL);
 
 static int
 acpi_debug_sysctl(SYSCTL_HANDLER_ARGS)
 {
     int		 error, *dbg;
     struct	 debugtag *tag;
     struct	 sbuf sb;
     char	 temp[128];
 
     if (sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND) == NULL)
 	return (ENOMEM);
     if (strcmp(oidp->oid_arg1, "debug.acpi.layer") == 0) {
 	tag = &dbg_layer[0];
 	dbg = &AcpiDbgLayer;
     } else {
 	tag = &dbg_level[0];
 	dbg = &AcpiDbgLevel;
     }
 
     /* Get old values if this is a get request. */
     ACPI_SERIAL_BEGIN(acpi);
     if (*dbg == 0) {
 	sbuf_cpy(&sb, "NONE");
     } else if (req->newptr == NULL) {
 	for (; tag->name != NULL; tag++) {
 	    if ((*dbg & tag->value) == tag->value)
 		sbuf_printf(&sb, "%s ", tag->name);
 	}
     }
     sbuf_trim(&sb);
     sbuf_finish(&sb);
     strlcpy(temp, sbuf_data(&sb), sizeof(temp));
     sbuf_delete(&sb);
 
     error = sysctl_handle_string(oidp, temp, sizeof(temp), req);
 
     /* Check for error or no change */
     if (error == 0 && req->newptr != NULL) {
 	*dbg = 0;
 	kern_setenv((char *)oidp->oid_arg1, temp);
 	acpi_set_debugging(NULL);
     }
     ACPI_SERIAL_END(acpi);
 
     return (error);
 }
 
 SYSCTL_PROC(_debug_acpi, OID_AUTO, layer, CTLFLAG_RW | CTLTYPE_STRING,
 	    "debug.acpi.layer", 0, acpi_debug_sysctl, "A", "");
 SYSCTL_PROC(_debug_acpi, OID_AUTO, level, CTLFLAG_RW | CTLTYPE_STRING,
 	    "debug.acpi.level", 0, acpi_debug_sysctl, "A", "");
 #endif /* ACPI_DEBUG */
 
 static int
 acpi_debug_objects_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int	error;
 	int	old;
 
 	old = acpi_debug_objects;
 	error = sysctl_handle_int(oidp, &acpi_debug_objects, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (old == acpi_debug_objects || (old && acpi_debug_objects))
 		return (0);
 
 	ACPI_SERIAL_BEGIN(acpi);
 	AcpiGbl_EnableAmlDebugObject = acpi_debug_objects ? TRUE : FALSE;
 	ACPI_SERIAL_END(acpi);
 
 	return (0);
 }
 
 static int
 acpi_parse_interfaces(char *str, struct acpi_interface *iface)
 {
 	char *p;
 	size_t len;
 	int i, j;
 
 	p = str;
 	while (isspace(*p) || *p == ',')
 		p++;
 	len = strlen(p);
 	if (len == 0)
 		return (0);
 	p = strdup(p, M_TEMP);
 	for (i = 0; i < len; i++)
 		if (p[i] == ',')
 			p[i] = '\0';
 	i = j = 0;
 	while (i < len)
 		if (isspace(p[i]) || p[i] == '\0')
 			i++;
 		else {
 			i += strlen(p + i) + 1;
 			j++;
 		}
 	if (j == 0) {
 		free(p, M_TEMP);
 		return (0);
 	}
 	iface->data = malloc(sizeof(*iface->data) * j, M_TEMP, M_WAITOK);
 	iface->num = j;
 	i = j = 0;
 	while (i < len)
 		if (isspace(p[i]) || p[i] == '\0')
 			i++;
 		else {
 			iface->data[j] = p + i;
 			i += strlen(p + i) + 1;
 			j++;
 		}
 
 	return (j);
 }
 
 static void
 acpi_free_interfaces(struct acpi_interface *iface)
 {
 
 	free(iface->data[0], M_TEMP);
 	free(iface->data, M_TEMP);
 }
 
 static void
 acpi_reset_interfaces(device_t dev)
 {
 	struct acpi_interface list;
 	ACPI_STATUS status;
 	int i;
 
 	if (acpi_parse_interfaces(acpi_install_interface, &list) > 0) {
 		for (i = 0; i < list.num; i++) {
 			status = AcpiInstallInterface(list.data[i]);
 			if (ACPI_FAILURE(status))
 				device_printf(dev,
 				    "failed to install _OSI(\"%s\"): %s\n",
 				    list.data[i], AcpiFormatException(status));
 			else if (bootverbose)
 				device_printf(dev, "installed _OSI(\"%s\")\n",
 				    list.data[i]);
 		}
 		acpi_free_interfaces(&list);
 	}
 	if (acpi_parse_interfaces(acpi_remove_interface, &list) > 0) {
 		for (i = 0; i < list.num; i++) {
 			status = AcpiRemoveInterface(list.data[i]);
 			if (ACPI_FAILURE(status))
 				device_printf(dev,
 				    "failed to remove _OSI(\"%s\"): %s\n",
 				    list.data[i], AcpiFormatException(status));
 			else if (bootverbose)
 				device_printf(dev, "removed _OSI(\"%s\")\n",
 				    list.data[i]);
 		}
 		acpi_free_interfaces(&list);
 	}
 }
 
 static int
 acpi_pm_func(u_long cmd, void *arg, ...)
 {
 	int	state, acpi_state;
 	int	error;
 	struct	acpi_softc *sc;
 	va_list	ap;
 
 	error = 0;
 	switch (cmd) {
 	case POWER_CMD_SUSPEND:
 		sc = (struct acpi_softc *)arg;
 		if (sc == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 
 		va_start(ap, arg);
 		state = va_arg(ap, int);
 		va_end(ap);
 
 		switch (state) {
 		case POWER_SLEEP_STATE_STANDBY:
 			acpi_state = sc->acpi_standby_sx;
 			break;
 		case POWER_SLEEP_STATE_SUSPEND:
 			acpi_state = sc->acpi_suspend_sx;
 			break;
 		case POWER_SLEEP_STATE_HIBERNATE:
 			acpi_state = ACPI_STATE_S4;
 			break;
 		default:
 			error = EINVAL;
 			goto out;
 		}
 
 		if (ACPI_FAILURE(acpi_EnterSleepState(sc, acpi_state)))
 			error = ENXIO;
 		break;
 	default:
 		error = EINVAL;
 		goto out;
 	}
 
 out:
 	return (error);
 }
 
 static void
 acpi_pm_register(void *arg)
 {
     if (!cold || resource_disabled("acpi", 0))
 	return;
 
     power_pm_register(POWER_PM_TYPE_ACPI, acpi_pm_func, NULL);
 }
 
 SYSINIT(power, SI_SUB_KLD, SI_ORDER_ANY, acpi_pm_register, 0);
Index: head/sys/vm/uma_core.c
===================================================================
--- head/sys/vm/uma_core.c	(revision 327953)
+++ head/sys/vm/uma_core.c	(revision 327954)
@@ -1,3907 +1,3907 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
  * Copyright (c) 2004-2006 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * uma_core.c  Implementation of the Universal Memory allocator
  *
  * This allocator is intended to replace the multitude of similar object caches
  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
  * efficient.  A primary design goal is to return unused memory to the rest of
  * the system.  This will make the system as a whole more flexible due to the
  * ability to move memory to subsystems which most need it instead of leaving
  * pools of reserved memory unused.
  *
  * The basic ideas stem from similar slab/zone based allocators whose algorithms
  * are well known.
  *
  */
 
 /*
  * TODO:
  *	- Improve memory usage for large allocations
  *	- Investigate cache size adjustments
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_param.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitset.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/types.h>
 #include <sys/limits.h>
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/sysctl.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/taskqueue.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_param.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 #include <vm/uma_dbg.h>
 
 #include <ddb/ddb.h>
 
 #ifdef DEBUG_MEMGUARD
 #include <vm/memguard.h>
 #endif
 
 /*
  * This is the zone and keg from which all zones are spawned.
  */
 static uma_zone_t kegs;
 static uma_zone_t zones;
 
 /* This is the zone from which all offpage uma_slab_ts are allocated. */
 static uma_zone_t slabzone;
 
 /*
  * The initial hash tables come out of this zone so they can be allocated
  * prior to malloc coming up.
  */
 static uma_zone_t hashzone;
 
 /* The boot-time adjusted value for cache line alignment. */
 int uma_align_cache = 64 - 1;
 
 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
 
 /*
  * Are we allowed to allocate buckets?
  */
 static int bucketdisable = 1;
 
 /* Linked list of all kegs in the system */
 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
 
 /* Linked list of all cache-only zones in the system */
 static LIST_HEAD(,uma_zone) uma_cachezones =
     LIST_HEAD_INITIALIZER(uma_cachezones);
 
 /* This RW lock protects the keg list */
 static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
 
 /*
  * Pointer and counter to pool of pages, that is preallocated at
  * startup to bootstrap UMA.  Early zones continue to use the pool
  * until it is depleted, so allocations may happen after boot, thus
  * we need a mutex to protect it.
  */
 static char *bootmem;
 static int boot_pages;
 static struct mtx uma_boot_pages_mtx;
 
 static struct sx uma_drain_lock;
 
 /* kmem soft limit. */
 static unsigned long uma_kmem_limit = LONG_MAX;
 static volatile unsigned long uma_kmem_total;
 
 /* Is the VM done starting up? */
 static int booted = 0;
 #define	UMA_STARTUP	1
 #define	UMA_STARTUP2	2
 
 /*
  * This is the handle used to schedule events that need to happen
  * outside of the allocation fast path.
  */
 static struct callout uma_callout;
 #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
 
 /*
  * This structure is passed as the zone ctor arg so that I don't have to create
  * a special allocation function just for zones.
  */
 struct uma_zctor_args {
 	const char *name;
 	size_t size;
 	uma_ctor ctor;
 	uma_dtor dtor;
 	uma_init uminit;
 	uma_fini fini;
 	uma_import import;
 	uma_release release;
 	void *arg;
 	uma_keg_t keg;
 	int align;
 	uint32_t flags;
 };
 
 struct uma_kctor_args {
 	uma_zone_t zone;
 	size_t size;
 	uma_init uminit;
 	uma_fini fini;
 	int align;
 	uint32_t flags;
 };
 
 struct uma_bucket_zone {
 	uma_zone_t	ubz_zone;
 	char		*ubz_name;
 	int		ubz_entries;	/* Number of items it can hold. */
 	int		ubz_maxsize;	/* Maximum allocation size per-item. */
 };
 
 /*
  * Compute the actual number of bucket entries to pack them in power
  * of two sizes for more efficient space utilization.
  */
 #define	BUCKET_SIZE(n)						\
     (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
 
 #define	BUCKET_MAX	BUCKET_SIZE(256)
 
 struct uma_bucket_zone bucket_zones[] = {
 	{ NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
 	{ NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
 	{ NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
 	{ NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
 	{ NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
 	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
 	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
 	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
 	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
 	{ NULL, NULL, 0}
 };
 
 /*
  * Flags and enumerations to be passed to internal functions.
  */
 enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
 
 #define	UMA_ANYDOMAIN	-1	/* Special value for domain search. */
 
 /* Prototypes.. */
 
 static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void page_free(void *, vm_size_t, uint8_t);
 static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int);
 static void cache_drain(uma_zone_t);
 static void bucket_drain(uma_zone_t, uma_bucket_t);
 static void bucket_cache_drain(uma_zone_t zone);
 static int keg_ctor(void *, int, void *, int);
 static void keg_dtor(void *, int, void *);
 static int zone_ctor(void *, int, void *, int);
 static void zone_dtor(void *, int, void *);
 static int zero_init(void *, int, int);
 static void keg_small_init(uma_keg_t keg);
 static void keg_large_init(uma_keg_t keg);
 static void zone_foreach(void (*zfunc)(uma_zone_t));
 static void zone_timeout(uma_zone_t zone);
 static int hash_alloc(struct uma_hash *);
 static int hash_expand(struct uma_hash *, struct uma_hash *);
 static void hash_free(struct uma_hash *hash);
 static void uma_timeout(void *);
 static void uma_startup3(void);
 static void *zone_alloc_item(uma_zone_t, void *, int, int);
 static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
 static void bucket_enable(void);
 static void bucket_init(void);
 static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
 static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
 static void bucket_zone_drain(void);
 static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
 static uma_slab_t zone_fetch_slab(uma_zone_t, uma_keg_t, int, int);
 static uma_slab_t zone_fetch_slab_multi(uma_zone_t, uma_keg_t, int, int);
 static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
 static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
 static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
     uma_fini fini, int align, uint32_t flags);
 static int zone_import(uma_zone_t, void **, int, int, int);
 static void zone_release(uma_zone_t, void **, int);
 static void uma_zero_item(void *, uma_zone_t);
 
 void uma_print_zone(uma_zone_t);
 void uma_print_stats(void);
 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
 
 #ifdef INVARIANTS
 static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
 static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
 #endif
 
 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
 
 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
 
 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
 
 static int zone_warnings = 1;
 SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
     "Warn when UMA zones becomes full");
 
 /* Adjust bytes under management by UMA. */
 static inline void
 uma_total_dec(unsigned long size)
 {
 
 	atomic_subtract_long(&uma_kmem_total, size);
 }
 
 static inline void
 uma_total_inc(unsigned long size)
 {
 
 	if (atomic_fetchadd_long(&uma_kmem_total, size) > uma_kmem_limit)
 		uma_reclaim_wakeup();
 }
 
 /*
  * This routine checks to see whether or not it's safe to enable buckets.
  */
 static void
 bucket_enable(void)
 {
 	bucketdisable = vm_page_count_min();
 }
 
 /*
  * Initialize bucket_zones, the array of zones of buckets of various sizes.
  *
  * For each zone, calculate the memory required for each bucket, consisting
  * of the header and an array of pointers.
  */
 static void
 bucket_init(void)
 {
 	struct uma_bucket_zone *ubz;
 	int size;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
 		size += sizeof(void *) * ubz->ubz_entries;
 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_NUMA);
 	}
 }
 
 /*
  * Given a desired number of entries for a bucket, return the zone from which
  * to allocate the bucket.
  */
 static struct uma_bucket_zone *
 bucket_zone_lookup(int entries)
 {
 	struct uma_bucket_zone *ubz;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
 		if (ubz->ubz_entries >= entries)
 			return (ubz);
 	ubz--;
 	return (ubz);
 }
 
 static int
 bucket_select(int size)
 {
 	struct uma_bucket_zone *ubz;
 
 	ubz = &bucket_zones[0];
 	if (size > ubz->ubz_maxsize)
 		return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
 
 	for (; ubz->ubz_entries != 0; ubz++)
 		if (ubz->ubz_maxsize < size)
 			break;
 	ubz--;
 	return (ubz->ubz_entries);
 }
 
 static uma_bucket_t
 bucket_alloc(uma_zone_t zone, void *udata, int flags)
 {
 	struct uma_bucket_zone *ubz;
 	uma_bucket_t bucket;
 
 	/*
 	 * This is to stop us from allocating per cpu buckets while we're
 	 * running out of vm.boot_pages.  Otherwise, we would exhaust the
 	 * boot pages.  This also prevents us from allocating buckets in
 	 * low memory situations.
 	 */
 	if (bucketdisable)
 		return (NULL);
 	/*
 	 * To limit bucket recursion we store the original zone flags
 	 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
 	 * NOVM flag to persist even through deep recursions.  We also
 	 * store ZFLAG_BUCKET once we have recursed attempting to allocate
 	 * a bucket for a bucket zone so we do not allow infinite bucket
 	 * recursion.  This cookie will even persist to frees of unused
 	 * buckets via the allocation path or bucket allocations in the
 	 * free path.
 	 */
 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
 		udata = (void *)(uintptr_t)zone->uz_flags;
 	else {
 		if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
 			return (NULL);
 		udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
 	}
 	if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
 		flags |= M_NOVM;
 	ubz = bucket_zone_lookup(zone->uz_count);
 	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
 		ubz++;
 	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
 	if (bucket) {
 #ifdef INVARIANTS
 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
 #endif
 		bucket->ub_cnt = 0;
 		bucket->ub_entries = ubz->ubz_entries;
 	}
 
 	return (bucket);
 }
 
 static void
 bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
 {
 	struct uma_bucket_zone *ubz;
 
 	KASSERT(bucket->ub_cnt == 0,
 	    ("bucket_free: Freeing a non free bucket."));
 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
 		udata = (void *)(uintptr_t)zone->uz_flags;
 	ubz = bucket_zone_lookup(bucket->ub_entries);
 	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
 }
 
 static void
 bucket_zone_drain(void)
 {
 	struct uma_bucket_zone *ubz;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
 		zone_drain(ubz->ubz_zone);
 }
 
 static void
 zone_log_warning(uma_zone_t zone)
 {
 	static const struct timeval warninterval = { 300, 0 };
 
 	if (!zone_warnings || zone->uz_warning == NULL)
 		return;
 
 	if (ratecheck(&zone->uz_ratecheck, &warninterval))
 		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
 }
 
 static inline void
 zone_maxaction(uma_zone_t zone)
 {
 
 	if (zone->uz_maxaction.ta_func != NULL)
 		taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
 }
 
 static void
 zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
 {
 	uma_klink_t klink;
 
 	LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
 		kegfn(klink->kl_keg);
 }
 
 /*
  * Routine called by timeout which is used to fire off some time interval
  * based calculations.  (stats, hash size, etc.)
  *
  * Arguments:
  *	arg   Unused
  *
  * Returns:
  *	Nothing
  */
 static void
 uma_timeout(void *unused)
 {
 	bucket_enable();
 	zone_foreach(zone_timeout);
 
 	/* Reschedule this event */
 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 }
 
 /*
  * Routine to perform timeout driven calculations.  This expands the
  * hashes and does per cpu statistics aggregation.
  *
  *  Returns nothing.
  */
 static void
 keg_timeout(uma_keg_t keg)
 {
 
 	KEG_LOCK(keg);
 	/*
 	 * Expand the keg hash table.
 	 *
 	 * This is done if the number of slabs is larger than the hash size.
 	 * What I'm trying to do here is completely reduce collisions.  This
 	 * may be a little aggressive.  Should I allow for two collisions max?
 	 */
 	if (keg->uk_flags & UMA_ZONE_HASH &&
 	    keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
 		struct uma_hash newhash;
 		struct uma_hash oldhash;
 		int ret;
 
 		/*
 		 * This is so involved because allocating and freeing
 		 * while the keg lock is held will lead to deadlock.
 		 * I have to do everything in stages and check for
 		 * races.
 		 */
 		newhash = keg->uk_hash;
 		KEG_UNLOCK(keg);
 		ret = hash_alloc(&newhash);
 		KEG_LOCK(keg);
 		if (ret) {
 			if (hash_expand(&keg->uk_hash, &newhash)) {
 				oldhash = keg->uk_hash;
 				keg->uk_hash = newhash;
 			} else
 				oldhash = newhash;
 
 			KEG_UNLOCK(keg);
 			hash_free(&oldhash);
 			return;
 		}
 	}
 	KEG_UNLOCK(keg);
 }
 
 static void
 zone_timeout(uma_zone_t zone)
 {
 
 	zone_foreach_keg(zone, &keg_timeout);
 }
 
 /*
  * Allocate and zero fill the next sized hash table from the appropriate
  * backing store.
  *
  * Arguments:
  *	hash  A new hash structure with the old hash size in uh_hashsize
  *
  * Returns:
  *	1 on success and 0 on failure.
  */
 static int
 hash_alloc(struct uma_hash *hash)
 {
 	int oldsize;
 	int alloc;
 
 	oldsize = hash->uh_hashsize;
 
 	/* We're just going to go to a power of two greater */
 	if (oldsize)  {
 		hash->uh_hashsize = oldsize * 2;
 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
 		    M_UMAHASH, M_NOWAIT);
 	} else {
 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
 		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
 		    UMA_ANYDOMAIN, M_WAITOK);
 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
 	}
 	if (hash->uh_slab_hash) {
 		bzero(hash->uh_slab_hash, alloc);
 		hash->uh_hashmask = hash->uh_hashsize - 1;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Expands the hash table for HASH zones.  This is done from zone_timeout
  * to reduce collisions.  This must not be done in the regular allocation
  * path, otherwise, we can recurse on the vm while allocating pages.
  *
  * Arguments:
  *	oldhash  The hash you want to expand
  *	newhash  The hash structure for the new table
  *
  * Returns:
  *	Nothing
  *
  * Discussion:
  */
 static int
 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
 {
 	uma_slab_t slab;
 	int hval;
 	int i;
 
 	if (!newhash->uh_slab_hash)
 		return (0);
 
 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
 		return (0);
 
 	/*
 	 * I need to investigate hash algorithms for resizing without a
 	 * full rehash.
 	 */
 
 	for (i = 0; i < oldhash->uh_hashsize; i++)
 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
 			hval = UMA_HASH(newhash, slab->us_data);
 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
 			    slab, us_hlink);
 		}
 
 	return (1);
 }
 
 /*
  * Free the hash bucket to the appropriate backing store.
  *
  * Arguments:
  *	slab_hash  The hash bucket we're freeing
  *	hashsize   The number of entries in that hash bucket
  *
  * Returns:
  *	Nothing
  */
 static void
 hash_free(struct uma_hash *hash)
 {
 	if (hash->uh_slab_hash == NULL)
 		return;
 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
 		zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
 	else
 		free(hash->uh_slab_hash, M_UMAHASH);
 }
 
 /*
  * Frees all outstanding items in a bucket
  *
  * Arguments:
  *	zone   The zone to free to, must be unlocked.
  *	bucket The free/alloc bucket with items, cpu queue must be locked.
  *
  * Returns:
  *	Nothing
  */
 
 static void
 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
 {
 	int i;
 
 	if (bucket == NULL)
 		return;
 
 	if (zone->uz_fini)
 		for (i = 0; i < bucket->ub_cnt; i++) 
 			zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
 	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
 	bucket->ub_cnt = 0;
 }
 
 /*
  * Drains the per cpu caches for a zone.
  *
  * NOTE: This may only be called while the zone is being turn down, and not
  * during normal operation.  This is necessary in order that we do not have
  * to migrate CPUs to drain the per-CPU caches.
  *
  * Arguments:
  *	zone     The zone to drain, must be unlocked.
  *
  * Returns:
  *	Nothing
  */
 static void
 cache_drain(uma_zone_t zone)
 {
 	uma_cache_t cache;
 	int cpu;
 
 	/*
 	 * XXX: It is safe to not lock the per-CPU caches, because we're
 	 * tearing down the zone anyway.  I.e., there will be no further use
 	 * of the caches at this point.
 	 *
 	 * XXX: It would good to be able to assert that the zone is being
 	 * torn down to prevent improper use of cache_drain().
 	 *
 	 * XXX: We lock the zone before passing into bucket_cache_drain() as
 	 * it is used elsewhere.  Should the tear-down path be made special
 	 * there in some form?
 	 */
 	CPU_FOREACH(cpu) {
 		cache = &zone->uz_cpu[cpu];
 		bucket_drain(zone, cache->uc_allocbucket);
 		bucket_drain(zone, cache->uc_freebucket);
 		if (cache->uc_allocbucket != NULL)
 			bucket_free(zone, cache->uc_allocbucket, NULL);
 		if (cache->uc_freebucket != NULL)
 			bucket_free(zone, cache->uc_freebucket, NULL);
 		cache->uc_allocbucket = cache->uc_freebucket = NULL;
 	}
 	ZONE_LOCK(zone);
 	bucket_cache_drain(zone);
 	ZONE_UNLOCK(zone);
 }
 
 static void
 cache_shrink(uma_zone_t zone)
 {
 
 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 		return;
 
 	ZONE_LOCK(zone);
 	zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
 	ZONE_UNLOCK(zone);
 }
 
 static void
 cache_drain_safe_cpu(uma_zone_t zone)
 {
 	uma_cache_t cache;
 	uma_bucket_t b1, b2;
 	int domain;
 
 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 		return;
 
 	b1 = b2 = NULL;
 	ZONE_LOCK(zone);
 	critical_enter();
 	if (zone->uz_flags & UMA_ZONE_NUMA)
 		domain = PCPU_GET(domain);
 	else
 		domain = 0;
 	cache = &zone->uz_cpu[curcpu];
 	if (cache->uc_allocbucket) {
 		if (cache->uc_allocbucket->ub_cnt != 0)
 			LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
 			    cache->uc_allocbucket, ub_link);
 		else
 			b1 = cache->uc_allocbucket;
 		cache->uc_allocbucket = NULL;
 	}
 	if (cache->uc_freebucket) {
 		if (cache->uc_freebucket->ub_cnt != 0)
 			LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
 			    cache->uc_freebucket, ub_link);
 		else
 			b2 = cache->uc_freebucket;
 		cache->uc_freebucket = NULL;
 	}
 	critical_exit();
 	ZONE_UNLOCK(zone);
 	if (b1)
 		bucket_free(zone, b1, NULL);
 	if (b2)
 		bucket_free(zone, b2, NULL);
 }
 
 /*
  * Safely drain per-CPU caches of a zone(s) to alloc bucket.
  * This is an expensive call because it needs to bind to all CPUs
  * one by one and enter a critical section on each of them in order
  * to safely access their cache buckets.
  * Zone lock must not be held on call this function.
  */
 static void
 cache_drain_safe(uma_zone_t zone)
 {
 	int cpu;
 
 	/*
 	 * Polite bucket sizes shrinking was not enouth, shrink aggressively.
 	 */
 	if (zone)
 		cache_shrink(zone);
 	else
 		zone_foreach(cache_shrink);
 
 	CPU_FOREACH(cpu) {
 		thread_lock(curthread);
 		sched_bind(curthread, cpu);
 		thread_unlock(curthread);
 
 		if (zone)
 			cache_drain_safe_cpu(zone);
 		else
 			zone_foreach(cache_drain_safe_cpu);
 	}
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 }
 
 /*
  * Drain the cached buckets from a zone.  Expects a locked zone on entry.
  */
 static void
 bucket_cache_drain(uma_zone_t zone)
 {
 	uma_zone_domain_t zdom;
 	uma_bucket_t bucket;
 	int i;
 
 	/*
 	 * Drain the bucket queues and free the buckets.
 	 */
 	for (i = 0; i < vm_ndomains; i++) {
 		zdom = &zone->uz_domain[i];
 		while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
 			LIST_REMOVE(bucket, ub_link);
 			ZONE_UNLOCK(zone);
 			bucket_drain(zone, bucket);
 			bucket_free(zone, bucket, NULL);
 			ZONE_LOCK(zone);
 		}
 	}
 
 	/*
 	 * Shrink further bucket sizes.  Price of single zone lock collision
 	 * is probably lower then price of global cache drain.
 	 */
 	if (zone->uz_count > zone->uz_count_min)
 		zone->uz_count--;
 }
 
 static void
 keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
 {
 	uint8_t *mem;
 	int i;
 	uint8_t flags;
 
 	CTR4(KTR_UMA, "keg_free_slab keg %s(%p) slab %p, returning %d bytes",
 	    keg->uk_name, keg, slab, PAGE_SIZE * keg->uk_ppera);
 
 	mem = slab->us_data;
 	flags = slab->us_flags;
 	i = start;
 	if (keg->uk_fini != NULL) {
 		for (i--; i > -1; i--)
 			keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
 			    keg->uk_size);
 	}
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 		zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
 	keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
 	uma_total_dec(PAGE_SIZE * keg->uk_ppera);
 }
 
 /*
  * Frees pages from a keg back to the system.  This is done on demand from
  * the pageout daemon.
  *
  * Returns nothing.
  */
 static void
 keg_drain(uma_keg_t keg)
 {
 	struct slabhead freeslabs = { 0 };
 	uma_domain_t dom;
 	uma_slab_t slab, tmp;
 	int i;
 
 	/*
 	 * We don't want to take pages from statically allocated kegs at this
 	 * time
 	 */
 	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
 		return;
 
 	CTR3(KTR_UMA, "keg_drain %s(%p) free items: %u",
 	    keg->uk_name, keg, keg->uk_free);
 	KEG_LOCK(keg);
 	if (keg->uk_free == 0)
 		goto finished;
 
 	for (i = 0; i < vm_ndomains; i++) {
 		dom = &keg->uk_domain[i];
 		LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) {
 			/* We have nowhere to free these to. */
 			if (slab->us_flags & UMA_SLAB_BOOT)
 				continue;
 
 			LIST_REMOVE(slab, us_link);
 			keg->uk_pages -= keg->uk_ppera;
 			keg->uk_free -= keg->uk_ipers;
 
 			if (keg->uk_flags & UMA_ZONE_HASH)
 				UMA_HASH_REMOVE(&keg->uk_hash, slab,
 				    slab->us_data);
 
 			SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
 		}
 	}
 
 finished:
 	KEG_UNLOCK(keg);
 
 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
 		keg_free_slab(keg, slab, keg->uk_ipers);
 	}
 }
 
 static void
 zone_drain_wait(uma_zone_t zone, int waitok)
 {
 
 	/*
 	 * Set draining to interlock with zone_dtor() so we can release our
 	 * locks as we go.  Only dtor() should do a WAITOK call since it
 	 * is the only call that knows the structure will still be available
 	 * when it wakes up.
 	 */
 	ZONE_LOCK(zone);
 	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
 		if (waitok == M_NOWAIT)
 			goto out;
 		msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
 	}
 	zone->uz_flags |= UMA_ZFLAG_DRAINING;
 	bucket_cache_drain(zone);
 	ZONE_UNLOCK(zone);
 	/*
 	 * The DRAINING flag protects us from being freed while
 	 * we're running.  Normally the uma_rwlock would protect us but we
 	 * must be able to release and acquire the right lock for each keg.
 	 */
 	zone_foreach_keg(zone, &keg_drain);
 	ZONE_LOCK(zone);
 	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
 	wakeup(zone);
 out:
 	ZONE_UNLOCK(zone);
 }
 
 void
 zone_drain(uma_zone_t zone)
 {
 
 	zone_drain_wait(zone, M_NOWAIT);
 }
 
 /*
  * Allocate a new slab for a keg.  This does not insert the slab onto a list.
  *
  * Arguments:
  *	wait  Shall we wait?
  *
  * Returns:
  *	The slab that was allocated or NULL if there is no memory and the
  *	caller specified M_NOWAIT.
  */
 static uma_slab_t
 keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
 {
 	uma_alloc allocf;
 	uma_slab_t slab;
 	unsigned long size;
 	uint8_t *mem;
 	uint8_t flags;
 	int i;
 
 	KASSERT(domain >= 0 && domain < vm_ndomains,
 	    ("keg_alloc_slab: domain %d out of range", domain));
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 	slab = NULL;
 	mem = NULL;
 
 	allocf = keg->uk_allocf;
 	KEG_UNLOCK(keg);
 	size = keg->uk_ppera * PAGE_SIZE;
 
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
 		slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, wait);
 		if (slab == NULL)
 			goto out;
 	}
 
 	/*
 	 * This reproduces the old vm_zone behavior of zero filling pages the
 	 * first time they are added to a zone.
 	 *
 	 * Malloced items are zeroed in uma_zalloc.
 	 */
 
 	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
 		wait |= M_ZERO;
 	else
 		wait &= ~M_ZERO;
 
 	if (keg->uk_flags & UMA_ZONE_NODUMP)
 		wait |= M_NODUMP;
 
 	/* zone is passed for legacy reasons. */
 	mem = allocf(zone, size, domain, &flags, wait);
 	if (mem == NULL) {
 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 			zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
 		slab = NULL;
 		goto out;
 	}
 	uma_total_inc(size);
 
 	/* Point the slab into the allocated memory */
 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
 		slab = (uma_slab_t )(mem + keg->uk_pgoff);
 
 	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
 		for (i = 0; i < keg->uk_ppera; i++)
 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
 
 	slab->us_keg = keg;
 	slab->us_data = mem;
 	slab->us_freecount = keg->uk_ipers;
 	slab->us_flags = flags;
 	slab->us_domain = domain;
 	BIT_FILL(SLAB_SETSIZE, &slab->us_free);
 #ifdef INVARIANTS
 	BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
 #endif
 
 	if (keg->uk_init != NULL) {
 		for (i = 0; i < keg->uk_ipers; i++)
 			if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
 			    keg->uk_size, wait) != 0)
 				break;
 		if (i != keg->uk_ipers) {
 			keg_free_slab(keg, slab, i);
 			slab = NULL;
 			goto out;
 		}
 	}
 out:
 	KEG_LOCK(keg);
 
 	CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)",
 	    slab, keg->uk_name, keg);
 
 	if (slab != NULL) {
 		if (keg->uk_flags & UMA_ZONE_HASH)
 			UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
 
 		keg->uk_pages += keg->uk_ppera;
 		keg->uk_free += keg->uk_ipers;
 	}
 
 	return (slab);
 }
 
 /*
  * This function is intended to be used early on in place of page_alloc() so
  * that we may use the boot time page cache to satisfy allocations before
  * the VM is ready.
  */
 static void *
 startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
     int wait)
 {
 	uma_keg_t keg;
 	void *mem;
 	int pages;
 
 	keg = zone_first_keg(zone);
 	pages = howmany(bytes, PAGE_SIZE);
 	KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
 
 	/*
 	 * Check our small startup cache to see if it has pages remaining.
 	 */
 	mtx_lock(&uma_boot_pages_mtx);
 	if (pages <= boot_pages) {
 		mem = bootmem;
 		boot_pages -= pages;
 		bootmem += pages * PAGE_SIZE;
 		mtx_unlock(&uma_boot_pages_mtx);
 		*pflag = UMA_SLAB_BOOT;
 		return (mem);
 	}
 	mtx_unlock(&uma_boot_pages_mtx);
 	if (booted < UMA_STARTUP2)
 		panic("UMA: Increase vm.boot_pages");
 	/*
 	 * Now that we've booted reset these users to their real allocator.
 	 */
 #ifdef UMA_MD_SMALL_ALLOC
 	keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
 #else
 	keg->uk_allocf = page_alloc;
 #endif
 	return keg->uk_allocf(zone, bytes, domain, pflag, wait);
 }
 
 /*
  * Allocates a number of pages from the system
  *
  * Arguments:
  *	bytes  The number of bytes requested
  *	wait  Shall we wait?
  *
  * Returns:
  *	A pointer to the alloced memory or possibly
  *	NULL if M_NOWAIT is set.
  */
 static void *
 page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
     int wait)
 {
 	void *p;	/* Returned page */
 
 	*pflag = UMA_SLAB_KERNEL;
 	p = (void *) kmem_malloc_domain(domain, bytes, wait);
 
 	return (p);
 }
 
 /*
  * Allocates a number of pages from within an object
  *
  * Arguments:
  *	bytes  The number of bytes requested
  *	wait   Shall we wait?
  *
  * Returns:
  *	A pointer to the alloced memory or possibly
  *	NULL if M_NOWAIT is set.
  */
 static void *
 noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
     int wait)
 {
 	TAILQ_HEAD(, vm_page) alloctail;
 	u_long npages;
 	vm_offset_t retkva, zkva;
 	vm_page_t p, p_next;
 	uma_keg_t keg;
 
 	TAILQ_INIT(&alloctail);
 	keg = zone_first_keg(zone);
 
 	npages = howmany(bytes, PAGE_SIZE);
 	while (npages > 0) {
 		p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT |
 		    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
 		    ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK :
 		    VM_ALLOC_NOWAIT));
 		if (p != NULL) {
 			/*
 			 * Since the page does not belong to an object, its
 			 * listq is unused.
 			 */
 			TAILQ_INSERT_TAIL(&alloctail, p, listq);
 			npages--;
 			continue;
 		}
 		/*
 		 * Page allocation failed, free intermediate pages and
 		 * exit.
 		 */
 		TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
 			vm_page_unwire(p, PQ_NONE);
 			vm_page_free(p); 
 		}
 		return (NULL);
 	}
 	*flags = UMA_SLAB_PRIV;
 	zkva = keg->uk_kva +
 	    atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
 	retkva = zkva;
 	TAILQ_FOREACH(p, &alloctail, listq) {
 		pmap_qenter(zkva, &p, 1);
 		zkva += PAGE_SIZE;
 	}
 
 	return ((void *)retkva);
 }
 
 /*
  * Frees a number of pages to the system
  *
  * Arguments:
  *	mem   A pointer to the memory to be freed
  *	size  The size of the memory being freed
  *	flags The original p->us_flags field
  *
  * Returns:
  *	Nothing
  */
 static void
 page_free(void *mem, vm_size_t size, uint8_t flags)
 {
 	struct vmem *vmem;
 
 	if (flags & UMA_SLAB_KERNEL)
 		vmem = kernel_arena;
 	else
 		panic("UMA: page_free used with invalid flags %x", flags);
 
 	kmem_free(vmem, (vm_offset_t)mem, size);
 }
 
 /*
  * Zero fill initializer
  *
  * Arguments/Returns follow uma_init specifications
  */
 static int
 zero_init(void *mem, int size, int flags)
 {
 	bzero(mem, size);
 	return (0);
 }
 
 /*
  * Finish creating a small uma keg.  This calculates ipers, and the keg size.
  *
  * Arguments
  *	keg  The zone we should initialize
  *
  * Returns
  *	Nothing
  */
 static void
 keg_small_init(uma_keg_t keg)
 {
 	u_int rsize;
 	u_int memused;
 	u_int wastedspace;
 	u_int shsize;
 	u_int slabsize;
 
 	if (keg->uk_flags & UMA_ZONE_PCPU) {
 		u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
 
 		slabsize = sizeof(struct pcpu);
 		keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
 		    PAGE_SIZE);
 	} else {
 		slabsize = UMA_SLAB_SIZE;
 		keg->uk_ppera = 1;
 	}
 
 	/*
 	 * Calculate the size of each allocation (rsize) according to
 	 * alignment.  If the requested size is smaller than we have
 	 * allocation bits for we round it up.
 	 */
 	rsize = keg->uk_size;
 	if (rsize < slabsize / SLAB_SETSIZE)
 		rsize = slabsize / SLAB_SETSIZE;
 	if (rsize & keg->uk_align)
 		rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
 	keg->uk_rsize = rsize;
 
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
 	    keg->uk_rsize < sizeof(struct pcpu),
 	    ("%s: size %u too large", __func__, keg->uk_rsize));
 
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 		shsize = 0;
 	else 
 		shsize = sizeof(struct uma_slab);
 
 	keg->uk_ipers = (slabsize - shsize) / rsize;
 	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
 	    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 
 	memused = keg->uk_ipers * rsize + shsize;
 	wastedspace = slabsize - memused;
 
 	/*
 	 * We can't do OFFPAGE if we're internal or if we've been
 	 * asked to not go to the VM for buckets.  If we do this we
 	 * may end up going to the VM  for slabs which we do not
 	 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
 	 * of UMA_ZONE_VM, which clearly forbids it.
 	 */
 	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
 	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
 		return;
 
 	/*
 	 * See if using an OFFPAGE slab will limit our waste.  Only do
 	 * this if it permits more items per-slab.
 	 *
 	 * XXX We could try growing slabsize to limit max waste as well.
 	 * Historically this was not done because the VM could not
 	 * efficiently handle contiguous allocations.
 	 */
 	if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
 	    (keg->uk_ipers < (slabsize / keg->uk_rsize))) {
 		keg->uk_ipers = slabsize / keg->uk_rsize;
 		KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
 		    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 		CTR6(KTR_UMA, "UMA decided we need offpage slab headers for "
 		    "keg: %s(%p), calculated wastedspace = %d, "
 		    "maximum wasted space allowed = %d, "
 		    "calculated ipers = %d, "
 		    "new wasted space = %d\n", keg->uk_name, keg, wastedspace,
 		    slabsize / UMA_MAX_WASTE, keg->uk_ipers,
 		    slabsize - keg->uk_ipers * keg->uk_rsize);
 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
 	}
 
 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
 		keg->uk_flags |= UMA_ZONE_HASH;
 }
 
 /*
  * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
  * more complicated.
  *
  * Arguments
  *	keg  The keg we should initialize
  *
  * Returns
  *	Nothing
  */
 static void
 keg_large_init(uma_keg_t keg)
 {
 	u_int shsize;
 
 	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
 	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
 	    ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
 	    ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
 
 	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
 	keg->uk_ipers = 1;
 	keg->uk_rsize = keg->uk_size;
 
 	/* Check whether we have enough space to not do OFFPAGE. */
 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
 		shsize = sizeof(struct uma_slab);
 		if (shsize & UMA_ALIGN_PTR)
 			shsize = (shsize & ~UMA_ALIGN_PTR) +
 			    (UMA_ALIGN_PTR + 1);
 
 		if (PAGE_SIZE * keg->uk_ppera - keg->uk_rsize < shsize) {
 			/*
 			 * We can't do OFFPAGE if we're internal, in which case
 			 * we need an extra page per allocation to contain the
 			 * slab header.
 			 */
 			if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) == 0)
 				keg->uk_flags |= UMA_ZONE_OFFPAGE;
 			else
 				keg->uk_ppera++;
 		}
 	}
 
 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
 		keg->uk_flags |= UMA_ZONE_HASH;
 }
 
 static void
 keg_cachespread_init(uma_keg_t keg)
 {
 	int alignsize;
 	int trailer;
 	int pages;
 	int rsize;
 
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
 	    ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
 
 	alignsize = keg->uk_align + 1;
 	rsize = keg->uk_size;
 	/*
 	 * We want one item to start on every align boundary in a page.  To
 	 * do this we will span pages.  We will also extend the item by the
 	 * size of align if it is an even multiple of align.  Otherwise, it
 	 * would fall on the same boundary every time.
 	 */
 	if (rsize & keg->uk_align)
 		rsize = (rsize & ~keg->uk_align) + alignsize;
 	if ((rsize & alignsize) == 0)
 		rsize += alignsize;
 	trailer = rsize - keg->uk_size;
 	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
 	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
 	keg->uk_rsize = rsize;
 	keg->uk_ppera = pages;
 	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
 	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
 	KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
 	    ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
 	    keg->uk_ipers));
 }
 
 /*
  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
  * the keg onto the global keg list.
  *
  * Arguments/Returns follow uma_ctor specifications
  *	udata  Actually uma_kctor_args
  */
 static int
 keg_ctor(void *mem, int size, void *udata, int flags)
 {
 	struct uma_kctor_args *arg = udata;
 	uma_keg_t keg = mem;
 	uma_zone_t zone;
 
 	bzero(keg, size);
 	keg->uk_size = arg->size;
 	keg->uk_init = arg->uminit;
 	keg->uk_fini = arg->fini;
 	keg->uk_align = arg->align;
 	keg->uk_cursor = 0;
 	keg->uk_free = 0;
 	keg->uk_reserve = 0;
 	keg->uk_pages = 0;
 	keg->uk_flags = arg->flags;
 	keg->uk_slabzone = NULL;
 
 	/*
 	 * The master zone is passed to us at keg-creation time.
 	 */
 	zone = arg->zone;
 	keg->uk_name = zone->uz_name;
 
 	if (arg->flags & UMA_ZONE_VM)
 		keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
 
 	if (arg->flags & UMA_ZONE_ZINIT)
 		keg->uk_init = zero_init;
 
 	if (arg->flags & UMA_ZONE_MALLOC)
 		keg->uk_flags |= UMA_ZONE_VTOSLAB;
 
 	if (arg->flags & UMA_ZONE_PCPU)
 #ifdef SMP
 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
 #else
 		keg->uk_flags &= ~UMA_ZONE_PCPU;
 #endif
 
 	if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
 		keg_cachespread_init(keg);
 	} else {
 		if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
 			keg_large_init(keg);
 		else
 			keg_small_init(keg);
 	}
 
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 		keg->uk_slabzone = slabzone;
 
 	/*
 	 * If we haven't booted yet we need allocations to go through the
 	 * startup cache until the vm is ready.
 	 */
 	if (booted < UMA_STARTUP2)
 		keg->uk_allocf = startup_alloc;
 #ifdef UMA_MD_SMALL_ALLOC
 	else if (keg->uk_ppera == 1)
 		keg->uk_allocf = uma_small_alloc;
 #endif
 	else
 		keg->uk_allocf = page_alloc;
 #ifdef UMA_MD_SMALL_ALLOC
 	if (keg->uk_ppera == 1)
 		keg->uk_freef = uma_small_free;
 	else
 #endif
 		keg->uk_freef = page_free;
 
 	/*
 	 * Initialize keg's lock
 	 */
 	KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
 
 	/*
 	 * If we're putting the slab header in the actual page we need to
 	 * figure out where in each page it goes.  This calculates a right
 	 * justified offset into the memory on an ALIGN_PTR boundary.
 	 */
 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
 		u_int totsize;
 
 		/* Size of the slab struct and free list */
 		totsize = sizeof(struct uma_slab);
 
 		if (totsize & UMA_ALIGN_PTR)
 			totsize = (totsize & ~UMA_ALIGN_PTR) +
 			    (UMA_ALIGN_PTR + 1);
 		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
 
 		/*
 		 * The only way the following is possible is if with our
 		 * UMA_ALIGN_PTR adjustments we are now bigger than
 		 * UMA_SLAB_SIZE.  I haven't checked whether this is
 		 * mathematically possible for all cases, so we make
 		 * sure here anyway.
 		 */
 		totsize = keg->uk_pgoff + sizeof(struct uma_slab);
 		if (totsize > PAGE_SIZE * keg->uk_ppera) {
 			printf("zone %s ipers %d rsize %d size %d\n",
 			    zone->uz_name, keg->uk_ipers, keg->uk_rsize,
 			    keg->uk_size);
 			panic("UMA slab won't fit.");
 		}
 	}
 
 	if (keg->uk_flags & UMA_ZONE_HASH)
 		hash_alloc(&keg->uk_hash);
 
 	CTR5(KTR_UMA, "keg_ctor %p zone %s(%p) out %d free %d\n",
 	    keg, zone->uz_name, zone,
 	    (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
 	    keg->uk_free);
 
 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
 
 	rw_wlock(&uma_rwlock);
 	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
 	rw_wunlock(&uma_rwlock);
 	return (0);
 }
 
 /*
  * Zone header ctor.  This initializes all fields, locks, etc.
  *
  * Arguments/Returns follow uma_ctor specifications
  *	udata  Actually uma_zctor_args
  */
 static int
 zone_ctor(void *mem, int size, void *udata, int flags)
 {
 	struct uma_zctor_args *arg = udata;
 	uma_zone_t zone = mem;
 	uma_zone_t z;
 	uma_keg_t keg;
 
 	bzero(zone, size);
 	zone->uz_name = arg->name;
 	zone->uz_ctor = arg->ctor;
 	zone->uz_dtor = arg->dtor;
 	zone->uz_slab = zone_fetch_slab;
 	zone->uz_init = NULL;
 	zone->uz_fini = NULL;
 	zone->uz_allocs = 0;
 	zone->uz_frees = 0;
 	zone->uz_fails = 0;
 	zone->uz_sleeps = 0;
 	zone->uz_count = 0;
 	zone->uz_count_min = 0;
 	zone->uz_flags = 0;
 	zone->uz_warning = NULL;
 	/* The domain structures follow the cpu structures. */
 	zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus];
 	timevalclear(&zone->uz_ratecheck);
 	keg = arg->keg;
 
 	ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
 
 	/*
 	 * This is a pure cache zone, no kegs.
 	 */
 	if (arg->import) {
 		if (arg->flags & UMA_ZONE_VM)
 			arg->flags |= UMA_ZFLAG_CACHEONLY;
 		zone->uz_flags = arg->flags;
 		zone->uz_size = arg->size;
 		zone->uz_import = arg->import;
 		zone->uz_release = arg->release;
 		zone->uz_arg = arg->arg;
 		zone->uz_lockptr = &zone->uz_lock;
 		rw_wlock(&uma_rwlock);
 		LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
 		rw_wunlock(&uma_rwlock);
 		goto out;
 	}
 
 	/*
 	 * Use the regular zone/keg/slab allocator.
 	 */
 	zone->uz_import = (uma_import)zone_import;
 	zone->uz_release = (uma_release)zone_release;
 	zone->uz_arg = zone; 
 
 	if (arg->flags & UMA_ZONE_SECONDARY) {
 		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
 		zone->uz_init = arg->uminit;
 		zone->uz_fini = arg->fini;
 		zone->uz_lockptr = &keg->uk_lock;
 		zone->uz_flags |= UMA_ZONE_SECONDARY;
 		rw_wlock(&uma_rwlock);
 		ZONE_LOCK(zone);
 		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
 			if (LIST_NEXT(z, uz_link) == NULL) {
 				LIST_INSERT_AFTER(z, zone, uz_link);
 				break;
 			}
 		}
 		ZONE_UNLOCK(zone);
 		rw_wunlock(&uma_rwlock);
 	} else if (keg == NULL) {
 		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
 		    arg->align, arg->flags)) == NULL)
 			return (ENOMEM);
 	} else {
 		struct uma_kctor_args karg;
 		int error;
 
 		/* We should only be here from uma_startup() */
 		karg.size = arg->size;
 		karg.uminit = arg->uminit;
 		karg.fini = arg->fini;
 		karg.align = arg->align;
 		karg.flags = arg->flags;
 		karg.zone = zone;
 		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
 		    flags);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Link in the first keg.
 	 */
 	zone->uz_klink.kl_keg = keg;
 	LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
 	zone->uz_lockptr = &keg->uk_lock;
 	zone->uz_size = keg->uk_size;
 	zone->uz_flags |= (keg->uk_flags &
 	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
 
 	/*
 	 * Some internal zones don't have room allocated for the per cpu
 	 * caches.  If we're internal, bail out here.
 	 */
 	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
 		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
 		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
 		return (0);
 	}
 
 out:
 	if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
 		zone->uz_count = bucket_select(zone->uz_size);
 	else
 		zone->uz_count = BUCKET_MAX;
 	zone->uz_count_min = zone->uz_count;
 
 	return (0);
 }
 
 /*
  * Keg header dtor.  This frees all data, destroys locks, frees the hash
  * table and removes the keg from the global list.
  *
  * Arguments/Returns follow uma_dtor specifications
  *	udata  unused
  */
 static void
 keg_dtor(void *arg, int size, void *udata)
 {
 	uma_keg_t keg;
 
 	keg = (uma_keg_t)arg;
 	KEG_LOCK(keg);
 	if (keg->uk_free != 0) {
 		printf("Freed UMA keg (%s) was not empty (%d items). "
 		    " Lost %d pages of memory.\n",
 		    keg->uk_name ? keg->uk_name : "",
 		    keg->uk_free, keg->uk_pages);
 	}
 	KEG_UNLOCK(keg);
 
 	hash_free(&keg->uk_hash);
 
 	KEG_LOCK_FINI(keg);
 }
 
 /*
  * Zone header dtor.
  *
  * Arguments/Returns follow uma_dtor specifications
  *	udata  unused
  */
 static void
 zone_dtor(void *arg, int size, void *udata)
 {
 	uma_klink_t klink;
 	uma_zone_t zone;
 	uma_keg_t keg;
 
 	zone = (uma_zone_t)arg;
 	keg = zone_first_keg(zone);
 
 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
 		cache_drain(zone);
 
 	rw_wlock(&uma_rwlock);
 	LIST_REMOVE(zone, uz_link);
 	rw_wunlock(&uma_rwlock);
 	/*
 	 * XXX there are some races here where
 	 * the zone can be drained but zone lock
 	 * released and then refilled before we
 	 * remove it... we dont care for now
 	 */
 	zone_drain_wait(zone, M_WAITOK);
 	/*
 	 * Unlink all of our kegs.
 	 */
 	while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
 		klink->kl_keg = NULL;
 		LIST_REMOVE(klink, kl_link);
 		if (klink == &zone->uz_klink)
 			continue;
 		free(klink, M_TEMP);
 	}
 	/*
 	 * We only destroy kegs from non secondary zones.
 	 */
 	if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
 		rw_wlock(&uma_rwlock);
 		LIST_REMOVE(keg, uk_link);
 		rw_wunlock(&uma_rwlock);
 		zone_free_item(kegs, keg, NULL, SKIP_NONE);
 	}
 	ZONE_LOCK_FINI(zone);
 }
 
 /*
  * Traverses every zone in the system and calls a callback
  *
  * Arguments:
  *	zfunc  A pointer to a function which accepts a zone
  *		as an argument.
  *
  * Returns:
  *	Nothing
  */
 static void
 zone_foreach(void (*zfunc)(uma_zone_t))
 {
 	uma_keg_t keg;
 	uma_zone_t zone;
 
 	rw_rlock(&uma_rwlock);
 	LIST_FOREACH(keg, &uma_kegs, uk_link) {
 		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
 			zfunc(zone);
 	}
 	rw_runlock(&uma_rwlock);
 }
 
 /* Public functions */
 /* See uma.h */
 void
 uma_startup(void *mem, int npages)
 {
 	struct uma_zctor_args args;
 	uma_keg_t masterkeg;
 	uintptr_t m;
 	int zsize;
 	int ksize;
 
 	rw_init(&uma_rwlock, "UMA lock");
 
 	ksize = sizeof(struct uma_keg) +
 	    (sizeof(struct uma_domain) * vm_ndomains);
 	zsize = sizeof(struct uma_zone) +
 	    (sizeof(struct uma_cache) * mp_ncpus) +
 	    (sizeof(struct uma_zone_domain) * vm_ndomains);
 
 	/* Use bootpages memory for the zone of zones and zone of kegs. */
 	m = (uintptr_t)mem;
 	zones = (uma_zone_t)m;
 	m += roundup(zsize, CACHE_LINE_SIZE);
 	kegs = (uma_zone_t)m;
 	m += roundup(zsize, CACHE_LINE_SIZE);
 	masterkeg = (uma_keg_t)m;
 	m += roundup(ksize, CACHE_LINE_SIZE);
 	m = roundup(m, PAGE_SIZE);
 	npages -= (m - (uintptr_t)mem) / PAGE_SIZE;
 	mem = (void *)m;
 
 	/* "manually" create the initial zone */
 	memset(&args, 0, sizeof(args));
 	args.name = "UMA Kegs";
 	args.size = ksize;
 	args.ctor = keg_ctor;
 	args.dtor = keg_dtor;
 	args.uminit = zero_init;
 	args.fini = NULL;
 	args.keg = masterkeg;
 	args.align = 32 - 1;
 	args.flags = UMA_ZFLAG_INTERNAL;
 	zone_ctor(kegs, zsize, &args, M_WAITOK);
 
 	mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
 	bootmem = mem;
 	boot_pages = npages;
 
 	args.name = "UMA Zones";
 	args.size = sizeof(struct uma_zone) +
 	    (sizeof(struct uma_cache) * (mp_maxid + 1)) +
 	    (sizeof(struct uma_zone_domain) * vm_ndomains);
 	args.ctor = zone_ctor;
 	args.dtor = zone_dtor;
 	args.uminit = zero_init;
 	args.fini = NULL;
 	args.keg = NULL;
 	args.align = 32 - 1;
 	args.flags = UMA_ZFLAG_INTERNAL;
 	zone_ctor(zones, zsize, &args, M_WAITOK);
 
 	/* Now make a zone for slab headers */
 	slabzone = uma_zcreate("UMA Slabs",
 				sizeof(struct uma_slab),
 				NULL, NULL, NULL, NULL,
 				UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 
 	hashzone = uma_zcreate("UMA Hash",
 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
 	    NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 
 	bucket_init();
 
 	booted = UMA_STARTUP;
 }
 
 /* see uma.h */
 void
 uma_startup2(void)
 {
 	booted = UMA_STARTUP2;
 	bucket_enable();
 	sx_init(&uma_drain_lock, "umadrain");
 }
 
 /*
  * Initialize our callout handle
  *
  */
 
 static void
 uma_startup3(void)
 {
 
 	callout_init(&uma_callout, 1);
 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 }
 
 static uma_keg_t
 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
 		int align, uint32_t flags)
 {
 	struct uma_kctor_args args;
 
 	args.size = size;
 	args.uminit = uminit;
 	args.fini = fini;
 	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
 	args.flags = flags;
 	args.zone = zone;
 	return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK));
 }
 
 /* See uma.h */
 void
 uma_set_align(int align)
 {
 
 	if (align != UMA_ALIGN_CACHE)
 		uma_align_cache = align;
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
 		uma_init uminit, uma_fini fini, int align, uint32_t flags)
 
 {
 	struct uma_zctor_args args;
 	uma_zone_t res;
 	bool locked;
 
 	KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"",
 	    align, name));
 
 	/* This stuff is essential for the zone ctor */
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = uminit;
 	args.fini = fini;
 #ifdef  INVARIANTS
 	/*
 	 * If a zone is being created with an empty constructor and
 	 * destructor, pass UMA constructor/destructor which checks for
 	 * memory use after free.
 	 */
 	if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
 	    ctor == NULL && dtor == NULL && uminit == NULL && fini == NULL) {
 		args.ctor = trash_ctor;
 		args.dtor = trash_dtor;
 		args.uminit = trash_init;
 		args.fini = trash_fini;
 	}
 #endif
 	args.align = align;
 	args.flags = flags;
 	args.keg = NULL;
 
 	if (booted < UMA_STARTUP2) {
 		locked = false;
 	} else {
 		sx_slock(&uma_drain_lock);
 		locked = true;
 	}
 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
 	if (locked)
 		sx_sunlock(&uma_drain_lock);
 	return (res);
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_zone_t master)
 {
 	struct uma_zctor_args args;
 	uma_keg_t keg;
 	uma_zone_t res;
 	bool locked;
 
 	keg = zone_first_keg(master);
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = keg->uk_size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = zinit;
 	args.fini = zfini;
 	args.align = keg->uk_align;
 	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
 	args.keg = keg;
 
 	if (booted < UMA_STARTUP2) {
 		locked = false;
 	} else {
 		sx_slock(&uma_drain_lock);
 		locked = true;
 	}
 	/* XXX Attaches only one keg of potentially many. */
 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
 	if (locked)
 		sx_sunlock(&uma_drain_lock);
 	return (res);
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_import zimport,
 		    uma_release zrelease, void *arg, int flags)
 {
 	struct uma_zctor_args args;
 
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = zinit;
 	args.fini = zfini;
 	args.import = zimport;
 	args.release = zrelease;
 	args.arg = arg;
 	args.align = 0;
 	args.flags = flags;
 
 	return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
 }
 
 static void
 zone_lock_pair(uma_zone_t a, uma_zone_t b)
 {
 	if (a < b) {
 		ZONE_LOCK(a);
 		mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
 	} else {
 		ZONE_LOCK(b);
 		mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
 	}
 }
 
 static void
 zone_unlock_pair(uma_zone_t a, uma_zone_t b)
 {
 
 	ZONE_UNLOCK(a);
 	ZONE_UNLOCK(b);
 }
 
 int
 uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
 {
 	uma_klink_t klink;
 	uma_klink_t kl;
 	int error;
 
 	error = 0;
 	klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
 
 	zone_lock_pair(zone, master);
 	/*
 	 * zone must use vtoslab() to resolve objects and must already be
 	 * a secondary.
 	 */
 	if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
 	    != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The new master must also use vtoslab().
 	 */
 	if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
 		error = EINVAL;
 		goto out;
 	}
 
 	/*
 	 * The underlying object must be the same size.  rsize
 	 * may be different.
 	 */
 	if (master->uz_size != zone->uz_size) {
 		error = E2BIG;
 		goto out;
 	}
 	/*
 	 * Put it at the end of the list.
 	 */
 	klink->kl_keg = zone_first_keg(master);
 	LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
 		if (LIST_NEXT(kl, kl_link) == NULL) {
 			LIST_INSERT_AFTER(kl, klink, kl_link);
 			break;
 		}
 	}
 	klink = NULL;
 	zone->uz_flags |= UMA_ZFLAG_MULTI;
 	zone->uz_slab = zone_fetch_slab_multi;
 
 out:
 	zone_unlock_pair(zone, master);
 	if (klink != NULL)
 		free(klink, M_TEMP);
 
 	return (error);
 }
 
 
 /* See uma.h */
 void
 uma_zdestroy(uma_zone_t zone)
 {
 
 	sx_slock(&uma_drain_lock);
 	zone_free_item(zones, zone, NULL, SKIP_NONE);
 	sx_sunlock(&uma_drain_lock);
 }
 
 void
 uma_zwait(uma_zone_t zone)
 {
 	void *item;
 
 	item = uma_zalloc_arg(zone, NULL, M_WAITOK);
 	uma_zfree(zone, item);
 }
 
 /* See uma.h */
 void *
 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
 {
 	uma_zone_domain_t zdom;
 	uma_bucket_t bucket;
 	uma_cache_t cache;
 	void *item;
 	int cpu, domain, lockfail;
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
 
 	/* This is the fast path allocation */
 	CTR4(KTR_UMA, "uma_zalloc_arg thread %x zone %s(%p) flags %d",
 	    curthread, zone->uz_name, zone, flags);
 
 	if (flags & M_WAITOK) {
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
 	}
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zalloc_arg: called with spinlock or critical section held"));
 
 #ifdef DEBUG_MEMGUARD
 	if (memguard_cmp_zone(zone)) {
 		item = memguard_alloc(zone->uz_size, flags);
 		if (item != NULL) {
 			if (zone->uz_init != NULL &&
 			    zone->uz_init(item, zone->uz_size, flags) != 0)
 				return (NULL);
 			if (zone->uz_ctor != NULL &&
 			    zone->uz_ctor(item, zone->uz_size, udata,
 			    flags) != 0) {
 			    	zone->uz_fini(item, zone->uz_size);
 				return (NULL);
 			}
 			return (item);
 		}
 		/* This is unfortunate but should not be fatal. */
 	}
 #endif
 	/*
 	 * If possible, allocate from the per-CPU cache.  There are two
 	 * requirements for safe access to the per-CPU cache: (1) the thread
 	 * accessing the cache must not be preempted or yield during access,
 	 * and (2) the thread must not migrate CPUs without switching which
 	 * cache it accesses.  We rely on a critical section to prevent
 	 * preemption and migration.  We release the critical section in
 	 * order to acquire the zone mutex if we are unable to allocate from
 	 * the current cache; when we re-acquire the critical section, we
 	 * must detect and handle migration if it has occurred.
 	 */
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 zalloc_start:
 	bucket = cache->uc_allocbucket;
 	if (bucket != NULL && bucket->ub_cnt > 0) {
 		bucket->ub_cnt--;
 		item = bucket->ub_bucket[bucket->ub_cnt];
 #ifdef INVARIANTS
 		bucket->ub_bucket[bucket->ub_cnt] = NULL;
 #endif
 		KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
 		cache->uc_allocs++;
 		critical_exit();
 		if (zone->uz_ctor != NULL &&
 		    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
 			atomic_add_long(&zone->uz_fails, 1);
 			zone_free_item(zone, item, udata, SKIP_DTOR);
 			return (NULL);
 		}
 #ifdef INVARIANTS
 		uma_dbg_alloc(zone, NULL, item);
 #endif
 		if (flags & M_ZERO)
 			uma_zero_item(item, zone);
 		return (item);
 	}
 
 	/*
 	 * We have run out of items in our alloc bucket.
 	 * See if we can switch with our free bucket.
 	 */
 	bucket = cache->uc_freebucket;
 	if (bucket != NULL && bucket->ub_cnt > 0) {
 		CTR2(KTR_UMA,
 		    "uma_zalloc: zone %s(%p) swapping empty with alloc",
 		    zone->uz_name, zone);
 		cache->uc_freebucket = cache->uc_allocbucket;
 		cache->uc_allocbucket = bucket;
 		goto zalloc_start;
 	}
 
 	/*
 	 * Discard any empty allocation bucket while we hold no locks.
 	 */
 	bucket = cache->uc_allocbucket;
 	cache->uc_allocbucket = NULL;
 	critical_exit();
 	if (bucket != NULL)
 		bucket_free(zone, bucket, udata);
 
 	if (zone->uz_flags & UMA_ZONE_NUMA)
 		domain = PCPU_GET(domain);
 	else
 		domain = UMA_ANYDOMAIN;
 
 	/* Short-circuit for zones without buckets and low memory. */
 	if (zone->uz_count == 0 || bucketdisable)
 		goto zalloc_item;
 
 	/*
 	 * Attempt to retrieve the item from the per-CPU cache has failed, so
 	 * we must go back to the zone.  This requires the zone lock, so we
 	 * must drop the critical section, then re-acquire it when we go back
 	 * to the cache.  Since the critical section is released, we may be
 	 * preempted or migrate.  As such, make sure not to maintain any
 	 * thread-local state specific to the cache from prior to releasing
 	 * the critical section.
 	 */
 	lockfail = 0;
 	if (ZONE_TRYLOCK(zone) == 0) {
 		/* Record contention to size the buckets. */
 		ZONE_LOCK(zone);
 		lockfail = 1;
 	}
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 	/*
 	 * Since we have locked the zone we may as well send back our stats.
 	 */
 	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
 	atomic_add_long(&zone->uz_frees, cache->uc_frees);
 	cache->uc_allocs = 0;
 	cache->uc_frees = 0;
 
 	/* See if we lost the race to fill the cache. */
 	if (cache->uc_allocbucket != NULL) {
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
 
 	/*
 	 * Check the zone's cache of buckets.
 	 */
 	if (domain == UMA_ANYDOMAIN)
 		zdom = &zone->uz_domain[0];
 	else
 		zdom = &zone->uz_domain[domain];
 	if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
 		KASSERT(bucket->ub_cnt != 0,
 		    ("uma_zalloc_arg: Returning an empty bucket."));
 
 		LIST_REMOVE(bucket, ub_link);
 		cache->uc_allocbucket = bucket;
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
 	/* We are no longer associated with this CPU. */
 	critical_exit();
 
 	/*
 	 * We bump the uz count when the cache size is insufficient to
 	 * handle the working set.
 	 */
 	if (lockfail && zone->uz_count < BUCKET_MAX)
 		zone->uz_count++;
 	ZONE_UNLOCK(zone);
 
 	/*
 	 * Now lets just fill a bucket and put it on the free list.  If that
 	 * works we'll restart the allocation from the beginning and it
 	 * will use the just filled bucket.
 	 */
 	bucket = zone_alloc_bucket(zone, udata, domain, flags);
 	CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
 	    zone->uz_name, zone, bucket);
 	if (bucket != NULL) {
 		ZONE_LOCK(zone);
 		critical_enter();
 		cpu = curcpu;
 		cache = &zone->uz_cpu[cpu];
 		/*
 		 * See if we lost the race or were migrated.  Cache the
 		 * initialized bucket to make this less likely or claim
 		 * the memory directly.
 		 */
 		if (cache->uc_allocbucket != NULL ||
 		    (zone->uz_flags & UMA_ZONE_NUMA &&
 		    domain != PCPU_GET(domain)))
 			LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
 		else
 			cache->uc_allocbucket = bucket;
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
 
 	/*
 	 * We may not be able to get a bucket so return an actual item.
 	 */
 zalloc_item:
 	item = zone_alloc_item(zone, udata, domain, flags);
 
 	return (item);
 }
 
 void *
 uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags)
 {
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
 
 	/* This is the fast path allocation */
 	CTR5(KTR_UMA,
 	    "uma_zalloc_domain thread %x zone %s(%p) domain %d flags %d",
 	    curthread, zone->uz_name, zone, domain, flags);
 
 	if (flags & M_WAITOK) {
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "uma_zalloc_domain: zone \"%s\"", zone->uz_name);
 	}
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zalloc_domain: called with spinlock or critical section held"));
 
 	return (zone_alloc_item(zone, udata, domain, flags));
 }
 
 /*
  * Find a slab with some space.  Prefer slabs that are partially used over those
  * that are totally full.  This helps to reduce fragmentation.
  *
  * If 'rr' is 1, search all domains starting from 'domain'.  Otherwise check
  * only 'domain'.
  */
 static uma_slab_t
 keg_first_slab(uma_keg_t keg, int domain, int rr)
 {
 	uma_domain_t dom;
 	uma_slab_t slab;
 	int start;
 
 	KASSERT(domain >= 0 && domain < vm_ndomains,
 	    ("keg_first_slab: domain %d out of range", domain));
 
 	slab = NULL;
 	start = domain;
 	do {
 		dom = &keg->uk_domain[domain];
 		if (!LIST_EMPTY(&dom->ud_part_slab))
 			return (LIST_FIRST(&dom->ud_part_slab));
 		if (!LIST_EMPTY(&dom->ud_free_slab)) {
 			slab = LIST_FIRST(&dom->ud_free_slab);
 			LIST_REMOVE(slab, us_link);
 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
 			return (slab);
 		}
 		if (rr)
 			domain = (domain + 1) % vm_ndomains;
 	} while (domain != start);
 
 	return (NULL);
 }
 
 static uma_slab_t
 keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags)
 {
 	uma_domain_t dom;
 	uma_slab_t slab;
 	int allocflags, domain, reserve, rr, start;
 
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 	slab = NULL;
 	reserve = 0;
 	allocflags = flags;
 	if ((flags & M_USE_RESERVE) == 0)
 		reserve = keg->uk_reserve;
 
 	/*
 	 * Round-robin for non first-touch zones when there is more than one
 	 * domain.
 	 */
 	if (vm_ndomains == 1)
 		rdomain = 0;
 	rr = rdomain == UMA_ANYDOMAIN;
 	if (rr) {
 		keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
 		domain = start = keg->uk_cursor;
 		/* Only block on the second pass. */
 		if ((flags & (M_WAITOK | M_NOVM)) == M_WAITOK)
 			allocflags = (allocflags & ~M_WAITOK) | M_NOWAIT;
 	} else
 		domain = start = rdomain;
 
 again:
 	do {
 		if (keg->uk_free > reserve &&
 		    (slab = keg_first_slab(keg, domain, rr)) != NULL) {
 			MPASS(slab->us_keg == keg);
 			return (slab);
 		}
 
 		/*
 		 * M_NOVM means don't ask at all!
 		 */
 		if (flags & M_NOVM)
 			break;
 
 		if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
 			keg->uk_flags |= UMA_ZFLAG_FULL;
 			/*
 			 * If this is not a multi-zone, set the FULL bit.
 			 * Otherwise slab_multi() takes care of it.
 			 */
 			if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
 				zone->uz_flags |= UMA_ZFLAG_FULL;
 				zone_log_warning(zone);
 				zone_maxaction(zone);
 			}
 			if (flags & M_NOWAIT)
 				return (NULL);
 			zone->uz_sleeps++;
 			msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
 			continue;
 		}
 		slab = keg_alloc_slab(keg, zone, domain, allocflags);
 		/*
 		 * If we got a slab here it's safe to mark it partially used
 		 * and return.  We assume that the caller is going to remove
 		 * at least one item.
 		 */
 		if (slab) {
 			MPASS(slab->us_keg == keg);
 			dom = &keg->uk_domain[slab->us_domain];
 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
 			return (slab);
 		}
 		if (rr) {
 			keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
 			domain = keg->uk_cursor;
 		}
 	} while (domain != start);
 
 	/* Retry domain scan with blocking. */
 	if (allocflags != flags) {
 		allocflags = flags;
 		goto again;
 	}
 
 	/*
 	 * We might not have been able to get a slab but another cpu
 	 * could have while we were unlocked.  Check again before we
 	 * fail.
 	 */
 	if (keg->uk_free > reserve &&
 	    (slab = keg_first_slab(keg, domain, rr)) != NULL) {
 		MPASS(slab->us_keg == keg);
 		return (slab);
 	}
 	return (NULL);
 }
 
 static uma_slab_t
 zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int domain, int flags)
 {
 	uma_slab_t slab;
 
 	if (keg == NULL) {
 		keg = zone_first_keg(zone);
 		KEG_LOCK(keg);
 	}
 
 	for (;;) {
 		slab = keg_fetch_slab(keg, zone, domain, flags);
 		if (slab)
 			return (slab);
 		if (flags & (M_NOWAIT | M_NOVM))
 			break;
 	}
 	KEG_UNLOCK(keg);
 	return (NULL);
 }
 
 /*
  * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
  * with the keg locked.  On NULL no lock is held.
  *
  * The last pointer is used to seed the search.  It is not required.
  */
 static uma_slab_t
 zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int domain, int rflags)
 {
 	uma_klink_t klink;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	int flags;
 	int empty;
 	int full;
 
 	/*
 	 * Don't wait on the first pass.  This will skip limit tests
 	 * as well.  We don't want to block if we can find a provider
 	 * without blocking.
 	 */
 	flags = (rflags & ~M_WAITOK) | M_NOWAIT;
 	/*
 	 * Use the last slab allocated as a hint for where to start
 	 * the search.
 	 */
 	if (last != NULL) {
 		slab = keg_fetch_slab(last, zone, domain, flags);
 		if (slab)
 			return (slab);
 		KEG_UNLOCK(last);
 	}
 	/*
 	 * Loop until we have a slab incase of transient failures
 	 * while M_WAITOK is specified.  I'm not sure this is 100%
 	 * required but we've done it for so long now.
 	 */
 	for (;;) {
 		empty = 0;
 		full = 0;
 		/*
 		 * Search the available kegs for slabs.  Be careful to hold the
 		 * correct lock while calling into the keg layer.
 		 */
 		LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
 			keg = klink->kl_keg;
 			KEG_LOCK(keg);
 			if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
 				slab = keg_fetch_slab(keg, zone, domain, flags);
 				if (slab)
 					return (slab);
 			}
 			if (keg->uk_flags & UMA_ZFLAG_FULL)
 				full++;
 			else
 				empty++;
 			KEG_UNLOCK(keg);
 		}
 		if (rflags & (M_NOWAIT | M_NOVM))
 			break;
 		flags = rflags;
 		/*
 		 * All kegs are full.  XXX We can't atomically check all kegs
 		 * and sleep so just sleep for a short period and retry.
 		 */
 		if (full && !empty) {
 			ZONE_LOCK(zone);
 			zone->uz_flags |= UMA_ZFLAG_FULL;
 			zone->uz_sleeps++;
 			zone_log_warning(zone);
 			zone_maxaction(zone);
 			msleep(zone, zone->uz_lockptr, PVM,
 			    "zonelimit", hz/100);
 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
 			ZONE_UNLOCK(zone);
 			continue;
 		}
 	}
 	return (NULL);
 }
 
 static void *
 slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
 {
 	uma_domain_t dom;
 	void *item;
 	uint8_t freei;
 
 	MPASS(keg == slab->us_keg);
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 
 	freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
 	BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
 	item = slab->us_data + (keg->uk_rsize * freei);
 	slab->us_freecount--;
 	keg->uk_free--;
 
 	/* Move this slab to the full list */
 	if (slab->us_freecount == 0) {
 		LIST_REMOVE(slab, us_link);
 		dom = &keg->uk_domain[slab->us_domain];
 		LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link);
 	}
 
 	return (item);
 }
 
 static int
 zone_import(uma_zone_t zone, void **bucket, int max, int domain, int flags)
 {
 	uma_slab_t slab;
 	uma_keg_t keg;
 	int stripe;
 	int i;
 
 	slab = NULL;
 	keg = NULL;
 	/* Try to keep the buckets totally full */
 	for (i = 0; i < max; ) {
 		if ((slab = zone->uz_slab(zone, keg, domain, flags)) == NULL)
 			break;
 		keg = slab->us_keg;
 		stripe = howmany(max, vm_ndomains);
 		while (slab->us_freecount && i < max) { 
 			bucket[i++] = slab_alloc_item(keg, slab);
 			if (keg->uk_free <= keg->uk_reserve)
 				break;
-#if MAXMEMDOM > 1
+#ifdef NUMA
 			/*
 			 * If the zone is striped we pick a new slab for every
 			 * N allocations.  Eliminating this conditional will
 			 * instead pick a new domain for each bucket rather
 			 * than stripe within each bucket.  The current option
 			 * produces more fragmentation and requires more cpu
 			 * time but yields better distribution.
 			 */
 			if ((zone->uz_flags & UMA_ZONE_NUMA) == 0 &&
 			    vm_ndomains > 1 && --stripe == 0)
 				break;
 #endif
 		}
 		/* Don't block if we allocated any successfully. */
 		flags &= ~M_WAITOK;
 		flags |= M_NOWAIT;
 	}
 	if (slab != NULL)
 		KEG_UNLOCK(keg);
 
 	return i;
 }
 
 static uma_bucket_t
 zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
 {
 	uma_bucket_t bucket;
 	int max;
 
 	/* Don't wait for buckets, preserve caller's NOVM setting. */
 	bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
 	if (bucket == NULL)
 		return (NULL);
 
 	max = MIN(bucket->ub_entries, zone->uz_count);
 	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
 	    max, domain, flags);
 
 	/*
 	 * Initialize the memory if necessary.
 	 */
 	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
 		int i;
 
 		for (i = 0; i < bucket->ub_cnt; i++)
 			if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
 			    flags) != 0)
 				break;
 		/*
 		 * If we couldn't initialize the whole bucket, put the
 		 * rest back onto the freelist.
 		 */
 		if (i != bucket->ub_cnt) {
 			zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
 			    bucket->ub_cnt - i);
 #ifdef INVARIANTS
 			bzero(&bucket->ub_bucket[i],
 			    sizeof(void *) * (bucket->ub_cnt - i));
 #endif
 			bucket->ub_cnt = i;
 		}
 	}
 
 	if (bucket->ub_cnt == 0) {
 		bucket_free(zone, bucket, udata);
 		atomic_add_long(&zone->uz_fails, 1);
 		return (NULL);
 	}
 
 	return (bucket);
 }
 
 /*
  * Allocates a single item from a zone.
  *
  * Arguments
  *	zone   The zone to alloc for.
  *	udata  The data to be passed to the constructor.
  *	domain The domain to allocate from or UMA_ANYDOMAIN.
  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
  *
  * Returns
  *	NULL if there is no memory and M_NOWAIT is set
  *	An item if successful
  */
 
 static void *
 zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
 {
 	void *item;
 
 	item = NULL;
 
 	if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
 		goto fail;
 	atomic_add_long(&zone->uz_allocs, 1);
 
 	/*
 	 * We have to call both the zone's init (not the keg's init)
 	 * and the zone's ctor.  This is because the item is going from
 	 * a keg slab directly to the user, and the user is expecting it
 	 * to be both zone-init'd as well as zone-ctor'd.
 	 */
 	if (zone->uz_init != NULL) {
 		if (zone->uz_init(item, zone->uz_size, flags) != 0) {
 			zone_free_item(zone, item, udata, SKIP_FINI);
 			goto fail;
 		}
 	}
 	if (zone->uz_ctor != NULL) {
 		if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
 			zone_free_item(zone, item, udata, SKIP_DTOR);
 			goto fail;
 		}
 	}
 #ifdef INVARIANTS
 	uma_dbg_alloc(zone, NULL, item);
 #endif
 	if (flags & M_ZERO)
 		uma_zero_item(item, zone);
 
 	CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item,
 	    zone->uz_name, zone);
 
 	return (item);
 
 fail:
 	CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
 	    zone->uz_name, zone);
 	atomic_add_long(&zone->uz_fails, 1);
 	return (NULL);
 }
 
 /* See uma.h */
 void
 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
 {
 	uma_cache_t cache;
 	uma_bucket_t bucket;
 	uma_zone_domain_t zdom;
 	int cpu, domain, lockfail;
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
 
 	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
 	    zone->uz_name);
 
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zfree_arg: called with spinlock or critical section held"));
 
         /* uma_zfree(..., NULL) does nothing, to match free(9). */
         if (item == NULL)
                 return;
 #ifdef DEBUG_MEMGUARD
 	if (is_memguard_addr(item)) {
 		if (zone->uz_dtor != NULL)
 			zone->uz_dtor(item, zone->uz_size, udata);
 		if (zone->uz_fini != NULL)
 			zone->uz_fini(item, zone->uz_size);
 		memguard_free(item);
 		return;
 	}
 #endif
 #ifdef INVARIANTS
 	if (zone->uz_flags & UMA_ZONE_MALLOC)
 		uma_dbg_free(zone, udata, item);
 	else
 		uma_dbg_free(zone, NULL, item);
 #endif
 	if (zone->uz_dtor != NULL)
 		zone->uz_dtor(item, zone->uz_size, udata);
 
 	/*
 	 * The race here is acceptable.  If we miss it we'll just have to wait
 	 * a little longer for the limits to be reset.
 	 */
 	if (zone->uz_flags & UMA_ZFLAG_FULL)
 		goto zfree_item;
 
 	/*
 	 * If possible, free to the per-CPU cache.  There are two
 	 * requirements for safe access to the per-CPU cache: (1) the thread
 	 * accessing the cache must not be preempted or yield during access,
 	 * and (2) the thread must not migrate CPUs without switching which
 	 * cache it accesses.  We rely on a critical section to prevent
 	 * preemption and migration.  We release the critical section in
 	 * order to acquire the zone mutex if we are unable to free to the
 	 * current cache; when we re-acquire the critical section, we must
 	 * detect and handle migration if it has occurred.
 	 */
 zfree_restart:
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 zfree_start:
 	/*
 	 * Try to free into the allocbucket first to give LIFO ordering
 	 * for cache-hot datastructures.  Spill over into the freebucket
 	 * if necessary.  Alloc will swap them if one runs dry.
 	 */
 	bucket = cache->uc_allocbucket;
 	if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
 		bucket = cache->uc_freebucket;
 	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
 		KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
 		    ("uma_zfree: Freeing to non free bucket index."));
 		bucket->ub_bucket[bucket->ub_cnt] = item;
 		bucket->ub_cnt++;
 		cache->uc_frees++;
 		critical_exit();
 		return;
 	}
 
 	/*
 	 * We must go back the zone, which requires acquiring the zone lock,
 	 * which in turn means we must release and re-acquire the critical
 	 * section.  Since the critical section is released, we may be
 	 * preempted or migrate.  As such, make sure not to maintain any
 	 * thread-local state specific to the cache from prior to releasing
 	 * the critical section.
 	 */
 	critical_exit();
 	if (zone->uz_count == 0 || bucketdisable)
 		goto zfree_item;
 
 	lockfail = 0;
 	if (ZONE_TRYLOCK(zone) == 0) {
 		/* Record contention to size the buckets. */
 		ZONE_LOCK(zone);
 		lockfail = 1;
 	}
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 	/*
 	 * Since we have locked the zone we may as well send back our stats.
 	 */
 	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
 	atomic_add_long(&zone->uz_frees, cache->uc_frees);
 	cache->uc_allocs = 0;
 	cache->uc_frees = 0;
 
 	bucket = cache->uc_freebucket;
 	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
 		ZONE_UNLOCK(zone);
 		goto zfree_start;
 	}
 	cache->uc_freebucket = NULL;
 	/* We are no longer associated with this CPU. */
 	critical_exit();
 
 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
 		domain = PCPU_GET(domain);
 	else 
 		domain = 0;
 	zdom = &zone->uz_domain[0];
 
 	/* Can we throw this on the zone full list? */
 	if (bucket != NULL) {
 		CTR3(KTR_UMA,
 		    "uma_zfree: zone %s(%p) putting bucket %p on free list",
 		    zone->uz_name, zone, bucket);
 		/* ub_cnt is pointing to the last free item */
 		KASSERT(bucket->ub_cnt != 0,
 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
 		LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
 	}
 
 	/*
 	 * We bump the uz count when the cache size is insufficient to
 	 * handle the working set.
 	 */
 	if (lockfail && zone->uz_count < BUCKET_MAX)
 		zone->uz_count++;
 	ZONE_UNLOCK(zone);
 
 	bucket = bucket_alloc(zone, udata, M_NOWAIT);
 	CTR3(KTR_UMA, "uma_zfree: zone %s(%p) allocated bucket %p",
 	    zone->uz_name, zone, bucket);
 	if (bucket) {
 		critical_enter();
 		cpu = curcpu;
 		cache = &zone->uz_cpu[cpu];
 		if (cache->uc_freebucket == NULL &&
 		    ((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
 		    domain == PCPU_GET(domain))) {
 			cache->uc_freebucket = bucket;
 			goto zfree_start;
 		}
 		/*
 		 * We lost the race, start over.  We have to drop our
 		 * critical section to free the bucket.
 		 */
 		critical_exit();
 		bucket_free(zone, bucket, udata);
 		goto zfree_restart;
 	}
 
 	/*
 	 * If nothing else caught this, we'll just do an internal free.
 	 */
 zfree_item:
 	zone_free_item(zone, item, udata, SKIP_DTOR);
 
 	return;
 }
 
 void
 uma_zfree_domain(uma_zone_t zone, void *item, void *udata)
 {
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
 
 	CTR2(KTR_UMA, "uma_zfree_domain thread %x zone %s", curthread,
 	    zone->uz_name);
 
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zfree_domain: called with spinlock or critical section held"));
 
         /* uma_zfree(..., NULL) does nothing, to match free(9). */
         if (item == NULL)
                 return;
 	zone_free_item(zone, item, udata, SKIP_NONE);
 }
 
 static void
 slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
 {
 	uma_domain_t dom;
 	uint8_t freei;
 
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 	MPASS(keg == slab->us_keg);
 
 	dom = &keg->uk_domain[slab->us_domain];
 
 	/* Do we need to remove from any lists? */
 	if (slab->us_freecount+1 == keg->uk_ipers) {
 		LIST_REMOVE(slab, us_link);
 		LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
 	} else if (slab->us_freecount == 0) {
 		LIST_REMOVE(slab, us_link);
 		LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
 	}
 
 	/* Slab management. */
 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
 	BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
 	slab->us_freecount++;
 
 	/* Keg statistics. */
 	keg->uk_free++;
 }
 
 static void
 zone_release(uma_zone_t zone, void **bucket, int cnt)
 {
 	void *item;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	uint8_t *mem;
 	int clearfull;
 	int i;
 
 	clearfull = 0;
 	keg = zone_first_keg(zone);
 	KEG_LOCK(keg);
 	for (i = 0; i < cnt; i++) {
 		item = bucket[i];
 		if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
 			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
 			if (zone->uz_flags & UMA_ZONE_HASH) {
 				slab = hash_sfind(&keg->uk_hash, mem);
 			} else {
 				mem += keg->uk_pgoff;
 				slab = (uma_slab_t)mem;
 			}
 		} else {
 			slab = vtoslab((vm_offset_t)item);
 			if (slab->us_keg != keg) {
 				KEG_UNLOCK(keg);
 				keg = slab->us_keg;
 				KEG_LOCK(keg);
 			}
 		}
 		slab_free_item(keg, slab, item);
 		if (keg->uk_flags & UMA_ZFLAG_FULL) {
 			if (keg->uk_pages < keg->uk_maxpages) {
 				keg->uk_flags &= ~UMA_ZFLAG_FULL;
 				clearfull = 1;
 			}
 
 			/* 
 			 * We can handle one more allocation. Since we're
 			 * clearing ZFLAG_FULL, wake up all procs blocked
 			 * on pages. This should be uncommon, so keeping this
 			 * simple for now (rather than adding count of blocked 
 			 * threads etc).
 			 */
 			wakeup(keg);
 		}
 	}
 	KEG_UNLOCK(keg);
 	if (clearfull) {
 		ZONE_LOCK(zone);
 		zone->uz_flags &= ~UMA_ZFLAG_FULL;
 		wakeup(zone);
 		ZONE_UNLOCK(zone);
 	}
 
 }
 
 /*
  * Frees a single item to any zone.
  *
  * Arguments:
  *	zone   The zone to free to
  *	item   The item we're freeing
  *	udata  User supplied data for the dtor
  *	skip   Skip dtors and finis
  */
 static void
 zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
 {
 
 #ifdef INVARIANTS
 	if (skip == SKIP_NONE) {
 		if (zone->uz_flags & UMA_ZONE_MALLOC)
 			uma_dbg_free(zone, udata, item);
 		else
 			uma_dbg_free(zone, NULL, item);
 	}
 #endif
 	if (skip < SKIP_DTOR && zone->uz_dtor)
 		zone->uz_dtor(item, zone->uz_size, udata);
 
 	if (skip < SKIP_FINI && zone->uz_fini)
 		zone->uz_fini(item, zone->uz_size);
 
 	atomic_add_long(&zone->uz_frees, 1);
 	zone->uz_release(zone->uz_arg, &item, 1);
 }
 
 /* See uma.h */
 int
 uma_zone_set_max(uma_zone_t zone, int nitems)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return (0);
 	KEG_LOCK(keg);
 	keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
 	if (keg->uk_maxpages * keg->uk_ipers < nitems)
 		keg->uk_maxpages += keg->uk_ppera;
 	nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
 	KEG_UNLOCK(keg);
 
 	return (nitems);
 }
 
 /* See uma.h */
 int
 uma_zone_get_max(uma_zone_t zone)
 {
 	int nitems;
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return (0);
 	KEG_LOCK(keg);
 	nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
 	KEG_UNLOCK(keg);
 
 	return (nitems);
 }
 
 /* See uma.h */
 void
 uma_zone_set_warning(uma_zone_t zone, const char *warning)
 {
 
 	ZONE_LOCK(zone);
 	zone->uz_warning = warning;
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 void
 uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
 {
 
 	ZONE_LOCK(zone);
 	TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 int
 uma_zone_get_cur(uma_zone_t zone)
 {
 	int64_t nitems;
 	u_int i;
 
 	ZONE_LOCK(zone);
 	nitems = zone->uz_allocs - zone->uz_frees;
 	CPU_FOREACH(i) {
 		/*
 		 * See the comment in sysctl_vm_zone_stats() regarding the
 		 * safety of accessing the per-cpu caches. With the zone lock
 		 * held, it is safe, but can potentially result in stale data.
 		 */
 		nitems += zone->uz_cpu[i].uc_allocs -
 		    zone->uz_cpu[i].uc_frees;
 	}
 	ZONE_UNLOCK(zone);
 
 	return (nitems < 0 ? 0 : nitems);
 }
 
 /* See uma.h */
 void
 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
 	KEG_LOCK(keg);
 	KASSERT(keg->uk_pages == 0,
 	    ("uma_zone_set_init on non-empty keg"));
 	keg->uk_init = uminit;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 void
 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
 	KEG_LOCK(keg);
 	KASSERT(keg->uk_pages == 0,
 	    ("uma_zone_set_fini on non-empty keg"));
 	keg->uk_fini = fini;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 void
 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
 {
 
 	ZONE_LOCK(zone);
 	KASSERT(zone_first_keg(zone)->uk_pages == 0,
 	    ("uma_zone_set_zinit on non-empty keg"));
 	zone->uz_init = zinit;
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 void
 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
 {
 
 	ZONE_LOCK(zone);
 	KASSERT(zone_first_keg(zone)->uk_pages == 0,
 	    ("uma_zone_set_zfini on non-empty keg"));
 	zone->uz_fini = zfini;
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 /* XXX uk_freef is not actually used with the zone locked */
 void
 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
 	KEG_LOCK(keg);
 	keg->uk_freef = freef;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 /* XXX uk_allocf is not actually used with the zone locked */
 void
 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KEG_LOCK(keg);
 	keg->uk_allocf = allocf;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 void
 uma_zone_reserve(uma_zone_t zone, int items)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return;
 	KEG_LOCK(keg);
 	keg->uk_reserve = items;
 	KEG_UNLOCK(keg);
 
 	return;
 }
 
 /* See uma.h */
 int
 uma_zone_reserve_kva(uma_zone_t zone, int count)
 {
 	uma_keg_t keg;
 	vm_offset_t kva;
 	u_int pages;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return (0);
 	pages = count / keg->uk_ipers;
 
 	if (pages * keg->uk_ipers < count)
 		pages++;
 	pages *= keg->uk_ppera;
 
 #ifdef UMA_MD_SMALL_ALLOC
 	if (keg->uk_ppera > 1) {
 #else
 	if (1) {
 #endif
 		kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
 		if (kva == 0)
 			return (0);
 	} else
 		kva = 0;
 	KEG_LOCK(keg);
 	keg->uk_kva = kva;
 	keg->uk_offset = 0;
 	keg->uk_maxpages = pages;
 #ifdef UMA_MD_SMALL_ALLOC
 	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
 #else
 	keg->uk_allocf = noobj_alloc;
 #endif
 	keg->uk_flags |= UMA_ZONE_NOFREE;
 	KEG_UNLOCK(keg);
 
 	return (1);
 }
 
 /* See uma.h */
 void
 uma_prealloc(uma_zone_t zone, int items)
 {
 	uma_domain_t dom;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	int domain, slabs;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return;
 	KEG_LOCK(keg);
 	slabs = items / keg->uk_ipers;
 	domain = 0;
 	if (slabs * keg->uk_ipers < items)
 		slabs++;
 	while (slabs > 0) {
 		slab = keg_alloc_slab(keg, zone, domain, M_WAITOK);
 		if (slab == NULL)
 			break;
 		MPASS(slab->us_keg == keg);
 		dom = &keg->uk_domain[slab->us_domain];
 		LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
 		slabs--;
 		domain = (domain + 1) % vm_ndomains;
 	}
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 static void
 uma_reclaim_locked(bool kmem_danger)
 {
 
 	CTR0(KTR_UMA, "UMA: vm asked us to release pages!");
 	sx_assert(&uma_drain_lock, SA_XLOCKED);
 	bucket_enable();
 	zone_foreach(zone_drain);
 	if (vm_page_count_min() || kmem_danger) {
 		cache_drain_safe(NULL);
 		zone_foreach(zone_drain);
 	}
 	/*
 	 * Some slabs may have been freed but this zone will be visited early
 	 * we visit again so that we can free pages that are empty once other
 	 * zones are drained.  We have to do the same for buckets.
 	 */
 	zone_drain(slabzone);
 	bucket_zone_drain();
 }
 
 void
 uma_reclaim(void)
 {
 
 	sx_xlock(&uma_drain_lock);
 	uma_reclaim_locked(false);
 	sx_xunlock(&uma_drain_lock);
 }
 
 static volatile int uma_reclaim_needed;
 
 void
 uma_reclaim_wakeup(void)
 {
 
 	if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0)
 		wakeup(uma_reclaim);
 }
 
 void
 uma_reclaim_worker(void *arg __unused)
 {
 
 	for (;;) {
 		sx_xlock(&uma_drain_lock);
 		while (atomic_load_int(&uma_reclaim_needed) == 0)
 			sx_sleep(uma_reclaim, &uma_drain_lock, PVM, "umarcl",
 			    hz);
 		sx_xunlock(&uma_drain_lock);
 		EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
 		sx_xlock(&uma_drain_lock);
 		uma_reclaim_locked(true);
 		atomic_store_int(&uma_reclaim_needed, 0);
 		sx_xunlock(&uma_drain_lock);
 		/* Don't fire more than once per-second. */
 		pause("umarclslp", hz);
 	}
 }
 
 /* See uma.h */
 int
 uma_zone_exhausted(uma_zone_t zone)
 {
 	int full;
 
 	ZONE_LOCK(zone);
 	full = (zone->uz_flags & UMA_ZFLAG_FULL);
 	ZONE_UNLOCK(zone);
 	return (full);	
 }
 
 int
 uma_zone_exhausted_nolock(uma_zone_t zone)
 {
 	return (zone->uz_flags & UMA_ZFLAG_FULL);
 }
 
 void *
 uma_large_malloc_domain(vm_size_t size, int domain, int wait)
 {
 	vm_offset_t addr;
 	uma_slab_t slab;
 
 	slab = zone_alloc_item(slabzone, NULL, domain, wait);
 	if (slab == NULL)
 		return (NULL);
 	if (domain == UMA_ANYDOMAIN)
 		addr = kmem_malloc(kernel_arena, size, wait);
 	else
 		addr = kmem_malloc_domain(domain, size, wait);
 	if (addr != 0) {
 		vsetslab(addr, slab);
 		slab->us_data = (void *)addr;
 		slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC;
 		slab->us_size = size;
 		slab->us_domain = vm_phys_domidx(PHYS_TO_VM_PAGE(
 		    pmap_kextract(addr)));
 		uma_total_inc(size);
 	} else {
 		zone_free_item(slabzone, slab, NULL, SKIP_NONE);
 	}
 
 	return ((void *)addr);
 }
 
 void *
 uma_large_malloc(vm_size_t size, int wait)
 {
 
 	return uma_large_malloc_domain(size, UMA_ANYDOMAIN, wait);
 }
 
 void
 uma_large_free(uma_slab_t slab)
 {
 
 	KASSERT((slab->us_flags & UMA_SLAB_KERNEL) != 0,
 	    ("uma_large_free:  Memory not allocated with uma_large_malloc."));
 	kmem_free(kernel_arena, (vm_offset_t)slab->us_data, slab->us_size);
 	uma_total_dec(slab->us_size);
 	zone_free_item(slabzone, slab, NULL, SKIP_NONE);
 }
 
 static void
 uma_zero_item(void *item, uma_zone_t zone)
 {
 	int i;
 
 	if (zone->uz_flags & UMA_ZONE_PCPU) {
 		CPU_FOREACH(i)
 			bzero(zpcpu_get_cpu(item, i), zone->uz_size);
 	} else
 		bzero(item, zone->uz_size);
 }
 
 unsigned long
 uma_limit(void)
 {
 
 	return (uma_kmem_limit);
 }
 
 void
 uma_set_limit(unsigned long limit)
 {
 
 	uma_kmem_limit = limit;
 }
 
 unsigned long
 uma_size(void)
 {
 
 	return (uma_kmem_total);
 }
 
 long
 uma_avail(void)
 {
 
 	return (uma_kmem_limit - uma_kmem_total);
 }
 
 void
 uma_print_stats(void)
 {
 	zone_foreach(uma_print_zone);
 }
 
 static void
 slab_print(uma_slab_t slab)
 {
 	printf("slab: keg %p, data %p, freecount %d\n",
 		slab->us_keg, slab->us_data, slab->us_freecount);
 }
 
 static void
 cache_print(uma_cache_t cache)
 {
 	printf("alloc: %p(%d), free: %p(%d)\n",
 		cache->uc_allocbucket,
 		cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
 		cache->uc_freebucket,
 		cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
 }
 
 static void
 uma_print_keg(uma_keg_t keg)
 {
 	uma_domain_t dom;
 	uma_slab_t slab;
 	int i;
 
 	printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
 	    "out %d free %d limit %d\n",
 	    keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
 	    keg->uk_ipers, keg->uk_ppera,
 	    (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
 	    keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
 	for (i = 0; i < vm_ndomains; i++) {
 		dom = &keg->uk_domain[i];
 		printf("Part slabs:\n");
 		LIST_FOREACH(slab, &dom->ud_part_slab, us_link)
 			slab_print(slab);
 		printf("Free slabs:\n");
 		LIST_FOREACH(slab, &dom->ud_free_slab, us_link)
 			slab_print(slab);
 		printf("Full slabs:\n");
 		LIST_FOREACH(slab, &dom->ud_full_slab, us_link)
 			slab_print(slab);
 	}
 }
 
 void
 uma_print_zone(uma_zone_t zone)
 {
 	uma_cache_t cache;
 	uma_klink_t kl;
 	int i;
 
 	printf("zone: %s(%p) size %d flags %#x\n",
 	    zone->uz_name, zone, zone->uz_size, zone->uz_flags);
 	LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
 		uma_print_keg(kl->kl_keg);
 	CPU_FOREACH(i) {
 		cache = &zone->uz_cpu[i];
 		printf("CPU %d Cache:\n", i);
 		cache_print(cache);
 	}
 }
 
 #ifdef DDB
 /*
  * Generate statistics across both the zone and its per-cpu cache's.  Return
  * desired statistics if the pointer is non-NULL for that statistic.
  *
  * Note: does not update the zone statistics, as it can't safely clear the
  * per-CPU cache statistic.
  *
  * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
  * safe from off-CPU; we should modify the caches to track this information
  * directly so that we don't have to.
  */
 static void
 uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
     uint64_t *freesp, uint64_t *sleepsp)
 {
 	uma_cache_t cache;
 	uint64_t allocs, frees, sleeps;
 	int cachefree, cpu;
 
 	allocs = frees = sleeps = 0;
 	cachefree = 0;
 	CPU_FOREACH(cpu) {
 		cache = &z->uz_cpu[cpu];
 		if (cache->uc_allocbucket != NULL)
 			cachefree += cache->uc_allocbucket->ub_cnt;
 		if (cache->uc_freebucket != NULL)
 			cachefree += cache->uc_freebucket->ub_cnt;
 		allocs += cache->uc_allocs;
 		frees += cache->uc_frees;
 	}
 	allocs += z->uz_allocs;
 	frees += z->uz_frees;
 	sleeps += z->uz_sleeps;
 	if (cachefreep != NULL)
 		*cachefreep = cachefree;
 	if (allocsp != NULL)
 		*allocsp = allocs;
 	if (freesp != NULL)
 		*freesp = frees;
 	if (sleepsp != NULL)
 		*sleepsp = sleeps;
 }
 #endif /* DDB */
 
 static int
 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
 {
 	uma_keg_t kz;
 	uma_zone_t z;
 	int count;
 
 	count = 0;
 	rw_rlock(&uma_rwlock);
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
 			count++;
 	}
 	rw_runlock(&uma_rwlock);
 	return (sysctl_handle_int(oidp, &count, 0, req));
 }
 
 static int
 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct uma_stream_header ush;
 	struct uma_type_header uth;
 	struct uma_percpu_stat ups;
 	uma_bucket_t bucket;
 	uma_zone_domain_t zdom;
 	struct sbuf sbuf;
 	uma_cache_t cache;
 	uma_klink_t kl;
 	uma_keg_t kz;
 	uma_zone_t z;
 	uma_keg_t k;
 	int count, error, i;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
 
 	count = 0;
 	rw_rlock(&uma_rwlock);
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
 			count++;
 	}
 
 	/*
 	 * Insert stream header.
 	 */
 	bzero(&ush, sizeof(ush));
 	ush.ush_version = UMA_STREAM_VERSION;
 	ush.ush_maxcpus = (mp_maxid + 1);
 	ush.ush_count = count;
 	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
 
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 			bzero(&uth, sizeof(uth));
 			ZONE_LOCK(z);
 			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
 			uth.uth_align = kz->uk_align;
 			uth.uth_size = kz->uk_size;
 			uth.uth_rsize = kz->uk_rsize;
 			LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
 				k = kl->kl_keg;
 				uth.uth_maxpages += k->uk_maxpages;
 				uth.uth_pages += k->uk_pages;
 				uth.uth_keg_free += k->uk_free;
 				uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
 				    * k->uk_ipers;
 			}
 
 			/*
 			 * A zone is secondary is it is not the first entry
 			 * on the keg's zone list.
 			 */
 			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
 			    (LIST_FIRST(&kz->uk_zones) != z))
 				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
 
 			for (i = 0; i < vm_ndomains; i++) {
 				zdom = &z->uz_domain[i];
 				LIST_FOREACH(bucket, &zdom->uzd_buckets,
 				    ub_link)
 					uth.uth_zone_free += bucket->ub_cnt;
 			}
 			uth.uth_allocs = z->uz_allocs;
 			uth.uth_frees = z->uz_frees;
 			uth.uth_fails = z->uz_fails;
 			uth.uth_sleeps = z->uz_sleeps;
 			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
 			/*
 			 * While it is not normally safe to access the cache
 			 * bucket pointers while not on the CPU that owns the
 			 * cache, we only allow the pointers to be exchanged
 			 * without the zone lock held, not invalidated, so
 			 * accept the possible race associated with bucket
 			 * exchange during monitoring.
 			 */
 			for (i = 0; i < (mp_maxid + 1); i++) {
 				bzero(&ups, sizeof(ups));
 				if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
 					goto skip;
 				if (CPU_ABSENT(i))
 					goto skip;
 				cache = &z->uz_cpu[i];
 				if (cache->uc_allocbucket != NULL)
 					ups.ups_cache_free +=
 					    cache->uc_allocbucket->ub_cnt;
 				if (cache->uc_freebucket != NULL)
 					ups.ups_cache_free +=
 					    cache->uc_freebucket->ub_cnt;
 				ups.ups_allocs = cache->uc_allocs;
 				ups.ups_frees = cache->uc_frees;
 skip:
 				(void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
 			}
 			ZONE_UNLOCK(z);
 		}
 	}
 	rw_runlock(&uma_rwlock);
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 int
 sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
 {
 	uma_zone_t zone = *(uma_zone_t *)arg1;
 	int error, max;
 
 	max = uma_zone_get_max(zone);
 	error = sysctl_handle_int(oidp, &max, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	uma_zone_set_max(zone, max);
 
 	return (0);
 }
 
 int
 sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
 {
 	uma_zone_t zone = *(uma_zone_t *)arg1;
 	int cur;
 
 	cur = uma_zone_get_cur(zone);
 	return (sysctl_handle_int(oidp, &cur, 0, req));
 }
 
 #ifdef INVARIANTS
 static uma_slab_t
 uma_dbg_getslab(uma_zone_t zone, void *item)
 {
 	uma_slab_t slab;
 	uma_keg_t keg;
 	uint8_t *mem;
 
 	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
 	if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
 		slab = vtoslab((vm_offset_t)mem);
 	} else {
 		/*
 		 * It is safe to return the slab here even though the
 		 * zone is unlocked because the item's allocation state
 		 * essentially holds a reference.
 		 */
 		ZONE_LOCK(zone);
 		keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
 		if (keg->uk_flags & UMA_ZONE_HASH)
 			slab = hash_sfind(&keg->uk_hash, mem);
 		else
 			slab = (uma_slab_t)(mem + keg->uk_pgoff);
 		ZONE_UNLOCK(zone);
 	}
 
 	return (slab);
 }
 
 /*
  * Set up the slab's freei data such that uma_dbg_free can function.
  *
  */
 static void
 uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
 {
 	uma_keg_t keg;
 	int freei;
 
 	if (zone_first_keg(zone) == NULL)
 		return;
 	if (slab == NULL) {
 		slab = uma_dbg_getslab(zone, item);
 		if (slab == NULL) 
 			panic("uma: item %p did not belong to zone %s\n",
 			    item, zone->uz_name);
 	}
 	keg = slab->us_keg;
 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
 
 	if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
 		panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 	BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
 
 	return;
 }
 
 /*
  * Verifies freed addresses.  Checks for alignment, valid slab membership
  * and duplicate frees.
  *
  */
 static void
 uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
 {
 	uma_keg_t keg;
 	int freei;
 
 	if (zone_first_keg(zone) == NULL)
 		return;
 	if (slab == NULL) {
 		slab = uma_dbg_getslab(zone, item);
 		if (slab == NULL) 
 			panic("uma: Freed item %p did not belong to zone %s\n",
 			    item, zone->uz_name);
 	}
 	keg = slab->us_keg;
 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
 
 	if (freei >= keg->uk_ipers)
 		panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 
 	if (((freei * keg->uk_rsize) + slab->us_data) != item) 
 		panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 
 	if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
 		panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 
 	BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
 }
 #endif /* INVARIANTS */
 
 #ifdef DDB
 DB_SHOW_COMMAND(uma, db_show_uma)
 {
 	uma_bucket_t bucket;
 	uma_keg_t kz;
 	uma_zone_t z;
 	uma_zone_domain_t zdom;
 	uint64_t allocs, frees, sleeps;
 	int cachefree, i;
 
 	db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
 	    "Free", "Requests", "Sleeps", "Bucket");
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 			if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
 				allocs = z->uz_allocs;
 				frees = z->uz_frees;
 				sleeps = z->uz_sleeps;
 				cachefree = 0;
 			} else
 				uma_zone_sumstat(z, &cachefree, &allocs,
 				    &frees, &sleeps);
 			if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
 			    (LIST_FIRST(&kz->uk_zones) != z)))
 				cachefree += kz->uk_free;
 			for (i = 0; i < vm_ndomains; i++) {
 				zdom = &z->uz_domain[i];
 				LIST_FOREACH(bucket, &zdom->uzd_buckets,
 				    ub_link)
 					cachefree += bucket->ub_cnt;
 			}
 			db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
 			    z->uz_name, (uintmax_t)kz->uk_size,
 			    (intmax_t)(allocs - frees), cachefree,
 			    (uintmax_t)allocs, sleeps, z->uz_count);
 			if (db_pager_quit)
 				return;
 		}
 	}
 }
 
 DB_SHOW_COMMAND(umacache, db_show_umacache)
 {
 	uma_bucket_t bucket;
 	uma_zone_t z;
 	uma_zone_domain_t zdom;
 	uint64_t allocs, frees;
 	int cachefree, i;
 
 	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
 	    "Requests", "Bucket");
 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
 		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
 		for (i = 0; i < vm_ndomains; i++) {
 			zdom = &z->uz_domain[i];
 			LIST_FOREACH(bucket, &zdom->uzd_buckets, ub_link)
 				cachefree += bucket->ub_cnt;
 		}
 		db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
 		    z->uz_name, (uintmax_t)z->uz_size,
 		    (intmax_t)(allocs - frees), cachefree,
 		    (uintmax_t)allocs, z->uz_count);
 		if (db_pager_quit)
 			return;
 	}
 }
 #endif	/* DDB */
Index: head/sys/vm/vm_domainset.c
===================================================================
--- head/sys/vm/vm_domainset.c	(revision 327953)
+++ head/sys/vm/vm_domainset.c	(revision 327954)
@@ -1,243 +1,277 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2017,	Jeffrey Roberson <jeff@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitset.h>
 #include <sys/domainset.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_domainset.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 
+#ifdef NUMA
 /*
  * Iterators are written such that the first nowait pass has as short a
  * codepath as possible to eliminate bloat from the allocator.  It is
  * assumed that most allocations are successful.
  */
 
 /*
  * Determine which policy is to be used for this allocation.
  */
 static void
 vm_domainset_iter_domain(struct vm_domainset_iter *di, struct vm_object *obj)
 {
 	struct domainset *domain;
 
 	/*
 	 * object policy takes precedence over thread policy.  The policies
 	 * are immutable and unsynchronized.  Updates can race but pointer
 	 * loads are assumed to be atomic.
 	 */
 	if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) {
 		di->di_domain = domain;
 		di->di_iter = &obj->domain.dr_iterator;
 	} else {
 		di->di_domain = curthread->td_domain.dr_policy;
 		di->di_iter = &curthread->td_domain.dr_iterator;
 	}
 }
 
 static void
 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain)
 {
 	int d;
 
 	d = *di->di_iter;
 	do {
 		d = (d + 1) % di->di_domain->ds_max;
 	} while (!DOMAINSET_ISSET(d, &di->di_domain->ds_mask));
 	*di->di_iter = *domain = d;
 }
 
 static void
 vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain)
 {
 	int d;
 
 	d = *di->di_iter;
 	do {
 		d = (d + 1) % di->di_domain->ds_max;
 	} while (!DOMAINSET_ISSET(d, &di->di_domain->ds_mask) || 
 	    d == di->di_domain->ds_prefer);
 	*di->di_iter = *domain = d;
 }
 
 static void
 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
 {
 
 	KASSERT(di->di_n > 0,
 	    ("vm_domainset_iter_first: Invalid n %d", di->di_n));
 	switch (di->di_domain->ds_policy) {
 	case DOMAINSET_POLICY_FIRSTTOUCH:
 		/*
 		 * To prevent impossible allocations we convert an invalid
 		 * first-touch to round-robin.
 		 */
 		/* FALLTHROUGH */
 	case DOMAINSET_POLICY_ROUNDROBIN:
 		vm_domainset_iter_rr(di, domain);
 		break;
 	case DOMAINSET_POLICY_PREFER:
 		vm_domainset_iter_prefer(di, domain);
 		break;
 	default:
 		panic("vm_domainset_iter_first: Unknown policy %d",
 		    di->di_domain->ds_policy);
 	}
 	KASSERT(*domain < vm_ndomains,
 	    ("vm_domainset_iter_next: Invalid domain %d", *domain));
 }
 
 static void
 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain)
 {
 
 	switch (di->di_domain->ds_policy) {
 	case DOMAINSET_POLICY_FIRSTTOUCH:
 		*domain = PCPU_GET(domain);
 		if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) {
 			di->di_n = 1;
 			break;
 		}
 		/*
 		 * To prevent impossible allocations we convert an invalid
 		 * first-touch to round-robin.
 		 */
 		/* FALLTHROUGH */
 	case DOMAINSET_POLICY_ROUNDROBIN:
 		di->di_n = di->di_domain->ds_cnt;
 		vm_domainset_iter_rr(di, domain);
 		break;
 	case DOMAINSET_POLICY_PREFER:
 		*domain = di->di_domain->ds_prefer;
 		di->di_n = di->di_domain->ds_cnt;
 		break;
 	default:
 		panic("vm_domainset_iter_first: Unknown policy %d",
 		    di->di_domain->ds_policy);
 	}
 	KASSERT(di->di_n > 0,
 	    ("vm_domainset_iter_first: Invalid n %d", di->di_n));
 	KASSERT(*domain < vm_ndomains,
 	    ("vm_domainset_iter_first: Invalid domain %d", *domain));
 }
 
 void
 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
     int *domain, int *req)
 {
 
 	vm_domainset_iter_domain(di, obj);
 	di->di_flags = *req;
 	*req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
 	    VM_ALLOC_NOWAIT;
 	vm_domainset_iter_first(di, domain);
 }
 
 int
 vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req)
 {
 
 	/*
 	 * If we exhausted all options with NOWAIT and did a WAITFAIL it
 	 * is time to return an error to the caller.
 	 */
 	if ((*req & VM_ALLOC_WAITFAIL) != 0)
 		return (ENOMEM);
 
 	/* If there are more domains to visit we run the iterator. */
 	if (--di->di_n != 0) {
 		vm_domainset_iter_next(di, domain);
 		return (0);
 	}
 
 	/* If we visited all domains and this was a NOWAIT we return error. */
 	if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0)
 		return (ENOMEM);
 
 	/*
 	 * We have visited all domains with non-blocking allocations, try
 	 * from the beginning with a blocking allocation.
 	 */
 	vm_domainset_iter_first(di, domain);
 	*req = di->di_flags;
 
 	return (0);
 }
 
 
 void
 vm_domainset_iter_malloc_init(struct vm_domainset_iter *di,
     struct vm_object *obj, int *domain, int *flags)
 {
 
 	vm_domainset_iter_domain(di, obj);
 	di->di_flags = *flags;
 	*flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
 	vm_domainset_iter_first(di, domain);
 }
 
 int
 vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
 {
 
 	/* If there are more domains to visit we run the iterator. */
 	if (--di->di_n != 0) {
 		vm_domainset_iter_next(di, domain);
 		return (0);
 	}
 
 	/* If we visited all domains and this was a NOWAIT we return error. */
 	if ((di->di_flags & M_WAITOK) == 0)
 		return (ENOMEM);
 
 	/*
 	 * We have visited all domains with non-blocking allocations, try
 	 * from the beginning with a blocking allocation.
 	 */
 	vm_domainset_iter_first(di, domain);
 	*flags = di->di_flags;
 
 	return (0);
 }
+
+#else /* !NUMA */
+int
+vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags)
+{
+
+	return (EJUSTRETURN);
+}
+
+void
+vm_domainset_iter_page_init(struct vm_domainset_iter *di,
+            struct vm_object *obj, int *domain, int *flags)
+{
+
+	*domain = 0;
+}
+
+int
+vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
+{
+
+	return (EJUSTRETURN);
+}
+
+void
+vm_domainset_iter_malloc_init(struct vm_domainset_iter *di,
+            struct vm_object *obj, int *domain, int *flags)
+{
+
+	*domain = 0;
+}
+
+#endif
Index: head/sys/vm/vm_pageout.c
===================================================================
--- head/sys/vm/vm_pageout.c	(revision 327953)
+++ head/sys/vm/vm_pageout.c	(revision 327954)
@@ -1,1987 +1,1983 @@
 /*-
  * SPDX-License-Identifier: (BSD-4-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2005 Yahoo! Technologies Norway AS
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	The proverbial page-out daemon.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/mount.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/time.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 /*
  * System initialization
  */
 
 /* the kernel process "vm_pageout"*/
 static void vm_pageout(void);
 static void vm_pageout_init(void);
 static int vm_pageout_clean(vm_page_t m, int *numpagedout);
 static int vm_pageout_cluster(vm_page_t m);
 static bool vm_pageout_scan(struct vm_domain *vmd, int pass);
 static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
     int starting_page_shortage);
 
 SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init,
     NULL);
 
 struct proc *pageproc;
 
 static struct kproc_desc page_kp = {
 	"pagedaemon",
 	vm_pageout,
 	&pageproc
 };
 SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start,
     &page_kp);
 
 SDT_PROVIDER_DEFINE(vm);
 SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
 
 /* Pagedaemon activity rates, in subdivisions of one second. */
 #define	VM_LAUNDER_RATE		10
 #define	VM_INACT_SCAN_RATE	2
 
 int vm_pageout_deficit;		/* Estimated number of pages deficit */
 u_int vm_pageout_wakeup_thresh;
 static int vm_pageout_oom_seq = 12;
 static bool vm_pageout_wanted;	/* Event on which pageout daemon sleeps */
 bool vm_pages_needed;		/* Are threads waiting for free pages? */
 
 /* Pending request for dirty page laundering. */
 static enum {
 	VM_LAUNDRY_IDLE,
 	VM_LAUNDRY_BACKGROUND,
 	VM_LAUNDRY_SHORTFALL
 } vm_laundry_request = VM_LAUNDRY_IDLE;
 static int vm_inactq_scans;
 
 static int vm_pageout_update_period;
 static int disable_swap_pageouts;
 static int lowmem_period = 10;
 static time_t lowmem_uptime;
 static int swapdev_enabled;
 
 static int vm_panic_on_oom = 0;
 
 SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,
 	CTLFLAG_RWTUN, &vm_panic_on_oom, 0,
 	"panic on out of memory instead of killing the largest process");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
 	CTLFLAG_RWTUN, &vm_pageout_wakeup_thresh, 0,
 	"free page threshold for waking up the pageout daemon");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
 	CTLFLAG_RWTUN, &vm_pageout_update_period, 0,
 	"Maximum active LRU update period");
   
 SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RWTUN, &lowmem_period, 0,
 	"Low memory callback period");
 
 SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
 	CTLFLAG_RWTUN, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
 
 static int pageout_lock_miss;
 SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
 	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
 	CTLFLAG_RWTUN, &vm_pageout_oom_seq, 0,
 	"back-to-back calls to oom detector to start OOM");
 
 static int act_scan_laundry_weight = 3;
 SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RWTUN,
     &act_scan_laundry_weight, 0,
     "weight given to clean vs. dirty pages in active queue scans");
 
 static u_int vm_background_launder_target;
 SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RWTUN,
     &vm_background_launder_target, 0,
     "background laundering target, in pages");
 
 static u_int vm_background_launder_rate = 4096;
 SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN,
     &vm_background_launder_rate, 0,
     "background laundering rate, in kilobytes per second");
 
 static u_int vm_background_launder_max = 20 * 1024;
 SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RWTUN,
     &vm_background_launder_max, 0, "background laundering cap, in kilobytes");
 
 int vm_pageout_page_count = 32;
 
 int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
 SYSCTL_INT(_vm, OID_AUTO, max_wired,
 	CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
 
 static u_int isqrt(u_int num);
 static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
 static int vm_pageout_launder(struct vm_domain *vmd, int launder,
     bool in_shortfall);
 static void vm_pageout_laundry_worker(void *arg);
 static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
 
 /*
  * Initialize a dummy page for marking the caller's place in the specified
  * paging queue.  In principle, this function only needs to set the flag
  * PG_MARKER.  Nonetheless, it write busies and initializes the hold count
  * to one as safety precautions.
  */ 
 static void
 vm_pageout_init_marker(vm_page_t marker, u_short queue)
 {
 
 	bzero(marker, sizeof(*marker));
 	marker->flags = PG_MARKER;
 	marker->busy_lock = VPB_SINGLE_EXCLUSIVER;
 	marker->queue = queue;
 	marker->hold_count = 1;
 }
 
 /*
  * vm_pageout_fallback_object_lock:
  * 
  * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is
  * known to have failed and page queue must be either PQ_ACTIVE or
  * PQ_INACTIVE.  To avoid lock order violation, unlock the page queue
  * while locking the vm object.  Use marker page to detect page queue
  * changes and maintain notion of next page on page queue.  Return
  * TRUE if no changes were detected, FALSE otherwise.  vm object is
  * locked on return.
  * 
  * This function depends on both the lock portion of struct vm_object
  * and normal struct vm_page being type stable.
  */
 static boolean_t
 vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
 {
 	struct vm_page marker;
 	struct vm_pagequeue *pq;
 	boolean_t unchanged;
 	u_short queue;
 	vm_object_t object;
 
 	queue = m->queue;
 	vm_pageout_init_marker(&marker, queue);
 	pq = vm_page_pagequeue(m);
 	object = m->object;
 	
 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
 	vm_pagequeue_unlock(pq);
 	vm_page_unlock(m);
 	VM_OBJECT_WLOCK(object);
 	vm_page_lock(m);
 	vm_pagequeue_lock(pq);
 
 	/*
 	 * The page's object might have changed, and/or the page might
 	 * have moved from its original position in the queue.  If the
 	 * page's object has changed, then the caller should abandon
 	 * processing the page because the wrong object lock was
 	 * acquired.  Use the marker's plinks.q, not the page's, to
 	 * determine if the page has been moved.  The state of the
 	 * page's plinks.q can be indeterminate; whereas, the marker's
 	 * plinks.q must be valid.
 	 */
 	*next = TAILQ_NEXT(&marker, plinks.q);
 	unchanged = m->object == object &&
 	    m == TAILQ_PREV(&marker, pglist, plinks.q);
 	KASSERT(!unchanged || m->queue == queue,
 	    ("page %p queue %d %d", m, queue, m->queue));
 	TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
 	return (unchanged);
 }
 
 /*
  * Lock the page while holding the page queue lock.  Use marker page
  * to detect page queue changes and maintain notion of next page on
  * page queue.  Return TRUE if no changes were detected, FALSE
  * otherwise.  The page is locked on return. The page queue lock might
  * be dropped and reacquired.
  *
  * This function depends on normal struct vm_page being type stable.
  */
 static boolean_t
 vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
 {
 	struct vm_page marker;
 	struct vm_pagequeue *pq;
 	boolean_t unchanged;
 	u_short queue;
 
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	if (vm_page_trylock(m))
 		return (TRUE);
 
 	queue = m->queue;
 	vm_pageout_init_marker(&marker, queue);
 	pq = vm_page_pagequeue(m);
 
 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
 	vm_pagequeue_unlock(pq);
 	vm_page_lock(m);
 	vm_pagequeue_lock(pq);
 
 	/* Page queue might have changed. */
 	*next = TAILQ_NEXT(&marker, plinks.q);
 	unchanged = m == TAILQ_PREV(&marker, pglist, plinks.q);
 	KASSERT(!unchanged || m->queue == queue,
 	    ("page %p queue %d %d", m, queue, m->queue));
 	TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
 	return (unchanged);
 }
 
 /*
  * Scan for pages at adjacent offsets within the given page's object that are
  * eligible for laundering, form a cluster of these pages and the given page,
  * and launder that cluster.
  */
 static int
 vm_pageout_cluster(vm_page_t m)
 {
 	vm_object_t object;
 	vm_page_t mc[2 * vm_pageout_page_count], p, pb, ps;
 	vm_pindex_t pindex;
 	int ib, is, page_base, pageout_count;
 
 	vm_page_assert_locked(m);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	pindex = m->pindex;
 
 	/*
 	 * We can't clean the page if it is busy or held.
 	 */
 	vm_page_assert_unbusied(m);
 	KASSERT(m->hold_count == 0, ("page %p is held", m));
 
 	pmap_remove_write(m);
 	vm_page_unlock(m);
 
 	mc[vm_pageout_page_count] = pb = ps = m;
 	pageout_count = 1;
 	page_base = vm_pageout_page_count;
 	ib = 1;
 	is = 1;
 
 	/*
 	 * We can cluster only if the page is not clean, busy, or held, and
 	 * the page is in the laundry queue.
 	 *
 	 * During heavy mmap/modification loads the pageout
 	 * daemon can really fragment the underlying file
 	 * due to flushing pages out of order and not trying to
 	 * align the clusters (which leaves sporadic out-of-order
 	 * holes).  To solve this problem we do the reverse scan
 	 * first and attempt to align our cluster, then do a 
 	 * forward scan if room remains.
 	 */
 more:
 	while (ib != 0 && pageout_count < vm_pageout_page_count) {
 		if (ib > pindex) {
 			ib = 0;
 			break;
 		}
 		if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) {
 			ib = 0;
 			break;
 		}
 		vm_page_test_dirty(p);
 		if (p->dirty == 0) {
 			ib = 0;
 			break;
 		}
 		vm_page_lock(p);
 		if (!vm_page_in_laundry(p) ||
 		    p->hold_count != 0) {	/* may be undergoing I/O */
 			vm_page_unlock(p);
 			ib = 0;
 			break;
 		}
 		pmap_remove_write(p);
 		vm_page_unlock(p);
 		mc[--page_base] = pb = p;
 		++pageout_count;
 		++ib;
 
 		/*
 		 * We are at an alignment boundary.  Stop here, and switch
 		 * directions.  Do not clear ib.
 		 */
 		if ((pindex - (ib - 1)) % vm_pageout_page_count == 0)
 			break;
 	}
 	while (pageout_count < vm_pageout_page_count && 
 	    pindex + is < object->size) {
 		if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
 			break;
 		vm_page_test_dirty(p);
 		if (p->dirty == 0)
 			break;
 		vm_page_lock(p);
 		if (!vm_page_in_laundry(p) ||
 		    p->hold_count != 0) {	/* may be undergoing I/O */
 			vm_page_unlock(p);
 			break;
 		}
 		pmap_remove_write(p);
 		vm_page_unlock(p);
 		mc[page_base + pageout_count] = ps = p;
 		++pageout_count;
 		++is;
 	}
 
 	/*
 	 * If we exhausted our forward scan, continue with the reverse scan
 	 * when possible, even past an alignment boundary.  This catches
 	 * boundary conditions.
 	 */
 	if (ib != 0 && pageout_count < vm_pageout_page_count)
 		goto more;
 
 	return (vm_pageout_flush(&mc[page_base], pageout_count,
 	    VM_PAGER_PUT_NOREUSE, 0, NULL, NULL));
 }
 
 /*
  * vm_pageout_flush() - launder the given pages
  *
  *	The given pages are laundered.  Note that we setup for the start of
  *	I/O ( i.e. busy the page ), mark it read-only, and bump the object
  *	reference count all in here rather then in the parent.  If we want
  *	the parent to do more sophisticated things we may have to change
  *	the ordering.
  *
  *	Returned runlen is the count of pages between mreq and first
  *	page after mreq with status VM_PAGER_AGAIN.
  *	*eio is set to TRUE if pager returned VM_PAGER_ERROR or VM_PAGER_FAIL
  *	for any page in runlen set.
  */
 int
 vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
     boolean_t *eio)
 {
 	vm_object_t object = mc[0]->object;
 	int pageout_status[count];
 	int numpagedout = 0;
 	int i, runlen;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * Initiate I/O.  Mark the pages busy and verify that they're valid
 	 * and read-only.
 	 *
 	 * We do not have to fixup the clean/dirty bits here... we can
 	 * allow the pager to do it after the I/O completes.
 	 *
 	 * NOTE! mc[i]->dirty may be partial or fragmented due to an
 	 * edge case with file fragments.
 	 */
 	for (i = 0; i < count; i++) {
 		KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL,
 		    ("vm_pageout_flush: partially invalid page %p index %d/%d",
 			mc[i], i, count));
 		KASSERT((mc[i]->aflags & PGA_WRITEABLE) == 0,
 		    ("vm_pageout_flush: writeable page %p", mc[i]));
 		vm_page_sbusy(mc[i]);
 	}
 	vm_object_pip_add(object, count);
 
 	vm_pager_put_pages(object, mc, count, flags, pageout_status);
 
 	runlen = count - mreq;
 	if (eio != NULL)
 		*eio = FALSE;
 	for (i = 0; i < count; i++) {
 		vm_page_t mt = mc[i];
 
 		KASSERT(pageout_status[i] == VM_PAGER_PEND ||
 		    !pmap_page_is_write_mapped(mt),
 		    ("vm_pageout_flush: page %p is not write protected", mt));
 		switch (pageout_status[i]) {
 		case VM_PAGER_OK:
 			vm_page_lock(mt);
 			if (vm_page_in_laundry(mt))
 				vm_page_deactivate_noreuse(mt);
 			vm_page_unlock(mt);
 			/* FALLTHROUGH */
 		case VM_PAGER_PEND:
 			numpagedout++;
 			break;
 		case VM_PAGER_BAD:
 			/*
 			 * The page is outside the object's range.  We pretend
 			 * that the page out worked and clean the page, so the
 			 * changes will be lost if the page is reclaimed by
 			 * the page daemon.
 			 */
 			vm_page_undirty(mt);
 			vm_page_lock(mt);
 			if (vm_page_in_laundry(mt))
 				vm_page_deactivate_noreuse(mt);
 			vm_page_unlock(mt);
 			break;
 		case VM_PAGER_ERROR:
 		case VM_PAGER_FAIL:
 			/*
 			 * If the page couldn't be paged out to swap because the
 			 * pager wasn't able to find space, place the page in
 			 * the PQ_UNSWAPPABLE holding queue.  This is an
 			 * optimization that prevents the page daemon from
 			 * wasting CPU cycles on pages that cannot be reclaimed
 			 * becase no swap device is configured.
 			 *
 			 * Otherwise, reactivate the page so that it doesn't
 			 * clog the laundry and inactive queues.  (We will try
 			 * paging it out again later.)
 			 */
 			vm_page_lock(mt);
 			if (object->type == OBJT_SWAP &&
 			    pageout_status[i] == VM_PAGER_FAIL) {
 				vm_page_unswappable(mt);
 				numpagedout++;
 			} else
 				vm_page_activate(mt);
 			vm_page_unlock(mt);
 			if (eio != NULL && i >= mreq && i - mreq < runlen)
 				*eio = TRUE;
 			break;
 		case VM_PAGER_AGAIN:
 			if (i >= mreq && i - mreq < runlen)
 				runlen = i - mreq;
 			break;
 		}
 
 		/*
 		 * If the operation is still going, leave the page busy to
 		 * block all other accesses. Also, leave the paging in
 		 * progress indicator set so that we don't attempt an object
 		 * collapse.
 		 */
 		if (pageout_status[i] != VM_PAGER_PEND) {
 			vm_object_pip_wakeup(object);
 			vm_page_sunbusy(mt);
 		}
 	}
 	if (prunlen != NULL)
 		*prunlen = runlen;
 	return (numpagedout);
 }
 
 static void
 vm_pageout_swapon(void *arg __unused, struct swdevt *sp __unused)
 {
 
 	atomic_store_rel_int(&swapdev_enabled, 1);
 }
 
 static void
 vm_pageout_swapoff(void *arg __unused, struct swdevt *sp __unused)
 {
 
 	if (swap_pager_nswapdev() == 1)
 		atomic_store_rel_int(&swapdev_enabled, 0);
 }
 
 /*
  * Attempt to acquire all of the necessary locks to launder a page and
  * then call through the clustering layer to PUTPAGES.  Wait a short
  * time for a vnode lock.
  *
  * Requires the page and object lock on entry, releases both before return.
  * Returns 0 on success and an errno otherwise.
  */
 static int
 vm_pageout_clean(vm_page_t m, int *numpagedout)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	vm_object_t object;
 	vm_pindex_t pindex;
 	int error, lockmode;
 
 	vm_page_assert_locked(m);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	error = 0;
 	vp = NULL;
 	mp = NULL;
 
 	/*
 	 * The object is already known NOT to be dead.   It
 	 * is possible for the vget() to block the whole
 	 * pageout daemon, but the new low-memory handling
 	 * code should prevent it.
 	 *
 	 * We can't wait forever for the vnode lock, we might
 	 * deadlock due to a vn_read() getting stuck in
 	 * vm_wait while holding this vnode.  We skip the 
 	 * vnode if we can't get it in a reasonable amount
 	 * of time.
 	 */
 	if (object->type == OBJT_VNODE) {
 		vm_page_unlock(m);
 		vp = object->handle;
 		if (vp->v_type == VREG &&
 		    vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 			mp = NULL;
 			error = EDEADLK;
 			goto unlock_all;
 		}
 		KASSERT(mp != NULL,
 		    ("vp %p with NULL v_mount", vp));
 		vm_object_reference_locked(object);
 		pindex = m->pindex;
 		VM_OBJECT_WUNLOCK(object);
 		lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
 		    LK_SHARED : LK_EXCLUSIVE;
 		if (vget(vp, lockmode | LK_TIMELOCK, curthread)) {
 			vp = NULL;
 			error = EDEADLK;
 			goto unlock_mp;
 		}
 		VM_OBJECT_WLOCK(object);
 
 		/*
 		 * Ensure that the object and vnode were not disassociated
 		 * while locks were dropped.
 		 */
 		if (vp->v_object != object) {
 			error = ENOENT;
 			goto unlock_all;
 		}
 		vm_page_lock(m);
 
 		/*
 		 * While the object and page were unlocked, the page
 		 * may have been:
 		 * (1) moved to a different queue,
 		 * (2) reallocated to a different object,
 		 * (3) reallocated to a different offset, or
 		 * (4) cleaned.
 		 */
 		if (!vm_page_in_laundry(m) || m->object != object ||
 		    m->pindex != pindex || m->dirty == 0) {
 			vm_page_unlock(m);
 			error = ENXIO;
 			goto unlock_all;
 		}
 
 		/*
 		 * The page may have been busied or held while the object
 		 * and page locks were released.
 		 */
 		if (vm_page_busied(m) || m->hold_count != 0) {
 			vm_page_unlock(m);
 			error = EBUSY;
 			goto unlock_all;
 		}
 	}
 
 	/*
 	 * If a page is dirty, then it is either being washed
 	 * (but not yet cleaned) or it is still in the
 	 * laundry.  If it is still in the laundry, then we
 	 * start the cleaning operation. 
 	 */
 	if ((*numpagedout = vm_pageout_cluster(m)) == 0)
 		error = EIO;
 
 unlock_all:
 	VM_OBJECT_WUNLOCK(object);
 
 unlock_mp:
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	if (mp != NULL) {
 		if (vp != NULL)
 			vput(vp);
 		vm_object_deallocate(object);
 		vn_finished_write(mp);
 	}
 
 	return (error);
 }
 
 /*
  * Attempt to launder the specified number of pages.
  *
  * Returns the number of pages successfully laundered.
  */
 static int
 vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
 {
 	struct vm_pagequeue *pq;
 	vm_object_t object;
 	vm_page_t m, next;
 	int act_delta, error, maxscan, numpagedout, starting_target;
 	int vnodes_skipped;
 	bool pageout_ok, queue_locked;
 
 	starting_target = launder;
 	vnodes_skipped = 0;
 
 	/*
 	 * Scan the laundry queues for pages eligible to be laundered.  We stop
 	 * once the target number of dirty pages have been laundered, or once
 	 * we've reached the end of the queue.  A single iteration of this loop
 	 * may cause more than one page to be laundered because of clustering.
 	 *
 	 * maxscan ensures that we don't re-examine requeued pages.  Any
 	 * additional pages written as part of a cluster are subtracted from
 	 * maxscan since they must be taken from the laundry queue.
 	 *
 	 * As an optimization, we avoid laundering from PQ_UNSWAPPABLE when no
 	 * swap devices are configured.
 	 */
 	if (atomic_load_acq_int(&swapdev_enabled))
 		pq = &vmd->vmd_pagequeues[PQ_UNSWAPPABLE];
 	else
 		pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
 
 scan:
 	vm_pagequeue_lock(pq);
 	maxscan = pq->pq_cnt;
 	queue_locked = true;
 	for (m = TAILQ_FIRST(&pq->pq_pl);
 	    m != NULL && maxscan-- > 0 && launder > 0;
 	    m = next) {
 		vm_pagequeue_assert_locked(pq);
 		KASSERT(queue_locked, ("unlocked laundry queue"));
 		KASSERT(vm_page_in_laundry(m),
 		    ("page %p has an inconsistent queue", m));
 		next = TAILQ_NEXT(m, plinks.q);
 		if ((m->flags & PG_MARKER) != 0)
 			continue;
 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
 		    ("PG_FICTITIOUS page %p cannot be in laundry queue", m));
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("VPO_UNMANAGED page %p cannot be in laundry queue", m));
 		if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
 			vm_page_unlock(m);
 			continue;
 		}
 		object = m->object;
 		if ((!VM_OBJECT_TRYWLOCK(object) &&
 		    (!vm_pageout_fallback_object_lock(m, &next) ||
 		    m->hold_count != 0)) || vm_page_busied(m)) {
 			VM_OBJECT_WUNLOCK(object);
 			vm_page_unlock(m);
 			continue;
 		}
 
 		/*
 		 * Unlock the laundry queue, invalidating the 'next' pointer.
 		 * Use a marker to remember our place in the laundry queue.
 		 */
 		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_laundry_marker,
 		    plinks.q);
 		vm_pagequeue_unlock(pq);
 		queue_locked = false;
 
 		/*
 		 * Invalid pages can be easily freed.  They cannot be
 		 * mapped; vm_page_free() asserts this.
 		 */
 		if (m->valid == 0)
 			goto free_page;
 
 		/*
 		 * If the page has been referenced and the object is not dead,
 		 * reactivate or requeue the page depending on whether the
 		 * object is mapped.
 		 */
 		if ((m->aflags & PGA_REFERENCED) != 0) {
 			vm_page_aflag_clear(m, PGA_REFERENCED);
 			act_delta = 1;
 		} else
 			act_delta = 0;
 		if (object->ref_count != 0)
 			act_delta += pmap_ts_referenced(m);
 		else {
 			KASSERT(!pmap_page_is_mapped(m),
 			    ("page %p is mapped", m));
 		}
 		if (act_delta != 0) {
 			if (object->ref_count != 0) {
 				VM_CNT_INC(v_reactivated);
 				vm_page_activate(m);
 
 				/*
 				 * Increase the activation count if the page
 				 * was referenced while in the laundry queue.
 				 * This makes it less likely that the page will
 				 * be returned prematurely to the inactive
 				 * queue.
  				 */
 				m->act_count += act_delta + ACT_ADVANCE;
 
 				/*
 				 * If this was a background laundering, count
 				 * activated pages towards our target.  The
 				 * purpose of background laundering is to ensure
 				 * that pages are eventually cycled through the
 				 * laundry queue, and an activation is a valid
 				 * way out.
 				 */
 				if (!in_shortfall)
 					launder--;
 				goto drop_page;
 			} else if ((object->flags & OBJ_DEAD) == 0)
 				goto requeue_page;
 		}
 
 		/*
 		 * If the page appears to be clean at the machine-independent
 		 * layer, then remove all of its mappings from the pmap in
 		 * anticipation of freeing it.  If, however, any of the page's
 		 * mappings allow write access, then the page may still be
 		 * modified until the last of those mappings are removed.
 		 */
 		if (object->ref_count != 0) {
 			vm_page_test_dirty(m);
 			if (m->dirty == 0)
 				pmap_remove_all(m);
 		}
 
 		/*
 		 * Clean pages are freed, and dirty pages are paged out unless
 		 * they belong to a dead object.  Requeueing dirty pages from
 		 * dead objects is pointless, as they are being paged out and
 		 * freed by the thread that destroyed the object.
 		 */
 		if (m->dirty == 0) {
 free_page:
 			vm_page_free(m);
 			VM_CNT_INC(v_dfree);
 		} else if ((object->flags & OBJ_DEAD) == 0) {
 			if (object->type != OBJT_SWAP &&
 			    object->type != OBJT_DEFAULT)
 				pageout_ok = true;
 			else if (disable_swap_pageouts)
 				pageout_ok = false;
 			else
 				pageout_ok = true;
 			if (!pageout_ok) {
 requeue_page:
 				vm_pagequeue_lock(pq);
 				queue_locked = true;
 				vm_page_requeue_locked(m);
 				goto drop_page;
 			}
 
 			/*
 			 * Form a cluster with adjacent, dirty pages from the
 			 * same object, and page out that entire cluster.
 			 *
 			 * The adjacent, dirty pages must also be in the
 			 * laundry.  However, their mappings are not checked
 			 * for new references.  Consequently, a recently
 			 * referenced page may be paged out.  However, that
 			 * page will not be prematurely reclaimed.  After page
 			 * out, the page will be placed in the inactive queue,
 			 * where any new references will be detected and the
 			 * page reactivated.
 			 */
 			error = vm_pageout_clean(m, &numpagedout);
 			if (error == 0) {
 				launder -= numpagedout;
 				maxscan -= numpagedout - 1;
 			} else if (error == EDEADLK) {
 				pageout_lock_miss++;
 				vnodes_skipped++;
 			}
 			goto relock_queue;
 		}
 drop_page:
 		vm_page_unlock(m);
 		VM_OBJECT_WUNLOCK(object);
 relock_queue:
 		if (!queue_locked) {
 			vm_pagequeue_lock(pq);
 			queue_locked = true;
 		}
 		next = TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q);
 		TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
 	}
 	vm_pagequeue_unlock(pq);
 
 	if (launder > 0 && pq == &vmd->vmd_pagequeues[PQ_UNSWAPPABLE]) {
 		pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
 		goto scan;
 	}
 
 	/*
 	 * Wakeup the sync daemon if we skipped a vnode in a writeable object
 	 * and we didn't launder enough pages.
 	 */
 	if (vnodes_skipped > 0 && launder > 0)
 		(void)speedup_syncer();
 
 	return (starting_target - launder);
 }
 
 /*
  * Compute the integer square root.
  */
 static u_int
 isqrt(u_int num)
 {
 	u_int bit, root, tmp;
 
 	bit = 1u << ((NBBY * sizeof(u_int)) - 2);
 	while (bit > num)
 		bit >>= 2;
 	root = 0;
 	while (bit != 0) {
 		tmp = root + bit;
 		root >>= 1;
 		if (num >= tmp) {
 			num -= tmp;
 			root += bit;
 		}
 		bit >>= 2;
 	}
 	return (root);
 }
 
 /*
  * Perform the work of the laundry thread: periodically wake up and determine
  * whether any pages need to be laundered.  If so, determine the number of pages
  * that need to be laundered, and launder them.
  */
 static void
 vm_pageout_laundry_worker(void *arg)
 {
 	struct vm_domain *domain;
 	struct vm_pagequeue *pq;
 	uint64_t nclean, ndirty;
 	u_int inactq_scans, last_launder;
 	int domidx, last_target, launder, shortfall, shortfall_cycle, target;
 	bool in_shortfall;
 
 	domidx = (uintptr_t)arg;
 	domain = &vm_dom[domidx];
 	pq = &domain->vmd_pagequeues[PQ_LAUNDRY];
 	KASSERT(domain->vmd_segs != 0, ("domain without segments"));
 	vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY);
 
 	shortfall = 0;
 	in_shortfall = false;
 	shortfall_cycle = 0;
 	target = 0;
 	inactq_scans = 0;
 	last_launder = 0;
 
 	/*
 	 * Calls to these handlers are serialized by the swap syscall lock.
 	 */
 	(void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, domain,
 	    EVENTHANDLER_PRI_ANY);
 	(void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, domain,
 	    EVENTHANDLER_PRI_ANY);
 
 	/*
 	 * The pageout laundry worker is never done, so loop forever.
 	 */
 	for (;;) {
 		KASSERT(target >= 0, ("negative target %d", target));
 		KASSERT(shortfall_cycle >= 0,
 		    ("negative cycle %d", shortfall_cycle));
 		launder = 0;
 
 		/*
 		 * First determine whether we need to launder pages to meet a
 		 * shortage of free pages.
 		 */
 		if (shortfall > 0) {
 			in_shortfall = true;
 			shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE;
 			target = shortfall;
 		} else if (!in_shortfall)
 			goto trybackground;
 		else if (shortfall_cycle == 0 || vm_laundry_target() <= 0) {
 			/*
 			 * We recently entered shortfall and began laundering
 			 * pages.  If we have completed that laundering run
 			 * (and we are no longer in shortfall) or we have met
 			 * our laundry target through other activity, then we
 			 * can stop laundering pages.
 			 */
 			in_shortfall = false;
 			target = 0;
 			goto trybackground;
 		}
 		last_launder = inactq_scans;
 		launder = target / shortfall_cycle--;
 		goto dolaundry;
 
 		/*
 		 * There's no immediate need to launder any pages; see if we
 		 * meet the conditions to perform background laundering:
 		 *
 		 * 1. The ratio of dirty to clean inactive pages exceeds the
 		 *    background laundering threshold and the pagedaemon has
 		 *    been woken up to reclaim pages since our last
 		 *    laundering, or
 		 * 2. we haven't yet reached the target of the current
 		 *    background laundering run.
 		 *
 		 * The background laundering threshold is not a constant.
 		 * Instead, it is a slowly growing function of the number of
 		 * page daemon scans since the last laundering.  Thus, as the
 		 * ratio of dirty to clean inactive pages grows, the amount of
 		 * memory pressure required to trigger laundering decreases.
 		 */
 trybackground:
 		nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count;
 		ndirty = vm_cnt.v_laundry_count;
 		if (target == 0 && inactq_scans != last_launder &&
 		    ndirty * isqrt(inactq_scans - last_launder) >= nclean) {
 			target = vm_background_launder_target;
 		}
 
 		/*
 		 * We have a non-zero background laundering target.  If we've
 		 * laundered up to our maximum without observing a page daemon
 		 * request, just stop.  This is a safety belt that ensures we
 		 * don't launder an excessive amount if memory pressure is low
 		 * and the ratio of dirty to clean pages is large.  Otherwise,
 		 * proceed at the background laundering rate.
 		 */
 		if (target > 0) {
 			if (inactq_scans != last_launder) {
 				last_launder = inactq_scans;
 				last_target = target;
 			} else if (last_target - target >=
 			    vm_background_launder_max * PAGE_SIZE / 1024) {
 				target = 0;
 			}
 			launder = vm_background_launder_rate * PAGE_SIZE / 1024;
 			launder /= VM_LAUNDER_RATE;
 			if (launder > target)
 				launder = target;
 		}
 
 dolaundry:
 		if (launder > 0) {
 			/*
 			 * Because of I/O clustering, the number of laundered
 			 * pages could exceed "target" by the maximum size of
 			 * a cluster minus one. 
 			 */
 			target -= min(vm_pageout_launder(domain, launder,
 			    in_shortfall), target);
 			pause("laundp", hz / VM_LAUNDER_RATE);
 		}
 
 		/*
 		 * If we're not currently laundering pages and the page daemon
 		 * hasn't posted a new request, sleep until the page daemon
 		 * kicks us.
 		 */
 		vm_pagequeue_lock(pq);
 		if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE)
 			(void)mtx_sleep(&vm_laundry_request,
 			    vm_pagequeue_lockptr(pq), PVM, "launds", 0);
 
 		/*
 		 * If the pagedaemon has indicated that it's in shortfall, start
 		 * a shortfall laundering unless we're already in the middle of
 		 * one.  This may preempt a background laundering.
 		 */
 		if (vm_laundry_request == VM_LAUNDRY_SHORTFALL &&
 		    (!in_shortfall || shortfall_cycle == 0)) {
 			shortfall = vm_laundry_target() + vm_pageout_deficit;
 			target = 0;
 		} else
 			shortfall = 0;
 
 		if (target == 0)
 			vm_laundry_request = VM_LAUNDRY_IDLE;
 		inactq_scans = vm_inactq_scans;
 		vm_pagequeue_unlock(pq);
 	}
 }
 
 /*
  *	vm_pageout_scan does the dirty work for the pageout daemon.
  *
  *	pass == 0: Update active LRU/deactivate pages
  *	pass >= 1: Free inactive pages
  *
  * Returns true if pass was zero or enough pages were freed by the inactive
  * queue scan to meet the target.
  */
 static bool
 vm_pageout_scan(struct vm_domain *vmd, int pass)
 {
 	vm_page_t m, next;
 	struct vm_pagequeue *pq;
 	vm_object_t object;
 	long min_scan;
 	int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan;
 	int page_shortage, scan_tick, scanned, starting_page_shortage;
 	boolean_t queue_locked;
 
 	/*
 	 * If we need to reclaim memory ask kernel caches to return
 	 * some.  We rate limit to avoid thrashing.
 	 */
 	if (vmd == &vm_dom[0] && pass > 0 &&
 	    (time_uptime - lowmem_uptime) >= lowmem_period) {
 		/*
 		 * Decrease registered cache sizes.
 		 */
 		SDT_PROBE0(vm, , , vm__lowmem_scan);
 		EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_PAGES);
 		/*
 		 * We do this explicitly after the caches have been
 		 * drained above.
 		 */
 		uma_reclaim();
 		lowmem_uptime = time_uptime;
 	}
 
 	/*
 	 * The addl_page_shortage is the number of temporarily
 	 * stuck pages in the inactive queue.  In other words, the
 	 * number of pages from the inactive count that should be
 	 * discounted in setting the target for the active queue scan.
 	 */
 	addl_page_shortage = 0;
 
 	/*
 	 * Calculate the number of pages that we want to free.  This number
 	 * can be negative if many pages are freed between the wakeup call to
 	 * the page daemon and this calculation.
 	 */
 	if (pass > 0) {
 		deficit = atomic_readandclear_int(&vm_pageout_deficit);
 		page_shortage = vm_paging_target() + deficit;
 	} else
 		page_shortage = deficit = 0;
 	starting_page_shortage = page_shortage;
 
 	/*
 	 * Start scanning the inactive queue for pages that we can free.  The
 	 * scan will stop when we reach the target or we have scanned the
 	 * entire queue.  (Note that m->act_count is not used to make
 	 * decisions for the inactive queue, only for the active queue.)
 	 */
 	pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
 	maxscan = pq->pq_cnt;
 	vm_pagequeue_lock(pq);
 	queue_locked = TRUE;
 	for (m = TAILQ_FIRST(&pq->pq_pl);
 	     m != NULL && maxscan-- > 0 && page_shortage > 0;
 	     m = next) {
 		vm_pagequeue_assert_locked(pq);
 		KASSERT(queue_locked, ("unlocked inactive queue"));
 		KASSERT(vm_page_inactive(m), ("Inactive queue %p", m));
 
 		VM_CNT_INC(v_pdpages);
 		next = TAILQ_NEXT(m, plinks.q);
 
 		/*
 		 * skip marker pages
 		 */
 		if (m->flags & PG_MARKER)
 			continue;
 
 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
 		    ("Fictitious page %p cannot be in inactive queue", m));
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("Unmanaged page %p cannot be in inactive queue", m));
 
 		/*
 		 * The page or object lock acquisitions fail if the
 		 * page was removed from the queue or moved to a
 		 * different position within the queue.  In either
 		 * case, addl_page_shortage should not be incremented.
 		 */
 		if (!vm_pageout_page_lock(m, &next))
 			goto unlock_page;
 		else if (m->hold_count != 0) {
 			/*
 			 * Held pages are essentially stuck in the
 			 * queue.  So, they ought to be discounted
 			 * from the inactive count.  See the
 			 * calculation of inactq_shortage before the
 			 * loop over the active queue below.
 			 */
 			addl_page_shortage++;
 			goto unlock_page;
 		}
 		object = m->object;
 		if (!VM_OBJECT_TRYWLOCK(object)) {
 			if (!vm_pageout_fallback_object_lock(m, &next))
 				goto unlock_object;
 			else if (m->hold_count != 0) {
 				addl_page_shortage++;
 				goto unlock_object;
 			}
 		}
 		if (vm_page_busied(m)) {
 			/*
 			 * Don't mess with busy pages.  Leave them at
 			 * the front of the queue.  Most likely, they
 			 * are being paged out and will leave the
 			 * queue shortly after the scan finishes.  So,
 			 * they ought to be discounted from the
 			 * inactive count.
 			 */
 			addl_page_shortage++;
 unlock_object:
 			VM_OBJECT_WUNLOCK(object);
 unlock_page:
 			vm_page_unlock(m);
 			continue;
 		}
 		KASSERT(m->hold_count == 0, ("Held page %p", m));
 
 		/*
 		 * Dequeue the inactive page and unlock the inactive page
 		 * queue, invalidating the 'next' pointer.  Dequeueing the
 		 * page here avoids a later reacquisition (and release) of
 		 * the inactive page queue lock when vm_page_activate(),
 		 * vm_page_free(), or vm_page_launder() is called.  Use a
 		 * marker to remember our place in the inactive queue.
 		 */
 		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);
 		vm_page_dequeue_locked(m);
 		vm_pagequeue_unlock(pq);
 		queue_locked = FALSE;
 
 		/*
 		 * Invalid pages can be easily freed. They cannot be
 		 * mapped, vm_page_free() asserts this.
 		 */
 		if (m->valid == 0)
 			goto free_page;
 
 		/*
 		 * If the page has been referenced and the object is not dead,
 		 * reactivate or requeue the page depending on whether the
 		 * object is mapped.
 		 */
 		if ((m->aflags & PGA_REFERENCED) != 0) {
 			vm_page_aflag_clear(m, PGA_REFERENCED);
 			act_delta = 1;
 		} else
 			act_delta = 0;
 		if (object->ref_count != 0) {
 			act_delta += pmap_ts_referenced(m);
 		} else {
 			KASSERT(!pmap_page_is_mapped(m),
 			    ("vm_pageout_scan: page %p is mapped", m));
 		}
 		if (act_delta != 0) {
 			if (object->ref_count != 0) {
 				VM_CNT_INC(v_reactivated);
 				vm_page_activate(m);
 
 				/*
 				 * Increase the activation count if the page
 				 * was referenced while in the inactive queue.
 				 * This makes it less likely that the page will
 				 * be returned prematurely to the inactive
 				 * queue.
  				 */
 				m->act_count += act_delta + ACT_ADVANCE;
 				goto drop_page;
 			} else if ((object->flags & OBJ_DEAD) == 0) {
 				vm_pagequeue_lock(pq);
 				queue_locked = TRUE;
 				m->queue = PQ_INACTIVE;
 				TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 				vm_pagequeue_cnt_inc(pq);
 				goto drop_page;
 			}
 		}
 
 		/*
 		 * If the page appears to be clean at the machine-independent
 		 * layer, then remove all of its mappings from the pmap in
 		 * anticipation of freeing it.  If, however, any of the page's
 		 * mappings allow write access, then the page may still be
 		 * modified until the last of those mappings are removed.
 		 */
 		if (object->ref_count != 0) {
 			vm_page_test_dirty(m);
 			if (m->dirty == 0)
 				pmap_remove_all(m);
 		}
 
 		/*
 		 * Clean pages can be freed, but dirty pages must be sent back
 		 * to the laundry, unless they belong to a dead object.
 		 * Requeueing dirty pages from dead objects is pointless, as
 		 * they are being paged out and freed by the thread that
 		 * destroyed the object.
 		 */
 		if (m->dirty == 0) {
 free_page:
 			vm_page_free(m);
 			VM_CNT_INC(v_dfree);
 			--page_shortage;
 		} else if ((object->flags & OBJ_DEAD) == 0)
 			vm_page_launder(m);
 drop_page:
 		vm_page_unlock(m);
 		VM_OBJECT_WUNLOCK(object);
 		if (!queue_locked) {
 			vm_pagequeue_lock(pq);
 			queue_locked = TRUE;
 		}
 		next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q);
 		TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
 	}
 	vm_pagequeue_unlock(pq);
 
 	/*
 	 * Wake up the laundry thread so that it can perform any needed
 	 * laundering.  If we didn't meet our target, we're in shortfall and
 	 * need to launder more aggressively.  If PQ_LAUNDRY is empty and no
 	 * swap devices are configured, the laundry thread has no work to do, so
 	 * don't bother waking it up.
 	 *
 	 * The laundry thread uses the number of inactive queue scans elapsed
 	 * since the last laundering to determine whether to launder again, so
 	 * keep count.
 	 */
 	if (starting_page_shortage > 0) {
 		pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY];
 		vm_pagequeue_lock(pq);
 		if (vm_laundry_request == VM_LAUNDRY_IDLE &&
 		    (pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) {
 			if (page_shortage > 0) {
 				vm_laundry_request = VM_LAUNDRY_SHORTFALL;
 				VM_CNT_INC(v_pdshortfalls);
 			} else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)
 				vm_laundry_request = VM_LAUNDRY_BACKGROUND;
 			wakeup(&vm_laundry_request);
 		}
 		vm_inactq_scans++;
 		vm_pagequeue_unlock(pq);
 	}
 
 	/*
 	 * Wakeup the swapout daemon if we didn't free the targeted number of
 	 * pages.
 	 */
 	if (page_shortage > 0)
 		vm_swapout_run();
 
 	/*
 	 * If the inactive queue scan fails repeatedly to meet its
 	 * target, kill the largest process.
 	 */
 	vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage);
 
 	/*
 	 * Compute the number of pages we want to try to move from the
 	 * active queue to either the inactive or laundry queue.
 	 *
 	 * When scanning active pages, we make clean pages count more heavily
 	 * towards the page shortage than dirty pages.  This is because dirty
 	 * pages must be laundered before they can be reused and thus have less
 	 * utility when attempting to quickly alleviate a shortage.  However,
 	 * this weighting also causes the scan to deactivate dirty pages more
 	 * more aggressively, improving the effectiveness of clustering and
 	 * ensuring that they can eventually be reused.
 	 */
 	inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count +
 	    vm_cnt.v_laundry_count / act_scan_laundry_weight) +
 	    vm_paging_target() + deficit + addl_page_shortage;
 	inactq_shortage *= act_scan_laundry_weight;
 
 	pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
 	vm_pagequeue_lock(pq);
 	maxscan = pq->pq_cnt;
 
 	/*
 	 * If we're just idle polling attempt to visit every
 	 * active page within 'update_period' seconds.
 	 */
 	scan_tick = ticks;
 	if (vm_pageout_update_period != 0) {
 		min_scan = pq->pq_cnt;
 		min_scan *= scan_tick - vmd->vmd_last_active_scan;
 		min_scan /= hz * vm_pageout_update_period;
 	} else
 		min_scan = 0;
 	if (min_scan > 0 || (inactq_shortage > 0 && maxscan > 0))
 		vmd->vmd_last_active_scan = scan_tick;
 
 	/*
 	 * Scan the active queue for pages that can be deactivated.  Update
 	 * the per-page activity counter and use it to identify deactivation
 	 * candidates.  Held pages may be deactivated.
 	 */
 	for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
 	    min_scan || (inactq_shortage > 0 && scanned < maxscan)); m = next,
 	    scanned++) {
 		KASSERT(m->queue == PQ_ACTIVE,
 		    ("vm_pageout_scan: page %p isn't active", m));
 		next = TAILQ_NEXT(m, plinks.q);
 		if ((m->flags & PG_MARKER) != 0)
 			continue;
 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
 		    ("Fictitious page %p cannot be in active queue", m));
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("Unmanaged page %p cannot be in active queue", m));
 		if (!vm_pageout_page_lock(m, &next)) {
 			vm_page_unlock(m);
 			continue;
 		}
 
 		/*
 		 * The count for page daemon pages is updated after checking
 		 * the page for eligibility.
 		 */
 		VM_CNT_INC(v_pdpages);
 
 		/*
 		 * Check to see "how much" the page has been used.
 		 */
 		if ((m->aflags & PGA_REFERENCED) != 0) {
 			vm_page_aflag_clear(m, PGA_REFERENCED);
 			act_delta = 1;
 		} else
 			act_delta = 0;
 
 		/*
 		 * Perform an unsynchronized object ref count check.  While
 		 * the page lock ensures that the page is not reallocated to
 		 * another object, in particular, one with unmanaged mappings
 		 * that cannot support pmap_ts_referenced(), two races are,
 		 * nonetheless, possible:
 		 * 1) The count was transitioning to zero, but we saw a non-
 		 *    zero value.  pmap_ts_referenced() will return zero
 		 *    because the page is not mapped.
 		 * 2) The count was transitioning to one, but we saw zero. 
 		 *    This race delays the detection of a new reference.  At
 		 *    worst, we will deactivate and reactivate the page.
 		 */
 		if (m->object->ref_count != 0)
 			act_delta += pmap_ts_referenced(m);
 
 		/*
 		 * Advance or decay the act_count based on recent usage.
 		 */
 		if (act_delta != 0) {
 			m->act_count += ACT_ADVANCE + act_delta;
 			if (m->act_count > ACT_MAX)
 				m->act_count = ACT_MAX;
 		} else
 			m->act_count -= min(m->act_count, ACT_DECLINE);
 
 		/*
 		 * Move this page to the tail of the active, inactive or laundry
 		 * queue depending on usage.
 		 */
 		if (m->act_count == 0) {
 			/* Dequeue to avoid later lock recursion. */
 			vm_page_dequeue_locked(m);
 
 			/*
 			 * When not short for inactive pages, let dirty pages go
 			 * through the inactive queue before moving to the
 			 * laundry queues.  This gives them some extra time to
 			 * be reactivated, potentially avoiding an expensive
 			 * pageout.  During a page shortage, the inactive queue
 			 * is necessarily small, so we may move dirty pages
 			 * directly to the laundry queue.
 			 */
 			if (inactq_shortage <= 0)
 				vm_page_deactivate(m);
 			else {
 				/*
 				 * Calling vm_page_test_dirty() here would
 				 * require acquisition of the object's write
 				 * lock.  However, during a page shortage,
 				 * directing dirty pages into the laundry
 				 * queue is only an optimization and not a
 				 * requirement.  Therefore, we simply rely on
 				 * the opportunistic updates to the page's
 				 * dirty field by the pmap.
 				 */
 				if (m->dirty == 0) {
 					vm_page_deactivate(m);
 					inactq_shortage -=
 					    act_scan_laundry_weight;
 				} else {
 					vm_page_launder(m);
 					inactq_shortage--;
 				}
 			}
 		} else
 			vm_page_requeue_locked(m);
 		vm_page_unlock(m);
 	}
 	vm_pagequeue_unlock(pq);
 	if (pass > 0)
 		vm_swapout_run_idle();
 	return (page_shortage <= 0);
 }
 
 static int vm_pageout_oom_vote;
 
 /*
  * The pagedaemon threads randlomly select one to perform the
  * OOM.  Trying to kill processes before all pagedaemons
  * failed to reach free target is premature.
  */
 static void
 vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
     int starting_page_shortage)
 {
 	int old_vote;
 
 	if (starting_page_shortage <= 0 || starting_page_shortage !=
 	    page_shortage)
 		vmd->vmd_oom_seq = 0;
 	else
 		vmd->vmd_oom_seq++;
 	if (vmd->vmd_oom_seq < vm_pageout_oom_seq) {
 		if (vmd->vmd_oom) {
 			vmd->vmd_oom = FALSE;
 			atomic_subtract_int(&vm_pageout_oom_vote, 1);
 		}
 		return;
 	}
 
 	/*
 	 * Do not follow the call sequence until OOM condition is
 	 * cleared.
 	 */
 	vmd->vmd_oom_seq = 0;
 
 	if (vmd->vmd_oom)
 		return;
 
 	vmd->vmd_oom = TRUE;
 	old_vote = atomic_fetchadd_int(&vm_pageout_oom_vote, 1);
 	if (old_vote != vm_ndomains - 1)
 		return;
 
 	/*
 	 * The current pagedaemon thread is the last in the quorum to
 	 * start OOM.  Initiate the selection and signaling of the
 	 * victim.
 	 */
 	vm_pageout_oom(VM_OOM_MEM);
 
 	/*
 	 * After one round of OOM terror, recall our vote.  On the
 	 * next pass, current pagedaemon would vote again if the low
 	 * memory condition is still there, due to vmd_oom being
 	 * false.
 	 */
 	vmd->vmd_oom = FALSE;
 	atomic_subtract_int(&vm_pageout_oom_vote, 1);
 }
 
 /*
  * The OOM killer is the page daemon's action of last resort when
  * memory allocation requests have been stalled for a prolonged period
  * of time because it cannot reclaim memory.  This function computes
  * the approximate number of physical pages that could be reclaimed if
  * the specified address space is destroyed.
  *
  * Private, anonymous memory owned by the address space is the
  * principal resource that we expect to recover after an OOM kill.
  * Since the physical pages mapped by the address space's COW entries
  * are typically shared pages, they are unlikely to be released and so
  * they are not counted.
  *
  * To get to the point where the page daemon runs the OOM killer, its
  * efforts to write-back vnode-backed pages may have stalled.  This
  * could be caused by a memory allocation deadlock in the write path
  * that might be resolved by an OOM kill.  Therefore, physical pages
  * belonging to vnode-backed objects are counted, because they might
  * be freed without being written out first if the address space holds
  * the last reference to an unlinked vnode.
  *
  * Similarly, physical pages belonging to OBJT_PHYS objects are
  * counted because the address space might hold the last reference to
  * the object.
  */
 static long
 vm_pageout_oom_pagecount(struct vmspace *vmspace)
 {
 	vm_map_t map;
 	vm_map_entry_t entry;
 	vm_object_t obj;
 	long res;
 
 	map = &vmspace->vm_map;
 	KASSERT(!map->system_map, ("system map"));
 	sx_assert(&map->lock, SA_LOCKED);
 	res = 0;
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
 			continue;
 		obj = entry->object.vm_object;
 		if (obj == NULL)
 			continue;
 		if ((entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0 &&
 		    obj->ref_count != 1)
 			continue;
 		switch (obj->type) {
 		case OBJT_DEFAULT:
 		case OBJT_SWAP:
 		case OBJT_PHYS:
 		case OBJT_VNODE:
 			res += obj->resident_page_count;
 			break;
 		}
 	}
 	return (res);
 }
 
 void
 vm_pageout_oom(int shortage)
 {
 	struct proc *p, *bigproc;
 	vm_offset_t size, bigsize;
 	struct thread *td;
 	struct vmspace *vm;
 	bool breakout;
 
 	/*
 	 * We keep the process bigproc locked once we find it to keep anyone
 	 * from messing with it; however, there is a possibility of
 	 * deadlock if process B is bigproc and one of its child processes
 	 * attempts to propagate a signal to B while we are waiting for A's
 	 * lock while walking this list.  To avoid this, we don't block on
 	 * the process lock but just skip a process if it is already locked.
 	 */
 	bigproc = NULL;
 	bigsize = 0;
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 
 		/*
 		 * If this is a system, protected or killed process, skip it.
 		 */
 		if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC |
 		    P_PROTECTED | P_SYSTEM | P_WEXIT)) != 0 ||
 		    p->p_pid == 1 || P_KILLED(p) ||
 		    (p->p_pid < 48 && swap_pager_avail != 0)) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		/*
 		 * If the process is in a non-running type state,
 		 * don't touch it.  Check all the threads individually.
 		 */
 		breakout = false;
 		FOREACH_THREAD_IN_PROC(p, td) {
 			thread_lock(td);
 			if (!TD_ON_RUNQ(td) &&
 			    !TD_IS_RUNNING(td) &&
 			    !TD_IS_SLEEPING(td) &&
 			    !TD_IS_SUSPENDED(td) &&
 			    !TD_IS_SWAPPED(td)) {
 				thread_unlock(td);
 				breakout = true;
 				break;
 			}
 			thread_unlock(td);
 		}
 		if (breakout) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		/*
 		 * get the process size
 		 */
 		vm = vmspace_acquire_ref(p);
 		if (vm == NULL) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		_PHOLD_LITE(p);
 		PROC_UNLOCK(p);
 		sx_sunlock(&allproc_lock);
 		if (!vm_map_trylock_read(&vm->vm_map)) {
 			vmspace_free(vm);
 			sx_slock(&allproc_lock);
 			PRELE(p);
 			continue;
 		}
 		size = vmspace_swap_count(vm);
 		if (shortage == VM_OOM_MEM)
 			size += vm_pageout_oom_pagecount(vm);
 		vm_map_unlock_read(&vm->vm_map);
 		vmspace_free(vm);
 		sx_slock(&allproc_lock);
 
 		/*
 		 * If this process is bigger than the biggest one,
 		 * remember it.
 		 */
 		if (size > bigsize) {
 			if (bigproc != NULL)
 				PRELE(bigproc);
 			bigproc = p;
 			bigsize = size;
 		} else {
 			PRELE(p);
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	if (bigproc != NULL) {
 		if (vm_panic_on_oom != 0)
 			panic("out of swap space");
 		PROC_LOCK(bigproc);
 		killproc(bigproc, "out of swap space");
 		sched_nice(bigproc, PRIO_MIN);
 		_PRELE(bigproc);
 		PROC_UNLOCK(bigproc);
 		wakeup(&vm_cnt.v_free_count);
 	}
 }
 
 static void
 vm_pageout_worker(void *arg)
 {
 	struct vm_domain *domain;
 	int domidx, pass;
 	bool target_met;
 
 	domidx = (uintptr_t)arg;
 	domain = &vm_dom[domidx];
 	pass = 0;
 	target_met = true;
 
 	/*
 	 * XXXKIB It could be useful to bind pageout daemon threads to
 	 * the cores belonging to the domain, from which vm_page_array
 	 * is allocated.
 	 */
 
 	KASSERT(domain->vmd_segs != 0, ("domain without segments"));
 	domain->vmd_last_active_scan = ticks;
 	vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE);
 	vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE);
 	TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl,
 	    &domain->vmd_inacthead, plinks.q);
 
 	/*
 	 * The pageout daemon worker is never done, so loop forever.
 	 */
 	while (TRUE) {
 		mtx_lock(&vm_page_queue_free_mtx);
 
 		/*
 		 * Generally, after a level >= 1 scan, if there are enough
 		 * free pages to wakeup the waiters, then they are already
 		 * awake.  A call to vm_page_free() during the scan awakened
 		 * them.  However, in the following case, this wakeup serves
 		 * to bound the amount of time that a thread might wait.
 		 * Suppose a thread's call to vm_page_alloc() fails, but
 		 * before that thread calls VM_WAIT, enough pages are freed by
 		 * other threads to alleviate the free page shortage.  The
 		 * thread will, nonetheless, wait until another page is freed
 		 * or this wakeup is performed.
 		 */
 		if (vm_pages_needed && !vm_page_count_min()) {
 			vm_pages_needed = false;
 			wakeup(&vm_cnt.v_free_count);
 		}
 
 		/*
 		 * Do not clear vm_pageout_wanted until we reach our free page
 		 * target.  Otherwise, we may be awakened over and over again,
 		 * wasting CPU time.
 		 */
 		if (vm_pageout_wanted && target_met)
 			vm_pageout_wanted = false;
 
 		/*
 		 * Might the page daemon receive a wakeup call?
 		 */
 		if (vm_pageout_wanted) {
 			/*
 			 * No.  Either vm_pageout_wanted was set by another
 			 * thread during the previous scan, which must have
 			 * been a level 0 scan, or vm_pageout_wanted was
 			 * already set and the scan failed to free enough
 			 * pages.  If we haven't yet performed a level >= 1
 			 * (page reclamation) scan, then increase the level
 			 * and scan again now.  Otherwise, sleep a bit and
 			 * try again later.
 			 */
 			mtx_unlock(&vm_page_queue_free_mtx);
 			if (pass >= 1)
 				pause("pwait", hz / VM_INACT_SCAN_RATE);
 			pass++;
 		} else {
 			/*
 			 * Yes.  If threads are still sleeping in VM_WAIT
 			 * then we immediately start a new scan.  Otherwise,
 			 * sleep until the next wakeup or until pages need to
 			 * have their reference stats updated.
 			 */
 			if (vm_pages_needed) {
 				mtx_unlock(&vm_page_queue_free_mtx);
 				if (pass == 0)
 					pass++;
 			} else if (mtx_sleep(&vm_pageout_wanted,
 			    &vm_page_queue_free_mtx, PDROP | PVM, "psleep",
 			    hz) == 0) {
 				VM_CNT_INC(v_pdwakeups);
 				pass = 1;
 			} else
 				pass = 0;
 		}
 
 		target_met = vm_pageout_scan(domain, pass);
 	}
 }
 
 /*
  *	vm_pageout_init initialises basic pageout daemon settings.
  */
 static void
 vm_pageout_init(void)
 {
 	/*
 	 * Initialize some paging parameters.
 	 */
 	vm_cnt.v_interrupt_free_min = 2;
 	if (vm_cnt.v_page_count < 2000)
 		vm_pageout_page_count = 8;
 
 	/*
 	 * v_free_reserved needs to include enough for the largest
 	 * swap pager structures plus enough for any pv_entry structs
 	 * when paging. 
 	 */
 	if (vm_cnt.v_page_count > 1024)
 		vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200;
 	else
 		vm_cnt.v_free_min = 4;
 	vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
 	    vm_cnt.v_interrupt_free_min;
 	vm_cnt.v_free_reserved = vm_pageout_page_count +
 	    vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768);
 	vm_cnt.v_free_severe = vm_cnt.v_free_min / 2;
 	vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved;
 	vm_cnt.v_free_min += vm_cnt.v_free_reserved;
 	vm_cnt.v_free_severe += vm_cnt.v_free_reserved;
 	vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2;
 	if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3)
 		vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3;
 
 	/*
 	 * Set the default wakeup threshold to be 10% above the minimum
 	 * page limit.  This keeps the steady state out of shortfall.
 	 */
 	vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11;
 
 	/*
 	 * Set interval in seconds for active scan.  We want to visit each
 	 * page at least once every ten minutes.  This is to prevent worst
 	 * case paging behaviors with stale active LRU.
 	 */
 	if (vm_pageout_update_period == 0)
 		vm_pageout_update_period = 600;
 
 	/* XXX does not really belong here */
 	if (vm_page_max_wired == 0)
 		vm_page_max_wired = vm_cnt.v_free_count / 3;
 
 	/*
 	 * Target amount of memory to move out of the laundry queue during a
 	 * background laundering.  This is proportional to the amount of system
 	 * memory.
 	 */
 	vm_background_launder_target = (vm_cnt.v_free_target -
 	    vm_cnt.v_free_min) / 10;
 }
 
 /*
  *     vm_pageout is the high level pageout daemon.
  */
 static void
 vm_pageout(void)
 {
 	int error;
-#ifdef VM_NUMA_ALLOC
 	int i;
-#endif
 
 	swap_pager_swap_init();
 	error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
 	    0, 0, "laundry: dom0");
 	if (error != 0)
 		panic("starting laundry for domain 0, error %d", error);
-#ifdef VM_NUMA_ALLOC
 	for (i = 1; i < vm_ndomains; i++) {
 		error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
 		    curproc, NULL, 0, 0, "dom%d", i);
 		if (error != 0) {
 			panic("starting pageout for domain %d, error %d\n",
 			    i, error);
 		}
 	}
-#endif
 	error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
 	    0, 0, "uma");
 	if (error != 0)
 		panic("starting uma_reclaim helper, error %d\n", error);
 	vm_pageout_worker((void *)(uintptr_t)0);
 }
 
 /*
  * Perform an advisory wakeup of the page daemon.
  */
 void
 pagedaemon_wakeup(void)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_NOTOWNED);
 
 	if (!vm_pageout_wanted && curthread->td_proc != pageproc) {
 		vm_pageout_wanted = true;
 		wakeup(&vm_pageout_wanted);
 	}
 }
 
 /*
  * Wake up the page daemon and wait for it to reclaim free pages.
  *
  * This function returns with the free queues mutex unlocked.
  */
 void
 pagedaemon_wait(int pri, const char *wmesg)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 
 	/*
 	 * vm_pageout_wanted may have been set by an advisory wakeup, but if the
 	 * page daemon is running on a CPU, the wakeup will have been lost.
 	 * Thus, deliver a potentially spurious wakeup to ensure that the page
 	 * daemon has been notified of the shortage.
 	 */
 	if (!vm_pageout_wanted || !vm_pages_needed) {
 		vm_pageout_wanted = true;
 		wakeup(&vm_pageout_wanted);
 	}
 	vm_pages_needed = true;
 	msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | pri,
 	    wmesg, 0);
 }
Index: head/sys/vm/vm_phys.c
===================================================================
--- head/sys/vm/vm_phys.c	(revision 327953)
+++ head/sys/vm/vm_phys.c	(revision 327954)
@@ -1,1276 +1,1276 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2006 Rice University
  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Alan L. Cox,
  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  *	Physical memory system implementation
  *
  * Any external functions defined by this module are only to be used by the
  * virtual memory system.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/tree.h>
 #include <sys/vmmeter.h>
 #include <sys/seq.h>
 
 #include <ddb/ddb.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 
 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
     "Too many physsegs.");
 
-#ifdef VM_NUMA_ALLOC
+#ifdef NUMA
 struct mem_affinity *mem_affinity;
 int *mem_locality;
 #endif
 
 int vm_ndomains = 1;
 
 struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
 int vm_phys_nsegs;
 
 struct vm_phys_fictitious_seg;
 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
     struct vm_phys_fictitious_seg *);
 
 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree =
     RB_INITIALIZER(_vm_phys_fictitious_tree);
 
 struct vm_phys_fictitious_seg {
 	RB_ENTRY(vm_phys_fictitious_seg) node;
 	/* Memory region data */
 	vm_paddr_t	start;
 	vm_paddr_t	end;
 	vm_page_t	first_page;
 };
 
 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node,
     vm_phys_fictitious_cmp);
 
 static struct rwlock vm_phys_fictitious_reg_lock;
 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
 
 static struct vm_freelist
     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
 
 static int vm_nfreelists;
 
 /*
  * Provides the mapping from VM_FREELIST_* to free list indices (flind).
  */
 static int vm_freelist_to_flind[VM_NFREELIST];
 
 CTASSERT(VM_FREELIST_DEFAULT == 0);
 
 #ifdef VM_FREELIST_ISADMA
 #define	VM_ISADMA_BOUNDARY	16777216
 #endif
 #ifdef VM_FREELIST_DMA32
 #define	VM_DMA32_BOUNDARY	((vm_paddr_t)1 << 32)
 #endif
 
 /*
  * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about
  * the ordering of the free list boundaries.
  */
 #if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY)
 CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY);
 #endif
 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY)
 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY);
 #endif
 
 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
 
 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
 
-#ifdef VM_NUMA_ALLOC
+#ifdef NUMA
 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS);
 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info");
 #endif
 
 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
     &vm_ndomains, 0, "Number of physical memory domains available.");
 
 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary);
 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain);
 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end);
 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
     int order);
 
 /*
  * Red-black tree helpers for vm fictitious range management.
  */
 static inline int
 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p,
     struct vm_phys_fictitious_seg *range)
 {
 
 	KASSERT(range->start != 0 && range->end != 0,
 	    ("Invalid range passed on search for vm_fictitious page"));
 	if (p->start >= range->end)
 		return (1);
 	if (p->start < range->start)
 		return (-1);
 
 	return (0);
 }
 
 static int
 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1,
     struct vm_phys_fictitious_seg *p2)
 {
 
 	/* Check if this is a search for a page */
 	if (p1->end == 0)
 		return (vm_phys_fictitious_in_range(p1, p2));
 
 	KASSERT(p2->end != 0,
     ("Invalid range passed as second parameter to vm fictitious comparison"));
 
 	/* Searching to add a new range */
 	if (p1->end <= p2->start)
 		return (-1);
 	if (p1->start >= p2->end)
 		return (1);
 
 	panic("Trying to add overlapping vm fictitious ranges:\n"
 	    "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start,
 	    (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
 }
 
 int
 vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high)
 {
-#ifdef VM_NUMA_ALLOC
+#ifdef NUMA
 	domainset_t mask;
 	int i;
 
 	if (vm_ndomains == 1 || mem_affinity == NULL)
 		return (0);
 
 	DOMAINSET_ZERO(&mask);
 	/*
 	 * Check for any memory that overlaps low, high.
 	 */
 	for (i = 0; mem_affinity[i].end != 0; i++)
 		if (mem_affinity[i].start <= high &&
 		    mem_affinity[i].end >= low)
 			DOMAINSET_SET(mem_affinity[i].domain, &mask);
 	if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask))
 		return (prefer);
 	if (DOMAINSET_EMPTY(&mask))
 		panic("vm_phys_domain_match:  Impossible constraint");
 	return (DOMAINSET_FFS(&mask) - 1);
 #else
 	return (0);
 #endif
 }
 
 /*
  * Outputs the state of the physical memory allocator, specifically,
  * the amount of physical memory in each free list.
  */
 static int
 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sbuf;
 	struct vm_freelist *fl;
 	int dom, error, flind, oind, pind;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
 	for (dom = 0; dom < vm_ndomains; dom++) {
 		sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
 		for (flind = 0; flind < vm_nfreelists; flind++) {
 			sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
 			    "\n  ORDER (SIZE)  |  NUMBER"
 			    "\n              ", flind);
 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
 				sbuf_printf(&sbuf, "  |  POOL %d", pind);
 			sbuf_printf(&sbuf, "\n--            ");
 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
 				sbuf_printf(&sbuf, "-- --      ");
 			sbuf_printf(&sbuf, "--\n");
 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
 				sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
 				    1 << (PAGE_SHIFT - 10 + oind));
 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 				fl = vm_phys_free_queues[dom][flind][pind];
 					sbuf_printf(&sbuf, "  |  %6d",
 					    fl[oind].lcnt);
 				}
 				sbuf_printf(&sbuf, "\n");
 			}
 		}
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 /*
  * Outputs the set of physical memory segments.
  */
 static int
 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sbuf;
 	struct vm_phys_seg *seg;
 	int error, segind;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
 		seg = &vm_phys_segs[segind];
 		sbuf_printf(&sbuf, "start:     %#jx\n",
 		    (uintmax_t)seg->start);
 		sbuf_printf(&sbuf, "end:       %#jx\n",
 		    (uintmax_t)seg->end);
 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 /*
  * Return affinity, or -1 if there's no affinity information.
  */
 int
 vm_phys_mem_affinity(int f, int t)
 {
 
-#ifdef VM_NUMA_ALLOC
+#ifdef NUMA
 	if (mem_locality == NULL)
 		return (-1);
 	if (f >= vm_ndomains || t >= vm_ndomains)
 		return (-1);
 	return (mem_locality[f * vm_ndomains + t]);
 #else
 	return (-1);
 #endif
 }
 
-#ifdef VM_NUMA_ALLOC
+#ifdef NUMA
 /*
  * Outputs the VM locality table.
  */
 static int
 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sbuf;
 	int error, i, j;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 
 	sbuf_printf(&sbuf, "\n");
 
 	for (i = 0; i < vm_ndomains; i++) {
 		sbuf_printf(&sbuf, "%d: ", i);
 		for (j = 0; j < vm_ndomains; j++) {
 			sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j));
 		}
 		sbuf_printf(&sbuf, "\n");
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 #endif
 
 static void
 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
 {
 
 	m->order = order;
 	if (tail)
 		TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
 	else
 		TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
 	fl[order].lcnt++;
 }
 
 static void
 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
 {
 
 	TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
 	fl[order].lcnt--;
 	m->order = VM_NFREEORDER;
 }
 
 /*
  * Create a physical memory segment.
  */
 static void
 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain)
 {
 	struct vm_phys_seg *seg;
 
 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
 	KASSERT(domain >= 0 && domain < vm_ndomains,
 	    ("vm_phys_create_seg: invalid domain provided"));
 	seg = &vm_phys_segs[vm_phys_nsegs++];
 	while (seg > vm_phys_segs && (seg - 1)->start >= end) {
 		*seg = *(seg - 1);
 		seg--;
 	}
 	seg->start = start;
 	seg->end = end;
 	seg->domain = domain;
 }
 
 static void
 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end)
 {
-#ifdef VM_NUMA_ALLOC
+#ifdef NUMA
 	int i;
 
 	if (mem_affinity == NULL) {
 		_vm_phys_create_seg(start, end, 0);
 		return;
 	}
 
 	for (i = 0;; i++) {
 		if (mem_affinity[i].end == 0)
 			panic("Reached end of affinity info");
 		if (mem_affinity[i].end <= start)
 			continue;
 		if (mem_affinity[i].start > start)
 			panic("No affinity info for start %jx",
 			    (uintmax_t)start);
 		if (mem_affinity[i].end >= end) {
 			_vm_phys_create_seg(start, end,
 			    mem_affinity[i].domain);
 			break;
 		}
 		_vm_phys_create_seg(start, mem_affinity[i].end,
 		    mem_affinity[i].domain);
 		start = mem_affinity[i].end;
 	}
 #else
 	_vm_phys_create_seg(start, end, 0);
 #endif
 }
 
 /*
  * Add a physical memory segment.
  */
 void
 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end)
 {
 	vm_paddr_t paddr;
 
 	KASSERT((start & PAGE_MASK) == 0,
 	    ("vm_phys_define_seg: start is not page aligned"));
 	KASSERT((end & PAGE_MASK) == 0,
 	    ("vm_phys_define_seg: end is not page aligned"));
 
 	/*
 	 * Split the physical memory segment if it spans two or more free
 	 * list boundaries.
 	 */
 	paddr = start;
 #ifdef	VM_FREELIST_ISADMA
 	if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) {
 		vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY);
 		paddr = VM_ISADMA_BOUNDARY;
 	}
 #endif
 #ifdef	VM_FREELIST_LOWMEM
 	if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) {
 		vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY);
 		paddr = VM_LOWMEM_BOUNDARY;
 	}
 #endif
 #ifdef	VM_FREELIST_DMA32
 	if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) {
 		vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY);
 		paddr = VM_DMA32_BOUNDARY;
 	}
 #endif
 	vm_phys_create_seg(paddr, end);
 }
 
 /*
  * Initialize the physical memory allocator.
  *
  * Requires that vm_page_array is initialized!
  */
 void
 vm_phys_init(void)
 {
 	struct vm_freelist *fl;
 	struct vm_phys_seg *seg;
 	u_long npages;
 	int dom, flind, freelist, oind, pind, segind;
 
 	/*
 	 * Compute the number of free lists, and generate the mapping from the
 	 * manifest constants VM_FREELIST_* to the free list indices.
 	 *
 	 * Initially, the entries of vm_freelist_to_flind[] are set to either
 	 * 0 or 1 to indicate which free lists should be created.
 	 */
 	npages = 0;
 	for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
 		seg = &vm_phys_segs[segind];
 #ifdef	VM_FREELIST_ISADMA
 		if (seg->end <= VM_ISADMA_BOUNDARY)
 			vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1;
 		else
 #endif
 #ifdef	VM_FREELIST_LOWMEM
 		if (seg->end <= VM_LOWMEM_BOUNDARY)
 			vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1;
 		else
 #endif
 #ifdef	VM_FREELIST_DMA32
 		if (
 #ifdef	VM_DMA32_NPAGES_THRESHOLD
 		    /*
 		     * Create the DMA32 free list only if the amount of
 		     * physical memory above physical address 4G exceeds the
 		     * given threshold.
 		     */
 		    npages > VM_DMA32_NPAGES_THRESHOLD &&
 #endif
 		    seg->end <= VM_DMA32_BOUNDARY)
 			vm_freelist_to_flind[VM_FREELIST_DMA32] = 1;
 		else
 #endif
 		{
 			npages += atop(seg->end - seg->start);
 			vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1;
 		}
 	}
 	/* Change each entry into a running total of the free lists. */
 	for (freelist = 1; freelist < VM_NFREELIST; freelist++) {
 		vm_freelist_to_flind[freelist] +=
 		    vm_freelist_to_flind[freelist - 1];
 	}
 	vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1];
 	KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists"));
 	/* Change each entry into a free list index. */
 	for (freelist = 0; freelist < VM_NFREELIST; freelist++)
 		vm_freelist_to_flind[freelist]--;
 
 	/*
 	 * Initialize the first_page and free_queues fields of each physical
 	 * memory segment.
 	 */
 #ifdef VM_PHYSSEG_SPARSE
 	npages = 0;
 #endif
 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
 		seg = &vm_phys_segs[segind];
 #ifdef VM_PHYSSEG_SPARSE
 		seg->first_page = &vm_page_array[npages];
 		npages += atop(seg->end - seg->start);
 #else
 		seg->first_page = PHYS_TO_VM_PAGE(seg->start);
 #endif
 #ifdef	VM_FREELIST_ISADMA
 		if (seg->end <= VM_ISADMA_BOUNDARY) {
 			flind = vm_freelist_to_flind[VM_FREELIST_ISADMA];
 			KASSERT(flind >= 0,
 			    ("vm_phys_init: ISADMA flind < 0"));
 		} else
 #endif
 #ifdef	VM_FREELIST_LOWMEM
 		if (seg->end <= VM_LOWMEM_BOUNDARY) {
 			flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM];
 			KASSERT(flind >= 0,
 			    ("vm_phys_init: LOWMEM flind < 0"));
 		} else
 #endif
 #ifdef	VM_FREELIST_DMA32
 		if (seg->end <= VM_DMA32_BOUNDARY) {
 			flind = vm_freelist_to_flind[VM_FREELIST_DMA32];
 			KASSERT(flind >= 0,
 			    ("vm_phys_init: DMA32 flind < 0"));
 		} else
 #endif
 		{
 			flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT];
 			KASSERT(flind >= 0,
 			    ("vm_phys_init: DEFAULT flind < 0"));
 		}
 		seg->free_queues = &vm_phys_free_queues[seg->domain][flind];
 	}
 
 	/*
 	 * Initialize the free queues.
 	 */
 	for (dom = 0; dom < vm_ndomains; dom++) {
 		for (flind = 0; flind < vm_nfreelists; flind++) {
 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 				fl = vm_phys_free_queues[dom][flind][pind];
 				for (oind = 0; oind < VM_NFREEORDER; oind++)
 					TAILQ_INIT(&fl[oind].pl);
 			}
 		}
 	}
 
 	rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
 }
 
 /*
  * Split a contiguous, power of two-sized set of physical pages.
  */
 static __inline void
 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
 {
 	vm_page_t m_buddy;
 
 	while (oind > order) {
 		oind--;
 		m_buddy = &m[1 << oind];
 		KASSERT(m_buddy->order == VM_NFREEORDER,
 		    ("vm_phys_split_pages: page %p has unexpected order %d",
 		    m_buddy, m_buddy->order));
 		vm_freelist_add(fl, m_buddy, oind, 0);
         }
 }
 
 /*
  * Allocate a contiguous, power of two-sized set of physical pages
  * from the free lists.
  *
  * The free page queues must be locked.
  */
 vm_page_t
 vm_phys_alloc_pages(int domain, int pool, int order)
 {
 	vm_page_t m;
 	int freelist;
 
 	for (freelist = 0; freelist < VM_NFREELIST; freelist++) {
 		m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order);
 		if (m != NULL)
 			return (m);
 	}
 	return (NULL);
 }
 
 /*
  * Allocate a contiguous, power of two-sized set of physical pages from the
  * specified free list.  The free list must be specified using one of the
  * manifest constants VM_FREELIST_*.
  *
  * The free page queues must be locked.
  */
 vm_page_t
 vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order)
 {
 	struct vm_freelist *alt, *fl;
 	vm_page_t m;
 	int oind, pind, flind;
 
 	KASSERT(domain >= 0 && domain < vm_ndomains,
 	    ("vm_phys_alloc_freelist_pages: domain %d is out of range",
 	    domain));
 	KASSERT(freelist < VM_NFREELIST,
 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range",
 	    freelist));
 	KASSERT(pool < VM_NFREEPOOL,
 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
 	KASSERT(order < VM_NFREEORDER,
 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
 
 	flind = vm_freelist_to_flind[freelist];
 	/* Check if freelist is present */
 	if (flind < 0)
 		return (NULL);
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	fl = &vm_phys_free_queues[domain][flind][pool][0];
 	for (oind = order; oind < VM_NFREEORDER; oind++) {
 		m = TAILQ_FIRST(&fl[oind].pl);
 		if (m != NULL) {
 			vm_freelist_rem(fl, m, oind);
 			vm_phys_split_pages(m, oind, fl, order);
 			return (m);
 		}
 	}
 
 	/*
 	 * The given pool was empty.  Find the largest
 	 * contiguous, power-of-two-sized set of pages in any
 	 * pool.  Transfer these pages to the given pool, and
 	 * use them to satisfy the allocation.
 	 */
 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 			alt = &vm_phys_free_queues[domain][flind][pind][0];
 			m = TAILQ_FIRST(&alt[oind].pl);
 			if (m != NULL) {
 				vm_freelist_rem(alt, m, oind);
 				vm_phys_set_pool(pool, m, oind);
 				vm_phys_split_pages(m, oind, fl, order);
 				return (m);
 			}
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Find the vm_page corresponding to the given physical address.
  */
 vm_page_t
 vm_phys_paddr_to_vm_page(vm_paddr_t pa)
 {
 	struct vm_phys_seg *seg;
 	int segind;
 
 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
 		seg = &vm_phys_segs[segind];
 		if (pa >= seg->start && pa < seg->end)
 			return (&seg->first_page[atop(pa - seg->start)]);
 	}
 	return (NULL);
 }
 
 vm_page_t
 vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
 {
 	struct vm_phys_fictitious_seg tmp, *seg;
 	vm_page_t m;
 
 	m = NULL;
 	tmp.start = pa;
 	tmp.end = 0;
 
 	rw_rlock(&vm_phys_fictitious_reg_lock);
 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
 	rw_runlock(&vm_phys_fictitious_reg_lock);
 	if (seg == NULL)
 		return (NULL);
 
 	m = &seg->first_page[atop(pa - seg->start)];
 	KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m));
 
 	return (m);
 }
 
 static inline void
 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start,
     long page_count, vm_memattr_t memattr)
 {
 	long i;
 
 	bzero(range, page_count * sizeof(*range));
 	for (i = 0; i < page_count; i++) {
 		vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr);
 		range[i].oflags &= ~VPO_UNMANAGED;
 		range[i].busy_lock = VPB_UNBUSIED;
 	}
 }
 
 int
 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
     vm_memattr_t memattr)
 {
 	struct vm_phys_fictitious_seg *seg;
 	vm_page_t fp;
 	long page_count;
 #ifdef VM_PHYSSEG_DENSE
 	long pi, pe;
 	long dpage_count;
 #endif
 
 	KASSERT(start < end,
 	    ("Start of segment isn't less than end (start: %jx end: %jx)",
 	    (uintmax_t)start, (uintmax_t)end));
 
 	page_count = (end - start) / PAGE_SIZE;
 
 #ifdef VM_PHYSSEG_DENSE
 	pi = atop(start);
 	pe = atop(end);
 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
 		fp = &vm_page_array[pi - first_page];
 		if ((pe - first_page) > vm_page_array_size) {
 			/*
 			 * We have a segment that starts inside
 			 * of vm_page_array, but ends outside of it.
 			 *
 			 * Use vm_page_array pages for those that are
 			 * inside of the vm_page_array range, and
 			 * allocate the remaining ones.
 			 */
 			dpage_count = vm_page_array_size - (pi - first_page);
 			vm_phys_fictitious_init_range(fp, start, dpage_count,
 			    memattr);
 			page_count -= dpage_count;
 			start += ptoa(dpage_count);
 			goto alloc;
 		}
 		/*
 		 * We can allocate the full range from vm_page_array,
 		 * so there's no need to register the range in the tree.
 		 */
 		vm_phys_fictitious_init_range(fp, start, page_count, memattr);
 		return (0);
 	} else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
 		/*
 		 * We have a segment that ends inside of vm_page_array,
 		 * but starts outside of it.
 		 */
 		fp = &vm_page_array[0];
 		dpage_count = pe - first_page;
 		vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count,
 		    memattr);
 		end -= ptoa(dpage_count);
 		page_count -= dpage_count;
 		goto alloc;
 	} else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
 		/*
 		 * Trying to register a fictitious range that expands before
 		 * and after vm_page_array.
 		 */
 		return (EINVAL);
 	} else {
 alloc:
 #endif
 		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
 		    M_WAITOK);
 #ifdef VM_PHYSSEG_DENSE
 	}
 #endif
 	vm_phys_fictitious_init_range(fp, start, page_count, memattr);
 
 	seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO);
 	seg->start = start;
 	seg->end = end;
 	seg->first_page = fp;
 
 	rw_wlock(&vm_phys_fictitious_reg_lock);
 	RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg);
 	rw_wunlock(&vm_phys_fictitious_reg_lock);
 
 	return (0);
 }
 
 void
 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
 {
 	struct vm_phys_fictitious_seg *seg, tmp;
 #ifdef VM_PHYSSEG_DENSE
 	long pi, pe;
 #endif
 
 	KASSERT(start < end,
 	    ("Start of segment isn't less than end (start: %jx end: %jx)",
 	    (uintmax_t)start, (uintmax_t)end));
 
 #ifdef VM_PHYSSEG_DENSE
 	pi = atop(start);
 	pe = atop(end);
 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
 		if ((pe - first_page) <= vm_page_array_size) {
 			/*
 			 * This segment was allocated using vm_page_array
 			 * only, there's nothing to do since those pages
 			 * were never added to the tree.
 			 */
 			return;
 		}
 		/*
 		 * We have a segment that starts inside
 		 * of vm_page_array, but ends outside of it.
 		 *
 		 * Calculate how many pages were added to the
 		 * tree and free them.
 		 */
 		start = ptoa(first_page + vm_page_array_size);
 	} else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
 		/*
 		 * We have a segment that ends inside of vm_page_array,
 		 * but starts outside of it.
 		 */
 		end = ptoa(first_page);
 	} else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
 		/* Since it's not possible to register such a range, panic. */
 		panic(
 		    "Unregistering not registered fictitious range [%#jx:%#jx]",
 		    (uintmax_t)start, (uintmax_t)end);
 	}
 #endif
 	tmp.start = start;
 	tmp.end = 0;
 
 	rw_wlock(&vm_phys_fictitious_reg_lock);
 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
 	if (seg->start != start || seg->end != end) {
 		rw_wunlock(&vm_phys_fictitious_reg_lock);
 		panic(
 		    "Unregistering not registered fictitious range [%#jx:%#jx]",
 		    (uintmax_t)start, (uintmax_t)end);
 	}
 	RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg);
 	rw_wunlock(&vm_phys_fictitious_reg_lock);
 	free(seg->first_page, M_FICT_PAGES);
 	free(seg, M_FICT_PAGES);
 }
 
 /*
  * Free a contiguous, power of two-sized set of physical pages.
  *
  * The free page queues must be locked.
  */
 void
 vm_phys_free_pages(vm_page_t m, int order)
 {
 	struct vm_freelist *fl;
 	struct vm_phys_seg *seg;
 	vm_paddr_t pa;
 	vm_page_t m_buddy;
 
 	KASSERT(m->order == VM_NFREEORDER,
 	    ("vm_phys_free_pages: page %p has unexpected order %d",
 	    m, m->order));
 	KASSERT(m->pool < VM_NFREEPOOL,
 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
 	    m, m->pool));
 	KASSERT(order < VM_NFREEORDER,
 	    ("vm_phys_free_pages: order %d is out of range", order));
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	seg = &vm_phys_segs[m->segind];
 	if (order < VM_NFREEORDER - 1) {
 		pa = VM_PAGE_TO_PHYS(m);
 		do {
 			pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
 			if (pa < seg->start || pa >= seg->end)
 				break;
 			m_buddy = &seg->first_page[atop(pa - seg->start)];
 			if (m_buddy->order != order)
 				break;
 			fl = (*seg->free_queues)[m_buddy->pool];
 			vm_freelist_rem(fl, m_buddy, order);
 			if (m_buddy->pool != m->pool)
 				vm_phys_set_pool(m->pool, m_buddy, order);
 			order++;
 			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
 			m = &seg->first_page[atop(pa - seg->start)];
 		} while (order < VM_NFREEORDER - 1);
 	}
 	fl = (*seg->free_queues)[m->pool];
 	vm_freelist_add(fl, m, order, 1);
 }
 
 /*
  * Free a contiguous, arbitrarily sized set of physical pages.
  *
  * The free page queues must be locked.
  */
 void
 vm_phys_free_contig(vm_page_t m, u_long npages)
 {
 	u_int n;
 	int order;
 
 	/*
 	 * Avoid unnecessary coalescing by freeing the pages in the largest
 	 * possible power-of-two-sized subsets.
 	 */
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	for (;; npages -= n) {
 		/*
 		 * Unsigned "min" is used here so that "order" is assigned
 		 * "VM_NFREEORDER - 1" when "m"'s physical address is zero
 		 * or the low-order bits of its physical address are zero
 		 * because the size of a physical address exceeds the size of
 		 * a long.
 		 */
 		order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
 		    VM_NFREEORDER - 1);
 		n = 1 << order;
 		if (npages < n)
 			break;
 		vm_phys_free_pages(m, order);
 		m += n;
 	}
 	/* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
 	for (; npages > 0; npages -= n) {
 		order = flsl(npages) - 1;
 		n = 1 << order;
 		vm_phys_free_pages(m, order);
 		m += n;
 	}
 }
 
 /*
  * Scan physical memory between the specified addresses "low" and "high" for a
  * run of contiguous physical pages that satisfy the specified conditions, and
  * return the lowest page in the run.  The specified "alignment" determines
  * the alignment of the lowest physical page in the run.  If the specified
  * "boundary" is non-zero, then the run of physical pages cannot span a
  * physical address that is a multiple of "boundary".
  *
  * "npages" must be greater than zero.  Both "alignment" and "boundary" must
  * be a power of two.
  */
 vm_page_t
 vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
     u_long alignment, vm_paddr_t boundary, int options)
 {
 	vm_paddr_t pa_end;
 	vm_page_t m_end, m_run, m_start;
 	struct vm_phys_seg *seg;
 	int segind;
 
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
 	if (low >= high)
 		return (NULL);
 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
 		seg = &vm_phys_segs[segind];
 		if (seg->domain != domain)
 			continue;
 		if (seg->start >= high)
 			break;
 		if (low >= seg->end)
 			continue;
 		if (low <= seg->start)
 			m_start = seg->first_page;
 		else
 			m_start = &seg->first_page[atop(low - seg->start)];
 		if (high < seg->end)
 			pa_end = high;
 		else
 			pa_end = seg->end;
 		if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages))
 			continue;
 		m_end = &seg->first_page[atop(pa_end - seg->start)];
 		m_run = vm_page_scan_contig(npages, m_start, m_end,
 		    alignment, boundary, options);
 		if (m_run != NULL)
 			return (m_run);
 	}
 	return (NULL);
 }
 
 /*
  * Set the pool for a contiguous, power of two-sized set of physical pages. 
  */
 void
 vm_phys_set_pool(int pool, vm_page_t m, int order)
 {
 	vm_page_t m_tmp;
 
 	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
 		m_tmp->pool = pool;
 }
 
 /*
  * Search for the given physical page "m" in the free lists.  If the search
  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
  * FALSE, indicating that "m" is not in the free lists.
  *
  * The free page queues must be locked.
  */
 boolean_t
 vm_phys_unfree_page(vm_page_t m)
 {
 	struct vm_freelist *fl;
 	struct vm_phys_seg *seg;
 	vm_paddr_t pa, pa_half;
 	vm_page_t m_set, m_tmp;
 	int order;
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 
 	/*
 	 * First, find the contiguous, power of two-sized set of free
 	 * physical pages containing the given physical page "m" and
 	 * assign it to "m_set".
 	 */
 	seg = &vm_phys_segs[m->segind];
 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
 	    order < VM_NFREEORDER - 1; ) {
 		order++;
 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
 		if (pa >= seg->start)
 			m_set = &seg->first_page[atop(pa - seg->start)];
 		else
 			return (FALSE);
 	}
 	if (m_set->order < order)
 		return (FALSE);
 	if (m_set->order == VM_NFREEORDER)
 		return (FALSE);
 	KASSERT(m_set->order < VM_NFREEORDER,
 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
 	    m_set, m_set->order));
 
 	/*
 	 * Next, remove "m_set" from the free lists.  Finally, extract
 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
 	 * is larger than a page, shrink "m_set" by returning the half
 	 * of "m_set" that does not contain "m" to the free lists.
 	 */
 	fl = (*seg->free_queues)[m_set->pool];
 	order = m_set->order;
 	vm_freelist_rem(fl, m_set, order);
 	while (order > 0) {
 		order--;
 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
 		if (m->phys_addr < pa_half)
 			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
 		else {
 			m_tmp = m_set;
 			m_set = &seg->first_page[atop(pa_half - seg->start)];
 		}
 		vm_freelist_add(fl, m_tmp, order, 0);
 	}
 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
 	return (TRUE);
 }
 
 /*
  * Allocate a contiguous set of physical pages of the given size
  * "npages" from the free lists.  All of the physical pages must be at
  * or above the given physical address "low" and below the given
  * physical address "high".  The given value "alignment" determines the
  * alignment of the first physical page in the set.  If the given value
  * "boundary" is non-zero, then the set of physical pages cannot cross
  * any physical address boundary that is a multiple of that value.  Both
  * "alignment" and "boundary" must be a power of two.
  */
 vm_page_t
 vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
     u_long alignment, vm_paddr_t boundary)
 {
 	vm_paddr_t pa_end, pa_start;
 	vm_page_t m_run;
 	struct vm_phys_seg *seg;
 	int segind;
 
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	if (low >= high)
 		return (NULL);
 	m_run = NULL;
 	for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
 		seg = &vm_phys_segs[segind];
 		if (seg->start >= high || seg->domain != domain)
 			continue;
 		if (low >= seg->end)
 			break;
 		if (low <= seg->start)
 			pa_start = seg->start;
 		else
 			pa_start = low;
 		if (high < seg->end)
 			pa_end = high;
 		else
 			pa_end = seg->end;
 		if (pa_end - pa_start < ptoa(npages))
 			continue;
 		m_run = vm_phys_alloc_seg_contig(seg, npages, low, high,
 		    alignment, boundary);
 		if (m_run != NULL)
 			break;
 	}
 	return (m_run);
 }
 
 /*
  * Allocate a run of contiguous physical pages from the free list for the
  * specified segment.
  */
 static vm_page_t
 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages,
     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
 {
 	struct vm_freelist *fl;
 	vm_paddr_t pa, pa_end, size;
 	vm_page_t m, m_ret;
 	u_long npages_end;
 	int oind, order, pind;
 
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	/* Compute the queue that is the best fit for npages. */
 	for (order = 0; (1 << order) < npages; order++);
 	/* Search for a run satisfying the specified conditions. */
 	size = npages << PAGE_SHIFT;
 	for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER;
 	    oind++) {
 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 			fl = (*seg->free_queues)[pind];
 			TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
 				/*
 				 * Is the size of this allocation request
 				 * larger than the largest block size?
 				 */
 				if (order >= VM_NFREEORDER) {
 					/*
 					 * Determine if a sufficient number of
 					 * subsequent blocks to satisfy the
 					 * allocation request are free.
 					 */
 					pa = VM_PAGE_TO_PHYS(m_ret);
 					pa_end = pa + size;
 					for (;;) {
 						pa += 1 << (PAGE_SHIFT +
 						    VM_NFREEORDER - 1);
 						if (pa >= pa_end ||
 						    pa < seg->start ||
 						    pa >= seg->end)
 							break;
 						m = &seg->first_page[atop(pa -
 						    seg->start)];
 						if (m->order != VM_NFREEORDER -
 						    1)
 							break;
 					}
 					/* If not, go to the next block. */
 					if (pa < pa_end)
 						continue;
 				}
 
 				/*
 				 * Determine if the blocks are within the
 				 * given range, satisfy the given alignment,
 				 * and do not cross the given boundary.
 				 */
 				pa = VM_PAGE_TO_PHYS(m_ret);
 				pa_end = pa + size;
 				if (pa >= low && pa_end <= high &&
 				    (pa & (alignment - 1)) == 0 &&
 				    rounddown2(pa ^ (pa_end - 1), boundary) == 0)
 					goto done;
 			}
 		}
 	}
 	return (NULL);
 done:
 	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
 		fl = (*seg->free_queues)[m->pool];
 		vm_freelist_rem(fl, m, m->order);
 	}
 	if (m_ret->pool != VM_FREEPOOL_DEFAULT)
 		vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
 	fl = (*seg->free_queues)[m_ret->pool];
 	vm_phys_split_pages(m_ret, oind, fl, order);
 	/* Return excess pages to the free lists. */
 	npages_end = roundup2(npages, 1 << imin(oind, order));
 	if (npages < npages_end)
 		vm_phys_free_contig(&m_ret[npages], npages_end - npages);
 	return (m_ret);
 }
 
 #ifdef DDB
 /*
  * Show the number of physical pages in each of the free lists.
  */
 DB_SHOW_COMMAND(freepages, db_show_freepages)
 {
 	struct vm_freelist *fl;
 	int flind, oind, pind, dom;
 
 	for (dom = 0; dom < vm_ndomains; dom++) {
 		db_printf("DOMAIN: %d\n", dom);
 		for (flind = 0; flind < vm_nfreelists; flind++) {
 			db_printf("FREE LIST %d:\n"
 			    "\n  ORDER (SIZE)  |  NUMBER"
 			    "\n              ", flind);
 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
 				db_printf("  |  POOL %d", pind);
 			db_printf("\n--            ");
 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
 				db_printf("-- --      ");
 			db_printf("--\n");
 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
 				db_printf("  %2.2d (%6.6dK)", oind,
 				    1 << (PAGE_SHIFT - 10 + oind));
 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 				fl = vm_phys_free_queues[dom][flind][pind];
 					db_printf("  |  %6.6d", fl[oind].lcnt);
 				}
 				db_printf("\n");
 			}
 			db_printf("\n");
 		}
 		db_printf("\n");
 	}
 }
 #endif
Index: head/sys/vm/vm_phys.h
===================================================================
--- head/sys/vm/vm_phys.h	(revision 327953)
+++ head/sys/vm/vm_phys.h	(revision 327954)
@@ -1,136 +1,142 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2006 Rice University
  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Alan L. Cox,
  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  *	Physical memory system definitions
  */
 
 #ifndef	_VM_PHYS_H_
 #define	_VM_PHYS_H_
 
 #ifdef _KERNEL
 
 /* Domains must be dense (non-sparse) and zero-based. */
 struct mem_affinity {
 	vm_paddr_t start;
 	vm_paddr_t end;
 	int domain;
 };
+#ifdef NUMA
+extern struct mem_affinity *mem_affinity;
+extern int *mem_locality;
+#endif
+extern int vm_ndomains;
 
 struct vm_freelist {
 	struct pglist pl;
 	int lcnt;
 };
 
 struct vm_phys_seg {
 	vm_paddr_t	start;
 	vm_paddr_t	end;
 	vm_page_t	first_page;
 	int		domain;
 	struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
 };
 
-extern struct mem_affinity *mem_affinity;
-extern int *mem_locality;
-extern int vm_ndomains;
 extern struct vm_phys_seg vm_phys_segs[];
 extern int vm_phys_nsegs;
 
 /*
  * The following functions are only to be used by the virtual memory system.
  */
 void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end);
 vm_page_t vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low,
     vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
 vm_page_t vm_phys_alloc_freelist_pages(int domain, int freelist, int pool,
     int order);
 vm_page_t vm_phys_alloc_pages(int domain, int pool, int order);
 int vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high);
 int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
     vm_memattr_t memattr);
 void vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end);
 vm_page_t vm_phys_fictitious_to_vm_page(vm_paddr_t pa);
 void vm_phys_free_contig(vm_page_t m, u_long npages);
 void vm_phys_free_pages(vm_page_t m, int order);
 void vm_phys_init(void);
 vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa);
 vm_page_t vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low,
     vm_paddr_t high, u_long alignment, vm_paddr_t boundary, int options);
 void vm_phys_set_pool(int pool, vm_page_t m, int order);
 boolean_t vm_phys_unfree_page(vm_page_t m);
 int vm_phys_mem_affinity(int f, int t);
 
 /*
  *
  *	vm_phys_domidx:
  *
  *	Return the index of the domain the page belongs to.
  */
 static inline int
 vm_phys_domidx(vm_page_t m)
 {
+#ifdef NUMA
 	int domn, segind;
 
 	/* XXXKIB try to assert that the page is managed */
 	segind = m->segind;
 	KASSERT(segind < vm_phys_nsegs, ("segind %d m %p", segind, m));
 	domn = vm_phys_segs[segind].domain;
 	KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m));
 	return (domn);
+#else
+	return (0);
+#endif
 }
 
 /*
  *	vm_phys_domain:
  *
  * 	Return the memory domain the page belongs to.
  */
 static inline struct vm_domain *
 vm_phys_domain(vm_page_t m)
 {
 
 	return (&vm_dom[vm_phys_domidx(m)]);
 }
 
 static inline u_int
 vm_phys_freecnt_adj(vm_page_t m, int adj)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	vm_phys_domain(m)->vmd_free_count += adj;
 	return (vm_cnt.v_free_count += adj);
 }
 
 #endif	/* _KERNEL */
 #endif	/* !_VM_PHYS_H_ */
Index: head/sys/x86/acpica/srat.c
===================================================================
--- head/sys/x86/acpica/srat.c	(revision 327953)
+++ head/sys/x86/acpica/srat.c	(revision 327954)
@@ -1,575 +1,575 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Hudson River Trading LLC
  * Written by: John H. Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 #include <sys/vmmeter.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/aclocal.h>
 #include <contrib/dev/acpica/include/actables.h>
 
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <x86/apicvar.h>
 
 #include <dev/acpica/acpivar.h>
 
 #if MAXMEMDOM > 1
 static struct cpu_info {
 	int enabled:1;
 	int has_memory:1;
 	int domain;
 } *cpus;
 
 struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1];
 int num_mem;
 
 static ACPI_TABLE_SRAT *srat;
 static vm_paddr_t srat_physaddr;
 
 static int domain_pxm[MAXMEMDOM];
 static int ndomain;
 
 static ACPI_TABLE_SLIT *slit;
 static vm_paddr_t slit_physaddr;
 static int vm_locality_table[MAXMEMDOM * MAXMEMDOM];
 
 static void	srat_walk_table(acpi_subtable_handler *handler, void *arg);
 
 /*
  * SLIT parsing.
  */
 
 static void
 slit_parse_table(ACPI_TABLE_SLIT *s)
 {
 	int i, j;
 	int i_domain, j_domain;
 	int offset = 0;
 	uint8_t e;
 
 	/*
 	 * This maps the SLIT data into the VM-domain centric view.
 	 * There may be sparse entries in the PXM namespace, so
 	 * remap them to a VM-domain ID and if it doesn't exist,
 	 * skip it.
 	 *
 	 * It should result in a packed 2d array of VM-domain
 	 * locality information entries.
 	 */
 
 	if (bootverbose)
 		printf("SLIT.Localities: %d\n", (int) s->LocalityCount);
 	for (i = 0; i < s->LocalityCount; i++) {
 		i_domain = acpi_map_pxm_to_vm_domainid(i);
 		if (i_domain < 0)
 			continue;
 
 		if (bootverbose)
 			printf("%d: ", i);
 		for (j = 0; j < s->LocalityCount; j++) {
 			j_domain = acpi_map_pxm_to_vm_domainid(j);
 			if (j_domain < 0)
 				continue;
 			e = s->Entry[i * s->LocalityCount + j];
 			if (bootverbose)
 				printf("%d ", (int) e);
 			/* 255 == "no locality information" */
 			if (e == 255)
 				vm_locality_table[offset] = -1;
 			else
 				vm_locality_table[offset] = e;
 			offset++;
 		}
 		if (bootverbose)
 			printf("\n");
 	}
 }
 
 /*
  * Look for an ACPI System Locality Distance Information Table ("SLIT")
  */
 static int
 parse_slit(void)
 {
 
 	if (resource_disabled("slit", 0)) {
 		return (-1);
 	}
 
 	slit_physaddr = acpi_find_table(ACPI_SIG_SLIT);
 	if (slit_physaddr == 0) {
 		return (-1);
 	}
 
 	/*
 	 * Make a pass over the table to populate the cpus[] and
 	 * mem_info[] tables.
 	 */
 	slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT);
 	slit_parse_table(slit);
 	acpi_unmap_table(slit);
 	slit = NULL;
 
-#ifdef VM_NUMA_ALLOC
+#ifdef NUMA
 	/* Tell the VM about it! */
 	mem_locality = vm_locality_table;
 #endif
 	return (0);
 }
 
 /*
  * SRAT parsing.
  */
 
 /*
  * Returns true if a memory range overlaps with at least one range in
  * phys_avail[].
  */
 static int
 overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end)
 {
 	int i;
 
 	for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) {
 		if (phys_avail[i + 1] <= start)
 			continue;
 		if (phys_avail[i] < end)
 			return (1);
 		break;
 	}
 	return (0);
 	
 }
 
 static void
 srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg)
 {
 	ACPI_SRAT_CPU_AFFINITY *cpu;
 	ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic;
 	ACPI_SRAT_MEM_AFFINITY *mem;
 	int domain, i, slot;
 
 	switch (entry->Type) {
 	case ACPI_SRAT_TYPE_CPU_AFFINITY:
 		cpu = (ACPI_SRAT_CPU_AFFINITY *)entry;
 		domain = cpu->ProximityDomainLo |
 		    cpu->ProximityDomainHi[0] << 8 |
 		    cpu->ProximityDomainHi[1] << 16 |
 		    cpu->ProximityDomainHi[2] << 24;
 		if (bootverbose)
 			printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
 			    cpu->ApicId, domain,
 			    (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ?
 			    "enabled" : "disabled");
 		if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED))
 			break;
 		if (cpu->ApicId > max_apic_id) {
 			printf("SRAT: Ignoring local APIC ID %u (too high)\n",
 			    cpu->ApicId);
 			break;
 		}
 
 		if (cpus[cpu->ApicId].enabled) {
 			printf("SRAT: Duplicate local APIC ID %u\n",
 			    cpu->ApicId);
 			*(int *)arg = ENXIO;
 			break;
 		}
 		cpus[cpu->ApicId].domain = domain;
 		cpus[cpu->ApicId].enabled = 1;
 		break;
 	case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
 		x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry;
 		if (bootverbose)
 			printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
 			    x2apic->ApicId, x2apic->ProximityDomain,
 			    (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ?
 			    "enabled" : "disabled");
 		if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED))
 			break;
 		if (x2apic->ApicId > max_apic_id) {
 			printf("SRAT: Ignoring local APIC ID %u (too high)\n",
 			    x2apic->ApicId);
 			break;
 		}
 
 		KASSERT(!cpus[x2apic->ApicId].enabled,
 		    ("Duplicate local APIC ID %u", x2apic->ApicId));
 		cpus[x2apic->ApicId].domain = x2apic->ProximityDomain;
 		cpus[x2apic->ApicId].enabled = 1;
 		break;
 	case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
 		mem = (ACPI_SRAT_MEM_AFFINITY *)entry;
 		if (bootverbose)
 			printf(
 		    "SRAT: Found memory domain %d addr 0x%jx len 0x%jx: %s\n",
 			    mem->ProximityDomain, (uintmax_t)mem->BaseAddress,
 			    (uintmax_t)mem->Length,
 			    (mem->Flags & ACPI_SRAT_MEM_ENABLED) ?
 			    "enabled" : "disabled");
 		if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED))
 			break;
 		if (mem->BaseAddress >= cpu_getmaxphyaddr() || 
 		    !overlaps_phys_avail(mem->BaseAddress,
 		    mem->BaseAddress + mem->Length)) {
 			printf("SRAT: Ignoring memory at addr 0x%jx\n",
 			    (uintmax_t)mem->BaseAddress);
 			break;
 		}
 		if (num_mem == VM_PHYSSEG_MAX) {
 			printf("SRAT: Too many memory regions\n");
 			*(int *)arg = ENXIO;
 			break;
 		}
 		slot = num_mem;
 		for (i = 0; i < num_mem; i++) {
 			if (mem_info[i].end <= mem->BaseAddress)
 				continue;
 			if (mem_info[i].start <
 			    (mem->BaseAddress + mem->Length)) {
 				printf("SRAT: Overlapping memory entries\n");
 				*(int *)arg = ENXIO;
 				return;
 			}
 			slot = i;
 		}
 		for (i = num_mem; i > slot; i--)
 			mem_info[i] = mem_info[i - 1];
 		mem_info[slot].start = mem->BaseAddress;
 		mem_info[slot].end = mem->BaseAddress + mem->Length;
 		mem_info[slot].domain = mem->ProximityDomain;
 		num_mem++;
 		break;
 	}
 }
 
 /*
  * Ensure each memory domain has at least one CPU and that each CPU
  * has at least one memory domain.
  */
 static int
 check_domains(void)
 {
 	int found, i, j;
 
 	for (i = 0; i < num_mem; i++) {
 		found = 0;
 		for (j = 0; j <= max_apic_id; j++)
 			if (cpus[j].enabled &&
 			    cpus[j].domain == mem_info[i].domain) {
 				cpus[j].has_memory = 1;
 				found++;
 			}
 		if (!found) {
 			printf("SRAT: No CPU found for memory domain %d\n",
 			    mem_info[i].domain);
 			return (ENXIO);
 		}
 	}
 	for (i = 0; i <= max_apic_id; i++)
 		if (cpus[i].enabled && !cpus[i].has_memory) {
 			printf("SRAT: No memory found for CPU %d\n", i);
 			return (ENXIO);
 		}
 	return (0);
 }
 
 /*
  * Check that the SRAT memory regions cover all of the regions in
  * phys_avail[].
  */
 static int
 check_phys_avail(void)
 {
 	vm_paddr_t address;
 	int i, j;
 
 	/* j is the current offset into phys_avail[]. */
 	address = phys_avail[0];
 	j = 0;
 	for (i = 0; i < num_mem; i++) {
 		/*
 		 * Consume as many phys_avail[] entries as fit in this
 		 * region.
 		 */
 		while (address >= mem_info[i].start &&
 		    address <= mem_info[i].end) {
 			/*
 			 * If we cover the rest of this phys_avail[] entry,
 			 * advance to the next entry.
 			 */
 			if (phys_avail[j + 1] <= mem_info[i].end) {
 				j += 2;
 				if (phys_avail[j] == 0 &&
 				    phys_avail[j + 1] == 0) {
 					return (0);
 				}
 				address = phys_avail[j];
 			} else
 				address = mem_info[i].end + 1;
 		}
 	}
 	printf("SRAT: No memory region found for 0x%jx - 0x%jx\n",
 	    (uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]);
 	return (ENXIO);
 }
 
 /*
  * Renumber the memory domains to be compact and zero-based if not
  * already.  Returns an error if there are too many domains.
  */
 static int
 renumber_domains(void)
 {
 	int i, j, slot;
 
 	/* Enumerate all the domains. */
 	ndomain = 0;
 	for (i = 0; i < num_mem; i++) {
 		/* See if this domain is already known. */
 		for (j = 0; j < ndomain; j++) {
 			if (domain_pxm[j] >= mem_info[i].domain)
 				break;
 		}
 		if (j < ndomain && domain_pxm[j] == mem_info[i].domain)
 			continue;
 
 		if (ndomain >= MAXMEMDOM) {
 			ndomain = 1;
 			printf("SRAT: Too many memory domains\n");
 			return (EFBIG);
 		}
 
 		/* Insert the new domain at slot 'j'. */
 		slot = j;
 		for (j = ndomain; j > slot; j--)
 			domain_pxm[j] = domain_pxm[j - 1];
 		domain_pxm[slot] = mem_info[i].domain;
 		ndomain++;
 	}
 
 	/* Renumber each domain to its index in the sorted 'domain_pxm' list. */
 	for (i = 0; i < ndomain; i++) {
 		/*
 		 * If the domain is already the right value, no need
 		 * to renumber.
 		 */
 		if (domain_pxm[i] == i)
 			continue;
 
 		/* Walk the cpu[] and mem_info[] arrays to renumber. */
 		for (j = 0; j < num_mem; j++)
 			if (mem_info[j].domain == domain_pxm[i])
 				mem_info[j].domain = i;
 		for (j = 0; j <= max_apic_id; j++)
 			if (cpus[j].enabled && cpus[j].domain == domain_pxm[i])
 				cpus[j].domain = i;
 	}
 
 	return (0);
 }
 
 /*
  * Look for an ACPI System Resource Affinity Table ("SRAT")
  */
 static int
 parse_srat(void)
 {
 	unsigned int idx, size;
 	vm_paddr_t addr;
 	int error;
 
 	if (resource_disabled("srat", 0))
 		return (-1);
 
 	srat_physaddr = acpi_find_table(ACPI_SIG_SRAT);
 	if (srat_physaddr == 0)
 		return (-1);
 
 	/*
 	 * Allocate data structure:
 	 *
 	 * Find the last physical memory region and steal some memory from
 	 * it. This is done because at this point in the boot process
 	 * malloc is still not usable.
 	 */
 	for (idx = 0; phys_avail[idx + 1] != 0; idx += 2);
 	KASSERT(idx != 0, ("phys_avail is empty!"));
 	idx -= 2;
 
 	size =  sizeof(*cpus) * (max_apic_id + 1);
 	addr = trunc_page(phys_avail[idx + 1] - size);
 	KASSERT(addr >= phys_avail[idx],
 	    ("Not enough memory for SRAT table items"));
 	phys_avail[idx + 1] = addr - 1;
 
 	/*
 	 * We cannot rely on PHYS_TO_DMAP because this code is also used in
 	 * i386, so use pmap_mapbios to map the memory, this will end up using
 	 * the default memory attribute (WB), and the DMAP when available.
 	 */
 	cpus = (struct cpu_info *)pmap_mapbios(addr, size);
 	bzero(cpus, size);
 
 	/*
 	 * Make a pass over the table to populate the cpus[] and
 	 * mem_info[] tables.
 	 */
 	srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT);
 	error = 0;
 	srat_walk_table(srat_parse_entry, &error);
 	acpi_unmap_table(srat);
 	srat = NULL;
 	if (error || check_domains() != 0 || check_phys_avail() != 0 ||
 	    renumber_domains() != 0) {
 		srat_physaddr = 0;
 		return (-1);
 	}
 
-#ifdef VM_NUMA_ALLOC
+#ifdef NUMA
 	/* Point vm_phys at our memory affinity table. */
 	vm_ndomains = ndomain;
 	mem_affinity = mem_info;
 #endif
 
 	return (0);
 }
 
 static void
 init_mem_locality(void)
 {
 	int i;
 
 	/*
 	 * For now, assume -1 == "no locality information for
 	 * this pairing.
 	 */
 	for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++)
 		vm_locality_table[i] = -1;
 }
 
 static void
 parse_acpi_tables(void *dummy)
 {
 
 	if (parse_srat() < 0)
 		return;
 	init_mem_locality();
 	(void) parse_slit();
 }
 SYSINIT(parse_acpi_tables, SI_SUB_VM - 1, SI_ORDER_FIRST, parse_acpi_tables,
     NULL);
 
 static void
 srat_walk_table(acpi_subtable_handler *handler, void *arg)
 {
 
 	acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length,
 	    handler, arg);
 }
 
 /*
  * Setup per-CPU domain IDs.
  */
 static void
 srat_set_cpus(void *dummy)
 {
 	struct cpu_info *cpu;
 	struct pcpu *pc;
 	u_int i;
 
 	if (srat_physaddr == 0)
 		return;
 	for (i = 0; i < MAXCPU; i++) {
 		if (CPU_ABSENT(i))
 			continue;
 		pc = pcpu_find(i);
 		KASSERT(pc != NULL, ("no pcpu data for CPU %u", i));
 		cpu = &cpus[pc->pc_apic_id];
 		if (!cpu->enabled)
 			panic("SRAT: CPU with APIC ID %u is not known",
 			    pc->pc_apic_id);
 		pc->pc_domain = cpu->domain;
 		CPU_SET(i, &cpuset_domain[cpu->domain]);
 		if (bootverbose)
 			printf("SRAT: CPU %u has memory domain %d\n", i,
 			    cpu->domain);
 	}
 
 	/* Last usage of the cpus array, unmap it. */
 	pmap_unmapbios((vm_offset_t)cpus, sizeof(*cpus) * (max_apic_id + 1));
 	cpus = NULL;
 }
 SYSINIT(srat_set_cpus, SI_SUB_CPU, SI_ORDER_ANY, srat_set_cpus, NULL);
 
 /*
  * Map a _PXM value to a VM domain ID.
  *
  * Returns the domain ID, or -1 if no domain ID was found.
  */
 int
 acpi_map_pxm_to_vm_domainid(int pxm)
 {
 	int i;
 
 	for (i = 0; i < ndomain; i++) {
 		if (domain_pxm[i] == pxm)
 			return (i);
 	}
 
 	return (-1);
 }
 
 #else /* MAXMEMDOM == 1 */
 
 int
 acpi_map_pxm_to_vm_domainid(int pxm)
 {
 
 	return (-1);
 }
 
 #endif /* MAXMEMDOM > 1 */