Index: head/lib/libc/gen/sysctl.3
===================================================================
--- head/lib/libc/gen/sysctl.3	(revision 287639)
+++ head/lib/libc/gen/sysctl.3	(revision 287640)
@@ -1,870 +1,862 @@
 .\" Copyright (c) 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 4. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"	@(#)sysctl.3	8.4 (Berkeley) 5/9/95
 .\" $FreeBSD$
 .\"
-.Dd May 17, 2013
+.Dd September 10, 2015
 .Dt SYSCTL 3
 .Os
 .Sh NAME
 .Nm sysctl ,
 .Nm sysctlbyname ,
 .Nm sysctlnametomib
 .Nd get or set system information
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In sys/types.h
 .In sys/sysctl.h
 .Ft int
 .Fn sysctl "const int *name" "u_int namelen" "void *oldp" "size_t *oldlenp" "const void *newp" "size_t newlen"
 .Ft int
 .Fn sysctlbyname "const char *name" "void *oldp" "size_t *oldlenp" "const void *newp" "size_t newlen"
 .Ft int
 .Fn sysctlnametomib "const char *name" "int *mibp" "size_t *sizep"
 .Sh DESCRIPTION
 The
 .Fn sysctl
 function retrieves system information and allows processes with
 appropriate privileges to set system information.
 The information available from
 .Fn sysctl
 consists of integers, strings, and tables.
 Information may be retrieved and set from the command interface
 using the
 .Xr sysctl 8
 utility.
 .Pp
 Unless explicitly noted below,
 .Fn sysctl
 returns a consistent snapshot of the data requested.
 Consistency is obtained by locking the destination
 buffer into memory so that the data may be copied out without blocking.
 Calls to
 .Fn sysctl
 are serialized to avoid deadlock.
 .Pp
 The state is described using a ``Management Information Base'' (MIB)
 style name, listed in
 .Fa name ,
 which is a
 .Fa namelen
 length array of integers.
 .Pp
 The
 .Fn sysctlbyname
 function accepts an ASCII representation of the name and internally
 looks up the integer name vector.
 Apart from that, it behaves the same
 as the standard
 .Fn sysctl
 function.
 .Pp
 The information is copied into the buffer specified by
 .Fa oldp .
 The size of the buffer is given by the location specified by
 .Fa oldlenp
 before the call,
 and that location gives the amount of data copied after a successful call
 and after a call that returns with the error code
 .Er ENOMEM .
 If the amount of data available is greater
 than the size of the buffer supplied,
 the call supplies as much data as fits in the buffer provided
 and returns with the error code
 .Er ENOMEM .
 If the old value is not desired,
 .Fa oldp
 and
 .Fa oldlenp
 should be set to NULL.
 .Pp
 The size of the available data can be determined by calling
 .Fn sysctl
 with the
 .Dv NULL
 argument for
 .Fa oldp .
 The size of the available data will be returned in the location pointed to by
 .Fa oldlenp .
 For some operations, the amount of space may change often.
 For these operations,
 the system attempts to round up so that the returned size is
 large enough for a call to return the data shortly thereafter.
 .Pp
 To set a new value,
 .Fa newp
 is set to point to a buffer of length
 .Fa newlen
 from which the requested value is to be taken.
 If a new value is not to be set,
 .Fa newp
 should be set to NULL and
 .Fa newlen
 set to 0.
 .Pp
 The
 .Fn sysctlnametomib
 function accepts an ASCII representation of the name,
 looks up the integer name vector,
 and returns the numeric representation in the mib array pointed to by
 .Fa mibp .
 The number of elements in the mib array is given by the location specified by
 .Fa sizep
 before the call,
 and that location gives the number of entries copied after a successful call.
 The resulting
 .Fa mib
 and
 .Fa size
 may be used in subsequent
 .Fn sysctl
 calls to get the data associated with the requested ASCII name.
 This interface is intended for use by applications that want to
 repeatedly request the same variable (the
 .Fn sysctl
 function runs in about a third the time as the same request made via the
 .Fn sysctlbyname
 function).
 The
 .Fn sysctlnametomib
 function is also useful for fetching mib prefixes and then adding
 a final component.
 For example, to fetch process information
 for processes with pid's less than 100:
 .Pp
 .Bd -literal -offset indent -compact
 int i, mib[4];
 size_t len;
 struct kinfo_proc kp;
 
 /* Fill out the first three components of the mib */
 len = 4;
 sysctlnametomib("kern.proc.pid", mib, &len);
 
 /* Fetch and print entries for pid's < 100 */
 for (i = 0; i < 100; i++) {
 	mib[3] = i;
 	len = sizeof(kp);
 	if (sysctl(mib, 4, &kp, &len, NULL, 0) == -1)
 		perror("sysctl");
 	else if (len > 0)
 		printkproc(&kp);
 }
 .Ed
 .Pp
 The top level names are defined with a CTL_ prefix in
 .In sys/sysctl.h ,
 and are as follows.
 The next and subsequent levels down are found in the include files
 listed here, and described in separate sections below.
 .Bl -column CTLXMACHDEPXXX "Next level namesXXXXXX" -offset indent
 .It Sy "Name	Next level names	Description"
 .It "CTL_DEBUG	sys/sysctl.h	Debugging"
 .It "CTL_VFS	sys/mount.h	File system"
 .It "CTL_HW	sys/sysctl.h	Generic CPU, I/O"
 .It "CTL_KERN	sys/sysctl.h	High kernel limits"
 .It "CTL_MACHDEP	sys/sysctl.h	Machine dependent"
 .It "CTL_NET	sys/socket.h	Networking"
 .It "CTL_USER	sys/sysctl.h	User-level"
 .It "CTL_VM	vm/vm_param.h	Virtual memory"
 .El
 .Pp
 For example, the following retrieves the maximum number of processes allowed
 in the system:
 .Pp
 .Bd -literal -offset indent -compact
 int mib[2], maxproc;
 size_t len;
 
 mib[0] = CTL_KERN;
 mib[1] = KERN_MAXPROC;
 len = sizeof(maxproc);
 sysctl(mib, 2, &maxproc, &len, NULL, 0);
 .Ed
 .Pp
 To retrieve the standard search path for the system utilities:
 .Pp
 .Bd -literal -offset indent -compact
 int mib[2];
 size_t len;
 char *p;
 
 mib[0] = CTL_USER;
 mib[1] = USER_CS_PATH;
 sysctl(mib, 2, NULL, &len, NULL, 0);
 p = malloc(len);
 sysctl(mib, 2, p, &len, NULL, 0);
 .Ed
 .Ss CTL_DEBUG
 The debugging variables vary from system to system.
 A debugging variable may be added or deleted without need to recompile
 .Fn sysctl
 to know about it.
 Each time it runs,
 .Fn sysctl
 gets the list of debugging variables from the kernel and
 displays their current values.
 The system defines twenty
 .Pq Vt "struct ctldebug"
 variables named
 .Va debug0
 through
 .Va debug19 .
 They are declared as separate variables so that they can be
 individually initialized at the location of their associated variable.
 The loader prevents multiple use of the same variable by issuing errors
 if a variable is initialized in more than one place.
 For example, to export the variable
 .Va dospecialcheck
 as a debugging variable, the following declaration would be used:
 .Pp
 .Bd -literal -offset indent -compact
 int dospecialcheck = 1;
 struct ctldebug debug5 = { "dospecialcheck", &dospecialcheck };
 .Ed
 .Ss CTL_VFS
 A distinguished second level name, VFS_GENERIC,
 is used to get general information about all file systems.
 One of its third level identifiers is VFS_MAXTYPENUM
 that gives the highest valid file system type number.
 Its other third level identifier is VFS_CONF that
 returns configuration information about the file system
 type given as a fourth level identifier (see
 .Xr getvfsbyname 3
 as an example of its use).
 The remaining second level identifiers are the
 file system type number returned by a
 .Xr statfs 2
 call or from VFS_CONF.
 The third level identifiers available for each file system
 are given in the header file that defines the mount
 argument structure for that file system.
 .Ss CTL_HW
 The string and integer information available for the CTL_HW level
 is detailed below.
 The changeable column shows whether a process with appropriate
 privilege may change the value.
 .Bl -column "Second level nameXXXXXX" integerXXX -offset indent
 .It Sy "Second level name	Type	Changeable"
 .It "HW_MACHINE	string	no"
 .It "HW_MODEL	string	no"
 .It "HW_NCPU	integer	no"
 .It "HW_BYTEORDER	integer	no"
 .It "HW_PHYSMEM	integer	no"
 .It "HW_USERMEM	integer	no"
 .It "HW_PAGESIZE	integer	no"
 .\".It "HW_DISKNAMES	integer	no"
 .\".It "HW_DISKSTATS	integer	no"
 .It "HW_FLOATINGPT	integer	no"
 .It "HW_MACHINE_ARCH	string	no"
 .It "HW_REALMEM	integer	no"
 .El
 .Bl -tag -width 6n
 .It Li HW_MACHINE
 The machine class.
 .It Li HW_MODEL
 The machine model
 .It Li HW_NCPU
 The number of cpus.
 .It Li HW_BYTEORDER
 The byteorder (4,321, or 1,234).
 .It Li HW_PHYSMEM
 The bytes of physical memory.
 .It Li HW_USERMEM
 The bytes of non-kernel memory.
 .It Li HW_PAGESIZE
 The software page size.
 .\".It Fa HW_DISKNAMES
 .\".It Fa HW_DISKSTATS
 .It Li HW_FLOATINGPT
 Nonzero if the floating point support is in hardware.
 .It Li HW_MACHINE_ARCH
 The machine dependent architecture type.
 .It Li HW_REALMEM
 The bytes of real memory.
 .El
 .Ss CTL_KERN
 The string and integer information available for the CTL_KERN level
 is detailed below.
 The changeable column shows whether a process with appropriate
 privilege may change the value.
 The types of data currently available are process information,
 system vnodes, the open file entries, routing table entries,
 virtual memory statistics, load average history, and clock rate
 information.
 .Bl -column "KERNXMAXFILESPERPROCXXX" "struct clockrateXXX" -offset indent
 .It Sy "Second level name	Type	Changeable"
 .It "KERN_ARGMAX	integer	no"
 .It "KERN_BOOTFILE	string	yes"
 .It "KERN_BOOTTIME	struct timeval	no"
 .It "KERN_CLOCKRATE	struct clockinfo	no"
 .It "KERN_FILE	struct xfile	no"
 .It "KERN_HOSTID	integer	yes"
 .It "KERN_HOSTUUID	string	yes"
 .It "KERN_HOSTNAME	string	yes"
 .It "KERN_JOB_CONTROL	integer	no"
 .It "KERN_MAXFILES	integer	yes"
 .It "KERN_MAXFILESPERPROC	integer	yes"
 .It "KERN_MAXPROC	integer	no"
 .It "KERN_MAXPROCPERUID	integer	yes"
 .It "KERN_MAXVNODES	integer	yes"
 .It "KERN_NGROUPS	integer	no"
 .It "KERN_NISDOMAINNAME	string	yes"
 .It "KERN_OSRELDATE	integer	no"
 .It "KERN_OSRELEASE	string	no"
 .It "KERN_OSREV	integer	no"
 .It "KERN_OSTYPE	string	no"
 .It "KERN_POSIX1	integer	no"
 .It "KERN_PROC	node	not applicable"
 .It "KERN_PROF	node	not applicable"
 .It "KERN_QUANTUM	integer	yes"
 .It "KERN_SAVED_IDS	integer	no"
 .It "KERN_SECURELVL	integer	raise only"
 .It "KERN_UPDATEINTERVAL	integer	no"
 .It "KERN_VERSION	string	no"
 .It "KERN_VNODE	struct xvnode	no"
 .El
 .Bl -tag -width 6n
 .It Li KERN_ARGMAX
 The maximum bytes of argument to
 .Xr execve 2 .
 .It Li KERN_BOOTFILE
 The full pathname of the file from which the kernel was loaded.
 .It Li KERN_BOOTTIME
 A
 .Va struct timeval
 structure is returned.
 This structure contains the time that the system was booted.
 .It Li KERN_CLOCKRATE
 A
 .Va struct clockinfo
 structure is returned.
 This structure contains the clock, statistics clock and profiling clock
 frequencies, the number of micro-seconds per hz tick and the skew rate.
 .It Li KERN_FILE
 Return the entire file table.
 The returned data consists of an array of
 .Va struct xfile ,
 whose size depends on the current number of such objects in the system.
 .It Li KERN_HOSTID
 Get or set the host ID.
 .It Li KERN_HOSTUUID
 Get or set the host's universally unique identifier (UUID).
 .It Li KERN_HOSTNAME
 Get or set the hostname.
 .It Li KERN_JOB_CONTROL
 Return 1 if job control is available on this system, otherwise 0.
 .It Li KERN_MAXFILES
 The maximum number of files that may be open in the system.
 .It Li KERN_MAXFILESPERPROC
 The maximum number of files that may be open for a single process.
 This limit only applies to processes with an effective uid of nonzero
 at the time of the open request.
 Files that have already been opened are not affected if the limit
 or the effective uid is changed.
 .It Li KERN_MAXPROC
 The maximum number of concurrent processes the system will allow.
 .It Li KERN_MAXPROCPERUID
 The maximum number of concurrent processes the system will allow
 for a single effective uid.
 This limit only applies to processes with an effective uid of nonzero
 at the time of a fork request.
 Processes that have already been started are not affected if the limit
 is changed.
 .It Li KERN_MAXVNODES
 The maximum number of vnodes available on the system.
 .It Li KERN_NGROUPS
 The maximum number of supplemental groups.
 .It Li KERN_NISDOMAINNAME
 The name of the current YP/NIS domain.
 .It Li KERN_OSRELDATE
 The kernel release version in the format
 .Ar M Ns Ar mm Ns Ar R Ns Ar xx ,
 where
 .Ar M
 is the major version,
 .Ar mm
 is the two digit minor version,
 .Ar R
 is 0 if release branch, otherwise 1,
 and
 .Ar xx
 is updated when the available APIs change.
 .Pp
 The userland release version is available from
 .In osreldate.h ;
 parse this file if you need to get the release version of
 the currently installed userland.
 .It Li KERN_OSRELEASE
 The system release string.
 .It Li KERN_OSREV
 The system revision string.
 .It Li KERN_OSTYPE
 The system type string.
 .It Li KERN_POSIX1
 The version of
 .St -p1003.1
 with which the system
 attempts to comply.
 .It Li KERN_PROC
 Return selected information about specific running processes.
 .Pp
 For the following names, an array of
 .Va struct kinfo_proc
 structures is returned,
 whose size depends on the current number of such objects in the system.
 .Bl -column "Third level nameXXXXXX" "Fourth level is:XXXXXX" -offset indent
 .It "Third level name	Fourth level is:"
 .It "KERN_PROC_ALL	None"
 .It "KERN_PROC_PID	A process ID"
 .It "KERN_PROC_PGRP	A process group"
 .It "KERN_PROC_TTY	A tty device"
 .It "KERN_PROC_UID	A user ID"
 .It "KERN_PROC_RUID	A real user ID"
 .El
 .Pp
 If the third level name is
 .Dv KERN_PROC_ARGS
 then the command line argument
 array is returned in a flattened form, i.e., zero-terminated arguments
 follow each other.
 The total size of array is returned.
 It is also possible for a process to set its own process title this way.
 If the third level name is
 .Dv KERN_PROC_PATHNAME ,
 the path of the
 process' text file is stored.
 For
 .Dv KERN_PROC_PATHNAME ,
 a process ID of
 .Li \-1
 implies the current process.
 .Bl -column "Third level nameXXXXXX" "Fourth level is:XXXXXX" -offset indent
 .It Sy "Third level name	Fourth level is:"
 .It Dv KERN_PROC_ARGS Ta "A process ID"
 .It Dv KERN_PROC_PATHNAME Ta "A process ID"
 .El
 .It Li KERN_PROF
 Return profiling information about the kernel.
 If the kernel is not compiled for profiling,
 attempts to retrieve any of the KERN_PROF values will
 fail with
 .Er ENOENT .
 The third level names for the string and integer profiling information
 is detailed below.
 The changeable column shows whether a process with appropriate
 privilege may change the value.
 .Bl -column "GPROFXGMONPARAMXXX" "struct gmonparamXXX" -offset indent
 .It Sy "Third level name	Type	Changeable"
 .It "GPROF_STATE	integer	yes"
 .It "GPROF_COUNT	u_short[\|]	yes"
 .It "GPROF_FROMS	u_short[\|]	yes"
 .It "GPROF_TOS	struct tostruct	yes"
 .It "GPROF_GMONPARAM	struct gmonparam	no"
 .El
 .Pp
 The variables are as follows:
 .Bl -tag -width 6n
 .It Li GPROF_STATE
 Returns GMON_PROF_ON or GMON_PROF_OFF to show that profiling
 is running or stopped.
 .It Li GPROF_COUNT
 Array of statistical program counter counts.
 .It Li GPROF_FROMS
 Array indexed by program counter of call-from points.
 .It Li GPROF_TOS
 Array of
 .Va struct tostruct
 describing destination of calls and their counts.
 .It Li GPROF_GMONPARAM
 Structure giving the sizes of the above arrays.
 .El
 .It Li KERN_QUANTUM
 The maximum period of time, in microseconds, for which a process is allowed
 to run without being preempted if other processes are in the run queue.
 .It Li KERN_SAVED_IDS
 Returns 1 if saved set-group and saved set-user ID is available.
 .It Li KERN_SECURELVL
 The system security level.
 This level may be raised by processes with appropriate privilege.
 It may not be lowered.
 .It Li KERN_VERSION
 The system version string.
 .It Li KERN_VNODE
 Return the entire vnode table.
 Note, the vnode table is not necessarily a consistent snapshot of
 the system.
 The returned data consists of an array whose size depends on the
 current number of such objects in the system.
 Each element of the array consists of a
 .Va struct xvnode .
 .El
 .Ss CTL_NET
 The string and integer information available for the CTL_NET level
 is detailed below.
 The changeable column shows whether a process with appropriate
 privilege may change the value.
 .Bl -column "Second level nameXXXXXX" "routing messagesXXX" -offset indent
 .It Sy "Second level name	Type	Changeable"
 .It "PF_ROUTE	routing messages	no"
 .It "PF_INET	IPv4 values	yes"
 .It "PF_INET6	IPv6 values	yes"
 .El
 .Bl -tag -width 6n
 .It Li PF_ROUTE
 Return the entire routing table or a subset of it.
 The data is returned as a sequence of routing messages (see
 .Xr route 4
 for the header file, format and meaning).
 The length of each message is contained in the message header.
 .Pp
 The third level name is a protocol number, which is currently always 0.
 The fourth level name is an address family, which may be set to 0 to
 select all address families.
 The fifth, sixth, and seventh level names are as follows:
 .Bl -column -offset indent "Fifth level      Sixth level" "Seventh level"
 .It Sy "Fifth level      Sixth level" Ta Sy "Seventh level"
 .It "NET_RT_FLAGS     rtflags" Ta "None"
 .It "NET_RT_DUMP      None" Ta "None or fib number"
 .It "NET_RT_IFLIST    0 or if_index" Ta None
 .It "NET_RT_IFMALIST  0 or if_index" Ta None
 .It "NET_RT_IFLISTL   0 or if_index" Ta None
 .El
 .Pp
 The
 .Dv NET_RT_IFMALIST
 name returns information about multicast group memberships on all interfaces
 if 0 is specified, or for the interface specified by
 .Va if_index .
 .Pp
 The
 .Dv NET_RT_IFLISTL
 is like
 .Dv NET_RT_IFLIST ,
 just returning message header structs with additional fields allowing the
 interface to be extended without breaking binary compatibility.
 The
 .Dv NET_RT_IFLISTL
 uses 'l' versions of the message header structures:
 .Va struct if_msghdrl
 and
 .Va struct ifa_msghdrl .
 .It Li PF_INET
 Get or set various global information about the IPv4
 (Internet Protocol version 4).
 The third level name is the protocol.
 The fourth level name is the variable name.
 The currently defined protocols and names are:
 .Bl -column ProtocolXX VariableXX TypeXX ChangeableXX
 .It Sy "Protocol	Variable	Type	Changeable"
 .It "icmp	bmcastecho	integer	yes"
 .It "icmp	maskrepl	integer	yes"
 .It "ip	forwarding	integer	yes"
 .It "ip	redirect	integer	yes"
 .It "ip	ttl	integer	yes"
 .It "udp	checksum	integer	yes"
 .El
 .Pp
 The variables are as follows:
 .Bl -tag -width 6n
 .It Li icmp.bmcastecho
 Returns 1 if an ICMP echo request to a broadcast or multicast address is
 to be answered.
 .It Li icmp.maskrepl
 Returns 1 if ICMP network mask requests are to be answered.
 .It Li ip.forwarding
 Returns 1 when IP forwarding is enabled for the host,
 meaning that the host is acting as a router.
 .It Li ip.redirect
 Returns 1 when ICMP redirects may be sent by the host.
 This option is ignored unless the host is routing IP packets,
 and should normally be enabled on all systems.
 .It Li ip.ttl
 The maximum time-to-live (hop count) value for an IP packet sourced by
 the system.
 This value applies to normal transport protocols, not to ICMP.
 .It Li udp.checksum
 Returns 1 when UDP checksums are being computed and checked.
 Disabling UDP checksums is strongly discouraged.
 .Pp
 For variables net.inet.*.ipsec, please refer to
 .Xr ipsec 4 .
 .El
 .It Li PF_INET6
 Get or set various global information about the IPv6
 (Internet Protocol version 6).
 The third level name is the protocol.
 The fourth level name is the variable name.
 .Pp
 For variables net.inet6.* please refer to
 .Xr inet6 4 .
 For variables net.inet6.*.ipsec6, please refer to
 .Xr ipsec 4 .
 .El
 .Ss CTL_USER
 The string and integer information available for the CTL_USER level
 is detailed below.
 The changeable column shows whether a process with appropriate
 privilege may change the value.
 .Bl -column "USER_COLL_WEIGHTS_MAXXXX" "integerXXX" -offset indent
 .It Sy "Second level name	Type	Changeable"
 .It "USER_BC_BASE_MAX	integer	no"
 .It "USER_BC_DIM_MAX	integer	no"
 .It "USER_BC_SCALE_MAX	integer	no"
 .It "USER_BC_STRING_MAX	integer	no"
 .It "USER_COLL_WEIGHTS_MAX	integer	no"
 .It "USER_CS_PATH	string	no"
 .It "USER_EXPR_NEST_MAX	integer	no"
 .It "USER_LINE_MAX	integer	no"
 .It "USER_POSIX2_CHAR_TERM	integer	no"
 .It "USER_POSIX2_C_BIND	integer	no"
 .It "USER_POSIX2_C_DEV	integer	no"
 .It "USER_POSIX2_FORT_DEV	integer	no"
 .It "USER_POSIX2_FORT_RUN	integer	no"
 .It "USER_POSIX2_LOCALEDEF	integer	no"
 .It "USER_POSIX2_SW_DEV	integer	no"
 .It "USER_POSIX2_UPE	integer	no"
 .It "USER_POSIX2_VERSION	integer	no"
 .It "USER_RE_DUP_MAX	integer	no"
 .It "USER_STREAM_MAX	integer	no"
 .It "USER_TZNAME_MAX	integer	no"
 .El
 .Bl -tag -width 6n
 .It Li USER_BC_BASE_MAX
 The maximum ibase/obase values in the
 .Xr bc 1
 utility.
 .It Li USER_BC_DIM_MAX
 The maximum array size in the
 .Xr bc 1
 utility.
 .It Li USER_BC_SCALE_MAX
 The maximum scale value in the
 .Xr bc 1
 utility.
 .It Li USER_BC_STRING_MAX
 The maximum string length in the
 .Xr bc 1
 utility.
 .It Li USER_COLL_WEIGHTS_MAX
 The maximum number of weights that can be assigned to any entry of
 the LC_COLLATE order keyword in the locale definition file.
 .It Li USER_CS_PATH
 Return a value for the
 .Ev PATH
 environment variable that finds all the standard utilities.
 .It Li USER_EXPR_NEST_MAX
 The maximum number of expressions that can be nested within
 parenthesis by the
 .Xr expr 1
 utility.
 .It Li USER_LINE_MAX
 The maximum length in bytes of a text-processing utility's input
 line.
 .It Li USER_POSIX2_CHAR_TERM
 Return 1 if the system supports at least one terminal type capable of
 all operations described in
 .St -p1003.2 ,
 otherwise 0.
 .It Li USER_POSIX2_C_BIND
 Return 1 if the system's C-language development facilities support the
 C-Language Bindings Option, otherwise 0.
 .It Li USER_POSIX2_C_DEV
 Return 1 if the system supports the C-Language Development Utilities Option,
 otherwise 0.
 .It Li USER_POSIX2_FORT_DEV
 Return 1 if the system supports the FORTRAN Development Utilities Option,
 otherwise 0.
 .It Li USER_POSIX2_FORT_RUN
 Return 1 if the system supports the FORTRAN Runtime Utilities Option,
 otherwise 0.
 .It Li USER_POSIX2_LOCALEDEF
 Return 1 if the system supports the creation of locales, otherwise 0.
 .It Li USER_POSIX2_SW_DEV
 Return 1 if the system supports the Software Development Utilities Option,
 otherwise 0.
 .It Li USER_POSIX2_UPE
 Return 1 if the system supports the User Portability Utilities Option,
 otherwise 0.
 .It Li USER_POSIX2_VERSION
 The version of
 .St -p1003.2
 with which the system attempts to comply.
 .It Li USER_RE_DUP_MAX
 The maximum number of repeated occurrences of a regular expression
 permitted when using interval notation.
 .It Li USER_STREAM_MAX
 The minimum maximum number of streams that a process may have open
 at any one time.
 .It Li USER_TZNAME_MAX
 The minimum maximum number of types supported for the name of a
 timezone.
 .El
 .Ss CTL_VM
 The string and integer information available for the CTL_VM level
 is detailed below.
 The changeable column shows whether a process with appropriate
 privilege may change the value.
 .Bl -column "Second level nameXXXXXX" "struct loadavgXXX" -offset indent
 .It Sy "Second level name	Type	Changeable"
 .It "VM_LOADAVG	struct loadavg	no"
 .It "VM_TOTAL	struct vmtotal	no"
 .It "VM_SWAPPING_ENABLED	integer	maybe"
-.It "VM_V_CACHE_MAX	integer	yes"
-.It "VM_V_CACHE_MIN	integer	yes"
 .It "VM_V_FREE_MIN	integer	yes"
 .It "VM_V_FREE_RESERVED	integer	yes"
 .It "VM_V_FREE_TARGET	integer	yes"
 .It "VM_V_INACTIVE_TARGET	integer	yes"
 .It "VM_V_PAGEOUT_FREE_MIN	integer	yes"
 .El
 .Bl -tag -width 6n
 .It Li VM_LOADAVG
 Return the load average history.
 The returned data consists of a
 .Va struct loadavg .
 .It Li VM_TOTAL
 Return the system wide virtual memory statistics.
 The returned data consists of a
 .Va struct vmtotal .
 .It Li VM_SWAPPING_ENABLED
 1 if process swapping is enabled or 0 if disabled.
 This variable is
 permanently set to 0 if the kernel was built with swapping disabled.
-.It Li VM_V_CACHE_MAX
-Maximum desired size of the cache queue.
-.It Li VM_V_CACHE_MIN
-Minimum desired size of the cache queue.
-If the cache queue size
-falls very far below this value, the pageout daemon is awakened.
 .It Li VM_V_FREE_MIN
 Minimum amount of memory (cache memory plus free memory)
 required to be available before a process waiting on memory will be
 awakened.
 .It Li VM_V_FREE_RESERVED
 Processes will awaken the pageout daemon and wait for memory if the
 number of free and cached pages drops below this value.
 .It Li VM_V_FREE_TARGET
 The total amount of free memory (including cache memory) that the
 pageout daemon tries to maintain.
 .It Li VM_V_INACTIVE_TARGET
 The desired number of inactive pages that the pageout daemon should
 achieve when it runs.
 Inactive pages can be quickly inserted into
 process address space when needed.
 .It Li VM_V_PAGEOUT_FREE_MIN
 If the amount of free and cache memory falls below this value, the
 pageout daemon will enter "memory conserving mode" to avoid deadlock.
 .El
 .Sh RETURN VALUES
 .Rv -std
 .Sh FILES
 .Bl -tag -width <netinet/icmpXvar.h> -compact
 .It In sys/sysctl.h
 definitions for top level identifiers, second level kernel and hardware
 identifiers, and user level identifiers
 .It In sys/socket.h
 definitions for second level network identifiers
 .It In sys/gmon.h
 definitions for third level profiling identifiers
 .It In vm/vm_param.h
 definitions for second level virtual memory identifiers
 .It In netinet/in.h
 definitions for third level IPv4/IPv6 identifiers and
 fourth level IPv4/v6 identifiers
 .It In netinet/icmp_var.h
 definitions for fourth level ICMP identifiers
 .It In netinet/icmp6.h
 definitions for fourth level ICMPv6 identifiers
 .It In netinet/udp_var.h
 definitions for fourth level UDP identifiers
 .El
 .Sh ERRORS
 The following errors may be reported:
 .Bl -tag -width Er
 .It Bq Er EFAULT
 The buffer
 .Fa name ,
 .Fa oldp ,
 .Fa newp ,
 or length pointer
 .Fa oldlenp
 contains an invalid address.
 .It Bq Er EINVAL
 The
 .Fa name
 array is less than two or greater than CTL_MAXNAME.
 .It Bq Er EINVAL
 A non-null
 .Fa newp
 is given and its specified length in
 .Fa newlen
 is too large or too small.
 .It Bq Er ENOMEM
 The length pointed to by
 .Fa oldlenp
 is too short to hold the requested value.
 .It Bq Er ENOMEM
 The smaller of either the length pointed to by
 .Fa oldlenp
 or the estimated size of the returned data exceeds the
 system limit on locked memory.
 .It Bq Er ENOMEM
 Locking the buffer
 .Fa oldp ,
 or a portion of the buffer if the estimated size of the data
 to be returned is smaller,
 would cause the process to exceed its per-process locked memory limit.
 .It Bq Er ENOTDIR
 The
 .Fa name
 array specifies an intermediate rather than terminal name.
 .It Bq Er EISDIR
 The
 .Fa name
 array specifies a terminal name, but the actual name is not terminal.
 .It Bq Er ENOENT
 The
 .Fa name
 array specifies a value that is unknown.
 .It Bq Er EPERM
 An attempt is made to set a read-only value.
 .It Bq Er EPERM
 A process without appropriate privilege attempts to set a value.
 .El
 .Sh SEE ALSO
 .Xr confstr 3 ,
 .Xr kvm 3 ,
 .Xr sysconf 3 ,
 .Xr sysctl 8
 .Sh HISTORY
 The
 .Fn sysctl
 function first appeared in
 .Bx 4.4 .
Index: head/sys/sys/vmmeter.h
===================================================================
--- head/sys/sys/vmmeter.h	(revision 287639)
+++ head/sys/sys/vmmeter.h	(revision 287640)
@@ -1,213 +1,212 @@
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vmmeter.h	8.2 (Berkeley) 7/10/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_VMMETER_H_
 #define _SYS_VMMETER_H_
 
 /*
  * This value is used by ps(1) to change sleep state flag from 'S' to
  * 'I' and by the sched process to set the alarm clock.
  */
 #define	MAXSLP			20
 
 /*
  * System wide statistics counters.
  * Locking:
  *      a - locked by atomic operations
  *      c - constant after initialization
  *      f - locked by vm_page_queue_free_mtx
  *      p - locked by being in the PCPU and atomicity respect to interrupts
  *      q - changes are synchronized by the corresponding vm_pagequeue lock
  */
 struct vmmeter {
 	/*
 	 * General system activity.
 	 */
 	u_int v_swtch;		/* (p) context switches */
 	u_int v_trap;		/* (p) calls to trap */
 	u_int v_syscall;	/* (p) calls to syscall() */
 	u_int v_intr;		/* (p) device interrupts */
 	u_int v_soft;		/* (p) software interrupts */
 	/*
 	 * Virtual memory activity.
 	 */
 	u_int v_vm_faults;	/* (p) address memory faults */
 	u_int v_io_faults;	/* (p) page faults requiring I/O */
 	u_int v_cow_faults;	/* (p) copy-on-writes faults */
 	u_int v_cow_optim;	/* (p) optimized copy-on-writes faults */
 	u_int v_zfod;		/* (p) pages zero filled on demand */
 	u_int v_ozfod;		/* (p) optimized zero fill pages */
 	u_int v_swapin;		/* (p) swap pager pageins */
 	u_int v_swapout;	/* (p) swap pager pageouts */
 	u_int v_swappgsin;	/* (p) swap pager pages paged in */
 	u_int v_swappgsout;	/* (p) swap pager pages paged out */
 	u_int v_vnodein;	/* (p) vnode pager pageins */
 	u_int v_vnodeout;	/* (p) vnode pager pageouts */
 	u_int v_vnodepgsin;	/* (p) vnode_pager pages paged in */
 	u_int v_vnodepgsout;	/* (p) vnode pager pages paged out */
 	u_int v_intrans;	/* (p) intransit blocking page faults */
 	u_int v_reactivated;	/* (f) pages reactivated from free list */
 	u_int v_pdwakeups;	/* (f) times daemon has awaken from sleep */
 	u_int v_pdpages;	/* (p) pages analyzed by daemon */
 
 	u_int v_tcached;	/* (p) total pages cached */
 	u_int v_dfree;		/* (p) pages freed by daemon */
 	u_int v_pfree;		/* (p) pages freed by exiting processes */
 	u_int v_tfree;		/* (p) total pages freed */
 	/*
 	 * Distribution of page usages.
 	 */
 	u_int v_page_size;	/* (c) page size in bytes */
 	u_int v_page_count;	/* (c) total number of pages in system */
 	u_int v_free_reserved;	/* (c) pages reserved for deadlock */
 	u_int v_free_target;	/* (c) pages desired free */
 	u_int v_free_min;	/* (c) pages desired free */
 	u_int v_free_count;	/* (f) pages free */
 	u_int v_wire_count;	/* (a) pages wired down */
 	u_int v_active_count;	/* (q) pages active */
 	u_int v_inactive_target; /* (c) pages desired inactive */
 	u_int v_inactive_count;	/* (q) pages inactive */
 	u_int v_cache_count;	/* (f) pages on cache queue */
-	u_int v_cache_min;	/* (c) min pages desired on cache queue */
-	u_int v_cache_max;	/* (c) max pages in cached obj (unused) */
 	u_int v_pageout_free_min;   /* (c) min pages reserved for kernel */
 	u_int v_interrupt_free_min; /* (c) reserved pages for int code */
 	u_int v_free_severe;	/* (c) severe page depletion point */
 	/*
 	 * Fork/vfork/rfork activity.
 	 */
 	u_int v_forks;		/* (p) fork() calls */
 	u_int v_vforks;		/* (p) vfork() calls */
 	u_int v_rforks;		/* (p) rfork() calls */
 	u_int v_kthreads;	/* (p) fork() calls by kernel */
 	u_int v_forkpages;	/* (p) VM pages affected by fork() */
 	u_int v_vforkpages;	/* (p) VM pages affected by vfork() */
 	u_int v_rforkpages;	/* (p) VM pages affected by rfork() */
 	u_int v_kthreadpages;	/* (p) VM pages affected by fork() by kernel */
+	u_int v_spare[2];
 };
 #ifdef _KERNEL
 
 extern struct vmmeter vm_cnt;
 
 extern int vm_pageout_wakeup_thresh;
 
 /*
  * Return TRUE if we are under our severe low-free-pages threshold
  *
  * This routine is typically used at the user<->system interface to determine
  * whether we need to block in order to avoid a low memory deadlock.
  */
 
 static __inline 
 int
 vm_page_count_severe(void)
 {
     return (vm_cnt.v_free_severe > (vm_cnt.v_free_count +
           vm_cnt.v_cache_count));
 }
 
 /*
  * Return TRUE if we are under our minimum low-free-pages threshold.
  *
  * This routine is typically used within the system to determine whether
  * we can execute potentially very expensive code in terms of memory.  It
  * is also used by the pageout daemon to calculate when to sleep, when
  * to wake waiters up, and when (after making a pass) to become more
  * desparate.
  */
 
 static __inline 
 int
 vm_page_count_min(void)
 {
     return (vm_cnt.v_free_min > (vm_cnt.v_free_count + vm_cnt.v_cache_count));
 }
 
 /*
  * Return TRUE if we have not reached our free page target during
  * free page recovery operations.
  */
 
 static __inline 
 int
 vm_page_count_target(void)
 {
     return (vm_cnt.v_free_target > (vm_cnt.v_free_count +
           vm_cnt.v_cache_count));
 }
 
 /*
  * Return the number of pages we need to free-up or cache
  * A positive number indicates that we do not have enough free pages.
  */
 
 static __inline 
 int
 vm_paging_target(void)
 {
     return (vm_cnt.v_free_target - (vm_cnt.v_free_count +
           vm_cnt.v_cache_count));
 }
 
 /*
  * Returns TRUE if the pagedaemon needs to be woken up.
  */
 
 static __inline 
 int
 vm_paging_needed(void)
 {
     return (vm_cnt.v_free_count + vm_cnt.v_cache_count <
         vm_pageout_wakeup_thresh);
 }
 
 #endif
 
 /* systemwide totals computed every five seconds */
 struct vmtotal {
 	int16_t	t_rq;		/* length of the run queue */
 	int16_t	t_dw;		/* jobs in ``disk wait'' (neg priority) */
 	int16_t	t_pw;		/* jobs in page wait */
 	int16_t	t_sl;		/* jobs sleeping in core */
 	int16_t	t_sw;		/* swapped out runnable/short block jobs */
 	int32_t	t_vm;		/* total virtual memory */
 	int32_t	t_avm;		/* active virtual memory */
 	int32_t	t_rm;		/* total real memory in use */
 	int32_t	t_arm;		/* active real memory */
 	int32_t	t_vmshr;	/* shared virtual memory */
 	int32_t	t_avmshr;	/* active shared virtual memory */
 	int32_t	t_rmshr;	/* shared real memory */
 	int32_t	t_armshr;	/* active shared real memory */
 	int32_t	t_free;		/* free memory pages */
 };
 
 #endif
Index: head/sys/vm/vm_meter.c
===================================================================
--- head/sys/vm/vm_meter.c	(revision 287639)
+++ head/sys/vm/vm_meter.c	(revision 287640)
@@ -1,325 +1,319 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vm_meter.c	8.4 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resource.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/vmmeter.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <sys/sysctl.h>
 
 struct vmmeter vm_cnt;
 
 SYSCTL_UINT(_vm, VM_V_FREE_MIN, v_free_min,
 	CTLFLAG_RW, &vm_cnt.v_free_min, 0, "Minimum low-free-pages threshold");
 SYSCTL_UINT(_vm, VM_V_FREE_TARGET, v_free_target,
 	CTLFLAG_RW, &vm_cnt.v_free_target, 0, "Desired free pages");
 SYSCTL_UINT(_vm, VM_V_FREE_RESERVED, v_free_reserved,
 	CTLFLAG_RW, &vm_cnt.v_free_reserved, 0, "Pages reserved for deadlock");
 SYSCTL_UINT(_vm, VM_V_INACTIVE_TARGET, v_inactive_target,
 	CTLFLAG_RW, &vm_cnt.v_inactive_target, 0, "Pages desired inactive");
-SYSCTL_UINT(_vm, VM_V_CACHE_MIN, v_cache_min,
-	CTLFLAG_RW, &vm_cnt.v_cache_min, 0, "Min pages on cache queue");
-SYSCTL_UINT(_vm, VM_V_CACHE_MAX, v_cache_max,
-	CTLFLAG_RW, &vm_cnt.v_cache_max, 0, "Max pages on cache queue");
 SYSCTL_UINT(_vm, VM_V_PAGEOUT_FREE_MIN, v_pageout_free_min,
 	CTLFLAG_RW, &vm_cnt.v_pageout_free_min, 0, "Min pages reserved for kernel");
 SYSCTL_UINT(_vm, OID_AUTO, v_free_severe,
 	CTLFLAG_RW, &vm_cnt.v_free_severe, 0, "Severe page depletion point");
 
 static int
 sysctl_vm_loadavg(SYSCTL_HANDLER_ARGS)
 {
 	
 #ifdef SCTL_MASK32
 	u_int32_t la[4];
 
 	if (req->flags & SCTL_MASK32) {
 		la[0] = averunnable.ldavg[0];
 		la[1] = averunnable.ldavg[1];
 		la[2] = averunnable.ldavg[2];
 		la[3] = averunnable.fscale;
 		return SYSCTL_OUT(req, la, sizeof(la));
 	} else
 #endif
 		return SYSCTL_OUT(req, &averunnable, sizeof(averunnable));
 }
 SYSCTL_PROC(_vm, VM_LOADAVG, loadavg, CTLTYPE_STRUCT | CTLFLAG_RD |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_loadavg, "S,loadavg",
     "Machine loadaverage history");
 
 static int
 vmtotal(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	struct vmtotal total;
 	vm_map_entry_t entry;
 	vm_object_t object;
 	vm_map_t map;
 	int paging;
 	struct thread *td;
 	struct vmspace *vm;
 
 	bzero(&total, sizeof(total));
 	/*
 	 * Mark all objects as inactive.
 	 */
 	mtx_lock(&vm_object_list_mtx);
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		VM_OBJECT_WLOCK(object);
 		vm_object_clear_flag(object, OBJ_ACTIVE);
 		VM_OBJECT_WUNLOCK(object);
 	}
 	mtx_unlock(&vm_object_list_mtx);
 	/*
 	 * Calculate process statistics.
 	 */
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_flag & P_SYSTEM)
 			continue;
 		PROC_LOCK(p);
 		switch (p->p_state) {
 		case PRS_NEW:
 			PROC_UNLOCK(p);
 			continue;
 			break;
 		default:
 			FOREACH_THREAD_IN_PROC(p, td) {
 				thread_lock(td);
 				switch (td->td_state) {
 				case TDS_INHIBITED:
 					if (TD_IS_SWAPPED(td))
 						total.t_sw++;
 					else if (TD_IS_SLEEPING(td) &&
 					    td->td_priority <= PZERO)
 						total.t_dw++;
 					else
 						total.t_sl++;
 					break;
 
 				case TDS_CAN_RUN:
 					total.t_sw++;
 					break;
 				case TDS_RUNQ:
 				case TDS_RUNNING:
 					total.t_rq++;
 					thread_unlock(td);
 					continue;
 				default:
 					break;
 				}
 				thread_unlock(td);
 			}
 		}
 		PROC_UNLOCK(p);
 		/*
 		 * Note active objects.
 		 */
 		paging = 0;
 		vm = vmspace_acquire_ref(p);
 		if (vm == NULL)
 			continue;
 		map = &vm->vm_map;
 		vm_map_lock_read(map);
 		for (entry = map->header.next;
 		    entry != &map->header; entry = entry->next) {
 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
 			    (object = entry->object.vm_object) == NULL)
 				continue;
 			VM_OBJECT_WLOCK(object);
 			vm_object_set_flag(object, OBJ_ACTIVE);
 			paging |= object->paging_in_progress;
 			VM_OBJECT_WUNLOCK(object);
 		}
 		vm_map_unlock_read(map);
 		vmspace_free(vm);
 		if (paging)
 			total.t_pw++;
 	}
 	sx_sunlock(&allproc_lock);
 	/*
 	 * Calculate object memory usage statistics.
 	 */
 	mtx_lock(&vm_object_list_mtx);
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		/*
 		 * Perform unsynchronized reads on the object.  In
 		 * this case, the lack of synchronization should not
 		 * impair the accuracy of the reported statistics.
 		 */
 		if ((object->flags & OBJ_FICTITIOUS) != 0) {
 			/*
 			 * Devices, like /dev/mem, will badly skew our totals.
 			 */
 			continue;
 		}
 		if (object->ref_count == 0) {
 			/*
 			 * Also skip unreferenced objects, including
 			 * vnodes representing mounted file systems.
 			 */
 			continue;
 		}
 		total.t_vm += object->size;
 		total.t_rm += object->resident_page_count;
 		if (object->flags & OBJ_ACTIVE) {
 			total.t_avm += object->size;
 			total.t_arm += object->resident_page_count;
 		}
 		if (object->shadow_count > 1) {
 			/* shared object */
 			total.t_vmshr += object->size;
 			total.t_rmshr += object->resident_page_count;
 			if (object->flags & OBJ_ACTIVE) {
 				total.t_avmshr += object->size;
 				total.t_armshr += object->resident_page_count;
 			}
 		}
 	}
 	mtx_unlock(&vm_object_list_mtx);
 	total.t_free = vm_cnt.v_free_count + vm_cnt.v_cache_count;
 	return (sysctl_handle_opaque(oidp, &total, sizeof(total), req));
 }
 
 /*
  * vcnt() -	accumulate statistics from all cpus and the global cnt
  *		structure.
  *
  *	The vmmeter structure is now per-cpu as well as global.  Those
  *	statistics which can be kept on a per-cpu basis (to avoid cache
  *	stalls between cpus) can be moved to the per-cpu vmmeter.  Remaining
  *	statistics, such as v_free_reserved, are left in the global
  *	structure.
  *
  * (sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
  */
 static int
 vcnt(SYSCTL_HANDLER_ARGS)
 {
 	int count = *(int *)arg1;
 	int offset = (char *)arg1 - (char *)&vm_cnt;
 	int i;
 
 	CPU_FOREACH(i) {
 		struct pcpu *pcpu = pcpu_find(i);
 		count += *(int *)((char *)&pcpu->pc_cnt + offset);
 	}
 	return (SYSCTL_OUT(req, &count, sizeof(int)));
 }
 
 SYSCTL_PROC(_vm, VM_TOTAL, vmtotal, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE,
     0, sizeof(struct vmtotal), vmtotal, "S,vmtotal", 
     "System virtual memory statistics");
 SYSCTL_NODE(_vm, OID_AUTO, stats, CTLFLAG_RW, 0, "VM meter stats");
 static SYSCTL_NODE(_vm_stats, OID_AUTO, sys, CTLFLAG_RW, 0,
 	"VM meter sys stats");
 static SYSCTL_NODE(_vm_stats, OID_AUTO, vm, CTLFLAG_RW, 0,
 	"VM meter vm stats");
 SYSCTL_NODE(_vm_stats, OID_AUTO, misc, CTLFLAG_RW, 0, "VM meter misc stats");
 
 #define	VM_STATS(parent, var, descr) \
 	SYSCTL_PROC(parent, OID_AUTO, var, \
 	    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, &vm_cnt.var, 0, vcnt, \
 	    "IU", descr)
 #define	VM_STATS_VM(var, descr)		VM_STATS(_vm_stats_vm, var, descr)
 #define	VM_STATS_SYS(var, descr)	VM_STATS(_vm_stats_sys, var, descr)
 
 VM_STATS_SYS(v_swtch, "Context switches");
 VM_STATS_SYS(v_trap, "Traps");
 VM_STATS_SYS(v_syscall, "System calls");
 VM_STATS_SYS(v_intr, "Device interrupts");
 VM_STATS_SYS(v_soft, "Software interrupts");
 VM_STATS_VM(v_vm_faults, "Address memory faults");
 VM_STATS_VM(v_io_faults, "Page faults requiring I/O");
 VM_STATS_VM(v_cow_faults, "Copy-on-write faults");
 VM_STATS_VM(v_cow_optim, "Optimized COW faults");
 VM_STATS_VM(v_zfod, "Pages zero-filled on demand");
 VM_STATS_VM(v_ozfod, "Optimized zero fill pages");
 VM_STATS_VM(v_swapin, "Swap pager pageins");
 VM_STATS_VM(v_swapout, "Swap pager pageouts");
 VM_STATS_VM(v_swappgsin, "Swap pages swapped in");
 VM_STATS_VM(v_swappgsout, "Swap pages swapped out");
 VM_STATS_VM(v_vnodein, "Vnode pager pageins");
 VM_STATS_VM(v_vnodeout, "Vnode pager pageouts");
 VM_STATS_VM(v_vnodepgsin, "Vnode pages paged in");
 VM_STATS_VM(v_vnodepgsout, "Vnode pages paged out");
 VM_STATS_VM(v_intrans, "In transit page faults");
 VM_STATS_VM(v_reactivated, "Pages reactivated from free list");
 VM_STATS_VM(v_pdwakeups, "Pagedaemon wakeups");
 VM_STATS_VM(v_pdpages, "Pages analyzed by pagedaemon");
 VM_STATS_VM(v_tcached, "Total pages cached");
 VM_STATS_VM(v_dfree, "Pages freed by pagedaemon");
 VM_STATS_VM(v_pfree, "Pages freed by exiting processes");
 VM_STATS_VM(v_tfree, "Total pages freed");
 VM_STATS_VM(v_page_size, "Page size in bytes");
 VM_STATS_VM(v_page_count, "Total number of pages in system");
 VM_STATS_VM(v_free_reserved, "Pages reserved for deadlock");
 VM_STATS_VM(v_free_target, "Pages desired free");
 VM_STATS_VM(v_free_min, "Minimum low-free-pages threshold");
 VM_STATS_VM(v_free_count, "Free pages");
 VM_STATS_VM(v_wire_count, "Wired pages");
 VM_STATS_VM(v_active_count, "Active pages");
 VM_STATS_VM(v_inactive_target, "Desired inactive pages");
 VM_STATS_VM(v_inactive_count, "Inactive pages");
 VM_STATS_VM(v_cache_count, "Pages on cache queue");
-VM_STATS_VM(v_cache_min, "Min pages on cache queue");
-VM_STATS_VM(v_cache_max, "Max pages on cached queue");
 VM_STATS_VM(v_pageout_free_min, "Min pages reserved for kernel");
 VM_STATS_VM(v_interrupt_free_min, "Reserved pages for interrupt code");
 VM_STATS_VM(v_forks, "Number of fork() calls");
 VM_STATS_VM(v_vforks, "Number of vfork() calls");
 VM_STATS_VM(v_rforks, "Number of rfork() calls");
 VM_STATS_VM(v_kthreads, "Number of fork() calls by kernel");
 VM_STATS_VM(v_forkpages, "VM pages affected by fork()");
 VM_STATS_VM(v_vforkpages, "VM pages affected by vfork()");
 VM_STATS_VM(v_rforkpages, "VM pages affected by rfork()");
 VM_STATS_VM(v_kthreadpages, "VM pages affected by fork() by kernel");
 
 SYSCTL_INT(_vm_stats_misc, OID_AUTO, zero_page_count, CTLFLAG_RD,
 	&vm_page_zero_count, 0, "Number of zero-ed free pages");
Index: head/sys/vm/vm_page.c
===================================================================
--- head/sys/vm/vm_page.c	(revision 287639)
+++ head/sys/vm/vm_page.c	(revision 287640)
@@ -1,3337 +1,3336 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1998 Matthew Dillon.  All Rights Reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
  */
 
 /*-
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *			GENERAL RULES ON VM_PAGE MANIPULATION
  *
  *	- A page queue lock is required when adding or removing a page from a
  *	  page queue regardless of other locks or the busy state of a page.
  *
  *		* In general, no thread besides the page daemon can acquire or
  *		  hold more than one page queue lock at a time.
  *
  *		* The page daemon can acquire and hold any pair of page queue
  *		  locks in any order.
  *
  *	- The object lock is required when inserting or removing
  *	  pages from an object (vm_page_insert() or vm_page_remove()).
  *
  */
 
 /*
  *	Resident memory management module.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 
 #include <machine/md_var.h>
 
 /*
  *	Associated with page of user-allocatable memory is a
  *	page structure.
  */
 
 struct vm_domain vm_dom[MAXMEMDOM];
 struct mtx_padalign vm_page_queue_free_mtx;
 
 struct mtx_padalign pa_lock[PA_LOCK_COUNT];
 
 vm_page_t vm_page_array;
 long vm_page_array_size;
 long first_page;
 int vm_page_zero_count;
 
 static int boot_pages = UMA_BOOT_PAGES;
 SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &boot_pages, 0,
     "number of pages allocated for bootstrapping the VM system");
 
 static int pa_tryrelock_restart;
 SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD,
     &pa_tryrelock_restart, 0, "Number of tryrelock restarts");
 
 static TAILQ_HEAD(, vm_page) blacklist_head;
 static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages");
 
 
 static uma_zone_t fakepg_zone;
 
 static struct vnode *vm_page_alloc_init(vm_page_t m);
 static void vm_page_cache_turn_free(vm_page_t m);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
 static void vm_page_enqueue(uint8_t queue, vm_page_t m);
 static void vm_page_init_fakepg(void *dummy);
 static int vm_page_insert_after(vm_page_t m, vm_object_t object,
     vm_pindex_t pindex, vm_page_t mpred);
 static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,
     vm_page_t mpred);
 
 SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL);
 
 static void
 vm_page_init_fakepg(void *dummy)
 {
 
 	fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM);
 }
 
 /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */
 #if PAGE_SIZE == 32768
 #ifdef CTASSERT
 CTASSERT(sizeof(u_long) >= 8);
 #endif
 #endif
 
 /*
  * Try to acquire a physical address lock while a pmap is locked.  If we
  * fail to trylock we unlock and lock the pmap directly and cache the
  * locked pa in *locked.  The caller should then restart their loop in case
  * the virtual to physical mapping has changed.
  */
 int
 vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked)
 {
 	vm_paddr_t lockpa;
 
 	lockpa = *locked;
 	*locked = pa;
 	if (lockpa) {
 		PA_LOCK_ASSERT(lockpa, MA_OWNED);
 		if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa))
 			return (0);
 		PA_UNLOCK(lockpa);
 	}
 	if (PA_TRYLOCK(pa))
 		return (0);
 	PMAP_UNLOCK(pmap);
 	atomic_add_int(&pa_tryrelock_restart, 1);
 	PA_LOCK(pa);
 	PMAP_LOCK(pmap);
 	return (EAGAIN);
 }
 
 /*
  *	vm_set_page_size:
  *
  *	Sets the page size, perhaps based upon the memory
  *	size.  Must be called before any use of page-size
  *	dependent functions.
  */
 void
 vm_set_page_size(void)
 {
 	if (vm_cnt.v_page_size == 0)
 		vm_cnt.v_page_size = PAGE_SIZE;
 	if (((vm_cnt.v_page_size - 1) & vm_cnt.v_page_size) != 0)
 		panic("vm_set_page_size: page size not a power of two");
 }
 
 /*
  *	vm_page_blacklist_next:
  *
  *	Find the next entry in the provided string of blacklist
  *	addresses.  Entries are separated by space, comma, or newline.
  *	If an invalid integer is encountered then the rest of the
  *	string is skipped.  Updates the list pointer to the next
  *	character, or NULL if the string is exhausted or invalid.
  */
 static vm_paddr_t
 vm_page_blacklist_next(char **list, char *end)
 {
 	vm_paddr_t bad;
 	char *cp, *pos;
 
 	if (list == NULL || *list == NULL)
 		return (0);
 	if (**list =='\0') {
 		*list = NULL;
 		return (0);
 	}
 
 	/*
 	 * If there's no end pointer then the buffer is coming from
 	 * the kenv and we know it's null-terminated.
 	 */
 	if (end == NULL)
 		end = *list + strlen(*list);
 
 	/* Ensure that strtoq() won't walk off the end */
 	if (*end != '\0') {
 		if (*end == '\n' || *end == ' ' || *end  == ',')
 			*end = '\0';
 		else {
 			printf("Blacklist not terminated, skipping\n");
 			*list = NULL;
 			return (0);
 		}
 	}
 
 	for (pos = *list; *pos != '\0'; pos = cp) {
 		bad = strtoq(pos, &cp, 0);
 		if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') {
 			if (bad == 0) {
 				if (++cp < end)
 					continue;
 				else
 					break;
 			}
 		} else
 			break;
 		if (*cp == '\0' || ++cp >= end)
 			*list = NULL;
 		else
 			*list = cp;
 		return (trunc_page(bad));
 	}
 	printf("Garbage in RAM blacklist, skipping\n");
 	*list = NULL;
 	return (0);
 }
 
 /*
  *	vm_page_blacklist_check:
  *
  *	Iterate through the provided string of blacklist addresses, pulling
  *	each entry out of the physical allocator free list and putting it
  *	onto a list for reporting via the vm.page_blacklist sysctl.
  */
 static void
 vm_page_blacklist_check(char *list, char *end)
 {
 	vm_paddr_t pa;
 	vm_page_t m;
 	char *next;
 	int ret;
 
 	next = list;
 	while (next != NULL) {
 		if ((pa = vm_page_blacklist_next(&next, end)) == 0)
 			continue;
 		m = vm_phys_paddr_to_vm_page(pa);
 		if (m == NULL)
 			continue;
 		mtx_lock(&vm_page_queue_free_mtx);
 		ret = vm_phys_unfree_page(m);
 		mtx_unlock(&vm_page_queue_free_mtx);
 		if (ret == TRUE) {
 			TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
 			if (bootverbose)
 				printf("Skipping page with pa 0x%jx\n",
 				    (uintmax_t)pa);
 		}
 	}
 }
 
 /*
  *	vm_page_blacklist_load:
  *
  *	Search for a special module named "ram_blacklist".  It'll be a
  *	plain text file provided by the user via the loader directive
  *	of the same name.
  */
 static void
 vm_page_blacklist_load(char **list, char **end)
 {
 	void *mod;
 	u_char *ptr;
 	u_int len;
 
 	mod = NULL;
 	ptr = NULL;
 
 	mod = preload_search_by_type("ram_blacklist");
 	if (mod != NULL) {
 		ptr = preload_fetch_addr(mod);
 		len = preload_fetch_size(mod);
         }
 	*list = ptr;
 	if (ptr != NULL)
 		*end = ptr + len;
 	else
 		*end = NULL;
 	return;
 }
 
 static int
 sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS)
 {
 	vm_page_t m;
 	struct sbuf sbuf;
 	int error, first;
 
 	first = 1;
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	TAILQ_FOREACH(m, &blacklist_head, listq) {
 		sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",",
 		    (uintmax_t)m->phys_addr);
 		first = 0;
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 static void
 vm_page_domain_init(struct vm_domain *vmd)
 {
 	struct vm_pagequeue *pq;
 	int i;
 
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) =
 	    "vm inactive pagequeue";
 	*__DECONST(int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) =
 	    &vm_cnt.v_inactive_count;
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) =
 	    "vm active pagequeue";
 	*__DECONST(int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) =
 	    &vm_cnt.v_active_count;
 	vmd->vmd_page_count = 0;
 	vmd->vmd_free_count = 0;
 	vmd->vmd_segs = 0;
 	vmd->vmd_oom = FALSE;
 	vmd->vmd_pass = 0;
 	for (i = 0; i < PQ_COUNT; i++) {
 		pq = &vmd->vmd_pagequeues[i];
 		TAILQ_INIT(&pq->pq_pl);
 		mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue",
 		    MTX_DEF | MTX_DUPOK);
 	}
 }
 
 /*
  *	vm_page_startup:
  *
  *	Initializes the resident memory module.
  *
  *	Allocates memory for the page cells, and
  *	for the object/offset-to-page hash table headers.
  *	Each page cell is initialized and placed on the free list.
  */
 vm_offset_t
 vm_page_startup(vm_offset_t vaddr)
 {
 	vm_offset_t mapped;
 	vm_paddr_t page_range;
 	vm_paddr_t new_end;
 	int i;
 	vm_paddr_t pa;
 	vm_paddr_t last_pa;
 	char *list, *listend;
 	vm_paddr_t end;
 	vm_paddr_t biggestsize;
 	vm_paddr_t low_water, high_water;
 	int biggestone;
 
 	biggestsize = 0;
 	biggestone = 0;
 	vaddr = round_page(vaddr);
 
 	for (i = 0; phys_avail[i + 1]; i += 2) {
 		phys_avail[i] = round_page(phys_avail[i]);
 		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
 	}
 
 	low_water = phys_avail[0];
 	high_water = phys_avail[1];
 
 	for (i = 0; i < vm_phys_nsegs; i++) {
 		if (vm_phys_segs[i].start < low_water)
 			low_water = vm_phys_segs[i].start;
 		if (vm_phys_segs[i].end > high_water)
 			high_water = vm_phys_segs[i].end;
 	}
 	for (i = 0; phys_avail[i + 1]; i += 2) {
 		vm_paddr_t size = phys_avail[i + 1] - phys_avail[i];
 
 		if (size > biggestsize) {
 			biggestone = i;
 			biggestsize = size;
 		}
 		if (phys_avail[i] < low_water)
 			low_water = phys_avail[i];
 		if (phys_avail[i + 1] > high_water)
 			high_water = phys_avail[i + 1];
 	}
 
 	end = phys_avail[biggestone+1];
 
 	/*
 	 * Initialize the page and queue locks.
 	 */
 	mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF);
 	for (i = 0; i < PA_LOCK_COUNT; i++)
 		mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF);
 	for (i = 0; i < vm_ndomains; i++)
 		vm_page_domain_init(&vm_dom[i]);
 
 	/*
 	 * Allocate memory for use when boot strapping the kernel memory
 	 * allocator.
 	 *
 	 * CTFLAG_RDTUN doesn't work during the early boot process, so we must
 	 * manually fetch the value.
 	 */
 	TUNABLE_INT_FETCH("vm.boot_pages", &boot_pages);
 	new_end = end - (boot_pages * UMA_SLAB_SIZE);
 	new_end = trunc_page(new_end);
 	mapped = pmap_map(&vaddr, new_end, end,
 	    VM_PROT_READ | VM_PROT_WRITE);
 	bzero((void *)mapped, end - new_end);
 	uma_startup((void *)mapped, boot_pages);
 
 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
     defined(__i386__) || defined(__mips__)
 	/*
 	 * Allocate a bitmap to indicate that a random physical page
 	 * needs to be included in a minidump.
 	 *
 	 * The amd64 port needs this to indicate which direct map pages
 	 * need to be dumped, via calls to dump_add_page()/dump_drop_page().
 	 *
 	 * However, i386 still needs this workspace internally within the
 	 * minidump code.  In theory, they are not needed on i386, but are
 	 * included should the sf_buf code decide to use them.
 	 */
 	last_pa = 0;
 	for (i = 0; dump_avail[i + 1] != 0; i += 2)
 		if (dump_avail[i + 1] > last_pa)
 			last_pa = dump_avail[i + 1];
 	page_range = last_pa / PAGE_SIZE;
 	vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY);
 	new_end -= vm_page_dump_size;
 	vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end,
 	    new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE);
 	bzero((void *)vm_page_dump, vm_page_dump_size);
 #endif
 #ifdef __amd64__
 	/*
 	 * Request that the physical pages underlying the message buffer be
 	 * included in a crash dump.  Since the message buffer is accessed
 	 * through the direct map, they are not automatically included.
 	 */
 	pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr);
 	last_pa = pa + round_page(msgbufsize);
 	while (pa < last_pa) {
 		dump_add_page(pa);
 		pa += PAGE_SIZE;
 	}
 #endif
 	/*
 	 * Compute the number of pages of memory that will be available for
 	 * use (taking into account the overhead of a page structure per
 	 * page).
 	 */
 	first_page = low_water / PAGE_SIZE;
 #ifdef VM_PHYSSEG_SPARSE
 	page_range = 0;
 	for (i = 0; i < vm_phys_nsegs; i++) {
 		page_range += atop(vm_phys_segs[i].end -
 		    vm_phys_segs[i].start);
 	}
 	for (i = 0; phys_avail[i + 1] != 0; i += 2)
 		page_range += atop(phys_avail[i + 1] - phys_avail[i]);
 #elif defined(VM_PHYSSEG_DENSE)
 	page_range = high_water / PAGE_SIZE - first_page;
 #else
 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
 #endif
 	end = new_end;
 
 	/*
 	 * Reserve an unmapped guard page to trap access to vm_page_array[-1].
 	 */
 	vaddr += PAGE_SIZE;
 
 	/*
 	 * Initialize the mem entry structures now, and put them in the free
 	 * queue.
 	 */
 	new_end = trunc_page(end - page_range * sizeof(struct vm_page));
 	mapped = pmap_map(&vaddr, new_end, end,
 	    VM_PROT_READ | VM_PROT_WRITE);
 	vm_page_array = (vm_page_t) mapped;
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * Allocate memory for the reservation management system's data
 	 * structures.
 	 */
 	new_end = vm_reserv_startup(&vaddr, new_end, high_water);
 #endif
 #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__)
 	/*
 	 * pmap_map on arm64, amd64, and mips can come out of the direct-map,
 	 * not kvm like i386, so the pages must be tracked for a crashdump to
 	 * include this data.  This includes the vm_page_array and the early
 	 * UMA bootstrap pages.
 	 */
 	for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE)
 		dump_add_page(pa);
 #endif
 	phys_avail[biggestone + 1] = new_end;
 
 	/*
 	 * Add physical memory segments corresponding to the available
 	 * physical pages.
 	 */
 	for (i = 0; phys_avail[i + 1] != 0; i += 2)
 		vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]);
 
 	/*
 	 * Clear all of the page structures
 	 */
 	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
 	for (i = 0; i < page_range; i++)
 		vm_page_array[i].order = VM_NFREEORDER;
 	vm_page_array_size = page_range;
 
 	/*
 	 * Initialize the physical memory allocator.
 	 */
 	vm_phys_init();
 
 	/*
 	 * Add every available physical page that is not blacklisted to
 	 * the free lists.
 	 */
 	vm_cnt.v_page_count = 0;
 	vm_cnt.v_free_count = 0;
 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
 		pa = phys_avail[i];
 		last_pa = phys_avail[i + 1];
 		while (pa < last_pa) {
 			vm_phys_add_page(pa);
 			pa += PAGE_SIZE;
 		}
 	}
 
 	TAILQ_INIT(&blacklist_head);
 	vm_page_blacklist_load(&list, &listend);
 	vm_page_blacklist_check(list, listend);
 
 	list = kern_getenv("vm.blacklist");
 	vm_page_blacklist_check(list, NULL);
 
 	freeenv(list);
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * Initialize the reservation management system.
 	 */
 	vm_reserv_init();
 #endif
 	return (vaddr);
 }
 
 void
 vm_page_reference(vm_page_t m)
 {
 
 	vm_page_aflag_set(m, PGA_REFERENCED);
 }
 
 /*
  *	vm_page_busy_downgrade:
  *
  *	Downgrade an exclusive busy page into a single shared busy page.
  */
 void
 vm_page_busy_downgrade(vm_page_t m)
 {
 	u_int x;
 
 	vm_page_assert_xbusied(m);
 
 	for (;;) {
 		x = m->busy_lock;
 		x &= VPB_BIT_WAITERS;
 		if (atomic_cmpset_rel_int(&m->busy_lock,
 		    VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1) | x))
 			break;
 	}
 }
 
 /*
  *	vm_page_sbusied:
  *
  *	Return a positive value if the page is shared busied, 0 otherwise.
  */
 int
 vm_page_sbusied(vm_page_t m)
 {
 	u_int x;
 
 	x = m->busy_lock;
 	return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED);
 }
 
 /*
  *	vm_page_sunbusy:
  *
  *	Shared unbusy a page.
  */
 void
 vm_page_sunbusy(vm_page_t m)
 {
 	u_int x;
 
 	vm_page_assert_sbusied(m);
 
 	for (;;) {
 		x = m->busy_lock;
 		if (VPB_SHARERS(x) > 1) {
 			if (atomic_cmpset_int(&m->busy_lock, x,
 			    x - VPB_ONE_SHARER))
 				break;
 			continue;
 		}
 		if ((x & VPB_BIT_WAITERS) == 0) {
 			KASSERT(x == VPB_SHARERS_WORD(1),
 			    ("vm_page_sunbusy: invalid lock state"));
 			if (atomic_cmpset_int(&m->busy_lock,
 			    VPB_SHARERS_WORD(1), VPB_UNBUSIED))
 				break;
 			continue;
 		}
 		KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS),
 		    ("vm_page_sunbusy: invalid lock state for waiters"));
 
 		vm_page_lock(m);
 		if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) {
 			vm_page_unlock(m);
 			continue;
 		}
 		wakeup(m);
 		vm_page_unlock(m);
 		break;
 	}
 }
 
 /*
  *	vm_page_busy_sleep:
  *
  *	Sleep and release the page lock, using the page pointer as wchan.
  *	This is used to implement the hard-path of busying mechanism.
  *
  *	The given page must be locked.
  */
 void
 vm_page_busy_sleep(vm_page_t m, const char *wmesg)
 {
 	u_int x;
 
 	vm_page_lock_assert(m, MA_OWNED);
 
 	x = m->busy_lock;
 	if (x == VPB_UNBUSIED) {
 		vm_page_unlock(m);
 		return;
 	}
 	if ((x & VPB_BIT_WAITERS) == 0 &&
 	    !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS)) {
 		vm_page_unlock(m);
 		return;
 	}
 	msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0);
 }
 
 /*
  *	vm_page_trysbusy:
  *
  *	Try to shared busy a page.
  *	If the operation succeeds 1 is returned otherwise 0.
  *	The operation never sleeps.
  */
 int
 vm_page_trysbusy(vm_page_t m)
 {
 	u_int x;
 
 	for (;;) {
 		x = m->busy_lock;
 		if ((x & VPB_BIT_SHARED) == 0)
 			return (0);
 		if (atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER))
 			return (1);
 	}
 }
 
 /*
  *	vm_page_xunbusy_hard:
  *
  *	Called after the first try the exclusive unbusy of a page failed.
  *	It is assumed that the waiters bit is on.
  */
 void
 vm_page_xunbusy_hard(vm_page_t m)
 {
 
 	vm_page_assert_xbusied(m);
 
 	vm_page_lock(m);
 	atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
 	wakeup(m);
 	vm_page_unlock(m);
 }
 
 /*
  *	vm_page_flash:
  *
  *	Wakeup anyone waiting for the page.
  *	The ownership bits do not change.
  *
  *	The given page must be locked.
  */
 void
 vm_page_flash(vm_page_t m)
 {
 	u_int x;
 
 	vm_page_lock_assert(m, MA_OWNED);
 
 	for (;;) {
 		x = m->busy_lock;
 		if ((x & VPB_BIT_WAITERS) == 0)
 			return;
 		if (atomic_cmpset_int(&m->busy_lock, x,
 		    x & (~VPB_BIT_WAITERS)))
 			break;
 	}
 	wakeup(m);
 }
 
 /*
  * Keep page from being freed by the page daemon
  * much of the same effect as wiring, except much lower
  * overhead and should be used only for *very* temporary
  * holding ("wiring").
  */
 void
 vm_page_hold(vm_page_t mem)
 {
 
 	vm_page_lock_assert(mem, MA_OWNED);
         mem->hold_count++;
 }
 
 void
 vm_page_unhold(vm_page_t mem)
 {
 
 	vm_page_lock_assert(mem, MA_OWNED);
 	KASSERT(mem->hold_count >= 1, ("vm_page_unhold: hold count < 0!!!"));
 	--mem->hold_count;
 	if (mem->hold_count == 0 && (mem->flags & PG_UNHOLDFREE) != 0)
 		vm_page_free_toq(mem);
 }
 
 /*
  *	vm_page_unhold_pages:
  *
  *	Unhold each of the pages that is referenced by the given array.
  */
 void
 vm_page_unhold_pages(vm_page_t *ma, int count)
 {
 	struct mtx *mtx, *new_mtx;
 
 	mtx = NULL;
 	for (; count != 0; count--) {
 		/*
 		 * Avoid releasing and reacquiring the same page lock.
 		 */
 		new_mtx = vm_page_lockptr(*ma);
 		if (mtx != new_mtx) {
 			if (mtx != NULL)
 				mtx_unlock(mtx);
 			mtx = new_mtx;
 			mtx_lock(mtx);
 		}
 		vm_page_unhold(*ma);
 		ma++;
 	}
 	if (mtx != NULL)
 		mtx_unlock(mtx);
 }
 
 vm_page_t
 PHYS_TO_VM_PAGE(vm_paddr_t pa)
 {
 	vm_page_t m;
 
 #ifdef VM_PHYSSEG_SPARSE
 	m = vm_phys_paddr_to_vm_page(pa);
 	if (m == NULL)
 		m = vm_phys_fictitious_to_vm_page(pa);
 	return (m);
 #elif defined(VM_PHYSSEG_DENSE)
 	long pi;
 
 	pi = atop(pa);
 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
 		m = &vm_page_array[pi - first_page];
 		return (m);
 	}
 	return (vm_phys_fictitious_to_vm_page(pa));
 #else
 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
 #endif
 }
 
 /*
  *	vm_page_getfake:
  *
  *	Create a fictitious page with the specified physical address and
  *	memory attribute.  The memory attribute is the only the machine-
  *	dependent aspect of a fictitious page that must be initialized.
  */
 vm_page_t
 vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr)
 {
 	vm_page_t m;
 
 	m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO);
 	vm_page_initfake(m, paddr, memattr);
 	return (m);
 }
 
 void
 vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
 {
 
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		/*
 		 * The page's memattr might have changed since the
 		 * previous initialization.  Update the pmap to the
 		 * new memattr.
 		 */
 		goto memattr;
 	}
 	m->phys_addr = paddr;
 	m->queue = PQ_NONE;
 	/* Fictitious pages don't use "segind". */
 	m->flags = PG_FICTITIOUS;
 	/* Fictitious pages don't use "order" or "pool". */
 	m->oflags = VPO_UNMANAGED;
 	m->busy_lock = VPB_SINGLE_EXCLUSIVER;
 	m->wire_count = 1;
 	pmap_page_init(m);
 memattr:
 	pmap_page_set_memattr(m, memattr);
 }
 
 /*
  *	vm_page_putfake:
  *
  *	Release a fictitious page.
  */
 void
 vm_page_putfake(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m));
 	KASSERT((m->flags & PG_FICTITIOUS) != 0,
 	    ("vm_page_putfake: bad page %p", m));
 	uma_zfree(fakepg_zone, m);
 }
 
 /*
  *	vm_page_updatefake:
  *
  *	Update the given fictitious page to the specified physical address and
  *	memory attribute.
  */
 void
 vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
 {
 
 	KASSERT((m->flags & PG_FICTITIOUS) != 0,
 	    ("vm_page_updatefake: bad page %p", m));
 	m->phys_addr = paddr;
 	pmap_page_set_memattr(m, memattr);
 }
 
 /*
  *	vm_page_free:
  *
  *	Free a page.
  */
 void
 vm_page_free(vm_page_t m)
 {
 
 	m->flags &= ~PG_ZERO;
 	vm_page_free_toq(m);
 }
 
 /*
  *	vm_page_free_zero:
  *
  *	Free a page to the zerod-pages queue
  */
 void
 vm_page_free_zero(vm_page_t m)
 {
 
 	m->flags |= PG_ZERO;
 	vm_page_free_toq(m);
 }
 
 /*
  * Unbusy and handle the page queueing for a page from the VOP_GETPAGES()
  * array which is not the request page.
  */
 void
 vm_page_readahead_finish(vm_page_t m)
 {
 
 	if (m->valid != 0) {
 		/*
 		 * Since the page is not the requested page, whether
 		 * it should be activated or deactivated is not
 		 * obvious.  Empirical results have shown that
 		 * deactivating the page is usually the best choice,
 		 * unless the page is wanted by another thread.
 		 */
 		vm_page_lock(m);
 		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
 			vm_page_activate(m);
 		else
 			vm_page_deactivate(m);
 		vm_page_unlock(m);
 		vm_page_xunbusy(m);
 	} else {
 		/*
 		 * Free the completely invalid page.  Such page state
 		 * occurs due to the short read operation which did
 		 * not covered our page at all, or in case when a read
 		 * error happens.
 		 */
 		vm_page_lock(m);
 		vm_page_free(m);
 		vm_page_unlock(m);
 	}
 }
 
 /*
  *	vm_page_sleep_if_busy:
  *
  *	Sleep and release the page queues lock if the page is busied.
  *	Returns TRUE if the thread slept.
  *
  *	The given page must be unlocked and object containing it must
  *	be locked.
  */
 int
 vm_page_sleep_if_busy(vm_page_t m, const char *msg)
 {
 	vm_object_t obj;
 
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 
 	if (vm_page_busied(m)) {
 		/*
 		 * The page-specific object must be cached because page
 		 * identity can change during the sleep, causing the
 		 * re-lock of a different object.
 		 * It is assumed that a reference to the object is already
 		 * held by the callers.
 		 */
 		obj = m->object;
 		vm_page_lock(m);
 		VM_OBJECT_WUNLOCK(obj);
 		vm_page_busy_sleep(m, msg);
 		VM_OBJECT_WLOCK(obj);
 		return (TRUE);
 	}
 	return (FALSE);
 }
 
 /*
  *	vm_page_dirty_KBI:		[ internal use only ]
  *
  *	Set all bits in the page's dirty field.
  *
  *	The object containing the specified page must be locked if the
  *	call is made from the machine-independent layer.
  *
  *	See vm_page_clear_dirty_mask().
  *
  *	This function should only be called by vm_page_dirty().
  */
 void
 vm_page_dirty_KBI(vm_page_t m)
 {
 
 	/* These assertions refer to this operation by its public name. */
 	KASSERT((m->flags & PG_CACHED) == 0,
 	    ("vm_page_dirty: page in cache!"));
 	KASSERT(m->valid == VM_PAGE_BITS_ALL,
 	    ("vm_page_dirty: page is invalid!"));
 	m->dirty = VM_PAGE_BITS_ALL;
 }
 
 /*
  *	vm_page_insert:		[ internal use only ]
  *
  *	Inserts the given mem entry into the object and object list.
  *
  *	The object must be locked.
  */
 int
 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t mpred;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	mpred = vm_radix_lookup_le(&object->rtree, pindex);
 	return (vm_page_insert_after(m, object, pindex, mpred));
 }
 
 /*
  *	vm_page_insert_after:
  *
  *	Inserts the page "m" into the specified object at offset "pindex".
  *
  *	The page "mpred" must immediately precede the offset "pindex" within
  *	the specified object.
  *
  *	The object must be locked.
  */
 static int
 vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
     vm_page_t mpred)
 {
 	vm_pindex_t sidx;
 	vm_object_t sobj;
 	vm_page_t msucc;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(m->object == NULL,
 	    ("vm_page_insert_after: page already inserted"));
 	if (mpred != NULL) {
 		KASSERT(mpred->object == object,
 		    ("vm_page_insert_after: object doesn't contain mpred"));
 		KASSERT(mpred->pindex < pindex,
 		    ("vm_page_insert_after: mpred doesn't precede pindex"));
 		msucc = TAILQ_NEXT(mpred, listq);
 	} else
 		msucc = TAILQ_FIRST(&object->memq);
 	if (msucc != NULL)
 		KASSERT(msucc->pindex > pindex,
 		    ("vm_page_insert_after: msucc doesn't succeed pindex"));
 
 	/*
 	 * Record the object/offset pair in this page
 	 */
 	sobj = m->object;
 	sidx = m->pindex;
 	m->object = object;
 	m->pindex = pindex;
 
 	/*
 	 * Now link into the object's ordered list of backed pages.
 	 */
 	if (vm_radix_insert(&object->rtree, m)) {
 		m->object = sobj;
 		m->pindex = sidx;
 		return (1);
 	}
 	vm_page_insert_radixdone(m, object, mpred);
 	return (0);
 }
 
 /*
  *	vm_page_insert_radixdone:
  *
  *	Complete page "m" insertion into the specified object after the
  *	radix trie hooking.
  *
  *	The page "mpred" must precede the offset "m->pindex" within the
  *	specified object.
  *
  *	The object must be locked.
  */
 static void
 vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object != NULL && m->object == object,
 	    ("vm_page_insert_radixdone: page %p has inconsistent object", m));
 	if (mpred != NULL) {
 		KASSERT(mpred->object == object,
 		    ("vm_page_insert_after: object doesn't contain mpred"));
 		KASSERT(mpred->pindex < m->pindex,
 		    ("vm_page_insert_after: mpred doesn't precede pindex"));
 	}
 
 	if (mpred != NULL)
 		TAILQ_INSERT_AFTER(&object->memq, mpred, m, listq);
 	else
 		TAILQ_INSERT_HEAD(&object->memq, m, listq);
 
 	/*
 	 * Show that the object has one more resident page.
 	 */
 	object->resident_page_count++;
 
 	/*
 	 * Hold the vnode until the last page is released.
 	 */
 	if (object->resident_page_count == 1 && object->type == OBJT_VNODE)
 		vhold(object->handle);
 
 	/*
 	 * Since we are inserting a new and possibly dirty page,
 	 * update the object's OBJ_MIGHTBEDIRTY flag.
 	 */
 	if (pmap_page_is_write_mapped(m))
 		vm_object_set_writeable_dirty(object);
 }
 
 /*
  *	vm_page_remove:
  *
  *	Removes the given mem entry from the object/offset-page
  *	table and the object page list, but do not invalidate/terminate
  *	the backing store.
  *
  *	The object must be locked.  The page must be locked if it is managed.
  */
 void
 vm_page_remove(vm_page_t m)
 {
 	vm_object_t object;
 	boolean_t lockacq;
 
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		vm_page_lock_assert(m, MA_OWNED);
 	if ((object = m->object) == NULL)
 		return;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (vm_page_xbusied(m)) {
 		lockacq = FALSE;
 		if ((m->oflags & VPO_UNMANAGED) != 0 &&
 		    !mtx_owned(vm_page_lockptr(m))) {
 			lockacq = TRUE;
 			vm_page_lock(m);
 		}
 		vm_page_flash(m);
 		atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
 		if (lockacq)
 			vm_page_unlock(m);
 	}
 
 	/*
 	 * Now remove from the object's list of backed pages.
 	 */
 	vm_radix_remove(&object->rtree, m->pindex);
 	TAILQ_REMOVE(&object->memq, m, listq);
 
 	/*
 	 * And show that the object has one fewer resident page.
 	 */
 	object->resident_page_count--;
 
 	/*
 	 * The vnode may now be recycled.
 	 */
 	if (object->resident_page_count == 0 && object->type == OBJT_VNODE)
 		vdrop(object->handle);
 
 	m->object = NULL;
 }
 
 /*
  *	vm_page_lookup:
  *
  *	Returns the page associated with the object/offset
  *	pair specified; if none is found, NULL is returned.
  *
  *	The object must be locked.
  */
 vm_page_t
 vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
 {
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	return (vm_radix_lookup(&object->rtree, pindex));
 }
 
 /*
  *	vm_page_find_least:
  *
  *	Returns the page associated with the object with least pindex
  *	greater than or equal to the parameter pindex, or NULL.
  *
  *	The object must be locked.
  */
 vm_page_t
 vm_page_find_least(vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t m;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	if ((m = TAILQ_FIRST(&object->memq)) != NULL && m->pindex < pindex)
 		m = vm_radix_lookup_ge(&object->rtree, pindex);
 	return (m);
 }
 
 /*
  * Returns the given page's successor (by pindex) within the object if it is
  * resident; if none is found, NULL is returned.
  *
  * The object must be locked.
  */
 vm_page_t
 vm_page_next(vm_page_t m)
 {
 	vm_page_t next;
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if ((next = TAILQ_NEXT(m, listq)) != NULL &&
 	    next->pindex != m->pindex + 1)
 		next = NULL;
 	return (next);
 }
 
 /*
  * Returns the given page's predecessor (by pindex) within the object if it is
  * resident; if none is found, NULL is returned.
  *
  * The object must be locked.
  */
 vm_page_t
 vm_page_prev(vm_page_t m)
 {
 	vm_page_t prev;
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL &&
 	    prev->pindex != m->pindex - 1)
 		prev = NULL;
 	return (prev);
 }
 
 /*
  * Uses the page mnew as a replacement for an existing page at index
  * pindex which must be already present in the object.
  *
  * The existing page must not be on a paging queue.
  */
 vm_page_t
 vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t mold, mpred;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * This function mostly follows vm_page_insert() and
 	 * vm_page_remove() without the radix, object count and vnode
 	 * dance.  Double check such functions for more comments.
 	 */
 	mpred = vm_radix_lookup(&object->rtree, pindex);
 	KASSERT(mpred != NULL,
 	    ("vm_page_replace: replacing page not present with pindex"));
 	mpred = TAILQ_PREV(mpred, respgs, listq);
 	if (mpred != NULL)
 		KASSERT(mpred->pindex < pindex,
 		    ("vm_page_insert_after: mpred doesn't precede pindex"));
 
 	mnew->object = object;
 	mnew->pindex = pindex;
 	mold = vm_radix_replace(&object->rtree, mnew);
 	KASSERT(mold->queue == PQ_NONE,
 	    ("vm_page_replace: mold is on a paging queue"));
 
 	/* Detach the old page from the resident tailq. */
 	TAILQ_REMOVE(&object->memq, mold, listq);
 
 	mold->object = NULL;
 	vm_page_xunbusy(mold);
 
 	/* Insert the new page in the resident tailq. */
 	if (mpred != NULL)
 		TAILQ_INSERT_AFTER(&object->memq, mpred, mnew, listq);
 	else
 		TAILQ_INSERT_HEAD(&object->memq, mnew, listq);
 	if (pmap_page_is_write_mapped(mnew))
 		vm_object_set_writeable_dirty(object);
 	return (mold);
 }
 
 /*
  *	vm_page_rename:
  *
  *	Move the given memory entry from its
  *	current object to the specified target object/offset.
  *
  *	Note: swap associated with the page must be invalidated by the move.  We
  *	      have to do this for several reasons:  (1) we aren't freeing the
  *	      page, (2) we are dirtying the page, (3) the VM system is probably
  *	      moving the page from object A to B, and will then later move
  *	      the backing store from A to B and we can't have a conflict.
  *
  *	Note: we *always* dirty the page.  It is necessary both for the
  *	      fact that we moved it, and because we may be invalidating
  *	      swap.  If the page is on the cache, we have to deactivate it
  *	      or vm_page_dirty() will panic.  Dirty pages are not allowed
  *	      on the cache.
  *
  *	The objects must be locked.
  */
 int
 vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
 {
 	vm_page_t mpred;
 	vm_pindex_t opidx;
 
 	VM_OBJECT_ASSERT_WLOCKED(new_object);
 
 	mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex);
 	KASSERT(mpred == NULL || mpred->pindex != new_pindex,
 	    ("vm_page_rename: pindex already renamed"));
 
 	/*
 	 * Create a custom version of vm_page_insert() which does not depend
 	 * by m_prev and can cheat on the implementation aspects of the
 	 * function.
 	 */
 	opidx = m->pindex;
 	m->pindex = new_pindex;
 	if (vm_radix_insert(&new_object->rtree, m)) {
 		m->pindex = opidx;
 		return (1);
 	}
 
 	/*
 	 * The operation cannot fail anymore.  The removal must happen before
 	 * the listq iterator is tainted.
 	 */
 	m->pindex = opidx;
 	vm_page_lock(m);
 	vm_page_remove(m);
 
 	/* Return back to the new pindex to complete vm_page_insert(). */
 	m->pindex = new_pindex;
 	m->object = new_object;
 	vm_page_unlock(m);
 	vm_page_insert_radixdone(m, new_object, mpred);
 	vm_page_dirty(m);
 	return (0);
 }
 
 /*
  *	Convert all of the given object's cached pages that have a
  *	pindex within the given range into free pages.  If the value
  *	zero is given for "end", then the range's upper bound is
  *	infinity.  If the given object is backed by a vnode and it
  *	transitions from having one or more cached pages to none, the
  *	vnode's hold count is reduced.
  */
 void
 vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
 	vm_page_t m;
 	boolean_t empty;
 
 	mtx_lock(&vm_page_queue_free_mtx);
 	if (__predict_false(vm_radix_is_empty(&object->cache))) {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		return;
 	}
 	while ((m = vm_radix_lookup_ge(&object->cache, start)) != NULL) {
 		if (end != 0 && m->pindex >= end)
 			break;
 		vm_radix_remove(&object->cache, m->pindex);
 		vm_page_cache_turn_free(m);
 	}
 	empty = vm_radix_is_empty(&object->cache);
 	mtx_unlock(&vm_page_queue_free_mtx);
 	if (object->type == OBJT_VNODE && empty)
 		vdrop(object->handle);
 }
 
 /*
  *	Returns the cached page that is associated with the given
  *	object and offset.  If, however, none exists, returns NULL.
  *
  *	The free page queue must be locked.
  */
 static inline vm_page_t
 vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	return (vm_radix_lookup(&object->cache, pindex));
 }
 
 /*
  *	Remove the given cached page from its containing object's
  *	collection of cached pages.
  *
  *	The free page queue must be locked.
  */
 static void
 vm_page_cache_remove(vm_page_t m)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	KASSERT((m->flags & PG_CACHED) != 0,
 	    ("vm_page_cache_remove: page %p is not cached", m));
 	vm_radix_remove(&m->object->cache, m->pindex);
 	m->object = NULL;
 	vm_cnt.v_cache_count--;
 }
 
 /*
  *	Transfer all of the cached pages with offset greater than or
  *	equal to 'offidxstart' from the original object's cache to the
  *	new object's cache.  However, any cached pages with offset
  *	greater than or equal to the new object's size are kept in the
  *	original object.  Initially, the new object's cache must be
  *	empty.  Offset 'offidxstart' in the original object must
  *	correspond to offset zero in the new object.
  *
  *	The new object must be locked.
  */
 void
 vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart,
     vm_object_t new_object)
 {
 	vm_page_t m;
 
 	/*
 	 * Insertion into an object's collection of cached pages
 	 * requires the object to be locked.  In contrast, removal does
 	 * not.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(new_object);
 	KASSERT(vm_radix_is_empty(&new_object->cache),
 	    ("vm_page_cache_transfer: object %p has cached pages",
 	    new_object));
 	mtx_lock(&vm_page_queue_free_mtx);
 	while ((m = vm_radix_lookup_ge(&orig_object->cache,
 	    offidxstart)) != NULL) {
 		/*
 		 * Transfer all of the pages with offset greater than or
 		 * equal to 'offidxstart' from the original object's
 		 * cache to the new object's cache.
 		 */
 		if ((m->pindex - offidxstart) >= new_object->size)
 			break;
 		vm_radix_remove(&orig_object->cache, m->pindex);
 		/* Update the page's object and offset. */
 		m->object = new_object;
 		m->pindex -= offidxstart;
 		if (vm_radix_insert(&new_object->cache, m))
 			vm_page_cache_turn_free(m);
 	}
 	mtx_unlock(&vm_page_queue_free_mtx);
 }
 
 /*
  *	Returns TRUE if a cached page is associated with the given object and
  *	offset, and FALSE otherwise.
  *
  *	The object must be locked.
  */
 boolean_t
 vm_page_is_cached(vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t m;
 
 	/*
 	 * Insertion into an object's collection of cached pages requires the
 	 * object to be locked.  Therefore, if the object is locked and the
 	 * object's collection is empty, there is no need to acquire the free
 	 * page queues lock in order to prove that the specified page doesn't
 	 * exist.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (__predict_true(vm_object_cache_is_empty(object)))
 		return (FALSE);
 	mtx_lock(&vm_page_queue_free_mtx);
 	m = vm_page_cache_lookup(object, pindex);
 	mtx_unlock(&vm_page_queue_free_mtx);
 	return (m != NULL);
 }
 
 /*
  *	vm_page_alloc:
  *
  *	Allocate and return a page that is associated with the specified
  *	object and offset pair.  By default, this page is exclusive busied.
  *
  *	The caller must always specify an allocation class.
  *
  *	allocation classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs a page
  *	VM_ALLOC_INTERRUPT	interrupt time request
  *
  *	optional allocation flags:
  *	VM_ALLOC_COUNT(number)	the number of additional pages that the caller
  *				intends to allocate
  *	VM_ALLOC_IFCACHED	return page only if it is cached
  *	VM_ALLOC_IFNOTCACHED	return NULL, do not reactivate if the page
  *				is cached
  *	VM_ALLOC_NOBUSY		do not exclusive busy the page
  *	VM_ALLOC_NODUMP		do not include the page in a kernel core dump
  *	VM_ALLOC_NOOBJ		page is not associated with an object and
  *				should not be exclusive busy
  *	VM_ALLOC_SBUSY		shared busy the allocated page
  *	VM_ALLOC_WIRED		wire the allocated page
  *	VM_ALLOC_ZERO		prefer a zeroed page
  *
  *	This routine may not sleep.
  */
 vm_page_t
 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 {
 	struct vnode *vp = NULL;
 	vm_object_t m_object;
 	vm_page_t m, mpred;
 	int flags, req_class;
 
 	mpred = 0;	/* XXX: pacify gcc */
 	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
 	    (object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
 	    ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
 	    (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
 	    ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object,
 	    req));
 	if (object != NULL)
 		VM_OBJECT_ASSERT_WLOCKED(object);
 
 	req_class = req & VM_ALLOC_CLASS_MASK;
 
 	/*
 	 * The page daemon is allowed to dig deeper into the free page list.
 	 */
 	if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
 		req_class = VM_ALLOC_SYSTEM;
 
 	if (object != NULL) {
 		mpred = vm_radix_lookup_le(&object->rtree, pindex);
 		KASSERT(mpred == NULL || mpred->pindex != pindex,
 		   ("vm_page_alloc: pindex already allocated"));
 	}
 
 	/*
 	 * The page allocation request can came from consumers which already
 	 * hold the free page queue mutex, like vm_page_insert() in
 	 * vm_page_cache().
 	 */
 	mtx_lock_flags(&vm_page_queue_free_mtx, MTX_RECURSE);
 	if (vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_free_reserved ||
 	    (req_class == VM_ALLOC_SYSTEM &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_interrupt_free_min) ||
 	    (req_class == VM_ALLOC_INTERRUPT &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count > 0)) {
 		/*
 		 * Allocate from the free queue if the number of free pages
 		 * exceeds the minimum for the request class.
 		 */
 		if (object != NULL &&
 		    (m = vm_page_cache_lookup(object, pindex)) != NULL) {
 			if ((req & VM_ALLOC_IFNOTCACHED) != 0) {
 				mtx_unlock(&vm_page_queue_free_mtx);
 				return (NULL);
 			}
 			if (vm_phys_unfree_page(m))
 				vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, 0);
 #if VM_NRESERVLEVEL > 0
 			else if (!vm_reserv_reactivate_page(m))
 #else
 			else
 #endif
 				panic("vm_page_alloc: cache page %p is missing"
 				    " from the free queue", m);
 		} else if ((req & VM_ALLOC_IFCACHED) != 0) {
 			mtx_unlock(&vm_page_queue_free_mtx);
 			return (NULL);
 #if VM_NRESERVLEVEL > 0
 		} else if (object == NULL || (object->flags & (OBJ_COLORED |
 		    OBJ_FICTITIOUS)) != OBJ_COLORED || (m =
 		    vm_reserv_alloc_page(object, pindex, mpred)) == NULL) {
 #else
 		} else {
 #endif
 			m = vm_phys_alloc_pages(object != NULL ?
 			    VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
 #if VM_NRESERVLEVEL > 0
 			if (m == NULL && vm_reserv_reclaim_inactive()) {
 				m = vm_phys_alloc_pages(object != NULL ?
 				    VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT,
 				    0);
 			}
 #endif
 		}
 	} else {
 		/*
 		 * Not allocatable, give up.
 		 */
 		mtx_unlock(&vm_page_queue_free_mtx);
 		atomic_add_int(&vm_pageout_deficit,
 		    max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
 		pagedaemon_wakeup();
 		return (NULL);
 	}
 
 	/*
 	 *  At this point we had better have found a good page.
 	 */
 	KASSERT(m != NULL, ("vm_page_alloc: missing page"));
 	KASSERT(m->queue == PQ_NONE,
 	    ("vm_page_alloc: page %p has unexpected queue %d", m, m->queue));
 	KASSERT(m->wire_count == 0, ("vm_page_alloc: page %p is wired", m));
 	KASSERT(m->hold_count == 0, ("vm_page_alloc: page %p is held", m));
 	KASSERT(!vm_page_sbusied(m),
 	    ("vm_page_alloc: page %p is busy", m));
 	KASSERT(m->dirty == 0, ("vm_page_alloc: page %p is dirty", m));
 	KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
 	    ("vm_page_alloc: page %p has unexpected memattr %d", m,
 	    pmap_page_get_memattr(m)));
 	if ((m->flags & PG_CACHED) != 0) {
 		KASSERT((m->flags & PG_ZERO) == 0,
 		    ("vm_page_alloc: cached page %p is PG_ZERO", m));
 		KASSERT(m->valid != 0,
 		    ("vm_page_alloc: cached page %p is invalid", m));
 		if (m->object == object && m->pindex == pindex)
 			vm_cnt.v_reactivated++;
 		else
 			m->valid = 0;
 		m_object = m->object;
 		vm_page_cache_remove(m);
 		if (m_object->type == OBJT_VNODE &&
 		    vm_object_cache_is_empty(m_object))
 			vp = m_object->handle;
 	} else {
 		KASSERT(m->valid == 0,
 		    ("vm_page_alloc: free page %p is valid", m));
 		vm_phys_freecnt_adj(m, -1);
 		if ((m->flags & PG_ZERO) != 0)
 			vm_page_zero_count--;
 	}
 	mtx_unlock(&vm_page_queue_free_mtx);
 
 	/*
 	 * Initialize the page.  Only the PG_ZERO flag is inherited.
 	 */
 	flags = 0;
 	if ((req & VM_ALLOC_ZERO) != 0)
 		flags = PG_ZERO;
 	flags &= m->flags;
 	if ((req & VM_ALLOC_NODUMP) != 0)
 		flags |= PG_NODUMP;
 	m->flags = flags;
 	m->aflags = 0;
 	m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ?
 	    VPO_UNMANAGED : 0;
 	m->busy_lock = VPB_UNBUSIED;
 	if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0)
 		m->busy_lock = VPB_SINGLE_EXCLUSIVER;
 	if ((req & VM_ALLOC_SBUSY) != 0)
 		m->busy_lock = VPB_SHARERS_WORD(1);
 	if (req & VM_ALLOC_WIRED) {
 		/*
 		 * The page lock is not required for wiring a page until that
 		 * page is inserted into the object.
 		 */
 		atomic_add_int(&vm_cnt.v_wire_count, 1);
 		m->wire_count = 1;
 	}
 	m->act_count = 0;
 
 	if (object != NULL) {
 		if (vm_page_insert_after(m, object, pindex, mpred)) {
 			/* See the comment below about hold count. */
 			if (vp != NULL)
 				vdrop(vp);
 			pagedaemon_wakeup();
 			if (req & VM_ALLOC_WIRED) {
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 				m->wire_count = 0;
 			}
 			m->object = NULL;
 			m->oflags = VPO_UNMANAGED;
 			vm_page_free(m);
 			return (NULL);
 		}
 
 		/* Ignore device objects; the pager sets "memattr" for them. */
 		if (object->memattr != VM_MEMATTR_DEFAULT &&
 		    (object->flags & OBJ_FICTITIOUS) == 0)
 			pmap_page_set_memattr(m, object->memattr);
 	} else
 		m->pindex = pindex;
 
 	/*
 	 * The following call to vdrop() must come after the above call
 	 * to vm_page_insert() in case both affect the same object and
 	 * vnode.  Otherwise, the affected vnode's hold count could
 	 * temporarily become zero.
 	 */
 	if (vp != NULL)
 		vdrop(vp);
 
 	/*
 	 * Don't wakeup too often - wakeup the pageout daemon when
 	 * we would be nearly out of memory.
 	 */
 	if (vm_paging_needed())
 		pagedaemon_wakeup();
 
 	return (m);
 }
 
 static void
 vm_page_alloc_contig_vdrop(struct spglist *lst)
 {
 
 	while (!SLIST_EMPTY(lst)) {
 		vdrop((struct vnode *)SLIST_FIRST(lst)-> plinks.s.pv);
 		SLIST_REMOVE_HEAD(lst, plinks.s.ss);
 	}
 }
 
 /*
  *	vm_page_alloc_contig:
  *
  *	Allocate a contiguous set of physical pages of the given size "npages"
  *	from the free lists.  All of the physical pages must be at or above
  *	the given physical address "low" and below the given physical address
  *	"high".  The given value "alignment" determines the alignment of the
  *	first physical page in the set.  If the given value "boundary" is
  *	non-zero, then the set of physical pages cannot cross any physical
  *	address boundary that is a multiple of that value.  Both "alignment"
  *	and "boundary" must be a power of two.
  *
  *	If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT,
  *	then the memory attribute setting for the physical pages is configured
  *	to the object's memory attribute setting.  Otherwise, the memory
  *	attribute setting for the physical pages is configured to "memattr",
  *	overriding the object's memory attribute setting.  However, if the
  *	object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the
  *	memory attribute setting for the physical pages cannot be configured
  *	to VM_MEMATTR_DEFAULT.
  *
  *	The caller must always specify an allocation class.
  *
  *	allocation classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs a page
  *	VM_ALLOC_INTERRUPT	interrupt time request
  *
  *	optional allocation flags:
  *	VM_ALLOC_NOBUSY		do not exclusive busy the page
  *	VM_ALLOC_NODUMP		do not include the page in a kernel core dump
  *	VM_ALLOC_NOOBJ		page is not associated with an object and
  *				should not be exclusive busy
  *	VM_ALLOC_SBUSY		shared busy the allocated page
  *	VM_ALLOC_WIRED		wire the allocated page
  *	VM_ALLOC_ZERO		prefer a zeroed page
  *
  *	This routine may not sleep.
  */
 vm_page_t
 vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr)
 {
 	struct vnode *drop;
 	struct spglist deferred_vdrop_list;
 	vm_page_t m, m_tmp, m_ret;
 	u_int flags;
 	int req_class;
 
 	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
 	    (object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
 	    ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
 	    (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
 	    ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object,
 	    req));
 	if (object != NULL) {
 		VM_OBJECT_ASSERT_WLOCKED(object);
 		KASSERT(object->type == OBJT_PHYS,
 		    ("vm_page_alloc_contig: object %p isn't OBJT_PHYS",
 		    object));
 	}
 	KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
 	req_class = req & VM_ALLOC_CLASS_MASK;
 
 	/*
 	 * The page daemon is allowed to dig deeper into the free page list.
 	 */
 	if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
 		req_class = VM_ALLOC_SYSTEM;
 
 	SLIST_INIT(&deferred_vdrop_list);
 	mtx_lock(&vm_page_queue_free_mtx);
 	if (vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages +
 	    vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages +
 	    vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages)) {
 #if VM_NRESERVLEVEL > 0
 retry:
 		if (object == NULL || (object->flags & OBJ_COLORED) == 0 ||
 		    (m_ret = vm_reserv_alloc_contig(object, pindex, npages,
 		    low, high, alignment, boundary)) == NULL)
 #endif
 			m_ret = vm_phys_alloc_contig(npages, low, high,
 			    alignment, boundary);
 	} else {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		atomic_add_int(&vm_pageout_deficit, npages);
 		pagedaemon_wakeup();
 		return (NULL);
 	}
 	if (m_ret != NULL)
 		for (m = m_ret; m < &m_ret[npages]; m++) {
 			drop = vm_page_alloc_init(m);
 			if (drop != NULL) {
 				/*
 				 * Enqueue the vnode for deferred vdrop().
 				 */
 				m->plinks.s.pv = drop;
 				SLIST_INSERT_HEAD(&deferred_vdrop_list, m,
 				    plinks.s.ss);
 			}
 		}
 	else {
 #if VM_NRESERVLEVEL > 0
 		if (vm_reserv_reclaim_contig(npages, low, high, alignment,
 		    boundary))
 			goto retry;
 #endif
 	}
 	mtx_unlock(&vm_page_queue_free_mtx);
 	if (m_ret == NULL)
 		return (NULL);
 
 	/*
 	 * Initialize the pages.  Only the PG_ZERO flag is inherited.
 	 */
 	flags = 0;
 	if ((req & VM_ALLOC_ZERO) != 0)
 		flags = PG_ZERO;
 	if ((req & VM_ALLOC_NODUMP) != 0)
 		flags |= PG_NODUMP;
 	if ((req & VM_ALLOC_WIRED) != 0)
 		atomic_add_int(&vm_cnt.v_wire_count, npages);
 	if (object != NULL) {
 		if (object->memattr != VM_MEMATTR_DEFAULT &&
 		    memattr == VM_MEMATTR_DEFAULT)
 			memattr = object->memattr;
 	}
 	for (m = m_ret; m < &m_ret[npages]; m++) {
 		m->aflags = 0;
 		m->flags = (m->flags | PG_NODUMP) & flags;
 		m->busy_lock = VPB_UNBUSIED;
 		if (object != NULL) {
 			if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
 				m->busy_lock = VPB_SINGLE_EXCLUSIVER;
 			if ((req & VM_ALLOC_SBUSY) != 0)
 				m->busy_lock = VPB_SHARERS_WORD(1);
 		}
 		if ((req & VM_ALLOC_WIRED) != 0)
 			m->wire_count = 1;
 		/* Unmanaged pages don't use "act_count". */
 		m->oflags = VPO_UNMANAGED;
 		if (object != NULL) {
 			if (vm_page_insert(m, object, pindex)) {
 				vm_page_alloc_contig_vdrop(
 				    &deferred_vdrop_list);
 				if (vm_paging_needed())
 					pagedaemon_wakeup();
 				if ((req & VM_ALLOC_WIRED) != 0)
 					atomic_subtract_int(&vm_cnt.v_wire_count,
 					    npages);
 				for (m_tmp = m, m = m_ret;
 				    m < &m_ret[npages]; m++) {
 					if ((req & VM_ALLOC_WIRED) != 0)
 						m->wire_count = 0;
 					if (m >= m_tmp)
 						m->object = NULL;
 					vm_page_free(m);
 				}
 				return (NULL);
 			}
 		} else
 			m->pindex = pindex;
 		if (memattr != VM_MEMATTR_DEFAULT)
 			pmap_page_set_memattr(m, memattr);
 		pindex++;
 	}
 	vm_page_alloc_contig_vdrop(&deferred_vdrop_list);
 	if (vm_paging_needed())
 		pagedaemon_wakeup();
 	return (m_ret);
 }
 
 /*
  * Initialize a page that has been freshly dequeued from a freelist.
  * The caller has to drop the vnode returned, if it is not NULL.
  *
  * This function may only be used to initialize unmanaged pages.
  *
  * To be called with vm_page_queue_free_mtx held.
  */
 static struct vnode *
 vm_page_alloc_init(vm_page_t m)
 {
 	struct vnode *drop;
 	vm_object_t m_object;
 
 	KASSERT(m->queue == PQ_NONE,
 	    ("vm_page_alloc_init: page %p has unexpected queue %d",
 	    m, m->queue));
 	KASSERT(m->wire_count == 0,
 	    ("vm_page_alloc_init: page %p is wired", m));
 	KASSERT(m->hold_count == 0,
 	    ("vm_page_alloc_init: page %p is held", m));
 	KASSERT(!vm_page_sbusied(m),
 	    ("vm_page_alloc_init: page %p is busy", m));
 	KASSERT(m->dirty == 0,
 	    ("vm_page_alloc_init: page %p is dirty", m));
 	KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
 	    ("vm_page_alloc_init: page %p has unexpected memattr %d",
 	    m, pmap_page_get_memattr(m)));
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	drop = NULL;
 	if ((m->flags & PG_CACHED) != 0) {
 		KASSERT((m->flags & PG_ZERO) == 0,
 		    ("vm_page_alloc_init: cached page %p is PG_ZERO", m));
 		m->valid = 0;
 		m_object = m->object;
 		vm_page_cache_remove(m);
 		if (m_object->type == OBJT_VNODE &&
 		    vm_object_cache_is_empty(m_object))
 			drop = m_object->handle;
 	} else {
 		KASSERT(m->valid == 0,
 		    ("vm_page_alloc_init: free page %p is valid", m));
 		vm_phys_freecnt_adj(m, -1);
 		if ((m->flags & PG_ZERO) != 0)
 			vm_page_zero_count--;
 	}
 	return (drop);
 }
 
 /*
  * 	vm_page_alloc_freelist:
  *
  *	Allocate a physical page from the specified free page list.
  *
  *	The caller must always specify an allocation class.
  *
  *	allocation classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs a page
  *	VM_ALLOC_INTERRUPT	interrupt time request
  *
  *	optional allocation flags:
  *	VM_ALLOC_COUNT(number)	the number of additional pages that the caller
  *				intends to allocate
  *	VM_ALLOC_WIRED		wire the allocated page
  *	VM_ALLOC_ZERO		prefer a zeroed page
  *
  *	This routine may not sleep.
  */
 vm_page_t
 vm_page_alloc_freelist(int flind, int req)
 {
 	struct vnode *drop;
 	vm_page_t m;
 	u_int flags;
 	int req_class;
 
 	req_class = req & VM_ALLOC_CLASS_MASK;
 
 	/*
 	 * The page daemon is allowed to dig deeper into the free page list.
 	 */
 	if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
 		req_class = VM_ALLOC_SYSTEM;
 
 	/*
 	 * Do not allocate reserved pages unless the req has asked for it.
 	 */
 	mtx_lock_flags(&vm_page_queue_free_mtx, MTX_RECURSE);
 	if (vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_free_reserved ||
 	    (req_class == VM_ALLOC_SYSTEM &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_interrupt_free_min) ||
 	    (req_class == VM_ALLOC_INTERRUPT &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count > 0))
 		m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0);
 	else {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		atomic_add_int(&vm_pageout_deficit,
 		    max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
 		pagedaemon_wakeup();
 		return (NULL);
 	}
 	if (m == NULL) {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		return (NULL);
 	}
 	drop = vm_page_alloc_init(m);
 	mtx_unlock(&vm_page_queue_free_mtx);
 
 	/*
 	 * Initialize the page.  Only the PG_ZERO flag is inherited.
 	 */
 	m->aflags = 0;
 	flags = 0;
 	if ((req & VM_ALLOC_ZERO) != 0)
 		flags = PG_ZERO;
 	m->flags &= flags;
 	if ((req & VM_ALLOC_WIRED) != 0) {
 		/*
 		 * The page lock is not required for wiring a page that does
 		 * not belong to an object.
 		 */
 		atomic_add_int(&vm_cnt.v_wire_count, 1);
 		m->wire_count = 1;
 	}
 	/* Unmanaged pages don't use "act_count". */
 	m->oflags = VPO_UNMANAGED;
 	if (drop != NULL)
 		vdrop(drop);
 	if (vm_paging_needed())
 		pagedaemon_wakeup();
 	return (m);
 }
 
 /*
  *	vm_wait:	(also see VM_WAIT macro)
  *
  *	Sleep until free pages are available for allocation.
  *	- Called in various places before memory allocations.
  */
 void
 vm_wait(void)
 {
 
 	mtx_lock(&vm_page_queue_free_mtx);
 	if (curproc == pageproc) {
 		vm_pageout_pages_needed = 1;
 		msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx,
 		    PDROP | PSWP, "VMWait", 0);
 	} else {
 		if (!vm_pages_needed) {
 			vm_pages_needed = 1;
 			wakeup(&vm_pages_needed);
 		}
 		msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PVM,
 		    "vmwait", 0);
 	}
 }
 
 /*
  *	vm_waitpfault:	(also see VM_WAITPFAULT macro)
  *
  *	Sleep until free pages are available for allocation.
  *	- Called only in vm_fault so that processes page faulting
  *	  can be easily tracked.
  *	- Sleeps at a lower priority than vm_wait() so that vm_wait()ing
  *	  processes will be able to grab memory first.  Do not change
  *	  this balance without careful testing first.
  */
 void
 vm_waitpfault(void)
 {
 
 	mtx_lock(&vm_page_queue_free_mtx);
 	if (!vm_pages_needed) {
 		vm_pages_needed = 1;
 		wakeup(&vm_pages_needed);
 	}
 	msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PUSER,
 	    "pfault", 0);
 }
 
 struct vm_pagequeue *
 vm_page_pagequeue(vm_page_t m)
 {
 
 	return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
 }
 
 /*
  *	vm_page_dequeue:
  *
  *	Remove the given page from its current page queue.
  *
  *	The page must be locked.
  */
 void
 vm_page_dequeue(vm_page_t m)
 {
 	struct vm_pagequeue *pq;
 
 	vm_page_assert_locked(m);
 	KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued",
 	    m));
 	pq = vm_page_pagequeue(m);
 	vm_pagequeue_lock(pq);
 	m->queue = PQ_NONE;
 	TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
 	vm_pagequeue_cnt_dec(pq);
 	vm_pagequeue_unlock(pq);
 }
 
 /*
  *	vm_page_dequeue_locked:
  *
  *	Remove the given page from its current page queue.
  *
  *	The page and page queue must be locked.
  */
 void
 vm_page_dequeue_locked(vm_page_t m)
 {
 	struct vm_pagequeue *pq;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	pq = vm_page_pagequeue(m);
 	vm_pagequeue_assert_locked(pq);
 	m->queue = PQ_NONE;
 	TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
 	vm_pagequeue_cnt_dec(pq);
 }
 
 /*
  *	vm_page_enqueue:
  *
  *	Add the given page to the specified page queue.
  *
  *	The page must be locked.
  */
 static void
 vm_page_enqueue(uint8_t queue, vm_page_t m)
 {
 	struct vm_pagequeue *pq;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	KASSERT(queue < PQ_COUNT,
 	    ("vm_page_enqueue: invalid queue %u request for page %p",
 	    queue, m));
 	pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
 	vm_pagequeue_lock(pq);
 	m->queue = queue;
 	TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 	vm_pagequeue_cnt_inc(pq);
 	vm_pagequeue_unlock(pq);
 }
 
 /*
  *	vm_page_requeue:
  *
  *	Move the given page to the tail of its current page queue.
  *
  *	The page must be locked.
  */
 void
 vm_page_requeue(vm_page_t m)
 {
 	struct vm_pagequeue *pq;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	KASSERT(m->queue != PQ_NONE,
 	    ("vm_page_requeue: page %p is not queued", m));
 	pq = vm_page_pagequeue(m);
 	vm_pagequeue_lock(pq);
 	TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
 	TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 	vm_pagequeue_unlock(pq);
 }
 
 /*
  *	vm_page_requeue_locked:
  *
  *	Move the given page to the tail of its current page queue.
  *
  *	The page queue must be locked.
  */
 void
 vm_page_requeue_locked(vm_page_t m)
 {
 	struct vm_pagequeue *pq;
 
 	KASSERT(m->queue != PQ_NONE,
 	    ("vm_page_requeue_locked: page %p is not queued", m));
 	pq = vm_page_pagequeue(m);
 	vm_pagequeue_assert_locked(pq);
 	TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
 	TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 }
 
 /*
  *	vm_page_activate:
  *
  *	Put the specified page on the active list (if appropriate).
  *	Ensure that act_count is at least ACT_INIT but do not otherwise
  *	mess with it.
  *
  *	The page must be locked.
  */
 void
 vm_page_activate(vm_page_t m)
 {
 	int queue;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	if ((queue = m->queue) != PQ_ACTIVE) {
 		if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
 			if (m->act_count < ACT_INIT)
 				m->act_count = ACT_INIT;
 			if (queue != PQ_NONE)
 				vm_page_dequeue(m);
 			vm_page_enqueue(PQ_ACTIVE, m);
 		} else
 			KASSERT(queue == PQ_NONE,
 			    ("vm_page_activate: wired page %p is queued", m));
 	} else {
 		if (m->act_count < ACT_INIT)
 			m->act_count = ACT_INIT;
 	}
 }
 
 /*
  *	vm_page_free_wakeup:
  *
  *	Helper routine for vm_page_free_toq() and vm_page_cache().  This
  *	routine is called when a page has been added to the cache or free
  *	queues.
  *
  *	The page queues must be locked.
  */
 static inline void
 vm_page_free_wakeup(void)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	/*
 	 * if pageout daemon needs pages, then tell it that there are
 	 * some free.
 	 */
 	if (vm_pageout_pages_needed &&
 	    vm_cnt.v_cache_count + vm_cnt.v_free_count >= vm_cnt.v_pageout_free_min) {
 		wakeup(&vm_pageout_pages_needed);
 		vm_pageout_pages_needed = 0;
 	}
 	/*
 	 * wakeup processes that are waiting on memory if we hit a
 	 * high water mark. And wakeup scheduler process if we have
 	 * lots of memory. this process will swapin processes.
 	 */
 	if (vm_pages_needed && !vm_page_count_min()) {
 		vm_pages_needed = 0;
 		wakeup(&vm_cnt.v_free_count);
 	}
 }
 
 /*
  *	Turn a cached page into a free page, by changing its attributes.
  *	Keep the statistics up-to-date.
  *
  *	The free page queue must be locked.
  */
 static void
 vm_page_cache_turn_free(vm_page_t m)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 
 	m->object = NULL;
 	m->valid = 0;
 	KASSERT((m->flags & PG_CACHED) != 0,
 	    ("vm_page_cache_turn_free: page %p is not cached", m));
 	m->flags &= ~PG_CACHED;
 	vm_cnt.v_cache_count--;
 	vm_phys_freecnt_adj(m, 1);
 }
 
 /*
  *	vm_page_free_toq:
  *
  *	Returns the given page to the free list,
  *	disassociating it with any VM object.
  *
  *	The object must be locked.  The page must be locked if it is managed.
  */
 void
 vm_page_free_toq(vm_page_t m)
 {
 
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		vm_page_lock_assert(m, MA_OWNED);
 		KASSERT(!pmap_page_is_mapped(m),
 		    ("vm_page_free_toq: freeing mapped page %p", m));
 	} else
 		KASSERT(m->queue == PQ_NONE,
 		    ("vm_page_free_toq: unmanaged page %p is queued", m));
 	PCPU_INC(cnt.v_tfree);
 
 	if (vm_page_sbusied(m))
 		panic("vm_page_free: freeing busy page %p", m);
 
 	/*
 	 * Unqueue, then remove page.  Note that we cannot destroy
 	 * the page here because we do not want to call the pager's
 	 * callback routine until after we've put the page on the
 	 * appropriate free queue.
 	 */
 	vm_page_remque(m);
 	vm_page_remove(m);
 
 	/*
 	 * If fictitious remove object association and
 	 * return, otherwise delay object association removal.
 	 */
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		return;
 	}
 
 	m->valid = 0;
 	vm_page_undirty(m);
 
 	if (m->wire_count != 0)
 		panic("vm_page_free: freeing wired page %p", m);
 	if (m->hold_count != 0) {
 		m->flags &= ~PG_ZERO;
 		KASSERT((m->flags & PG_UNHOLDFREE) == 0,
 		    ("vm_page_free: freeing PG_UNHOLDFREE page %p", m));
 		m->flags |= PG_UNHOLDFREE;
 	} else {
 		/*
 		 * Restore the default memory attribute to the page.
 		 */
 		if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
 			pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
 
 		/*
 		 * Insert the page into the physical memory allocator's
 		 * cache/free page queues.
 		 */
 		mtx_lock(&vm_page_queue_free_mtx);
 		vm_phys_freecnt_adj(m, 1);
 #if VM_NRESERVLEVEL > 0
 		if (!vm_reserv_free_page(m))
 #else
 		if (TRUE)
 #endif
 			vm_phys_free_pages(m, 0);
 		if ((m->flags & PG_ZERO) != 0)
 			++vm_page_zero_count;
 		else
 			vm_page_zero_idle_wakeup();
 		vm_page_free_wakeup();
 		mtx_unlock(&vm_page_queue_free_mtx);
 	}
 }
 
 /*
  *	vm_page_wire:
  *
  *	Mark this page as wired down by yet
  *	another map, removing it from paging queues
  *	as necessary.
  *
  *	If the page is fictitious, then its wire count must remain one.
  *
  *	The page must be locked.
  */
 void
 vm_page_wire(vm_page_t m)
 {
 
 	/*
 	 * Only bump the wire statistics if the page is not already wired,
 	 * and only unqueue the page if it is on some queue (if it is unmanaged
 	 * it is already off the queues).
 	 */
 	vm_page_lock_assert(m, MA_OWNED);
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		KASSERT(m->wire_count == 1,
 		    ("vm_page_wire: fictitious page %p's wire count isn't one",
 		    m));
 		return;
 	}
 	if (m->wire_count == 0) {
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0 ||
 		    m->queue == PQ_NONE,
 		    ("vm_page_wire: unmanaged page %p is queued", m));
 		vm_page_remque(m);
 		atomic_add_int(&vm_cnt.v_wire_count, 1);
 	}
 	m->wire_count++;
 	KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m));
 }
 
 /*
  * vm_page_unwire:
  *
  * Release one wiring of the specified page, potentially enabling it to be
  * paged again.  If paging is enabled, then the value of the parameter
  * "queue" determines the queue to which the page is added.
  *
  * However, unless the page belongs to an object, it is not enqueued because
  * it cannot be paged out.
  *
  * If a page is fictitious, then its wire count must always be one.
  *
  * A managed page must be locked.
  */
 void
 vm_page_unwire(vm_page_t m, uint8_t queue)
 {
 
 	KASSERT(queue < PQ_COUNT,
 	    ("vm_page_unwire: invalid queue %u request for page %p",
 	    queue, m));
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		vm_page_lock_assert(m, MA_OWNED);
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		KASSERT(m->wire_count == 1,
 	    ("vm_page_unwire: fictitious page %p's wire count isn't one", m));
 		return;
 	}
 	if (m->wire_count > 0) {
 		m->wire_count--;
 		if (m->wire_count == 0) {
 			atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 			if ((m->oflags & VPO_UNMANAGED) != 0 ||
 			    m->object == NULL)
 				return;
 			if (queue == PQ_INACTIVE)
 				m->flags &= ~PG_WINATCFLS;
 			vm_page_enqueue(queue, m);
 		}
 	} else
 		panic("vm_page_unwire: page %p's wire count is zero", m);
 }
 
 /*
  * Move the specified page to the inactive queue.
  *
  * Many pages placed on the inactive queue should actually go
  * into the cache, but it is difficult to figure out which.  What
  * we do instead, if the inactive target is well met, is to put
  * clean pages at the head of the inactive queue instead of the tail.
  * This will cause them to be moved to the cache more quickly and
  * if not actively re-referenced, reclaimed more quickly.  If we just
  * stick these pages at the end of the inactive queue, heavy filesystem
  * meta-data accesses can cause an unnecessary paging load on memory bound
  * processes.  This optimization causes one-time-use metadata to be
  * reused more quickly.
  *
  * Normally athead is 0 resulting in LRU operation.  athead is set
  * to 1 if we want this page to be 'as if it were placed in the cache',
  * except without unmapping it from the process address space.
  *
  * The page must be locked.
  */
 static inline void
 _vm_page_deactivate(vm_page_t m, int athead)
 {
 	struct vm_pagequeue *pq;
 	int queue;
 
 	vm_page_assert_locked(m);
 
 	/*
 	 * Ignore if the page is already inactive, unless it is unlikely to be
 	 * reactivated.
 	 */
 	if ((queue = m->queue) == PQ_INACTIVE && !athead)
 		return;
 	if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
 		pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE];
 		/* Avoid multiple acquisitions of the inactive queue lock. */
 		if (queue == PQ_INACTIVE) {
 			vm_pagequeue_lock(pq);
 			vm_page_dequeue_locked(m);
 		} else {
 			if (queue != PQ_NONE)
 				vm_page_dequeue(m);
 			m->flags &= ~PG_WINATCFLS;
 			vm_pagequeue_lock(pq);
 		}
 		m->queue = PQ_INACTIVE;
 		if (athead)
 			TAILQ_INSERT_HEAD(&pq->pq_pl, m, plinks.q);
 		else
 			TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 		vm_pagequeue_cnt_inc(pq);
 		vm_pagequeue_unlock(pq);
 	}
 }
 
 /*
  * Move the specified page to the inactive queue.
  *
  * The page must be locked.
  */
 void
 vm_page_deactivate(vm_page_t m)
 {
 
 	_vm_page_deactivate(m, 0);
 }
 
 /*
  * vm_page_try_to_cache:
  *
  * Returns 0 on failure, 1 on success
  */
 int
 vm_page_try_to_cache(vm_page_t m)
 {
 
 	vm_page_lock_assert(m, MA_OWNED);
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (m->dirty || m->hold_count || m->wire_count ||
 	    (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m))
 		return (0);
 	pmap_remove_all(m);
 	if (m->dirty)
 		return (0);
 	vm_page_cache(m);
 	return (1);
 }
 
 /*
  * vm_page_try_to_free()
  *
  *	Attempt to free the page.  If we cannot free it, we do nothing.
  *	1 is returned on success, 0 on failure.
  */
 int
 vm_page_try_to_free(vm_page_t m)
 {
 
 	vm_page_lock_assert(m, MA_OWNED);
 	if (m->object != NULL)
 		VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (m->dirty || m->hold_count || m->wire_count ||
 	    (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m))
 		return (0);
 	pmap_remove_all(m);
 	if (m->dirty)
 		return (0);
 	vm_page_free(m);
 	return (1);
 }
 
 /*
  * vm_page_cache
  *
  * Put the specified page onto the page cache queue (if appropriate).
  *
  * The object and page must be locked.
  */
 void
 vm_page_cache(vm_page_t m)
 {
 	vm_object_t object;
 	boolean_t cache_was_empty;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (vm_page_busied(m) || (m->oflags & VPO_UNMANAGED) ||
 	    m->hold_count || m->wire_count)
 		panic("vm_page_cache: attempting to cache busy page");
 	KASSERT(!pmap_page_is_mapped(m),
 	    ("vm_page_cache: page %p is mapped", m));
 	KASSERT(m->dirty == 0, ("vm_page_cache: page %p is dirty", m));
 	if (m->valid == 0 || object->type == OBJT_DEFAULT ||
 	    (object->type == OBJT_SWAP &&
 	    !vm_pager_has_page(object, m->pindex, NULL, NULL))) {
 		/*
 		 * Hypothesis: A cache-eligible page belonging to a
 		 * default object or swap object but without a backing
 		 * store must be zero filled.
 		 */
 		vm_page_free(m);
 		return;
 	}
 	KASSERT((m->flags & PG_CACHED) == 0,
 	    ("vm_page_cache: page %p is already cached", m));
 
 	/*
 	 * Remove the page from the paging queues.
 	 */
 	vm_page_remque(m);
 
 	/*
 	 * Remove the page from the object's collection of resident
 	 * pages.
 	 */
 	vm_radix_remove(&object->rtree, m->pindex);
 	TAILQ_REMOVE(&object->memq, m, listq);
 	object->resident_page_count--;
 
 	/*
 	 * Restore the default memory attribute to the page.
 	 */
 	if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
 		pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
 
 	/*
 	 * Insert the page into the object's collection of cached pages
 	 * and the physical memory allocator's cache/free page queues.
 	 */
 	m->flags &= ~PG_ZERO;
 	mtx_lock(&vm_page_queue_free_mtx);
 	cache_was_empty = vm_radix_is_empty(&object->cache);
 	if (vm_radix_insert(&object->cache, m)) {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		if (object->resident_page_count == 0)
 			vdrop(object->handle);
 		m->object = NULL;
 		vm_page_free(m);
 		return;
 	}
 
 	/*
 	 * The above call to vm_radix_insert() could reclaim the one pre-
 	 * existing cached page from this object, resulting in a call to
 	 * vdrop().
 	 */
 	if (!cache_was_empty)
 		cache_was_empty = vm_radix_is_singleton(&object->cache);
 
 	m->flags |= PG_CACHED;
 	vm_cnt.v_cache_count++;
 	PCPU_INC(cnt.v_tcached);
 #if VM_NRESERVLEVEL > 0
 	if (!vm_reserv_free_page(m)) {
 #else
 	if (TRUE) {
 #endif
 		vm_phys_free_pages(m, 0);
 	}
 	vm_page_free_wakeup();
 	mtx_unlock(&vm_page_queue_free_mtx);
 
 	/*
 	 * Increment the vnode's hold count if this is the object's only
 	 * cached page.  Decrement the vnode's hold count if this was
 	 * the object's only resident page.
 	 */
 	if (object->type == OBJT_VNODE) {
 		if (cache_was_empty && object->resident_page_count != 0)
 			vhold(object->handle);
 		else if (!cache_was_empty && object->resident_page_count == 0)
 			vdrop(object->handle);
 	}
 }
 
 /*
  * vm_page_advise
  *
  * 	Deactivate or do nothing, as appropriate.  This routine is used
  * 	by madvise() and vop_stdadvise().
  *
  *	The object and page must be locked.
  */
 void
 vm_page_advise(vm_page_t m, int advice)
 {
 
 	vm_page_assert_locked(m);
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (advice == MADV_FREE)
 		/*
 		 * Mark the page clean.  This will allow the page to be freed
 		 * up by the system.  However, such pages are often reused
 		 * quickly by malloc() so we do not do anything that would
 		 * cause a page fault if we can help it.
 		 *
 		 * Specifically, we do not try to actually free the page now
 		 * nor do we try to put it in the cache (which would cause a
 		 * page fault on reuse).
 		 *
 		 * But we do make the page as freeable as we can without
 		 * actually taking the step of unmapping it.
 		 */
 		m->dirty = 0;
 	else if (advice != MADV_DONTNEED)
 		return;
 
 	/*
 	 * Clear any references to the page.  Otherwise, the page daemon will
 	 * immediately reactivate the page.
 	 */
 	vm_page_aflag_clear(m, PGA_REFERENCED);
 
 	if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m))
 		vm_page_dirty(m);
 
 	/*
 	 * Place clean pages at the head of the inactive queue rather than the
 	 * tail, thus defeating the queue's LRU operation and ensuring that the
 	 * page will be reused quickly.
 	 */
 	_vm_page_deactivate(m, m->dirty == 0);
 }
 
 /*
  * Grab a page, waiting until we are waken up due to the page
  * changing state.  We keep on waiting, if the page continues
  * to be in the object.  If the page doesn't exist, first allocate it
  * and then conditionally zero it.
  *
  * This routine may sleep.
  *
  * The object must be locked on entry.  The lock will, however, be released
  * and reacquired if the routine sleeps.
  */
 vm_page_t
 vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
 {
 	vm_page_t m;
 	int sleep;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
 	    (allocflags & VM_ALLOC_IGN_SBUSY) != 0,
 	    ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch"));
 retrylookup:
 	if ((m = vm_page_lookup(object, pindex)) != NULL) {
 		sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ?
 		    vm_page_xbusied(m) : vm_page_busied(m);
 		if (sleep) {
 			if ((allocflags & VM_ALLOC_NOWAIT) != 0)
 				return (NULL);
 			/*
 			 * Reference the page before unlocking and
 			 * sleeping so that the page daemon is less
 			 * likely to reclaim it.
 			 */
 			vm_page_aflag_set(m, PGA_REFERENCED);
 			vm_page_lock(m);
 			VM_OBJECT_WUNLOCK(object);
 			vm_page_busy_sleep(m, "pgrbwt");
 			VM_OBJECT_WLOCK(object);
 			goto retrylookup;
 		} else {
 			if ((allocflags & VM_ALLOC_WIRED) != 0) {
 				vm_page_lock(m);
 				vm_page_wire(m);
 				vm_page_unlock(m);
 			}
 			if ((allocflags &
 			    (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
 				vm_page_xbusy(m);
 			if ((allocflags & VM_ALLOC_SBUSY) != 0)
 				vm_page_sbusy(m);
 			return (m);
 		}
 	}
 	m = vm_page_alloc(object, pindex, allocflags);
 	if (m == NULL) {
 		if ((allocflags & VM_ALLOC_NOWAIT) != 0)
 			return (NULL);
 		VM_OBJECT_WUNLOCK(object);
 		VM_WAIT;
 		VM_OBJECT_WLOCK(object);
 		goto retrylookup;
 	} else if (m->valid != 0)
 		return (m);
 	if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 	return (m);
 }
 
 /*
  * Mapping function for valid or dirty bits in a page.
  *
  * Inputs are required to range within a page.
  */
 vm_page_bits_t
 vm_page_bits(int base, int size)
 {
 	int first_bit;
 	int last_bit;
 
 	KASSERT(
 	    base + size <= PAGE_SIZE,
 	    ("vm_page_bits: illegal base/size %d/%d", base, size)
 	);
 
 	if (size == 0)		/* handle degenerate case */
 		return (0);
 
 	first_bit = base >> DEV_BSHIFT;
 	last_bit = (base + size - 1) >> DEV_BSHIFT;
 
 	return (((vm_page_bits_t)2 << last_bit) -
 	    ((vm_page_bits_t)1 << first_bit));
 }
 
 /*
  *	vm_page_set_valid_range:
  *
  *	Sets portions of a page valid.  The arguments are expected
  *	to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive
  *	of any partial chunks touched by the range.  The invalid portion of
  *	such chunks will be zeroed.
  *
  *	(base + size) must be less then or equal to PAGE_SIZE.
  */
 void
 vm_page_set_valid_range(vm_page_t m, int base, int size)
 {
 	int endoff, frag;
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (size == 0)	/* handle degenerate case */
 		return;
 
 	/*
 	 * If the base is not DEV_BSIZE aligned and the valid
 	 * bit is clear, we have to zero out a portion of the
 	 * first block.
 	 */
 	if ((frag = base & ~(DEV_BSIZE - 1)) != base &&
 	    (m->valid & (1 << (base >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, frag, base - frag);
 
 	/*
 	 * If the ending offset is not DEV_BSIZE aligned and the
 	 * valid bit is clear, we have to zero out a portion of
 	 * the last block.
 	 */
 	endoff = base + size;
 	if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff &&
 	    (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, endoff,
 		    DEV_BSIZE - (endoff & (DEV_BSIZE - 1)));
 
 	/*
 	 * Assert that no previously invalid block that is now being validated
 	 * is already dirty.
 	 */
 	KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0,
 	    ("vm_page_set_valid_range: page %p is dirty", m));
 
 	/*
 	 * Set valid bits inclusive of any overlap.
 	 */
 	m->valid |= vm_page_bits(base, size);
 }
 
 /*
  * Clear the given bits from the specified page's dirty field.
  */
 static __inline void
 vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits)
 {
 	uintptr_t addr;
 #if PAGE_SIZE < 16384
 	int shift;
 #endif
 
 	/*
 	 * If the object is locked and the page is neither exclusive busy nor
 	 * write mapped, then the page's dirty field cannot possibly be
 	 * set by a concurrent pmap operation.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m))
 		m->dirty &= ~pagebits;
 	else {
 		/*
 		 * The pmap layer can call vm_page_dirty() without
 		 * holding a distinguished lock.  The combination of
 		 * the object's lock and an atomic operation suffice
 		 * to guarantee consistency of the page dirty field.
 		 *
 		 * For PAGE_SIZE == 32768 case, compiler already
 		 * properly aligns the dirty field, so no forcible
 		 * alignment is needed. Only require existence of
 		 * atomic_clear_64 when page size is 32768.
 		 */
 		addr = (uintptr_t)&m->dirty;
 #if PAGE_SIZE == 32768
 		atomic_clear_64((uint64_t *)addr, pagebits);
 #elif PAGE_SIZE == 16384
 		atomic_clear_32((uint32_t *)addr, pagebits);
 #else		/* PAGE_SIZE <= 8192 */
 		/*
 		 * Use a trick to perform a 32-bit atomic on the
 		 * containing aligned word, to not depend on the existence
 		 * of atomic_clear_{8, 16}.
 		 */
 		shift = addr & (sizeof(uint32_t) - 1);
 #if BYTE_ORDER == BIG_ENDIAN
 		shift = (sizeof(uint32_t) - sizeof(m->dirty) - shift) * NBBY;
 #else
 		shift *= NBBY;
 #endif
 		addr &= ~(sizeof(uint32_t) - 1);
 		atomic_clear_32((uint32_t *)addr, pagebits << shift);
 #endif		/* PAGE_SIZE */
 	}
 }
 
 /*
  *	vm_page_set_validclean:
  *
  *	Sets portions of a page valid and clean.  The arguments are expected
  *	to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive
  *	of any partial chunks touched by the range.  The invalid portion of
  *	such chunks will be zero'd.
  *
  *	(base + size) must be less then or equal to PAGE_SIZE.
  */
 void
 vm_page_set_validclean(vm_page_t m, int base, int size)
 {
 	vm_page_bits_t oldvalid, pagebits;
 	int endoff, frag;
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (size == 0)	/* handle degenerate case */
 		return;
 
 	/*
 	 * If the base is not DEV_BSIZE aligned and the valid
 	 * bit is clear, we have to zero out a portion of the
 	 * first block.
 	 */
 	if ((frag = base & ~(DEV_BSIZE - 1)) != base &&
 	    (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, frag, base - frag);
 
 	/*
 	 * If the ending offset is not DEV_BSIZE aligned and the
 	 * valid bit is clear, we have to zero out a portion of
 	 * the last block.
 	 */
 	endoff = base + size;
 	if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff &&
 	    (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, endoff,
 		    DEV_BSIZE - (endoff & (DEV_BSIZE - 1)));
 
 	/*
 	 * Set valid, clear dirty bits.  If validating the entire
 	 * page we can safely clear the pmap modify bit.  We also
 	 * use this opportunity to clear the VPO_NOSYNC flag.  If a process
 	 * takes a write fault on a MAP_NOSYNC memory area the flag will
 	 * be set again.
 	 *
 	 * We set valid bits inclusive of any overlap, but we can only
 	 * clear dirty bits for DEV_BSIZE chunks that are fully within
 	 * the range.
 	 */
 	oldvalid = m->valid;
 	pagebits = vm_page_bits(base, size);
 	m->valid |= pagebits;
 #if 0	/* NOT YET */
 	if ((frag = base & (DEV_BSIZE - 1)) != 0) {
 		frag = DEV_BSIZE - frag;
 		base += frag;
 		size -= frag;
 		if (size < 0)
 			size = 0;
 	}
 	pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1));
 #endif
 	if (base == 0 && size == PAGE_SIZE) {
 		/*
 		 * The page can only be modified within the pmap if it is
 		 * mapped, and it can only be mapped if it was previously
 		 * fully valid.
 		 */
 		if (oldvalid == VM_PAGE_BITS_ALL)
 			/*
 			 * Perform the pmap_clear_modify() first.  Otherwise,
 			 * a concurrent pmap operation, such as
 			 * pmap_protect(), could clear a modification in the
 			 * pmap and set the dirty field on the page before
 			 * pmap_clear_modify() had begun and after the dirty
 			 * field was cleared here.
 			 */
 			pmap_clear_modify(m);
 		m->dirty = 0;
 		m->oflags &= ~VPO_NOSYNC;
 	} else if (oldvalid != VM_PAGE_BITS_ALL)
 		m->dirty &= ~pagebits;
 	else
 		vm_page_clear_dirty_mask(m, pagebits);
 }
 
 void
 vm_page_clear_dirty(vm_page_t m, int base, int size)
 {
 
 	vm_page_clear_dirty_mask(m, vm_page_bits(base, size));
 }
 
 /*
  *	vm_page_set_invalid:
  *
  *	Invalidates DEV_BSIZE'd chunks within a page.  Both the
  *	valid and dirty bits for the effected areas are cleared.
  */
 void
 vm_page_set_invalid(vm_page_t m, int base, int size)
 {
 	vm_page_bits_t bits;
 	vm_object_t object;
 
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) +
 	    size >= object->un_pager.vnp.vnp_size)
 		bits = VM_PAGE_BITS_ALL;
 	else
 		bits = vm_page_bits(base, size);
 	if (m->valid == VM_PAGE_BITS_ALL && bits != 0)
 		pmap_remove_all(m);
 	KASSERT((bits == 0 && m->valid == VM_PAGE_BITS_ALL) ||
 	    !pmap_page_is_mapped(m),
 	    ("vm_page_set_invalid: page %p is mapped", m));
 	m->valid &= ~bits;
 	m->dirty &= ~bits;
 }
 
 /*
  * vm_page_zero_invalid()
  *
  *	The kernel assumes that the invalid portions of a page contain
  *	garbage, but such pages can be mapped into memory by user code.
  *	When this occurs, we must zero out the non-valid portions of the
  *	page so user code sees what it expects.
  *
  *	Pages are most often semi-valid when the end of a file is mapped
  *	into memory and the file's size is not page aligned.
  */
 void
 vm_page_zero_invalid(vm_page_t m, boolean_t setvalid)
 {
 	int b;
 	int i;
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	/*
 	 * Scan the valid bits looking for invalid sections that
 	 * must be zeroed.  Invalid sub-DEV_BSIZE'd areas ( where the
 	 * valid bit may be set ) have already been zeroed by
 	 * vm_page_set_validclean().
 	 */
 	for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) {
 		if (i == (PAGE_SIZE / DEV_BSIZE) ||
 		    (m->valid & ((vm_page_bits_t)1 << i))) {
 			if (i > b) {
 				pmap_zero_page_area(m,
 				    b << DEV_BSHIFT, (i - b) << DEV_BSHIFT);
 			}
 			b = i + 1;
 		}
 	}
 
 	/*
 	 * setvalid is TRUE when we can safely set the zero'd areas
 	 * as being valid.  We can do this if there are no cache consistancy
 	 * issues.  e.g. it is ok to do with UFS, but not ok to do with NFS.
 	 */
 	if (setvalid)
 		m->valid = VM_PAGE_BITS_ALL;
 }
 
 /*
  *	vm_page_is_valid:
  *
  *	Is (partial) page valid?  Note that the case where size == 0
  *	will return FALSE in the degenerate case where the page is
  *	entirely invalid, and TRUE otherwise.
  */
 int
 vm_page_is_valid(vm_page_t m, int base, int size)
 {
 	vm_page_bits_t bits;
 
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 	bits = vm_page_bits(base, size);
 	return (m->valid != 0 && (m->valid & bits) == bits);
 }
 
 /*
  *	vm_page_ps_is_valid:
  *
  *	Returns TRUE if the entire (super)page is valid and FALSE otherwise.
  */
 boolean_t
 vm_page_ps_is_valid(vm_page_t m)
 {
 	int i, npages;
 
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 	npages = atop(pagesizes[m->psind]);
 
 	/*
 	 * The physically contiguous pages that make up a superpage, i.e., a
 	 * page with a page size index ("psind") greater than zero, will
 	 * occupy adjacent entries in vm_page_array[].
 	 */
 	for (i = 0; i < npages; i++) {
 		if (m[i].valid != VM_PAGE_BITS_ALL)
 			return (FALSE);
 	}
 	return (TRUE);
 }
 
 /*
  * Set the page's dirty bits if the page is modified.
  */
 void
 vm_page_test_dirty(vm_page_t m)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m))
 		vm_page_dirty(m);
 }
 
 void
 vm_page_lock_KBI(vm_page_t m, const char *file, int line)
 {
 
 	mtx_lock_flags_(vm_page_lockptr(m), 0, file, line);
 }
 
 void
 vm_page_unlock_KBI(vm_page_t m, const char *file, int line)
 {
 
 	mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line);
 }
 
 int
 vm_page_trylock_KBI(vm_page_t m, const char *file, int line)
 {
 
 	return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line));
 }
 
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void
 vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line)
 {
 
 	vm_page_lock_assert_KBI(m, MA_OWNED, file, line);
 }
 
 void
 vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line)
 {
 
 	mtx_assert_(vm_page_lockptr(m), a, file, line);
 }
 #endif
 
 #ifdef INVARIANTS
 void
 vm_page_object_lock_assert(vm_page_t m)
 {
 
 	/*
 	 * Certain of the page's fields may only be modified by the
 	 * holder of the containing object's lock or the exclusive busy.
 	 * holder.  Unfortunately, the holder of the write busy is
 	 * not recorded, and thus cannot be checked here.
 	 */
 	if (m->object != NULL && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_WLOCKED(m->object);
 }
 
 void
 vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits)
 {
 
 	if ((bits & PGA_WRITEABLE) == 0)
 		return;
 
 	/*
 	 * The PGA_WRITEABLE flag can only be set if the page is
 	 * managed, is exclusively busied or the object is locked.
 	 * Currently, this flag is only set by pmap_enter().
 	 */
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("PGA_WRITEABLE on unmanaged page"));
 	if (!vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 }
 #endif
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <sys/kernel.h>
 
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(page, vm_page_print_page_info)
 {
 	db_printf("vm_cnt.v_free_count: %d\n", vm_cnt.v_free_count);
 	db_printf("vm_cnt.v_cache_count: %d\n", vm_cnt.v_cache_count);
 	db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count);
 	db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count);
 	db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count);
 	db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
 	db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
 	db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target);
-	db_printf("vm_cnt.v_cache_min: %d\n", vm_cnt.v_cache_min);
 	db_printf("vm_cnt.v_inactive_target: %d\n", vm_cnt.v_inactive_target);
 }
 
 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
 {
 	int dom;
 
 	db_printf("pq_free %d pq_cache %d\n",
 	    vm_cnt.v_free_count, vm_cnt.v_cache_count);
 	for (dom = 0; dom < vm_ndomains; dom++) {
 		db_printf(
 	"dom %d page_cnt %d free %d pq_act %d pq_inact %d pass %d\n",
 		    dom,
 		    vm_dom[dom].vmd_page_count,
 		    vm_dom[dom].vmd_free_count,
 		    vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt,
 		    vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt,
 		    vm_dom[dom].vmd_pass);
 	}
 }
 
 DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo)
 {
 	vm_page_t m;
 	boolean_t phys;
 
 	if (!have_addr) {
 		db_printf("show pginfo addr\n");
 		return;
 	}
 
 	phys = strchr(modif, 'p') != NULL;
 	if (phys)
 		m = PHYS_TO_VM_PAGE(addr);
 	else
 		m = (vm_page_t)addr;
 	db_printf(
     "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n"
     "  af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n",
 	    m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr,
 	    m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags,
 	    m->flags, m->act_count, m->busy_lock, m->valid, m->dirty);
 }
 #endif /* DDB */
Index: head/sys/vm/vm_param.h
===================================================================
--- head/sys/vm/vm_param.h	(revision 287639)
+++ head/sys/vm/vm_param.h	(revision 287640)
@@ -1,134 +1,134 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_param.h	8.1 (Berkeley) 6/11/93
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 /*
  *	Machine independent virtual memory parameters.
  */
 
 #ifndef	_VM_PARAM_
 #define	_VM_PARAM_
 
 #include <machine/vmparam.h>
 
 /*
  * CTL_VM identifiers
  */
 #define	VM_TOTAL		1	/* struct vmtotal */
 #define	VM_METER                VM_TOTAL/* deprecated, use VM_TOTAL */
 #define	VM_LOADAVG	 	2	/* struct loadavg */
 #define VM_V_FREE_MIN		3	/* vm_cnt.v_free_min */
 #define VM_V_FREE_TARGET	4	/* vm_cnt.v_free_target */
 #define VM_V_FREE_RESERVED	5	/* vm_cnt.v_free_reserved */
 #define VM_V_INACTIVE_TARGET	6	/* vm_cnt.v_inactive_target */
-#define	VM_V_CACHE_MIN		7	/* vm_cnt.v_cache_min */
-#define	VM_V_CACHE_MAX		8	/* vm_cnt.v_cache_max */
+#define	VM_OBSOLETE_7		7	/* unused, formerly v_cache_min */
+#define	VM_OBSOLETE_8		8	/* unused, formerly v_cache_max */
 #define VM_V_PAGEOUT_FREE_MIN	9	/* vm_cnt.v_pageout_free_min */
 #define	VM_OBSOLETE_10		10	/* pageout algorithm */
 #define VM_SWAPPING_ENABLED	11	/* swapping enabled */
 #define	VM_MAXID		12	/* number of valid vm ids */
 
 /*
  * Structure for swap device statistics
  */
 #define XSWDEV_VERSION	1
 struct xswdev {
 	u_int	xsw_version;
 	dev_t	xsw_dev;
 	int	xsw_flags;
 	int	xsw_nblks;
 	int     xsw_used;
 };
 
 /*
  *	Return values from the VM routines.
  */
 #define	KERN_SUCCESS		0
 #define	KERN_INVALID_ADDRESS	1
 #define	KERN_PROTECTION_FAILURE	2
 #define	KERN_NO_SPACE		3
 #define	KERN_INVALID_ARGUMENT	4
 #define	KERN_FAILURE		5
 #define	KERN_RESOURCE_SHORTAGE	6
 #define	KERN_NOT_RECEIVER	7
 #define	KERN_NO_ACCESS		8
 
 #ifndef PA_LOCK_COUNT
 #ifdef SMP
 #define	PA_LOCK_COUNT	32
 #else
 #define PA_LOCK_COUNT	1
 #endif	/* !SMP */
 #endif	/* !PA_LOCK_COUNT */
 
 #ifndef ASSEMBLER
 #ifdef _KERNEL
 #define num_pages(x) \
 	((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT))
 extern	unsigned long maxtsiz;
 extern	unsigned long dfldsiz;
 extern	unsigned long maxdsiz;
 extern	unsigned long dflssiz;
 extern	unsigned long maxssiz;
 extern	unsigned long sgrowsiz;
 #endif				/* _KERNEL */
 #endif				/* ASSEMBLER */
 #endif				/* _VM_PARAM_ */
Index: head/usr.bin/vmstat/vmstat.c
===================================================================
--- head/usr.bin/vmstat/vmstat.c	(revision 287639)
+++ head/usr.bin/vmstat/vmstat.c	(revision 287640)
@@ -1,1585 +1,1583 @@
 /*
  * Copyright (c) 1980, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1980, 1986, 1991, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)vmstat.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/uio.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
 #include <sys/signal.h>
 #include <sys/fcntl.h>
 #include <sys/ioctl.h>
 #include <sys/resource.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/user.h>
 #include <sys/vmmeter.h>
 #include <sys/pcpu.h>
 
 #include <vm/vm_param.h>
 
 #include <ctype.h>
 #include <devstat.h>
 #include <err.h>
 #include <errno.h>
 #include <inttypes.h>
 #include <kvm.h>
 #include <limits.h>
 #include <memstat.h>
 #include <nlist.h>
 #include <paths.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysexits.h>
 #include <time.h>
 #include <unistd.h>
 #include <libutil.h>
 
 static char da[] = "da";
 
 static struct nlist namelist[] = {
 #define X_SUM		0
 	{ "_vm_cnt" },
 #define X_HZ		1
 	{ "_hz" },
 #define X_STATHZ	2
 	{ "_stathz" },
 #define X_NCHSTATS	3
 	{ "_nchstats" },
 #define	X_INTRNAMES	4
 	{ "_intrnames" },
 #define	X_SINTRNAMES	5
 	{ "_sintrnames" },
 #define	X_INTRCNT	6
 	{ "_intrcnt" },
 #define	X_SINTRCNT	7
 	{ "_sintrcnt" },
 #ifdef notyet
 #define	X_DEFICIT	XXX
 	{ "_deficit" },
 #define X_REC		XXX
 	{ "_rectime" },
 #define X_PGIN		XXX
 	{ "_pgintime" },
 #define	X_XSTATS	XXX
 	{ "_xstats" },
 #define X_END		XXX
 #else
 #define X_END		8
 #endif
 	{ "" },
 };
 
 static struct statinfo cur, last;
 static int num_devices, maxshowdevs;
 static long generation;
 static struct device_selection *dev_select;
 static int num_selected;
 static struct devstat_match *matches;
 static int num_matches = 0;
 static int num_devices_specified, num_selections;
 static long select_generation;
 static char **specified_devices;
 static devstat_select_mode select_mode;
 
 static struct	vmmeter sum, osum;
 
 #define	VMSTAT_DEFAULT_LINES	20	/* Default number of `winlines'. */
 volatile sig_atomic_t wresized;		/* Tty resized, when non-zero. */
 static int winlines = VMSTAT_DEFAULT_LINES; /* Current number of tty rows. */
 
 static int	aflag;
 static int	nflag;
 static int	Pflag;
 static int	hflag;
 
 static kvm_t   *kd;
 
 #define	FORKSTAT	0x01
 #define	INTRSTAT	0x02
 #define	MEMSTAT		0x04
 #define	SUMSTAT		0x08
 #define	TIMESTAT	0x10
 #define	VMSTAT		0x20
 #define ZMEMSTAT	0x40
 #define	OBJSTAT		0x80
 
 static void	cpustats(void);
 static void	pcpustats(int, u_long, int);
 static void	devstats(void);
 static void	doforkst(void);
 static void	dointr(unsigned int, int);
 static void	doobjstat(void);
 static void	dosum(void);
 static void	dovmstat(unsigned int, int);
 static void	domemstat_malloc(void);
 static void	domemstat_zone(void);
 static void	kread(int, void *, size_t);
 static void	kreado(int, void *, size_t, size_t);
 static char    *kgetstr(const char *);
 static void	needhdr(int);
 static void	needresize(int);
 static void	doresize(void);
 static void	printhdr(int, u_long);
 static void	usage(void);
 
 static long	pct(long, long);
 static long long	getuptime(void);
 
 static char   **getdrivedata(char **);
 
 int
 main(int argc, char *argv[])
 {
 	int c, todo;
 	unsigned int interval;
 	float f;
 	int reps;
 	char *memf, *nlistf;
 	char errbuf[_POSIX2_LINE_MAX];
 
 	memf = nlistf = NULL;
 	interval = reps = todo = 0;
 	maxshowdevs = 2;
 	hflag = isatty(1);
 	while ((c = getopt(argc, argv, "ac:fhHiM:mN:n:oPp:stw:z")) != -1) {
 		switch (c) {
 		case 'a':
 			aflag++;
 			break;
 		case 'c':
 			reps = atoi(optarg);
 			break;
 		case 'P':
 			Pflag++;
 			break;
 		case 'f':
 			todo |= FORKSTAT;
 			break;
 		case 'h':
 			hflag = 1;
 			break;
 		case 'H':
 			hflag = 0;
 			break;
 		case 'i':
 			todo |= INTRSTAT;
 			break;
 		case 'M':
 			memf = optarg;
 			break;
 		case 'm':
 			todo |= MEMSTAT;
 			break;
 		case 'N':
 			nlistf = optarg;
 			break;
 		case 'n':
 			nflag = 1;
 			maxshowdevs = atoi(optarg);
 			if (maxshowdevs < 0)
 				errx(1, "number of devices %d is < 0",
 				     maxshowdevs);
 			break;
 		case 'o':
 			todo |= OBJSTAT;
 			break;
 		case 'p':
 			if (devstat_buildmatch(optarg, &matches, &num_matches) != 0)
 				errx(1, "%s", devstat_errbuf);
 			break;
 		case 's':
 			todo |= SUMSTAT;
 			break;
 		case 't':
 #ifdef notyet
 			todo |= TIMESTAT;
 #else
 			errx(EX_USAGE, "sorry, -t is not (re)implemented yet");
 #endif
 			break;
 		case 'w':
 			/* Convert to milliseconds. */
 			f = atof(optarg);
 			interval = f * 1000;
 			break;
 		case 'z':
 			todo |= ZMEMSTAT;
 			break;
 		case '?':
 		default:
 			usage();
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (todo == 0)
 		todo = VMSTAT;
 
 	if (memf != NULL) {
 		kd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, errbuf);
 		if (kd == NULL)
 			errx(1, "kvm_openfiles: %s", errbuf);
 	}
 
 retry_nlist:
 	if (kd != NULL && (c = kvm_nlist(kd, namelist)) != 0) {
 		if (c > 0) {
 			/*
 			 * 'cnt' was renamed to 'vm_cnt'. If 'vm_cnt' is not
 			 * found try looking up older 'cnt' symbol.
 			 * */
 			if (namelist[X_SUM].n_type == 0 &&
 			    strcmp(namelist[X_SUM].n_name, "_vm_cnt") == 0) {
 				namelist[X_SUM].n_name = "_cnt";
 				goto retry_nlist;
 			}
 			warnx("undefined symbols:");
 			for (c = 0;
 			     c < (int)(sizeof(namelist)/sizeof(namelist[0]));
 			     c++)
 				if (namelist[c].n_type == 0)
 					(void)fprintf(stderr, " %s",
 					    namelist[c].n_name);
 			(void)fputc('\n', stderr);
 		} else
 			warnx("kvm_nlist: %s", kvm_geterr(kd));
 		exit(1);
 	}
 	if (kd && Pflag)
 		errx(1, "Cannot use -P with crash dumps");
 
 	if (todo & VMSTAT) {
 		/*
 		 * Make sure that the userland devstat version matches the
 		 * kernel devstat version.  If not, exit and print a
 		 * message informing the user of his mistake.
 		 */
 		if (devstat_checkversion(NULL) < 0)
 			errx(1, "%s", devstat_errbuf);
 
 
 		argv = getdrivedata(argv);
 	}
 
 	if (*argv) {
 		f = atof(*argv);
 		interval = f * 1000;
 		if (*++argv)
 			reps = atoi(*argv);
 	}
 
 	if (interval) {
 		if (!reps)
 			reps = -1;
 	} else if (reps)
 		interval = 1 * 1000;
 
 	if (todo & FORKSTAT)
 		doforkst();
 	if (todo & MEMSTAT)
 		domemstat_malloc();
 	if (todo & ZMEMSTAT)
 		domemstat_zone();
 	if (todo & SUMSTAT)
 		dosum();
 	if (todo & OBJSTAT)
 		doobjstat();
 #ifdef notyet
 	if (todo & TIMESTAT)
 		dotimes();
 #endif
 	if (todo & INTRSTAT)
 		dointr(interval, reps);
 	if (todo & VMSTAT)
 		dovmstat(interval, reps);
 	exit(0);
 }
 
 static int
 mysysctl(const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen)
 {
 	int error;
 
 	error = sysctlbyname(name, oldp, oldlenp, newp, newlen);
 	if (error != 0 && errno != ENOMEM)
 		err(1, "sysctl(%s)", name);
 	return (error);
 }
 
 static char **
 getdrivedata(char **argv)
 {
 	if ((num_devices = devstat_getnumdevs(NULL)) < 0)
 		errx(1, "%s", devstat_errbuf);
 
 	cur.dinfo = (struct devinfo *)calloc(1, sizeof(struct devinfo));
 	last.dinfo = (struct devinfo *)calloc(1, sizeof(struct devinfo));
 
 	if (devstat_getdevs(NULL, &cur) == -1)
 		errx(1, "%s", devstat_errbuf);
 
 	num_devices = cur.dinfo->numdevs;
 	generation = cur.dinfo->generation;
 
 	specified_devices = (char **)malloc(sizeof(char *));
 	for (num_devices_specified = 0; *argv; ++argv) {
 		if (isdigit(**argv))
 			break;
 		num_devices_specified++;
 		specified_devices = (char **)realloc(specified_devices,
 						     sizeof(char *) *
 						     num_devices_specified);
 		specified_devices[num_devices_specified - 1] = *argv;
 	}
 	dev_select = NULL;
 
 	if (nflag == 0 && maxshowdevs < num_devices_specified)
 			maxshowdevs = num_devices_specified;
 
 	/*
 	 * People are generally only interested in disk statistics when
 	 * they're running vmstat.  So, that's what we're going to give
 	 * them if they don't specify anything by default.  We'll also give
 	 * them any other random devices in the system so that we get to
 	 * maxshowdevs devices, if that many devices exist.  If the user
 	 * specifies devices on the command line, either through a pattern
 	 * match or by naming them explicitly, we will give the user only
 	 * those devices.
 	 */
 	if ((num_devices_specified == 0) && (num_matches == 0)) {
 		if (devstat_buildmatch(da, &matches, &num_matches) != 0)
 			errx(1, "%s", devstat_errbuf);
 
 		select_mode = DS_SELECT_ADD;
 	} else
 		select_mode = DS_SELECT_ONLY;
 
 	/*
 	 * At this point, selectdevs will almost surely indicate that the
 	 * device list has changed, so we don't look for return values of 0
 	 * or 1.  If we get back -1, though, there is an error.
 	 */
 	if (devstat_selectdevs(&dev_select, &num_selected, &num_selections,
 		       &select_generation, generation, cur.dinfo->devices,
 		       num_devices, matches, num_matches, specified_devices,
 		       num_devices_specified, select_mode,
 		       maxshowdevs, 0) == -1)
 		errx(1, "%s", devstat_errbuf);
 
 	return(argv);
 }
 
 /* Return system uptime in nanoseconds */
 static long long
 getuptime(void)
 {
 	struct timespec sp;
 
 	(void)clock_gettime(CLOCK_UPTIME, &sp);
 
 	return((long long)sp.tv_sec * 1000000000LL + sp.tv_nsec);
 }
 
 static void
 fill_pcpu(struct pcpu ***pcpup, int* maxcpup)
 {
 	struct pcpu **pcpu;
 	
 	int maxcpu, i;
 
 	*pcpup = NULL;
 	
 	if (kd == NULL)
 		return;
 
 	maxcpu = kvm_getmaxcpu(kd);
 	if (maxcpu < 0)
 		errx(1, "kvm_getmaxcpu: %s", kvm_geterr(kd));
 
 	pcpu = calloc(maxcpu, sizeof(struct pcpu *));
 	if (pcpu == NULL)
 		err(1, "calloc");
 
 	for (i = 0; i < maxcpu; i++) {
 		pcpu[i] = kvm_getpcpu(kd, i);
 		if (pcpu[i] == (struct pcpu *)-1)
 			errx(1, "kvm_getpcpu: %s", kvm_geterr(kd));
 	}
 
 	*maxcpup = maxcpu;
 	*pcpup = pcpu;
 }
 
 static void
 free_pcpu(struct pcpu **pcpu, int maxcpu)
 {
 	int i;
 
 	for (i = 0; i < maxcpu; i++)
 		free(pcpu[i]);
 	free(pcpu);
 }
 
 static void
 fill_vmmeter(struct vmmeter *vmmp)
 {
 	struct pcpu **pcpu;
 	int maxcpu, i;
 
 	if (kd != NULL) {
 		kread(X_SUM, vmmp, sizeof(*vmmp));
 		fill_pcpu(&pcpu, &maxcpu);
 		for (i = 0; i < maxcpu; i++) {
 			if (pcpu[i] == NULL)
 				continue;
 #define ADD_FROM_PCPU(i, name) \
 			vmmp->name += pcpu[i]->pc_cnt.name
 			ADD_FROM_PCPU(i, v_swtch);
 			ADD_FROM_PCPU(i, v_trap);
 			ADD_FROM_PCPU(i, v_syscall);
 			ADD_FROM_PCPU(i, v_intr);
 			ADD_FROM_PCPU(i, v_soft);
 			ADD_FROM_PCPU(i, v_vm_faults);
 			ADD_FROM_PCPU(i, v_io_faults);
 			ADD_FROM_PCPU(i, v_cow_faults);
 			ADD_FROM_PCPU(i, v_cow_optim);
 			ADD_FROM_PCPU(i, v_zfod);
 			ADD_FROM_PCPU(i, v_ozfod);
 			ADD_FROM_PCPU(i, v_swapin);
 			ADD_FROM_PCPU(i, v_swapout);
 			ADD_FROM_PCPU(i, v_swappgsin);
 			ADD_FROM_PCPU(i, v_swappgsout);
 			ADD_FROM_PCPU(i, v_vnodein);
 			ADD_FROM_PCPU(i, v_vnodeout);
 			ADD_FROM_PCPU(i, v_vnodepgsin);
 			ADD_FROM_PCPU(i, v_vnodepgsout);
 			ADD_FROM_PCPU(i, v_intrans);
 			ADD_FROM_PCPU(i, v_tfree);
 			ADD_FROM_PCPU(i, v_forks);
 			ADD_FROM_PCPU(i, v_vforks);
 			ADD_FROM_PCPU(i, v_rforks);
 			ADD_FROM_PCPU(i, v_kthreads);
 			ADD_FROM_PCPU(i, v_forkpages);
 			ADD_FROM_PCPU(i, v_vforkpages);
 			ADD_FROM_PCPU(i, v_rforkpages);
 			ADD_FROM_PCPU(i, v_kthreadpages);
 #undef ADD_FROM_PCPU
 		}
 		free_pcpu(pcpu, maxcpu);
 	} else {
 		size_t size = sizeof(unsigned int);
 #define GET_VM_STATS(cat, name) \
 	mysysctl("vm.stats." #cat "." #name, &vmmp->name, &size, NULL, 0)
 		/* sys */
 		GET_VM_STATS(sys, v_swtch);
 		GET_VM_STATS(sys, v_trap);
 		GET_VM_STATS(sys, v_syscall);
 		GET_VM_STATS(sys, v_intr);
 		GET_VM_STATS(sys, v_soft);
 
 		/* vm */
 		GET_VM_STATS(vm, v_vm_faults);
 		GET_VM_STATS(vm, v_io_faults);
 		GET_VM_STATS(vm, v_cow_faults);
 		GET_VM_STATS(vm, v_cow_optim);
 		GET_VM_STATS(vm, v_zfod);
 		GET_VM_STATS(vm, v_ozfod);
 		GET_VM_STATS(vm, v_swapin);
 		GET_VM_STATS(vm, v_swapout);
 		GET_VM_STATS(vm, v_swappgsin);
 		GET_VM_STATS(vm, v_swappgsout);
 		GET_VM_STATS(vm, v_vnodein);
 		GET_VM_STATS(vm, v_vnodeout);
 		GET_VM_STATS(vm, v_vnodepgsin);
 		GET_VM_STATS(vm, v_vnodepgsout);
 		GET_VM_STATS(vm, v_intrans);
 		GET_VM_STATS(vm, v_reactivated);
 		GET_VM_STATS(vm, v_pdwakeups);
 		GET_VM_STATS(vm, v_pdpages);
 		GET_VM_STATS(vm, v_tcached);
 		GET_VM_STATS(vm, v_dfree);
 		GET_VM_STATS(vm, v_pfree);
 		GET_VM_STATS(vm, v_tfree);
 		GET_VM_STATS(vm, v_page_size);
 		GET_VM_STATS(vm, v_page_count);
 		GET_VM_STATS(vm, v_free_reserved);
 		GET_VM_STATS(vm, v_free_target);
 		GET_VM_STATS(vm, v_free_min);
 		GET_VM_STATS(vm, v_free_count);
 		GET_VM_STATS(vm, v_wire_count);
 		GET_VM_STATS(vm, v_active_count);
 		GET_VM_STATS(vm, v_inactive_target);
 		GET_VM_STATS(vm, v_inactive_count);
 		GET_VM_STATS(vm, v_cache_count);
-		GET_VM_STATS(vm, v_cache_min);
-		GET_VM_STATS(vm, v_cache_max);
 		GET_VM_STATS(vm, v_pageout_free_min);
 		GET_VM_STATS(vm, v_interrupt_free_min);
 		/*GET_VM_STATS(vm, v_free_severe);*/
 		GET_VM_STATS(vm, v_forks);
 		GET_VM_STATS(vm, v_vforks);
 		GET_VM_STATS(vm, v_rforks);
 		GET_VM_STATS(vm, v_kthreads);
 		GET_VM_STATS(vm, v_forkpages);
 		GET_VM_STATS(vm, v_vforkpages);
 		GET_VM_STATS(vm, v_rforkpages);
 		GET_VM_STATS(vm, v_kthreadpages);
 #undef GET_VM_STATS
 	}
 }
 
 static void
 fill_vmtotal(struct vmtotal *vmtp)
 {
 	if (kd != NULL) {
 		/* XXX fill vmtp */
 		errx(1, "not implemented");
 	} else {
 		size_t size = sizeof(*vmtp);
 		mysysctl("vm.vmtotal", vmtp, &size, NULL, 0);
 		if (size != sizeof(*vmtp))
 			errx(1, "vm.total size mismatch");
 	}
 }
 
 /* Determine how many cpu columns, and what index they are in kern.cp_times */
 static int
 getcpuinfo(u_long *maskp, int *maxidp)
 {
 	int maxcpu;
 	int maxid;
 	int ncpus;
 	int i, j;
 	int empty;
 	size_t size;
 	long *times;
 	u_long mask;
 
 	if (kd != NULL)
 		errx(1, "not implemented");
 	mask = 0;
 	ncpus = 0;
 	size = sizeof(maxcpu);
 	mysysctl("kern.smp.maxcpus", &maxcpu, &size, NULL, 0);
 	if (size != sizeof(maxcpu))
 		errx(1, "sysctl kern.smp.maxcpus");
 	size = sizeof(long) * maxcpu * CPUSTATES;
 	times = malloc(size);
 	if (times == NULL)
 		err(1, "malloc %zd bytes", size);
 	mysysctl("kern.cp_times", times, &size, NULL, 0);
 	maxid = (size / CPUSTATES / sizeof(long)) - 1;
 	for (i = 0; i <= maxid; i++) {
 		empty = 1;
 		for (j = 0; empty && j < CPUSTATES; j++) {
 			if (times[i * CPUSTATES + j] != 0)
 				empty = 0;
 		}
 		if (!empty) {
 			mask |= (1ul << i);
 			ncpus++;
 		}
 	}
 	if (maskp)
 		*maskp = mask;
 	if (maxidp)
 		*maxidp = maxid;
 	return (ncpus);
 }
 
 
 static void
 prthuman(u_int64_t val, int size)
 {
 	char buf[10];
 	int flags;
 
 	if (size < 5 || size > 9)
 		errx(1, "doofus");
 	flags = HN_B | HN_NOSPACE | HN_DECIMAL;
 	humanize_number(buf, size, val, "", HN_AUTOSCALE, flags);
 	printf("%*s", size, buf);
 }
 
 static int hz, hdrcnt;
 
 static long *cur_cp_times;
 static long *last_cp_times;
 static size_t size_cp_times;
 
 static void
 dovmstat(unsigned int interval, int reps)
 {
 	struct vmtotal total;
 	time_t uptime, halfuptime;
 	struct devinfo *tmp_dinfo;
 	size_t size;
 	int ncpus, maxid;
 	u_long cpumask;
 	int rate_adj;
 
 	uptime = getuptime() / 1000000000LL;
 	halfuptime = uptime / 2;
 	rate_adj = 1;
 	ncpus = 1;
 	maxid = 0;
 
 	/*
 	 * If the user stops the program (control-Z) and then resumes it,
 	 * print out the header again.
 	 */
 	(void)signal(SIGCONT, needhdr);
 
 	/*
 	 * If our standard output is a tty, then install a SIGWINCH handler
 	 * and set wresized so that our first iteration through the main
 	 * vmstat loop will peek at the terminal's current rows to find out
 	 * how many lines can fit in a screenful of output.
 	 */
 	if (isatty(fileno(stdout)) != 0) {
 		wresized = 1;
 		(void)signal(SIGWINCH, needresize);
 	} else {
 		wresized = 0;
 		winlines = VMSTAT_DEFAULT_LINES;
 	}
 
 	if (kd != NULL) {
 		if (namelist[X_STATHZ].n_type != 0 &&
 		    namelist[X_STATHZ].n_value != 0)
 			kread(X_STATHZ, &hz, sizeof(hz));
 		if (!hz)
 			kread(X_HZ, &hz, sizeof(hz));
 	} else {
 		struct clockinfo clockrate;
 
 		size = sizeof(clockrate);
 		mysysctl("kern.clockrate", &clockrate, &size, NULL, 0);
 		if (size != sizeof(clockrate))
 			errx(1, "clockrate size mismatch");
 		hz = clockrate.hz;
 	}
 
 	if (Pflag) {
 		ncpus = getcpuinfo(&cpumask, &maxid);
 		size_cp_times = sizeof(long) * (maxid + 1) * CPUSTATES;
 		cur_cp_times = calloc(1, size_cp_times);
 		last_cp_times = calloc(1, size_cp_times);
 	}
 	for (hdrcnt = 1;;) {
 		if (!--hdrcnt)
 			printhdr(maxid, cpumask);
 		if (kd != NULL) {
 			if (kvm_getcptime(kd, cur.cp_time) < 0)
 				errx(1, "kvm_getcptime: %s", kvm_geterr(kd));
 		} else {
 			size = sizeof(cur.cp_time);
 			mysysctl("kern.cp_time", &cur.cp_time, &size, NULL, 0);
 			if (size != sizeof(cur.cp_time))
 				errx(1, "cp_time size mismatch");
 		}
 		if (Pflag) {
 			size = size_cp_times;
 			mysysctl("kern.cp_times", cur_cp_times, &size, NULL, 0);
 			if (size != size_cp_times)
 				errx(1, "cp_times mismatch");
 		}
 
 		tmp_dinfo = last.dinfo;
 		last.dinfo = cur.dinfo;
 		cur.dinfo = tmp_dinfo;
 		last.snap_time = cur.snap_time;
 
 		/*
 		 * Here what we want to do is refresh our device stats.
 		 * getdevs() returns 1 when the device list has changed.
 		 * If the device list has changed, we want to go through
 		 * the selection process again, in case a device that we
 		 * were previously displaying has gone away.
 		 */
 		switch (devstat_getdevs(NULL, &cur)) {
 		case -1:
 			errx(1, "%s", devstat_errbuf);
 			break;
 		case 1: {
 			int retval;
 
 			num_devices = cur.dinfo->numdevs;
 			generation = cur.dinfo->generation;
 
 			retval = devstat_selectdevs(&dev_select, &num_selected,
 					    &num_selections, &select_generation,
 					    generation, cur.dinfo->devices,
 					    num_devices, matches, num_matches,
 					    specified_devices,
 					    num_devices_specified, select_mode,
 					    maxshowdevs, 0);
 			switch (retval) {
 			case -1:
 				errx(1, "%s", devstat_errbuf);
 				break;
 			case 1:
 				printhdr(maxid, cpumask);
 				break;
 			default:
 				break;
 			}
 		}
 		default:
 			break;
 		}
 
 		fill_vmmeter(&sum);
 		fill_vmtotal(&total);
 		(void)printf("%1d %1d %1d",
 		    total.t_rq - 1, total.t_dw + total.t_pw, total.t_sw);
 #define vmstat_pgtok(a) ((a) * (sum.v_page_size >> 10))
 #define	rate(x)	(((x) * rate_adj + halfuptime) / uptime)	/* round */
 		if (hflag) {
 			printf("");
 			prthuman(total.t_avm * (u_int64_t)sum.v_page_size, 5);
 			printf(" ");
 			prthuman(total.t_free * (u_int64_t)sum.v_page_size, 5);
 			printf(" ");
 			(void)printf("%5lu ",
 			    (unsigned long)rate(sum.v_vm_faults -
 			    osum.v_vm_faults));
 		} else {
 			printf(" %7d", vmstat_pgtok(total.t_avm));
 			printf(" %7d ", vmstat_pgtok(total.t_free));
 			(void)printf("%4lu ",
 			    (unsigned long)rate(sum.v_vm_faults -
 			    osum.v_vm_faults));
 		}
 		(void)printf("%3lu ",
 		    (unsigned long)rate(sum.v_reactivated - osum.v_reactivated));
 		(void)printf("%3lu ",
 		    (unsigned long)rate(sum.v_swapin + sum.v_vnodein -
 		    (osum.v_swapin + osum.v_vnodein)));
 		(void)printf("%3lu ",
 		    (unsigned long)rate(sum.v_swapout + sum.v_vnodeout -
 		    (osum.v_swapout + osum.v_vnodeout)));
 		(void)printf("%5lu ",
 		    (unsigned long)rate(sum.v_tfree - osum.v_tfree));
 		(void)printf("%4lu ",
 		    (unsigned long)rate(sum.v_pdpages - osum.v_pdpages));
 		devstats();
 		(void)printf("%4lu %5lu %5lu",
 		    (unsigned long)rate(sum.v_intr - osum.v_intr),
 		    (unsigned long)rate(sum.v_syscall - osum.v_syscall),
 		    (unsigned long)rate(sum.v_swtch - osum.v_swtch));
 		if (Pflag)
 			pcpustats(ncpus, cpumask, maxid);
 		else
 			cpustats();
 		(void)printf("\n");
 		(void)fflush(stdout);
 		if (reps >= 0 && --reps <= 0)
 			break;
 		osum = sum;
 		uptime = interval;
 		rate_adj = 1000;
 		/*
 		 * We round upward to avoid losing low-frequency events
 		 * (i.e., >= 1 per interval but < 1 per millisecond).
 		 */
 		if (interval != 1)
 			halfuptime = (uptime + 1) / 2;
 		else
 			halfuptime = 0;
 		(void)usleep(interval * 1000);
 	}
 }
 
 static void
 printhdr(int maxid, u_long cpumask)
 {
 	int i, num_shown;
 
 	num_shown = (num_selected < maxshowdevs) ? num_selected : maxshowdevs;
 	if (hflag) {
 		(void)printf("procs  memory      page%*s ", 19, "");
 	} else {
 		(void)printf("procs     memory       page%*s ", 19, "");
 	}
 	if (num_shown > 1)
 		(void)printf("   disks %*s", num_shown * 4 - 7, "");
 	else if (num_shown == 1)
 		(void)printf("   disk");
 	(void)printf("   faults      ");
 	if (Pflag) {
 		for (i = 0; i <= maxid; i++) {
 			if (cpumask & (1ul << i))
 				printf("  cpu%d   ", i);
 		}
 		printf("\n");
 	} else
 		printf("   cpu\n");
 	if (hflag) {
 		(void)printf("r b w  avm   fre   flt  re  pi  po    fr   sr ");
 	} else {
 		(void)printf("r b w     avm     fre  flt  re  pi  po    fr   sr ");
 	}
 	for (i = 0; i < num_devices; i++)
 		if ((dev_select[i].selected)
 		 && (dev_select[i].selected <= maxshowdevs))
 			(void)printf("%c%c%d ", dev_select[i].device_name[0],
 				     dev_select[i].device_name[1],
 				     dev_select[i].unit_number);
 	(void)printf("  in    sy    cs");
 	if (Pflag) {
 		for (i = 0; i <= maxid; i++) {
 			if (cpumask & (1ul << i))
 				printf(" us sy id");
 		}
 		printf("\n");
 	} else
 		printf(" us sy id\n");
 	if (wresized != 0)
 		doresize();
 	hdrcnt = winlines;
 }
 
 /*
  * Force a header to be prepended to the next output.
  */
 static void
 needhdr(int dummy __unused)
 {
 
 	hdrcnt = 1;
 }
 
 /*
  * When the terminal is resized, force an update of the maximum number of rows
  * printed between each header repetition.  Then force a new header to be
  * prepended to the next output.
  */
 void
 needresize(int signo)
 {
 
 	wresized = 1;
 	hdrcnt = 1;
 }
 
 /*
  * Update the global `winlines' count of terminal rows.
  */
 void
 doresize(void)
 {
 	int status;
 	struct winsize w;
 
 	for (;;) {
 		status = ioctl(fileno(stdout), TIOCGWINSZ, &w);
 		if (status == -1 && errno == EINTR)
 			continue;
 		else if (status == -1)
 			err(1, "ioctl");
 		if (w.ws_row > 3)
 			winlines = w.ws_row - 3;
 		else
 			winlines = VMSTAT_DEFAULT_LINES;
 		break;
 	}
 
 	/*
 	 * Inhibit doresize() calls until we are rescheduled by SIGWINCH.
 	 */
 	wresized = 0;
 }
 
 #ifdef notyet
 static void
 dotimes(void)
 {
 	unsigned int pgintime, rectime;
 
 	kread(X_REC, &rectime, sizeof(rectime));
 	kread(X_PGIN, &pgintime, sizeof(pgintime));
 	kread(X_SUM, &sum, sizeof(sum));
 	(void)printf("%u reclaims, %u total time (usec)\n",
 	    sum.v_pgrec, rectime);
 	(void)printf("average: %u usec / reclaim\n", rectime / sum.v_pgrec);
 	(void)printf("\n");
 	(void)printf("%u page ins, %u total time (msec)\n",
 	    sum.v_pgin, pgintime / 10);
 	(void)printf("average: %8.1f msec / page in\n",
 	    pgintime / (sum.v_pgin * 10.0));
 }
 #endif
 
 static long
 pct(long top, long bot)
 {
 	long ans;
 
 	if (bot == 0)
 		return(0);
 	ans = (quad_t)top * 100 / bot;
 	return (ans);
 }
 
 #define	PCT(top, bot) pct((long)(top), (long)(bot))
 
 static void
 dosum(void)
 {
 	struct nchstats lnchstats;
 	long nchtotal;
 
 	fill_vmmeter(&sum);
 	(void)printf("%9u cpu context switches\n", sum.v_swtch);
 	(void)printf("%9u device interrupts\n", sum.v_intr);
 	(void)printf("%9u software interrupts\n", sum.v_soft);
 	(void)printf("%9u traps\n", sum.v_trap);
 	(void)printf("%9u system calls\n", sum.v_syscall);
 	(void)printf("%9u kernel threads created\n", sum.v_kthreads);
 	(void)printf("%9u  fork() calls\n", sum.v_forks);
 	(void)printf("%9u vfork() calls\n", sum.v_vforks);
 	(void)printf("%9u rfork() calls\n", sum.v_rforks);
 	(void)printf("%9u swap pager pageins\n", sum.v_swapin);
 	(void)printf("%9u swap pager pages paged in\n", sum.v_swappgsin);
 	(void)printf("%9u swap pager pageouts\n", sum.v_swapout);
 	(void)printf("%9u swap pager pages paged out\n", sum.v_swappgsout);
 	(void)printf("%9u vnode pager pageins\n", sum.v_vnodein);
 	(void)printf("%9u vnode pager pages paged in\n", sum.v_vnodepgsin);
 	(void)printf("%9u vnode pager pageouts\n", sum.v_vnodeout);
 	(void)printf("%9u vnode pager pages paged out\n", sum.v_vnodepgsout);
 	(void)printf("%9u page daemon wakeups\n", sum.v_pdwakeups);
 	(void)printf("%9u pages examined by the page daemon\n", sum.v_pdpages);
 	(void)printf("%9u pages reactivated\n", sum.v_reactivated);
 	(void)printf("%9u copy-on-write faults\n", sum.v_cow_faults);
 	(void)printf("%9u copy-on-write optimized faults\n", sum.v_cow_optim);
 	(void)printf("%9u zero fill pages zeroed\n", sum.v_zfod);
 	(void)printf("%9u zero fill pages prezeroed\n", sum.v_ozfod);
 	(void)printf("%9u intransit blocking page faults\n", sum.v_intrans);
 	(void)printf("%9u total VM faults taken\n", sum.v_vm_faults);
 	(void)printf("%9u page faults requiring I/O\n", sum.v_io_faults);
 	(void)printf("%9u pages affected by kernel thread creation\n",
 	    sum.v_kthreadpages);
 	(void)printf("%9u pages affected by  fork()\n", sum.v_forkpages);
 	(void)printf("%9u pages affected by vfork()\n", sum.v_vforkpages);
 	(void)printf("%9u pages affected by rfork()\n", sum.v_rforkpages);
 	(void)printf("%9u pages cached\n", sum.v_tcached);
 	(void)printf("%9u pages freed\n", sum.v_tfree);
 	(void)printf("%9u pages freed by daemon\n", sum.v_dfree);
 	(void)printf("%9u pages freed by exiting processes\n", sum.v_pfree);
 	(void)printf("%9u pages active\n", sum.v_active_count);
 	(void)printf("%9u pages inactive\n", sum.v_inactive_count);
 	(void)printf("%9u pages in VM cache\n", sum.v_cache_count);
 	(void)printf("%9u pages wired down\n", sum.v_wire_count);
 	(void)printf("%9u pages free\n", sum.v_free_count);
 	(void)printf("%9u bytes per page\n", sum.v_page_size);
 	if (kd != NULL) {
 		kread(X_NCHSTATS, &lnchstats, sizeof(lnchstats));
 	} else {
 		size_t size = sizeof(lnchstats);
 		mysysctl("vfs.cache.nchstats", &lnchstats, &size, NULL, 0);
 		if (size != sizeof(lnchstats))
 			errx(1, "vfs.cache.nchstats size mismatch");
 	}
 	nchtotal = lnchstats.ncs_goodhits + lnchstats.ncs_neghits +
 	    lnchstats.ncs_badhits + lnchstats.ncs_falsehits +
 	    lnchstats.ncs_miss + lnchstats.ncs_long;
 	(void)printf("%9ld total name lookups\n", nchtotal);
 	(void)printf(
 	    "%9s cache hits (%ld%% pos + %ld%% neg) system %ld%% per-directory\n",
 	    "", PCT(lnchstats.ncs_goodhits, nchtotal),
 	    PCT(lnchstats.ncs_neghits, nchtotal),
 	    PCT(lnchstats.ncs_pass2, nchtotal));
 	(void)printf("%9s deletions %ld%%, falsehits %ld%%, toolong %ld%%\n", "",
 	    PCT(lnchstats.ncs_badhits, nchtotal),
 	    PCT(lnchstats.ncs_falsehits, nchtotal),
 	    PCT(lnchstats.ncs_long, nchtotal));
 }
 
 static void
 doforkst(void)
 {
 	fill_vmmeter(&sum);
 	(void)printf("%u forks, %u pages, average %.2f\n",
 	    sum.v_forks, sum.v_forkpages,
 	    sum.v_forks == 0 ? 0.0 :
 	    (double)sum.v_forkpages / sum.v_forks);
 	(void)printf("%u vforks, %u pages, average %.2f\n",
 	    sum.v_vforks, sum.v_vforkpages,
 	    sum.v_vforks == 0 ? 0.0 :
 	    (double)sum.v_vforkpages / sum.v_vforks);
 	(void)printf("%u rforks, %u pages, average %.2f\n",
 	    sum.v_rforks, sum.v_rforkpages,
 	    sum.v_rforks == 0 ? 0.0 :
 	    (double)sum.v_rforkpages / sum.v_rforks);
 }
 
 static void
 devstats(void)
 {
 	int dn, state;
 	long double transfers_per_second;
 	long double busy_seconds;
 	long tmp;
 
 	for (state = 0; state < CPUSTATES; ++state) {
 		tmp = cur.cp_time[state];
 		cur.cp_time[state] -= last.cp_time[state];
 		last.cp_time[state] = tmp;
 	}
 
 	busy_seconds = cur.snap_time - last.snap_time;
 
 	for (dn = 0; dn < num_devices; dn++) {
 		int di;
 
 		if ((dev_select[dn].selected == 0)
 		 || (dev_select[dn].selected > maxshowdevs))
 			continue;
 
 		di = dev_select[dn].position;
 
 		if (devstat_compute_statistics(&cur.dinfo->devices[di],
 		    &last.dinfo->devices[di], busy_seconds,
 		    DSM_TRANSFERS_PER_SECOND, &transfers_per_second,
 		    DSM_NONE) != 0)
 			errx(1, "%s", devstat_errbuf);
 
 		(void)printf("%3.0Lf ", transfers_per_second);
 	}
 }
 
 static void
 percent(double pct, int *over)
 {
 	char buf[10];
 	int l;
 
 	l = snprintf(buf, sizeof(buf), "%.0f", pct);
 	if (l == 1 && *over) {
 		printf("%s",  buf);
 		(*over)--;
 	} else
 		printf("%2s", buf);
 	if (l > 2)
 		(*over)++;
 }
 
 static void
 cpustats(void)
 {
 	int state, over;
 	double lpct, total;
 
 	total = 0;
 	for (state = 0; state < CPUSTATES; ++state)
 		total += cur.cp_time[state];
 	if (total)
 		lpct = 100.0 / total;
 	else
 		lpct = 0.0;
 	over = 0;
 	printf(" ");
 	percent((cur.cp_time[CP_USER] + cur.cp_time[CP_NICE]) * lpct, &over);
 	printf(" ");
 	percent((cur.cp_time[CP_SYS] + cur.cp_time[CP_INTR]) * lpct, &over);
 	printf(" ");
 	percent(cur.cp_time[CP_IDLE] * lpct, &over);
 }
 
 static void
 pcpustats(int ncpus, u_long cpumask, int maxid)
 {
 	int state, i;
 	double lpct, total;
 	long tmp;
 	int over;
 
 	/* devstats does this for cp_time */
 	for (i = 0; i <= maxid; i++) {
 		if ((cpumask & (1ul << i)) == 0)
 			continue;
 		for (state = 0; state < CPUSTATES; ++state) {
 			tmp = cur_cp_times[i * CPUSTATES + state];
 			cur_cp_times[i * CPUSTATES + state] -= last_cp_times[i *
 			    CPUSTATES + state];
 			last_cp_times[i * CPUSTATES + state] = tmp;
 		}
 	}
 
 	over = 0;
 	for (i = 0; i <= maxid; i++) {
 		if ((cpumask & (1ul << i)) == 0)
 			continue;
 		total = 0;
 		for (state = 0; state < CPUSTATES; ++state)
 			total += cur_cp_times[i * CPUSTATES + state];
 		if (total)
 			lpct = 100.0 / total;
 		else
 			lpct = 0.0;
 		printf(" ");
 		percent((cur_cp_times[i * CPUSTATES + CP_USER] +
 			 cur_cp_times[i * CPUSTATES + CP_NICE]) * lpct, &over);
 		printf(" ");
 		percent((cur_cp_times[i * CPUSTATES + CP_SYS] +
 			 cur_cp_times[i * CPUSTATES + CP_INTR]) * lpct, &over);
 		printf(" ");
 		percent(cur_cp_times[i * CPUSTATES + CP_IDLE] * lpct, &over);
 	}
 }
 
 static unsigned int
 read_intrcnts(unsigned long **intrcnts)
 {
 	size_t intrcntlen;
 
 	if (kd != NULL) {
 		kread(X_SINTRCNT, &intrcntlen, sizeof(intrcntlen));
 		if ((*intrcnts = malloc(intrcntlen)) == NULL)
 			err(1, "malloc()");
 		kread(X_INTRCNT, *intrcnts, intrcntlen);
 	} else {
 		for (*intrcnts = NULL, intrcntlen = 1024; ; intrcntlen *= 2) {
 			*intrcnts = reallocf(*intrcnts, intrcntlen);
 			if (*intrcnts == NULL)
 				err(1, "reallocf()");
 			if (mysysctl("hw.intrcnt",
 			    *intrcnts, &intrcntlen, NULL, 0) == 0)
 				break;
 		}
 	}
 
 	return (intrcntlen / sizeof(unsigned long));
 }
 
 static void
 print_intrcnts(unsigned long *intrcnts, unsigned long *old_intrcnts,
 		char *intrnames, unsigned int nintr,
 		size_t istrnamlen, long long period_ms)
 {
 	unsigned long *intrcnt, *old_intrcnt;
 	uint64_t inttotal, old_inttotal, total_count, total_rate;
 	char* intrname;
 	unsigned int i;
 
 	inttotal = 0;
 	old_inttotal = 0;
 	intrname = intrnames;
 	for (i = 0, intrcnt=intrcnts, old_intrcnt=old_intrcnts; i < nintr; i++) {
 		if (intrname[0] != '\0' && (*intrcnt != 0 || aflag)) {
 			unsigned long count, rate;
 
 			count = *intrcnt - *old_intrcnt;
 			rate = (count * 1000 + period_ms / 2) / period_ms;
 			(void)printf("%-*s %20lu %10lu\n", (int)istrnamlen,
 			    intrname, count, rate);
 		}
 		intrname += strlen(intrname) + 1;
 		inttotal += *intrcnt++;
 		old_inttotal += *old_intrcnt++;
 	}
 	total_count = inttotal - old_inttotal;
 	total_rate = (total_count * 1000 + period_ms / 2) / period_ms;
 	(void)printf("%-*s %20" PRIu64 " %10" PRIu64 "\n", (int)istrnamlen,
 	    "Total", total_count, total_rate);
 }
 
 static void
 dointr(unsigned int interval, int reps)
 {
 	unsigned long *intrcnts;
 	long long uptime, period_ms;
 	unsigned long *old_intrcnts = NULL;
 	size_t clen, inamlen, istrnamlen;
 	char *intrnames, *intrname;
 
 	uptime = getuptime();
 
 	/* Get the names of each interrupt source */
 	if (kd != NULL) {
 		kread(X_SINTRNAMES, &inamlen, sizeof(inamlen));
 		if ((intrnames = malloc(inamlen)) == NULL)
 			err(1, "malloc()");
 		kread(X_INTRNAMES, intrnames, inamlen);
 	} else {
 		for (intrnames = NULL, inamlen = 1024; ; inamlen *= 2) {
 			if ((intrnames = reallocf(intrnames, inamlen)) == NULL)
 				err(1, "reallocf()");
 			if (mysysctl("hw.intrnames",
 			    intrnames, &inamlen, NULL, 0) == 0)
 				break;
 		}
 	}
 
 	/* Determine the length of the longest interrupt name */
 	intrname = intrnames;
 	istrnamlen = strlen("interrupt");
 	while(*intrname != '\0') {
 		clen = strlen(intrname);
 		if (clen > istrnamlen)
 			istrnamlen = clen;
 		intrname += strlen(intrname) + 1;
 	}
 	(void)printf("%-*s %20s %10s\n", (int)istrnamlen, "interrupt", "total",
 	    "rate");
 
 	/* 
 	 * Loop reps times printing differential interrupt counts.  If reps is
 	 * zero, then run just once, printing total counts
 	 */
 	period_ms = uptime / 1000000;
 	while(1) {
 		unsigned int nintr;
 		long long old_uptime;
 
 		nintr = read_intrcnts(&intrcnts);
 		/* 
 		 * Initialize old_intrcnts to 0 for the first pass, so
 		 * print_intrcnts will print total interrupts since boot
 		 */
 		if (old_intrcnts == NULL) {
 			old_intrcnts = calloc(nintr, sizeof(unsigned long));
 			if (old_intrcnts == NULL)
 				err(1, "calloc()");
 		}
 
 		print_intrcnts(intrcnts, old_intrcnts, intrnames, nintr,
 		    istrnamlen, period_ms);
 
 		free(old_intrcnts);
 		old_intrcnts = intrcnts;
 		if (reps >= 0 && --reps <= 0)
 			break;
 		usleep(interval * 1000);
 		old_uptime = uptime;
 		uptime = getuptime();
 		period_ms = (uptime - old_uptime) / 1000000;
 	}
 }
 
 static void
 domemstat_malloc(void)
 {
 	struct memory_type_list *mtlp;
 	struct memory_type *mtp;
 	int error, first, i;
 
 	mtlp = memstat_mtl_alloc();
 	if (mtlp == NULL) {
 		warn("memstat_mtl_alloc");
 		return;
 	}
 	if (kd == NULL) {
 		if (memstat_sysctl_malloc(mtlp, 0) < 0) {
 			warnx("memstat_sysctl_malloc: %s",
 			    memstat_strerror(memstat_mtl_geterror(mtlp)));
 			return;
 		}
 	} else {
 		if (memstat_kvm_malloc(mtlp, kd) < 0) {
 			error = memstat_mtl_geterror(mtlp);
 			if (error == MEMSTAT_ERROR_KVM)
 				warnx("memstat_kvm_malloc: %s",
 				    kvm_geterr(kd));
 			else
 				warnx("memstat_kvm_malloc: %s",
 				    memstat_strerror(error));
 		}
 	}
 	printf("%13s %5s %6s %7s %8s  Size(s)\n", "Type", "InUse", "MemUse",
 	    "HighUse", "Requests");
 	for (mtp = memstat_mtl_first(mtlp); mtp != NULL;
 	    mtp = memstat_mtl_next(mtp)) {
 		if (memstat_get_numallocs(mtp) == 0 &&
 		    memstat_get_count(mtp) == 0)
 			continue;
 		printf("%13s %5" PRIu64 " %5" PRIu64 "K %7s %8" PRIu64 "  ",
 		    memstat_get_name(mtp), memstat_get_count(mtp),
 		    (memstat_get_bytes(mtp) + 1023) / 1024, "-",
 		    memstat_get_numallocs(mtp));
 		first = 1;
 		for (i = 0; i < 32; i++) {
 			if (memstat_get_sizemask(mtp) & (1 << i)) {
 				if (!first)
 					printf(",");
 				printf("%d", 1 << (i + 4));
 				first = 0;
 			}
 		}
 		printf("\n");
 	}
 	memstat_mtl_free(mtlp);
 }
 
 static void
 domemstat_zone(void)
 {
 	struct memory_type_list *mtlp;
 	struct memory_type *mtp;
 	char name[MEMTYPE_MAXNAME + 1];
 	int error;
 
 	mtlp = memstat_mtl_alloc();
 	if (mtlp == NULL) {
 		warn("memstat_mtl_alloc");
 		return;
 	}
 	if (kd == NULL) {
 		if (memstat_sysctl_uma(mtlp, 0) < 0) {
 			warnx("memstat_sysctl_uma: %s",
 			    memstat_strerror(memstat_mtl_geterror(mtlp)));
 			return;
 		}
 	} else {
 		if (memstat_kvm_uma(mtlp, kd) < 0) {
 			error = memstat_mtl_geterror(mtlp);
 			if (error == MEMSTAT_ERROR_KVM)
 				warnx("memstat_kvm_uma: %s",
 				    kvm_geterr(kd));
 			else
 				warnx("memstat_kvm_uma: %s",
 				    memstat_strerror(error));
 		}
 	}
 	printf("%-20s %6s %6s %8s %8s %8s %4s %4s\n\n", "ITEM", "SIZE",
 	    "LIMIT", "USED", "FREE", "REQ", "FAIL", "SLEEP");
 	for (mtp = memstat_mtl_first(mtlp); mtp != NULL;
 	    mtp = memstat_mtl_next(mtp)) {
 		strlcpy(name, memstat_get_name(mtp), MEMTYPE_MAXNAME);
 		strcat(name, ":");
 		printf("%-20s %6" PRIu64 ", %6" PRIu64 ",%8" PRIu64 ",%8" PRIu64
 		    ",%8" PRIu64 ",%4" PRIu64 ",%4" PRIu64 "\n", name,
 		    memstat_get_size(mtp), memstat_get_countlimit(mtp),
 		    memstat_get_count(mtp), memstat_get_free(mtp),
 		    memstat_get_numallocs(mtp), memstat_get_failures(mtp),
 		    memstat_get_sleeps(mtp));
 	}
 	memstat_mtl_free(mtlp);
 	printf("\n");
 }
 
 static void
 display_object(struct kinfo_vmobject *kvo)
 {
 	const char *str;
 
 	printf("%5jd ", (uintmax_t)kvo->kvo_resident);
 	printf("%5jd ", (uintmax_t)kvo->kvo_active);
 	printf("%5jd ", (uintmax_t)kvo->kvo_inactive);
 	printf("%3d ", kvo->kvo_ref_count);
 	printf("%3d ", kvo->kvo_shadow_count);
 	switch (kvo->kvo_memattr) {
 #ifdef VM_MEMATTR_UNCACHEABLE
 	case VM_MEMATTR_UNCACHEABLE:
 		str = "UC";
 		break;
 #endif
 #ifdef VM_MEMATTR_WRITE_COMBINING
 	case VM_MEMATTR_WRITE_COMBINING:
 		str = "WC";
 		break;
 #endif
 #ifdef VM_MEMATTR_WRITE_THROUGH
 	case VM_MEMATTR_WRITE_THROUGH:
 		str = "WT";
 		break;
 #endif
 #ifdef VM_MEMATTR_WRITE_PROTECTED
 	case VM_MEMATTR_WRITE_PROTECTED:
 		str = "WP";
 		break;
 #endif
 #ifdef VM_MEMATTR_WRITE_BACK
 	case VM_MEMATTR_WRITE_BACK:
 		str = "WB";
 		break;
 #endif
 #ifdef VM_MEMATTR_WEAK_UNCACHEABLE
 	case VM_MEMATTR_WEAK_UNCACHEABLE:
 		str = "UC-";
 		break;
 #endif
 #ifdef VM_MEMATTR_WB_WA
 	case VM_MEMATTR_WB_WA:
 		str = "WB";
 		break;
 #endif
 #ifdef VM_MEMATTR_NOCACHE
 	case VM_MEMATTR_NOCACHE:
 		str = "NC";
 		break;
 #endif
 #ifdef VM_MEMATTR_DEVICE
 	case VM_MEMATTR_DEVICE:
 		str = "DEV";
 		break;
 #endif
 #ifdef VM_MEMATTR_CACHEABLE
 	case VM_MEMATTR_CACHEABLE:
 		str = "C";
 		break;
 #endif
 #ifdef VM_MEMATTR_PREFETCHABLE
 	case VM_MEMATTR_PREFETCHABLE:
 		str = "PRE";
 		break;
 #endif
 	default:
 		str = "??";
 		break;
 	}
 	printf("%-3s ", str);
 	switch (kvo->kvo_type) {
 	case KVME_TYPE_NONE:
 		str = "--";
 		break;
 	case KVME_TYPE_DEFAULT:
 		str = "df";
 		break;
 	case KVME_TYPE_VNODE:
 		str = "vn";
 		break;
 	case KVME_TYPE_SWAP:
 		str = "sw";
 		break;
 	case KVME_TYPE_DEVICE:
 		str = "dv";
 		break;
 	case KVME_TYPE_PHYS:
 		str = "ph";
 		break;
 	case KVME_TYPE_DEAD:
 		str = "dd";
 		break;
 	case KVME_TYPE_SG:
 		str = "sg";
 		break;
 	case KVME_TYPE_UNKNOWN:
 	default:
 		str = "??";
 		break;
 	}
 	printf("%-2s ", str);
 	printf("%-s\n", kvo->kvo_path);
 }
 
 static void
 doobjstat(void)
 {
 	struct kinfo_vmobject *kvo;
 	int cnt, i;
 
 	kvo = kinfo_getvmobject(&cnt);
 	if (kvo == NULL) {
 		warn("Failed to fetch VM object list");
 		return;
 	}
 	printf("%5s %5s %5s %3s %3s %3s %2s %s\n", "RES", "ACT", "INACT",
 	    "REF", "SHD", "CM", "TP", "PATH");
 	for (i = 0; i < cnt; i++)
 		display_object(&kvo[i]);
 	free(kvo);
 }
 
 /*
  * kread reads something from the kernel, given its nlist index.
  */
 static void
 kreado(int nlx, void *addr, size_t size, size_t offset)
 {
 	const char *sym;
 
 	if (namelist[nlx].n_type == 0 || namelist[nlx].n_value == 0) {
 		sym = namelist[nlx].n_name;
 		if (*sym == '_')
 			++sym;
 		errx(1, "symbol %s not defined", sym);
 	}
 	if ((size_t)kvm_read(kd, namelist[nlx].n_value + offset, addr,
 	    size) != size) {
 		sym = namelist[nlx].n_name;
 		if (*sym == '_')
 			++sym;
 		errx(1, "%s: %s", sym, kvm_geterr(kd));
 	}
 }
 
 static void
 kread(int nlx, void *addr, size_t size)
 {
 	kreado(nlx, addr, size, 0);
 }
 
 static char *
 kgetstr(const char *strp)
 {
 	int n = 0, size = 1;
 	char *ret = NULL;
 
 	do {
 		if (size == n + 1) {
 			ret = realloc(ret, size);
 			if (ret == NULL)
 				err(1, "%s: realloc", __func__);
 			size *= 2;
 		}
 		if (kvm_read(kd, (u_long)strp + n, &ret[n], 1) != 1)
 			errx(1, "%s: %s", __func__, kvm_geterr(kd));
 	} while (ret[n++] != '\0');
 	return (ret);
 }
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr, "%s%s",
 		"usage: vmstat [-afHhimoPsz] [-M core [-N system]] [-c count] [-n devs]\n",
 		"              [-p type,if,pass] [-w wait] [disks] [wait [count]]\n");
 	exit(1);
 }