Index: head/share/man/man9/zone.9
===================================================================
--- head/share/man/man9/zone.9	(revision 356533)
+++ head/share/man/man9/zone.9	(revision 356534)
@@ -1,606 +1,598 @@
 .\"-
 .\" Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 22, 2019
+.Dd January 8, 2020
 .Dt UMA 9
 .Os
 .Sh NAME
 .Nm UMA
 .Nd general-purpose kernel object allocator
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/queue.h
 .In vm/uma.h
 .Cd "options UMA_FIRSTTOUCH"
 .Cd "options UMA_XDOMAIN"
 .Bd -literal
 typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
 typedef void (*uma_dtor)(void *mem, int size, void *arg);
 typedef int (*uma_init)(void *mem, int size, int flags);
 typedef void (*uma_fini)(void *mem, int size);
 typedef int (*uma_import)(void *arg, void **store, int count, int domain,
     int flags);
 typedef void (*uma_release)(void *arg, void **store, int count);
 typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
     uint8_t *pflag, int wait);
 typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
 
 .Ed
 .Ft uma_zone_t
 .Fo uma_zcreate
 .Fa "char *name" "int size"
 .Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
 .Fa "int align" "uint16_t flags"
 .Fc
 .Ft uma_zone_t
 .Fo uma_zcache_create
 .Fa "char *name" "int size"
 .Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
 .Fa "uma_import zimport" "uma_release zrelease"
 .Fa "void *arg" "int flags"
 .Fc
 .Ft uma_zone_t
 .Fo uma_zsecond_create
 .Fa "char *name"
 .Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
 .Fa "uma_zone_t master"
 .Fc
 .Ft void
 .Fn uma_zdestroy "uma_zone_t zone"
 .Ft "void *"
 .Fn uma_zalloc "uma_zone_t zone" "int flags"
 .Ft "void *"
 .Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags"
 .Ft "void *"
 .Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags"
 .Ft "void *"
 .Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags"
 .Ft "void *"
 .Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags"
 .Ft void
 .Fn uma_zfree "uma_zone_t zone" "void *item"
 .Ft void
 .Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg"
 .Ft void
 .Fn uma_zfree_domain "uma_zone_t zone" "void *item" "void *arg"
 .Ft void
 .Fn uma_zfree_pcpu "uma_zone_t zone" "void *item"
 .Ft void
 .Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg"
 .Ft void
 .Fn uma_prealloc "uma_zone_t zone" "int nitems"
 .Ft void
 .Fn uma_zone_reserve "uma_zone_t zone" "int nitems"
 .Ft void
 .Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems"
 .Ft void
 .Fn uma_reclaim "int req"
 .Ft void
 .Fn uma_zone_reclaim "uma_zone_t zone" "int req"
 .Ft void
 .Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf"
 .Ft void
 .Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef"
 .Ft int
 .Fn uma_zone_set_max "uma_zone_t zone" "int nitems"
 .Ft void
 .Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems"
 .Ft int
 .Fn uma_zone_get_max "uma_zone_t zone"
 .Ft int
 .Fn uma_zone_get_cur "uma_zone_t zone"
 .Ft void
 .Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning"
 .Ft void
 .Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)"
 .Ft void
 .Fn uma_reclaim
 .In sys/sysctl.h
 .Fn SYSCTL_UMA_MAX parent nbr name access zone descr
 .Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
 .Fn SYSCTL_UMA_CUR parent nbr name access zone descr
 .Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr
 .Sh DESCRIPTION
 UMA (Universal Memory Allocator) provides an efficient interface for managing
 dynamically-sized collections of items of identical size, referred to as zones.
 Zones keep track of which items are in use and which
 are not, and UMA provides functions for allocating items from a zone and
 for releasing them back, making them available for subsequent allocation requests.
 Zones maintain per-CPU caches with linear scalability on SMP
 systems as well as round-robin and first-touch policies for NUMA
 systems.
 The number of items cached per CPU is bounded, and each zone additionally
 maintains an unbounded cache of items that is used to quickly satisfy
 per-CPU cache allocation misses.
 .Pp
 Two types of zones exist: regular zones and cache zones.
 In a regular zone, items are allocated from a slab, which is one or more
 virtually contiguous memory pages that have been allocated from the kernel's
 page allocator.
 Internally, slabs are managed by a UMA keg, which is responsible for allocating
 slabs and keeping track of their usage by one or more zones.
 In typical usage, there is one keg per zone, so slabs are not shared among
 multiple zones.
 .Pp
 Normal zones import items from a keg, and release items back to that keg if
 requested.
 Cache zones do not have a keg, and instead use custom import and release
 methods.
 For example, some collections of kernel objects are statically allocated
 at boot-time, and the size of the collection does not change.
 A cache zone can be used to implement an efficient allocator for the objects in
 such a collection.
 .Pp
 The
 .Fn uma_zcreate
 and
 .Fn uma_zcache_create
 functions create a new regular zone and cache zone, respectively.
 The
 .Fn uma_zsecond_create
 function creates a regular zone which shares the keg of the zone
 specified by the
 .Fa master
 argument.
 The
 .Fa name
 argument is a text name of the zone for debugging and stats; this memory
 should not be freed until the zone has been deallocated.
 .Pp
 The
 .Fa ctor
 and
 .Fa dtor
 arguments are callback functions that are called by
 the UMA subsystem at the time of the call to
 .Fn uma_zalloc
 and
 .Fn uma_zfree
 respectively.
 Their purpose is to provide hooks for initializing or
 destroying things that need to be done at the time of the allocation
 or release of a resource.
 A good usage for the
 .Fa ctor
 and
 .Fa dtor
 callbacks might be to initialize a data structure embedded in the item,
 such as a
 .Xr queue 3
 head.
 .Pp
 The
 .Fa zinit
 and
 .Fa zfini
 arguments are used to optimize the allocation of items from the zone.
 They are called by the UMA subsystem whenever
 it needs to allocate or free items to satisfy requests or memory pressure.
 A good use for the
 .Fa zinit
 and
 .Fa zfini
 callbacks might be to
 initialize and destroy a mutex contained within an item.
 This would allow one to avoid destroying and re-initializing the mutex
 each time the item is freed and re-allocated.
 They are not called on each call to
 .Fn uma_zalloc
 and
 .Fn uma_zfree
 but rather when an item is imported into a zone's cache, and when a zone
 releases an item to the slab allocator, typically as a response to memory
 pressure.
 .Pp
 For
 .Fn uma_zcache_create ,
 the
 .Fa zimport
 and
 .Fa zrelease
 functions are called to import items into the zone and to release items
 from the zone, respectively.
 The
 .Fa zimport
 function should store pointers to items in the
 .Fa store
 array, which contains a maximum of
 .Fa count
 entries.
 The function must return the number of imported items, which may be less than
 the maximum.
 Similarly, the
 .Fa store
 parameter to the
 .Fa zrelease
 function contains an array of
 .Fa count
 pointers to items.
 The
 .Fa arg
 parameter passed to
 .Fn uma_zcache_create
 is provided to the import and release functions.
 The
 .Fa domain
 parameter to
 .Fa zimport
 specifies the requested
 .Xr numa 4
 domain for the allocation.
 It is either a NUMA domain number or the special value
 .Dv UMA_ANYDOMAIN .
 .Pp
 The
 .Fa flags
 argument of
 .Fn uma_zcreate
 and
 .Fn uma_zcache_create
 is a subset of the following flags:
 .Bl -tag -width "foo"
 .It Dv UMA_ZONE_NOFREE
 Slabs allocated to the zone's keg are never freed.
 .It Dv UMA_ZONE_NODUMP
 Pages belonging to the zone will not be included in minidumps.
 .It Dv UMA_ZONE_PCPU
 An allocation from zone would have
 .Va mp_ncpu
 shadow copies, that are privately assigned to CPUs.
 A CPU can address its private copy using base the allocation address plus
 a multiple of the current CPU ID and
 .Fn sizeof "struct pcpu" :
 .Bd -literal -offset indent
 foo_zone = uma_zcreate(..., UMA_ZONE_PCPU);
  ...
 foo_base = uma_zalloc(foo_zone, ...);
  ...
 critical_enter();
 foo_pcpu = (foo_t *)zpcpu_get(foo_base);
 /* do something with foo_pcpu */
 critical_exit();
 
 .Ed
 Note that
 .Dv M_ZERO
 cannot be used when allocating items from a PCPU zone.
 To obtain zeroed memory from a PCPU zone, use the
 .Fn uma_zalloc_pcpu
 function and its variants instead, and pass
 .Dv M_ZERO .
-.It Dv UMA_ZONE_OFFPAGE
-By default book-keeping of items within a slab is done in the slab page itself.
-This flag explicitly tells subsystem that book-keeping structure should be
-allocated separately from special internal zone.
-This flag requires either
-.Dv UMA_ZONE_VTOSLAB
-or
-.Dv UMA_ZONE_HASH ,
-since subsystem requires a mechanism to find a book-keeping structure
-to an item being freed.
-The subsystem may choose to prefer offpage book-keeping for certain zones
-implicitly.
+.It Dv UMA_ZONE_NOTOUCH
+The UMA subsystem may not directly touch (i.e. read or write) the slab memory.
+Otherwise, by default, book-keeping of items within a slab may be done in the
+slab page itself, and
+.Dv INVARIANTS
+kernels may also do use-after-free checking by accessing the slab memory.
 .It Dv UMA_ZONE_ZINIT
 The zone will have its
 .Ft uma_init
 method set to internal method that initializes a new allocated slab
 to all zeros.
 Do not mistake
 .Ft uma_init
 method with
 .Ft uma_ctor .
 A zone with
 .Dv UMA_ZONE_ZINIT
 flag would not return zeroed memory on every
 .Fn uma_zalloc .
-.It Dv UMA_ZONE_HASH
-The zone should use an internal hash table to find slab book-keeping
-structure where an allocation being freed belongs to.
-.It Dv UMA_ZONE_VTOSLAB
-The zone should use special field of
-.Vt vm_page_t
-to find slab book-keeping structure where an allocation being freed belongs to.
+.It Dv UMA_ZONE_NOTPAGE
+An allocator function will be supplied with
+.Fn uma_zone_set_allocf
+and the memory that it returns may not be kernel virtual memory backed by VM
+pages in the page array.
 .It Dv UMA_ZONE_MALLOC
 The zone is for the
 .Xr malloc 9
 subsystem.
 .It Dv UMA_ZONE_VM
 The zone is for the VM subsystem.
 .It Dv UMA_ZONE_NUMA
 The zone should use a first-touch NUMA policy rather than the round-robin
 default.
 If the
 .Dv UMA_FIRSTTOUCH
 kernel option is configured, all zones implicitly use a first-touch policy,
 and the
 .Dv UMA_ZONE_NUMA
 flag has no effect.
 The
 .Dv UMA_XDOMAIN
 kernel option, when configured, causes UMA to do the extra tracking to ensure
 that allocations from first-touch zones are always local.
 Otherwise, consumers that do not free memory on the same domain from which it
 was allocated will cause mixing in per-CPU caches.
 See
 .Xr numa 4
 for more details.
 .El
 .Pp
 Zones can be destroyed using
 .Fn uma_zdestroy ,
 freeing all memory that is cached in the zone.
 All items allocated from the zone must be freed to the zone before the zone
 may be safely destroyed.
 .Pp
 To allocate an item from a zone, simply call
 .Fn uma_zalloc
 with a pointer to that zone and set the
 .Fa flags
 argument to selected flags as documented in
 .Xr malloc 9 .
 It will return a pointer to an item if successful, or
 .Dv NULL
 in the rare case where all items in the zone are in use and the
 allocator is unable to grow the zone and
 .Dv M_NOWAIT
 is specified.
 .Pp
 Items are released back to the zone from which they were allocated by
 calling
 .Fn uma_zfree
 with a pointer to the zone and a pointer to the item.
 If
 .Fa item
 is
 .Dv NULL ,
 then
 .Fn uma_zfree
 does nothing.
 .Pp
 The variants
 .Fn uma_zalloc_arg
 and
 .Fn uma_zfree_arg
 allow callers to
 specify an argument for the
 .Dv ctor
 and
 .Dv dtor
 functions of the zone, respectively.
 The
 .Fn uma_zalloc_domain
 function allows callers to specify a fixed
 .Xr numa 4
 domain to allocate from.
 This uses a guaranteed but slow path in the allocator which reduces
 concurrency.
 The
 .Fn uma_zfree_domain
 function should be used to return memory allocated in this fashion.
 This function infers the domain from the pointer and does not require it as an
 argument.
 .Pp
 The
 .Fn uma_zone_prealloc
 function allocates slabs for the requested number of items, typically following
 the initial creation of a zone.
 Subsequent allocations from the zone will be satisfied using the pre-allocated
 slabs.
 Note that slab allocation is performed with the
 .Dv M_WAITOK
 flag, so
 .Fn uma_zone_prealloc
 may sleep.
 .Pp
 The
 .Fn uma_zone_reserve
 function sets the number of reserved items for the zone.
 .Fn uma_zalloc
 and variants will ensure that the zone contains at least the reserved number
 of free items.
 Reserved items may be allocated by specifying
 .Dv M_USE_RESERVE
 in the allocation request flags.
 .Fn uma_zone_reserve
 does not perform any pre-allocation by itself.
 .Pp
 The
 .Fn uma_zone_reserve_kva
 function pre-allocates kernel virtual address space for the requested
 number of items.
 Subsequent allocations from the zone will be satisfied using the pre-allocated
 address space.
 Note that unlike
 .Fn uma_zone_reserve ,
 .Fn uma_zone_reserve_kva
 does not restrict the use of the pre-allocation to
 .Dv M_USE_RESERVE
 requests.
 .Pp
 The
 .Fn uma_reclaim
 and
 .Fn uma_zone_reclaim
 functions reclaim cached items from UMA zones, releasing unused memory.
 The
 .Fn uma_reclaim
 function reclaims items from all regular zones, while
 .Fn uma_zone_reclaim
 reclaims items only from the specified zone.
 The
 .Fa req
 parameter must be one of three values which specify how aggressively
 items are to be reclaimed:
 .Bl -tag -width indent
 .It Dv UMA_RECLAIM_TRIM
 Reclaim items only in excess of the zone's estimated working set size.
 The working set size is periodically updated and tracks the recent history
 of the zone's usage.
 .It Dv UMA_RECLAIM_DRAIN
 Reclaim all items from the unbounded cache.
 Free items in the per-CPU caches are left alone.
 .It Dv UMA_RECLAIM_DRAIN_CPU
 Reclaim all cached items.
 .El
 .Pp
 The
 .Fn uma_zone_set_allocf
 and
 .Fn uma_zone_set_freef
 functions allow a zone's default slab allocation and free functions to be
 overridden.
 This is useful if the zone's items have special memory allocation constraints.
 For example, if multi-page objects are required to be physically contiguous,
 an
 .Fa allocf
 function which requests contiguous memory from the kernel's page allocator
 may be used.
 .Pp
 The
 .Fn uma_zone_set_max
 function limits the number of items
 .Pq and therefore memory
 that can be allocated to
 .Fa zone .
 The
 .Fa nitems
 argument specifies the requested upper limit number of items.
 The effective limit is returned to the caller, as it may end up being higher
 than requested due to the implementation rounding up to ensure all memory pages
 allocated to the zone are utilised to capacity.
 The limit applies to the total number of items in the zone, which includes
 allocated items, free items and free items in the per-cpu caches.
 On systems with more than one CPU it may not be possible to allocate
 the specified number of items even when there is no shortage of memory,
 because all of the remaining free items may be in the caches of the
 other CPUs when the limit is hit.
 .Pp
 The
 .Fn uma_zone_set_maxcache
 function limits the number of free items which may be cached in the zone.
 This limit applies to both the per-CPU caches and the cache of free buckets.
 .Pp
 The
 .Fn uma_zone_get_max
 function returns the effective upper limit number of items for a zone.
 .Pp
 The
 .Fn uma_zone_get_cur
 function returns an approximation of the number of items currently allocated
 from the zone.
 The returned value is approximate because appropriate synchronisation to
 determine an exact value is not performed by the implementation.
 This ensures low overhead at the expense of potentially stale data being used
 in the calculation.
 .Pp
 The
 .Fn uma_zone_set_warning
 function sets a warning that will be printed on the system console when the
 given zone becomes full and fails to allocate an item.
 The warning will be printed no more often than every five minutes.
 Warnings can be turned off globally by setting the
 .Va vm.zone_warnings
 sysctl tunable to
 .Va 0 .
 .Pp
 The
 .Fn uma_zone_set_maxaction
 function sets a function that will be called when the given zone becomes full
 and fails to allocate an item.
 The function will be called with the zone locked.
 Also, the function
 that called the allocation function may have held additional locks.
 Therefore,
 this function should do very little work (similar to a signal handler).
 .Pp
 The
 .Fn SYSCTL_UMA_MAX parent nbr name access zone descr
 macro declares a static
 .Xr sysctl 9
 oid that exports the effective upper limit number of items for a zone.
 The
 .Fa zone
 argument should be a pointer to
 .Vt uma_zone_t .
 A read of the oid returns value obtained through
 .Fn uma_zone_get_max .
 A write to the oid sets new value via
 .Fn uma_zone_set_max .
 The
 .Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
 macro is provided to create this type of oid dynamically.
 .Pp
 The
 .Fn SYSCTL_UMA_CUR parent nbr name access zone descr
 macro declares a static read-only
 .Xr sysctl 9
 oid that exports the approximate current occupancy of the zone.
 The
 .Fa zone
 argument should be a pointer to
 .Vt uma_zone_t .
 A read of the oid returns value obtained through
 .Fn uma_zone_get_cur .
 The
 .Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr
 macro is provided to create this type of oid dynamically.
 .Sh IMPLEMENTATION NOTES
 The memory that these allocation calls return is not executable.
 The
 .Fn uma_zalloc
 function does not support the
 .Dv M_EXEC
 flag to allocate executable memory.
 Not all platforms enforce a distinction between executable and
 non-executable memory.
 .Sh SEE ALSO
 .Xr numa 4 ,
 .Xr vmstat 8 ,
 .Xr malloc 9
 .Rs
 .%A Jeff Bonwick
 .%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator"
 .%D 1994
 .Re
 .Sh HISTORY
 The zone allocator first appeared in
 .Fx 3.0 .
 It was radically changed in
 .Fx 5.0
 to function as a slab allocator.
 .Sh AUTHORS
 .An -nosplit
 The zone allocator was written by
 .An John S. Dyson .
 The zone allocator was rewritten in large parts by
 .An Jeff Roberson Aq Mt jeff@FreeBSD.org
 to function as a slab allocator.
 .Pp
 This manual page was written by
 .An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .
 Changes for UMA by
 .An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org .
Index: head/sys/arm/arm/busdma_machdep-v6.c
===================================================================
--- head/sys/arm/arm/busdma_machdep-v6.c	(revision 356533)
+++ head/sys/arm/arm/busdma_machdep-v6.c	(revision 356534)
@@ -1,1784 +1,1784 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012-2015 Ian Lepore
  * Copyright (c) 2010 Mark Tinguely
  * Copyright (c) 2004 Olivier Houchard
  * Copyright (c) 2002 Peter Grehan
  * Copyright (c) 1997, 1998 Justin T. Gibbs.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *  From i386/busdma_machdep.c 191438 2009-04-23 20:24:19Z jhb
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/busdma_bufalloc.h>
 #include <sys/counter.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/memdesc.h>
 #include <sys/proc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 
 #define	BUSDMA_DCACHE_ALIGN	cpuinfo.dcache_line_size
 #define	BUSDMA_DCACHE_MASK	cpuinfo.dcache_line_mask
 
 #define	MAX_BPAGES		64
 #define	MAX_DMA_SEGMENTS	4096
 #define	BUS_DMA_EXCL_BOUNCE	BUS_DMA_BUS2
 #define	BUS_DMA_ALIGN_BOUNCE	BUS_DMA_BUS3
 #define	BUS_DMA_COULD_BOUNCE	(BUS_DMA_EXCL_BOUNCE | BUS_DMA_ALIGN_BOUNCE)
 #define	BUS_DMA_MIN_ALLOC_COMP	BUS_DMA_BUS4
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	bus_dma_tag_t		parent;
 	bus_size_t		alignment;
 	bus_addr_t		boundary;
 	bus_addr_t		lowaddr;
 	bus_addr_t		highaddr;
 	bus_dma_filter_t	*filter;
 	void			*filterarg;
 	bus_size_t		maxsize;
 	u_int			nsegments;
 	bus_size_t		maxsegsz;
 	int			flags;
 	int			ref_count;
 	int			map_count;
 	bus_dma_lock_t		*lockfunc;
 	void			*lockfuncarg;
 	struct bounce_zone	*bounce_zone;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	vm_page_t	datapage;	/* physical page of client data */
 	vm_offset_t	dataoffs;	/* page offset of client data */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 struct sync_list {
 	vm_offset_t	vaddr;		/* kva of client data */
 	bus_addr_t	paddr;		/* physical address */
 	vm_page_t	pages;		/* starting page of client data */
 	bus_size_t	datacount;	/* client data count */
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static uint32_t tags_total;
 static uint32_t maps_total;
 static uint32_t maps_dmamem;
 static uint32_t maps_coherent;
 static counter_u64_t maploads_total;
 static counter_u64_t maploads_bounced;
 static counter_u64_t maploads_coherent;
 static counter_u64_t maploads_dmamem;
 static counter_u64_t maploads_mbuf;
 static counter_u64_t maploads_physmem;
 
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0,
    "Number of active tags");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0,
    "Number of active maps");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0,
    "Number of active maps for bus_dmamem_alloc buffers");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0,
    "Number of active maps with BUS_DMA_COHERENT flag set");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD,
     &maploads_total, "Number of load operations performed");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD,
     &maploads_bounced, "Number of load operations that used bounce buffers");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD,
     &maploads_dmamem, "Number of load operations on BUS_DMA_COHERENT memory");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD,
     &maploads_dmamem, "Number of load operations on bus_dmamem_alloc buffers");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD,
     &maploads_mbuf, "Number of load operations for mbufs");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD,
     &maploads_physmem, "Number of load operations on physical buffers");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
    "Total bounce pages");
 
 struct bus_dmamap {
 	struct bp_list		bpages;
 	int			pagesneeded;
 	int			pagesreserved;
 	bus_dma_tag_t		dmat;
 	struct memdesc		mem;
 	bus_dmamap_callback_t	*callback;
 	void			*callback_arg;
 	int			flags;
 #define	DMAMAP_COHERENT		(1 << 0)
 #define	DMAMAP_DMAMEM_ALLOC	(1 << 1)
 #define	DMAMAP_MBUF		(1 << 2)
 	STAILQ_ENTRY(bus_dmamap) links;
 	bus_dma_segment_t	*segments;
 	int			sync_count;
 	struct sync_list	slist[];
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_offset_t vaddr, bus_addr_t addr, bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap,
     bus_dmamap_t map, void *buf, bus_size_t buflen, int flags);
 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags);
 static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int flags);
 static void dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size);
 static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op);
 
 static busdma_bufalloc_t coherent_allocator;	/* Cache of coherent buffers */
 static busdma_bufalloc_t standard_allocator;	/* Cache of standard buffers */
 
 MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata");
 MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages");
 
 static void
 busdma_init(void *dummy)
 {
 	int uma_flags;
 
 	maploads_total    = counter_u64_alloc(M_WAITOK);
 	maploads_bounced  = counter_u64_alloc(M_WAITOK);
 	maploads_coherent = counter_u64_alloc(M_WAITOK);
 	maploads_dmamem   = counter_u64_alloc(M_WAITOK);
 	maploads_mbuf     = counter_u64_alloc(M_WAITOK);
 	maploads_physmem  = counter_u64_alloc(M_WAITOK);
 
 	uma_flags = 0;
 
 	/* Create a cache of buffers in standard (cacheable) memory. */
 	standard_allocator = busdma_bufalloc_create("buffer",
 	    BUSDMA_DCACHE_ALIGN,/* minimum_alignment */
 	    NULL,		/* uma_alloc func */
 	    NULL,		/* uma_free func */
 	    uma_flags);		/* uma_zcreate_flags */
 
 #ifdef INVARIANTS
 	/*
 	 * Force UMA zone to allocate service structures like
 	 * slabs using own allocator. uma_debug code performs
 	 * atomic ops on uma_slab_t fields and safety of this
 	 * operation is not guaranteed for write-back caches
 	 */
-	uma_flags = UMA_ZONE_OFFPAGE;
+	uma_flags = UMA_ZONE_NOTOUCH;
 #endif
 	/*
 	 * Create a cache of buffers in uncacheable memory, to implement the
 	 * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag.
 	 */
 	coherent_allocator = busdma_bufalloc_create("coherent",
 	    BUSDMA_DCACHE_ALIGN,/* minimum_alignment */
 	    busdma_bufalloc_alloc_uncacheable,
 	    busdma_bufalloc_free_uncacheable,
 	    uma_flags);	/* uma_zcreate_flags */
 }
 
 /*
  * This init historically used SI_SUB_VM, but now the init code requires
  * malloc(9) using M_BUSDMA memory and the pcpu zones for counter(9), which get
  * set up by SI_SUB_KMEM and SI_ORDER_LAST, so we'll go right after that by
  * using SI_SUB_KMEM+1.
  */
 SYSINIT(busdma, SI_SUB_KMEM+1, SI_ORDER_FIRST, busdma_init, NULL);
 
 /*
  * This routine checks the exclusion zone constraints from a tag against the
  * physical RAM available on the machine.  If a tag specifies an exclusion zone
  * but there's no RAM in that zone, then we avoid allocating resources to bounce
  * a request, and we can use any memory allocator (as opposed to needing
  * kmem_alloc_contig() just because it can allocate pages in an address range).
  *
  * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the
  * same value on 32-bit architectures) as their lowaddr constraint, and we can't
  * possibly have RAM at an address higher than the highest address we can
  * express, so we take a fast out.
  */
 static int
 exclusion_bounce_check(vm_offset_t lowaddr, vm_offset_t highaddr)
 {
 	int i;
 
 	if (lowaddr >= BUS_SPACE_MAXADDR)
 		return (0);
 
 	for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) {
 		if ((lowaddr >= phys_avail[i] && lowaddr < phys_avail[i + 1]) ||
 		    (lowaddr < phys_avail[i] && highaddr >= phys_avail[i]))
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Return true if the tag has an exclusion zone that could lead to bouncing.
  */
 static __inline int
 exclusion_bounce(bus_dma_tag_t dmat)
 {
 
 	return (dmat->flags & BUS_DMA_EXCL_BOUNCE);
 }
 
 /*
  * Return true if the given address does not fall on the alignment boundary.
  */
 static __inline int
 alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr)
 {
 
 	return (addr & (dmat->alignment - 1));
 }
 
 /*
  * Return true if the DMA should bounce because the start or end does not fall
  * on a cacheline boundary (which would require a partial cacheline flush).
  * COHERENT memory doesn't trigger cacheline flushes.  Memory allocated by
  * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a
  * strict rule that such memory cannot be accessed by the CPU while DMA is in
  * progress (or by multiple DMA engines at once), so that it's always safe to do
  * full cacheline flushes even if that affects memory outside the range of a
  * given DMA operation that doesn't involve the full allocated buffer.  If we're
  * mapping an mbuf, that follows the same rules as a buffer we allocated.
  */
 static __inline int
 cacheline_bounce(bus_dmamap_t map, bus_addr_t addr, bus_size_t size)
 {
 
 	if (map->flags & (DMAMAP_DMAMEM_ALLOC | DMAMAP_COHERENT | DMAMAP_MBUF))
 		return (0);
 	return ((addr | size) & BUSDMA_DCACHE_MASK);
 }
 
 /*
  * Return true if we might need to bounce the DMA described by addr and size.
  *
  * This is used to quick-check whether we need to do the more expensive work of
  * checking the DMA page-by-page looking for alignment and exclusion bounces.
  *
  * Note that the addr argument might be either virtual or physical.  It doesn't
  * matter because we only look at the low-order bits, which are the same in both
  * address spaces and maximum alignment of generic buffer is limited up to page
  * size.
  * Bouncing of buffers allocated by bus_dmamem_alloc()is not necessary, these
  * always comply with the required rules (alignment, boundary, and address
  * range).
  */
 static __inline int
 might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr,
     bus_size_t size)
 {
 
 	KASSERT(map->flags & DMAMAP_DMAMEM_ALLOC ||
 	    dmat->alignment <= PAGE_SIZE,
 	    ("%s: unsupported alignment (0x%08lx) for buffer not "
 	    "allocated by bus_dmamem_alloc()",
 	    __func__, dmat->alignment));
 
 	return (!(map->flags & DMAMAP_DMAMEM_ALLOC) &&
 	    ((dmat->flags & BUS_DMA_EXCL_BOUNCE) ||
 	    alignment_bounce(dmat, addr) ||
 	    cacheline_bounce(map, addr, size)));
 }
 
 /*
  * Return true if we must bounce the DMA described by paddr and size.
  *
  * Bouncing can be triggered by DMA that doesn't begin and end on cacheline
  * boundaries, or doesn't begin on an alignment boundary, or falls within the
  * exclusion zone of any tag in the ancestry chain.
  *
  * For exclusions, walk the chain of tags comparing paddr to the exclusion zone
  * within each tag.  If the tag has a filter function, use it to decide whether
  * the DMA needs to bounce, otherwise any DMA within the zone bounces.
  */
 static int
 must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr,
     bus_size_t size)
 {
 
 	if (cacheline_bounce(map, paddr, size))
 		return (1);
 
 	/*
 	 *  The tag already contains ancestors' alignment restrictions so this
 	 *  check doesn't need to be inside the loop.
 	 */
 	if (alignment_bounce(dmat, paddr))
 		return (1);
 
 	/*
 	 * Even though each tag has an exclusion zone that is a superset of its
 	 * own and all its ancestors' exclusions, the exclusion zone of each tag
 	 * up the chain must be checked within the loop, because the busdma
 	 * rules say the filter function is called only when the address lies
 	 * within the low-highaddr range of the tag that filterfunc belongs to.
 	 */
 	while (dmat != NULL && exclusion_bounce(dmat)) {
 		if ((paddr >= dmat->lowaddr && paddr <= dmat->highaddr) &&
 		    (dmat->filter == NULL ||
 		    dmat->filter(dmat->filterarg, paddr) != 0))
 			return (1);
 		dmat = dmat->parent;
 	}
 
 	return (0);
 }
 
 /*
  * Convenience function for manipulating driver locks from busdma (during
  * busdma_swi, for example).  Drivers that don't provide their own locks
  * should specify &Giant to dmat->lockfuncarg.  Drivers that use their own
  * non-mutex locking scheme don't have to use this at all.
  */
 void
 busdma_lock_mutex(void *arg, bus_dma_lock_op_t op)
 {
 	struct mtx *dmtx;
 
 	dmtx = (struct mtx *)arg;
 	switch (op) {
 	case BUS_DMA_LOCK:
 		mtx_lock(dmtx);
 		break;
 	case BUS_DMA_UNLOCK:
 		mtx_unlock(dmtx);
 		break;
 	default:
 		panic("Unknown operation 0x%x for busdma_lock_mutex!", op);
 	}
 }
 
 /*
  * dflt_lock should never get called.  It gets put into the dma tag when
  * lockfunc == NULL, which is only valid if the maps that are associated
  * with the tag are meant to never be defered.
  * XXX Should have a way to identify which driver is responsible here.
  */
 static void
 dflt_lock(void *arg, bus_dma_lock_op_t op)
 {
 
 	panic("driver error: busdma dflt_lock called");
 }
 
 /*
  * Allocate a device specific dma_tag.
  */
 int
 bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error = 0;
 
 	/* Basic sanity checking. */
 	KASSERT(boundary == 0 || powerof2(boundary),
 	    ("dma tag boundary %lu, must be a power of 2", boundary));
 	KASSERT(boundary == 0 || boundary >= maxsegsz,
 	    ("dma tag boundary %lu is < maxsegsz %lu\n", boundary, maxsegsz));
 	KASSERT(alignment != 0 && powerof2(alignment),
 	    ("dma tag alignment %lu, must be non-zero power of 2", alignment));
 	KASSERT(maxsegsz != 0, ("dma tag maxsegsz must not be zero"));
 
 	/* Return a NULL tag on failure */
 	*dmat = NULL;
 
 	newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA,
 	    M_ZERO | M_NOWAIT);
 	if (newtag == NULL) {
 		CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 		    __func__, newtag, 0, error);
 		return (ENOMEM);
 	}
 
 	newtag->parent = parent;
 	newtag->alignment = alignment;
 	newtag->boundary = boundary;
 	newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1);
 	newtag->highaddr = trunc_page((vm_paddr_t)highaddr) +
 	    (PAGE_SIZE - 1);
 	newtag->filter = filter;
 	newtag->filterarg = filterarg;
 	newtag->maxsize = maxsize;
 	newtag->nsegments = nsegments;
 	newtag->maxsegsz = maxsegsz;
 	newtag->flags = flags;
 	newtag->ref_count = 1; /* Count ourself */
 	newtag->map_count = 0;
 	if (lockfunc != NULL) {
 		newtag->lockfunc = lockfunc;
 		newtag->lockfuncarg = lockfuncarg;
 	} else {
 		newtag->lockfunc = dflt_lock;
 		newtag->lockfuncarg = NULL;
 	}
 
 	/* Take into account any restrictions imposed by our parent tag */
 	if (parent != NULL) {
 		newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr);
 		newtag->highaddr = MAX(parent->highaddr, newtag->highaddr);
 		newtag->alignment = MAX(parent->alignment, newtag->alignment);
 		newtag->flags |= parent->flags & BUS_DMA_COULD_BOUNCE;
 		newtag->flags |= parent->flags & BUS_DMA_COHERENT;
 		if (newtag->boundary == 0)
 			newtag->boundary = parent->boundary;
 		else if (parent->boundary != 0)
 			newtag->boundary = MIN(parent->boundary,
 					       newtag->boundary);
 		if (newtag->filter == NULL) {
 			/*
 			 * Short circuit to looking at our parent directly
 			 * since we have encapsulated all of its information
 			 */
 			newtag->filter = parent->filter;
 			newtag->filterarg = parent->filterarg;
 			newtag->parent = parent->parent;
 		}
 		if (newtag->parent != NULL)
 			atomic_add_int(&parent->ref_count, 1);
 	}
 
 	if (exclusion_bounce_check(newtag->lowaddr, newtag->highaddr))
 		newtag->flags |= BUS_DMA_EXCL_BOUNCE;
 	if (alignment_bounce(newtag, 1))
 		newtag->flags |= BUS_DMA_ALIGN_BOUNCE;
 
 	/*
 	 * Any request can auto-bounce due to cacheline alignment, in addition
 	 * to any alignment or boundary specifications in the tag, so if the
 	 * ALLOCNOW flag is set, there's always work to do.
 	 */
 	if ((flags & BUS_DMA_ALLOCNOW) != 0) {
 		struct bounce_zone *bz;
 		/*
 		 * Round size up to a full page, and add one more page because
 		 * there can always be one more boundary crossing than the
 		 * number of pages in a transfer.
 		 */
 		maxsize = roundup2(maxsize, PAGE_SIZE) + PAGE_SIZE;
 
 		if ((error = alloc_bounce_zone(newtag)) != 0) {
 			free(newtag, M_BUSDMA);
 			return (error);
 		}
 		bz = newtag->bounce_zone;
 
 		if (ptoa(bz->total_bpages) < maxsize) {
 			int pages;
 
 			pages = atop(maxsize) - bz->total_bpages;
 
 			/* Add pages to our bounce pool */
 			if (alloc_bounce_pages(newtag, pages) < pages)
 				error = ENOMEM;
 		}
 		/* Performed initial allocation */
 		newtag->flags |= BUS_DMA_MIN_ALLOC_COMP;
 	} else
 		newtag->bounce_zone = NULL;
 
 	if (error != 0) {
 		free(newtag, M_BUSDMA);
 	} else {
 		atomic_add_32(&tags_total, 1);
 		*dmat = newtag;
 	}
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->flags : 0), error);
 	return (error);
 }
 
 void
 bus_dma_template_init(bus_dma_tag_template_t *t, bus_dma_tag_t parent)
 {
 
 	if (t == NULL)
 		return;
 
 	t->parent = parent;
 	t->alignment = 1;
 	t->boundary = 0;
 	t->lowaddr = t->highaddr = BUS_SPACE_MAXADDR;
 	t->maxsize = t->maxsegsize = BUS_SPACE_MAXSIZE;
 	t->nsegments = BUS_SPACE_UNRESTRICTED;
 	t->lockfunc = NULL;
 	t->lockfuncarg = NULL;
 	t->flags = 0;
 }
 
 int
 bus_dma_template_tag(bus_dma_tag_template_t *t, bus_dma_tag_t *dmat)
 {
 
 	if (t == NULL || dmat == NULL)
 		return (EINVAL);
 
 	return (bus_dma_tag_create(t->parent, t->alignment, t->boundary,
 	    t->lowaddr, t->highaddr, NULL, NULL, t->maxsize,
 	    t->nsegments, t->maxsegsize, t->flags, t->lockfunc, t->lockfuncarg,
 	    dmat));
 }
 
 void
 bus_dma_template_clone(bus_dma_tag_template_t *t, bus_dma_tag_t dmat)
 {
 
 	if (t == NULL || dmat == NULL)
 		return;
 
 	t->parent = dmat->parent;
 	t->alignment = dmat->alignment;
 	t->boundary = dmat->boundary;
 	t->lowaddr = dmat->lowaddr;
 	t->highaddr = dmat->highaddr;
 	t->maxsize = dmat->maxsize;
 	t->nsegments = dmat->nsegments;
 	t->maxsegsize = dmat->maxsegsz;
 	t->flags = dmat->flags;
 	t->lockfunc = dmat->lockfunc;
 	t->lockfuncarg = dmat->lockfuncarg;
 }
 
 int
 bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain)
 {
 
 	return (0);
 }
 
 int
 bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 	bus_dma_tag_t dmat_copy;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat;
 
 	if (dmat != NULL) {
 
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 
 		while (dmat != NULL) {
 			bus_dma_tag_t parent;
 
 			parent = dmat->parent;
 			atomic_subtract_int(&dmat->ref_count, 1);
 			if (dmat->ref_count == 0) {
 				atomic_subtract_32(&tags_total, 1);
 				free(dmat, M_BUSDMA);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
 static int
 allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t mapp)
 {
 	struct bounce_zone *bz;
 	int maxpages;
 	int error;
 
 	if (dmat->bounce_zone == NULL)
 		if ((error = alloc_bounce_zone(dmat)) != 0)
 			return (error);
 	bz = dmat->bounce_zone;
 	/* Initialize the new map */
 	STAILQ_INIT(&(mapp->bpages));
 
 	/*
 	 * Attempt to add pages to our pool on a per-instance basis up to a sane
 	 * limit.  Even if the tag isn't flagged as COULD_BOUNCE due to
 	 * alignment and boundary constraints, it could still auto-bounce due to
 	 * cacheline alignment, which requires at most two bounce pages.
 	 */
 	if (dmat->flags & BUS_DMA_COULD_BOUNCE)
 		maxpages = MAX_BPAGES;
 	else
 		maxpages = 2 * bz->map_count;
 	if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 ||
 	    (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 		int pages;
 
 		pages = atop(roundup2(dmat->maxsize, PAGE_SIZE)) + 1;
 		pages = MIN(maxpages - bz->total_bpages, pages);
 		pages = MAX(pages, 2);
 		if (alloc_bounce_pages(dmat, pages) < pages)
 			return (ENOMEM);
 
 		if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0)
 			dmat->flags |= BUS_DMA_MIN_ALLOC_COMP;
 	}
 	bz->map_count++;
 	return (0);
 }
 
 static bus_dmamap_t
 allocate_map(bus_dma_tag_t dmat, int mflags)
 {
 	int mapsize, segsize;
 	bus_dmamap_t map;
 
 	/*
 	 * Allocate the map.  The map structure ends with an embedded
 	 * variable-sized array of sync_list structures.  Following that
 	 * we allocate enough extra space to hold the array of bus_dma_segments.
 	 */
 	KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS,
 	   ("cannot allocate %u dma segments (max is %u)",
 	    dmat->nsegments, MAX_DMA_SEGMENTS));
 	segsize = sizeof(struct bus_dma_segment) * dmat->nsegments;
 	mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments;
 	map = malloc(mapsize + segsize, M_BUSDMA, mflags | M_ZERO);
 	if (map == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
 		return (NULL);
 	}
 	map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize);
 	STAILQ_INIT(&map->bpages);
 	return (map);
 }
 
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	bus_dmamap_t map;
 	int error = 0;
 
 	*mapp = map = allocate_map(dmat, M_NOWAIT);
 	if (map == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
 		return (ENOMEM);
 	}
 
 	/*
 	 * Bouncing might be required if the driver asks for an exclusion
 	 * region, a data alignment that is stricter than 1, or DMA that begins
 	 * or ends with a partial cacheline.  Whether bouncing will actually
 	 * happen can't be known until mapping time, but we need to pre-allocate
 	 * resources now because we might not be allowed to at mapping time.
 	 */
 	error = allocate_bz_and_pages(dmat, map);
 	if (error != 0) {
 		free(map, M_BUSDMA);
 		*mapp = NULL;
 		return (error);
 	}
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_add_32(&maps_coherent, 1);
 	atomic_add_32(&maps_total, 1);
 	dmat->map_count++;
 
 	return (0);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 
 	if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 		    __func__, dmat, EBUSY);
 		return (EBUSY);
 	}
 	if (dmat->bounce_zone)
 		dmat->bounce_zone->map_count--;
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_subtract_32(&maps_coherent, 1);
 	atomic_subtract_32(&maps_total, 1);
 	free(map, M_BUSDMA);
 	dmat->map_count--;
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
 	return (0);
 }
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into bus device
  * space based on the constraints listed in the dma tag.  Returns a pointer to
  * the allocated memory, and a pointer to an associated bus_dmamap.
  */
 int
 bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	busdma_bufalloc_t ba;
 	struct busdma_bufzone *bufzone;
 	bus_dmamap_t map;
 	vm_memattr_t memattr;
 	int mflags;
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 
 	*mapp = map = allocate_map(dmat, mflags);
 	if (map == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->flags, ENOMEM);
 		return (ENOMEM);
 	}
 	map->flags = DMAMAP_DMAMEM_ALLOC;
 
 	/* For coherent memory, set the map flag that disables sync ops. */
 	if (flags & BUS_DMA_COHERENT)
 		map->flags |= DMAMAP_COHERENT;
 
 	/*
 	 * Choose a busdma buffer allocator based on memory type flags.
 	 * If the tag's COHERENT flag is set, that means normal memory
 	 * is already coherent, use the normal allocator.
 	 */
 	if ((flags & BUS_DMA_COHERENT) &&
 	    ((dmat->flags & BUS_DMA_COHERENT) == 0)) {
 		memattr = VM_MEMATTR_UNCACHEABLE;
 		ba = coherent_allocator;
 	} else {
 		memattr = VM_MEMATTR_DEFAULT;
 		ba = standard_allocator;
 	}
 
 	/*
 	 * Try to find a bufzone in the allocator that holds a cache of buffers
 	 * of the right size for this request.  If the buffer is too big to be
 	 * held in the allocator cache, this returns NULL.
 	 */
 	bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
 
 	/*
 	 * Allocate the buffer from the uma(9) allocator if...
 	 *  - It's small enough to be in the allocator (bufzone not NULL).
 	 *  - The alignment constraint isn't larger than the allocation size
 	 *    (the allocator aligns buffers to their size boundaries).
 	 *  - There's no need to handle lowaddr/highaddr exclusion zones.
 	 * else allocate non-contiguous pages if...
 	 *  - The page count that could get allocated doesn't exceed
 	 *    nsegments also when the maximum segment size is less
 	 *    than PAGE_SIZE.
 	 *  - The alignment constraint isn't larger than a page boundary.
 	 *  - There are no boundary-crossing constraints.
 	 * else allocate a block of contiguous pages because one or more of the
 	 * constraints is something that only the contig allocator can fulfill.
 	 */
 	if (bufzone != NULL && dmat->alignment <= bufzone->size &&
 	    !exclusion_bounce(dmat)) {
 		*vaddr = uma_zalloc(bufzone->umazone, mflags);
 	} else if (dmat->nsegments >=
 	    howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) &&
 	    dmat->alignment <= PAGE_SIZE &&
 	    (dmat->boundary % PAGE_SIZE) == 0) {
 		*vaddr = (void *)kmem_alloc_attr(dmat->maxsize, mflags, 0,
 		    dmat->lowaddr, memattr);
 	} else {
 		*vaddr = (void *)kmem_alloc_contig(dmat->maxsize, mflags, 0,
 		    dmat->lowaddr, dmat->alignment, dmat->boundary, memattr);
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->flags, ENOMEM);
 		free(map, M_BUSDMA);
 		*mapp = NULL;
 		return (ENOMEM);
 	}
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_add_32(&maps_coherent, 1);
 	atomic_add_32(&maps_dmamem, 1);
 	atomic_add_32(&maps_total, 1);
 	dmat->map_count++;
 
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->flags, 0);
 	return (0);
 }
 
 /*
  * Free a piece of memory that was allocated via bus_dmamem_alloc, along with
  * its associated map.
  */
 void
 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 	struct busdma_bufzone *bufzone;
 	busdma_bufalloc_t ba;
 
 	if ((map->flags & DMAMAP_COHERENT) &&
 	    ((dmat->flags & BUS_DMA_COHERENT) == 0))
 		ba = coherent_allocator;
 	else
 		ba = standard_allocator;
 
 	bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
 
 	if (bufzone != NULL && dmat->alignment <= bufzone->size &&
 	    !exclusion_bounce(dmat))
 		uma_zfree(bufzone->umazone, vaddr);
 	else
 		kmem_free((vm_offset_t)vaddr, dmat->maxsize);
 
 	dmat->map_count--;
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_subtract_32(&maps_coherent, 1);
 	atomic_subtract_32(&maps_total, 1);
 	atomic_subtract_32(&maps_dmamem, 1);
 	free(map, M_BUSDMA);
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags);
 }
 
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 
 	if (map->pagesneeded == 0) {
 		CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d"
 		    " map= %p, pagesneeded= %d",
 		    dmat->lowaddr, dmat->boundary, dmat->alignment,
 		    map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		curaddr = buf;
 		while (buflen != 0) {
 			sgsize = MIN(buflen, dmat->maxsegsz);
 			if (must_bounce(dmat, map, curaddr, sgsize) != 0) {
 				sgsize = MIN(sgsize,
 				    PAGE_SIZE - (curaddr & PAGE_MASK));
 				map->pagesneeded++;
 			}
 			curaddr += sgsize;
 			buflen -= sgsize;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, bus_dmamap_t map,
     void *buf, bus_size_t buflen, int flags)
 {
 	vm_offset_t vaddr;
 	vm_offset_t vendaddr;
 	bus_addr_t paddr;
 
 	if (map->pagesneeded == 0) {
 		CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d"
 		    " map= %p, pagesneeded= %d",
 		    dmat->lowaddr, dmat->boundary, dmat->alignment,
 		    map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = (vm_offset_t)buf;
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			if (__predict_true(pmap == kernel_pmap))
 				paddr = pmap_kextract(vaddr);
 			else
 				paddr = pmap_extract(pmap, vaddr);
 			if (must_bounce(dmat, map, paddr,
 			    min(vendaddr - vaddr, (PAGE_SIZE - ((vm_offset_t)vaddr &
 			    PAGE_MASK)))) != 0) {
 				map->pagesneeded++;
 			}
 			vaddr += (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK));
 
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			map->pagesneeded = 0;
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->boundary - 1);
 	if (dmat->boundary > 0) {
 		baddr = (curaddr + dmat->boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * previous segment if possible.
 	 */
 	seg = *segp;
 	if (seg == -1) {
 		seg = 0;
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	} else {
 		if (curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 		    (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
 		    (dmat->boundary == 0 ||
 		    (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
 			segs[seg].ds_len += sgsize;
 		else {
 			if (++seg >= dmat->nsegments)
 				return (0);
 			segs[seg].ds_addr = curaddr;
 			segs[seg].ds_len = sgsize;
 		}
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 int
 _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t curaddr;
 	bus_addr_t sl_end = 0;
 	bus_size_t sgsize;
 	struct sync_list *sl;
 	int error;
 
 	if (segs == NULL)
 		segs = map->segments;
 
 	counter_u64_add(maploads_total, 1);
 	counter_u64_add(maploads_physmem, 1);
 
 	if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			counter_u64_add(maploads_bounced, 1);
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	sl = map->slist + map->sync_count - 1;
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->maxsegsz);
 		if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr,
 		    sgsize)) {
 			sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
 			curaddr = add_bounce_page(dmat, map, 0, curaddr,
 			    sgsize);
 		} else if ((dmat->flags & BUS_DMA_COHERENT) == 0) {
 			if (map->sync_count > 0)
 				sl_end = sl->paddr + sl->datacount;
 
 			if (map->sync_count == 0 || curaddr != sl_end) {
 				if (++map->sync_count > dmat->nsegments)
 					break;
 				sl++;
 				sl->vaddr = 0;
 				sl->paddr = curaddr;
 				sl->datacount = sgsize;
 				sl->pages = PHYS_TO_VM_PAGE(curaddr);
 				KASSERT(sl->pages != NULL,
 				    ("%s: page at PA:0x%08lx is not in "
 				    "vm_page_array", __func__, curaddr));
 			} else
 				sl->datacount += sgsize;
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	if (buflen != 0) {
 		bus_dmamap_unload(dmat, map);
 		return (EFBIG); /* XXX better return value here? */
 	}
 	return (0);
 }
 
 int
 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp)
 {
 
 	return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags,
 	    segs, segp));
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrance, and the ending segment on exit.
  */
 int
 _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize;
 	bus_addr_t curaddr;
 	bus_addr_t sl_pend = 0;
 	vm_offset_t kvaddr, vaddr, sl_vend = 0;
 	struct sync_list *sl;
 	int error;
 
 	counter_u64_add(maploads_total, 1);
 	if (map->flags & DMAMAP_COHERENT)
 		counter_u64_add(maploads_coherent, 1);
 	if (map->flags & DMAMAP_DMAMEM_ALLOC)
 		counter_u64_add(maploads_dmamem, 1);
 
 	if (segs == NULL)
 		segs = map->segments;
 
 	if (flags & BUS_DMA_LOAD_MBUF) {
 		counter_u64_add(maploads_mbuf, 1);
 		map->flags |= DMAMAP_MBUF;
 	}
 
 	if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) {
 		_bus_dmamap_count_pages(dmat, pmap, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			counter_u64_add(maploads_bounced, 1);
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	sl = map->slist + map->sync_count - 1;
 	vaddr = (vm_offset_t)buf;
 
 	while (buflen > 0) {
 		/*
 		 * Get the physical address for this segment.
 		 */
 		if (__predict_true(pmap == kernel_pmap)) {
 			curaddr = pmap_kextract(vaddr);
 			kvaddr = vaddr;
 		} else {
 			curaddr = pmap_extract(pmap, vaddr);
 			kvaddr = 0;
 		}
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 		if (sgsize > dmat->maxsegsz)
 			sgsize = dmat->maxsegsz;
 		if (buflen < sgsize)
 			sgsize = buflen;
 
 		if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr,
 		    sgsize)) {
 			curaddr = add_bounce_page(dmat, map, kvaddr, curaddr,
 			    sgsize);
 		} else if ((dmat->flags & BUS_DMA_COHERENT) == 0) {
 			if (map->sync_count > 0) {
 				sl_pend = sl->paddr + sl->datacount;
 				sl_vend = sl->vaddr + sl->datacount;
 			}
 
 			if (map->sync_count == 0 ||
 			    (kvaddr != 0 && kvaddr != sl_vend) ||
 			    (curaddr != sl_pend)) {
 
 				if (++map->sync_count > dmat->nsegments)
 					goto cleanup;
 				sl++;
 				sl->vaddr = kvaddr;
 				sl->paddr = curaddr;
 				if (kvaddr != 0) {
 					sl->pages = NULL;
 				} else {
 					sl->pages = PHYS_TO_VM_PAGE(curaddr);
 					KASSERT(sl->pages != NULL,
 					    ("%s: page at PA:0x%08lx is not "
 					    "in vm_page_array", __func__,
 					    curaddr));
 				}
 				sl->datacount = sgsize;
 			} else
 				sl->datacount += sgsize;
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 cleanup:
 	/*
 	 * Did we fit?
 	 */
 	if (buflen != 0) {
 		bus_dmamap_unload(dmat, map);
 		return (EFBIG); /* XXX better return value here? */
 	}
 	return (0);
 }
 
 void
 _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem,
     bus_dmamap_callback_t *callback, void *callback_arg)
 {
 
 	map->mem = *mem;
 	map->dmat = dmat;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 bus_dma_segment_t *
 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (segs == NULL)
 		segs = map->segments;
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 void
 bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 	struct bounce_zone *bz;
 
 	if ((bz = dmat->bounce_zone) != NULL) {
 		while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 			STAILQ_REMOVE_HEAD(&map->bpages, links);
 			free_bounce_page(dmat, bpage);
 		}
 
 		bz = dmat->bounce_zone;
 		bz->free_bpages += map->pagesreserved;
 		bz->reserved_bpages -= map->pagesreserved;
 		map->pagesreserved = 0;
 		map->pagesneeded = 0;
 	}
 	map->sync_count = 0;
 	map->flags &= ~DMAMAP_MBUF;
 }
 
 static void
 dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size)
 {
 	/*
 	 * Write back any partial cachelines immediately before and
 	 * after the DMA region.  We don't need to round the address
 	 * down to the nearest cacheline or specify the exact size,
 	 * as dcache_wb_poc() will do the rounding for us and works
 	 * at cacheline granularity.
 	 */
 	if (va & BUSDMA_DCACHE_MASK)
 		dcache_wb_poc(va, pa, 1);
 	if ((va + size) & BUSDMA_DCACHE_MASK)
 		dcache_wb_poc(va + size, pa + size, 1);
 
 	dcache_inv_poc_dma(va, pa, size);
 }
 
 static void
 dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op)
 {
 	uint32_t len, offset;
 	vm_page_t m;
 	vm_paddr_t pa;
 	vm_offset_t va, tempva;
 	bus_size_t size;
 
 	offset = sl->paddr & PAGE_MASK;
 	m = sl->pages;
 	size = sl->datacount;
 	pa = sl->paddr;
 
 	for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) {
 		tempva = 0;
 		if (sl->vaddr == 0) {
 			len = min(PAGE_SIZE - offset, size);
 			tempva = pmap_quick_enter_page(m);
 			va = tempva | offset;
 			KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset),
 			    ("unexpected vm_page_t phys: 0x%08x != 0x%08x",
 			    VM_PAGE_TO_PHYS(m) | offset, pa));
 		} else {
 			len = sl->datacount;
 			va = sl->vaddr;
 		}
 
 		switch (op) {
 		case BUS_DMASYNC_PREWRITE:
 		case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD:
 			dcache_wb_poc(va, pa, len);
 			break;
 		case BUS_DMASYNC_PREREAD:
 			/*
 			 * An mbuf may start in the middle of a cacheline. There
 			 * will be no cpu writes to the beginning of that line
 			 * (which contains the mbuf header) while dma is in
 			 * progress.  Handle that case by doing a writeback of
 			 * just the first cacheline before invalidating the
 			 * overall buffer.  Any mbuf in a chain may have this
 			 * misalignment.  Buffers which are not mbufs bounce if
 			 * they are not aligned to a cacheline.
 			 */
 			dma_preread_safe(va, pa, len);
 			break;
 		case BUS_DMASYNC_POSTREAD:
 		case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
 			dcache_inv_poc(va, pa, len);
 			break;
 		default:
 			panic("unsupported combination of sync operations: "
                               "0x%08x\n", op);
 		}
 
 		if (tempva != 0)
 			pmap_quick_remove_page(tempva);
 	}
 }
 
 void
 bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 	struct sync_list *sl, *end;
 	vm_offset_t datavaddr, tempvaddr;
 
 	if (op == BUS_DMASYNC_POSTWRITE)
 		return;
 
 	/*
 	 * If the buffer was from user space, it is possible that this is not
 	 * the same vm map, especially on a POST operation.  It's not clear that
 	 * dma on userland buffers can work at all right now.  To be safe, until
 	 * we're able to test direct userland dma, panic on a map mismatch.
 	 */
 	if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 		    "performing bounce", __func__, dmat, dmat->flags, op);
 
 		/*
 		 * For PREWRITE do a writeback.  Clean the caches from the
 		 * innermost to the outermost levels.
 		 */
 		if (op & BUS_DMASYNC_PREWRITE) {
 			while (bpage != NULL) {
 				tempvaddr = 0;
 				datavaddr = bpage->datavaddr;
 				if (datavaddr == 0) {
 					tempvaddr = pmap_quick_enter_page(
 					    bpage->datapage);
 					datavaddr = tempvaddr | bpage->dataoffs;
 				}
 				bcopy((void *)datavaddr, (void *)bpage->vaddr,
 				    bpage->datacount);
 				if (tempvaddr != 0)
 					pmap_quick_remove_page(tempvaddr);
 				if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 					dcache_wb_poc(bpage->vaddr,
 					    bpage->busaddr, bpage->datacount);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 
 		/*
 		 * Do an invalidate for PREREAD unless a writeback was already
 		 * done above due to PREWRITE also being set.  The reason for a
 		 * PREREAD invalidate is to prevent dirty lines currently in the
 		 * cache from being evicted during the DMA.  If a writeback was
 		 * done due to PREWRITE also being set there will be no dirty
 		 * lines and the POSTREAD invalidate handles the rest. The
 		 * invalidate is done from the innermost to outermost level. If
 		 * L2 were done first, a dirty cacheline could be automatically
 		 * evicted from L1 before we invalidated it, re-dirtying the L2.
 		 */
 		if ((op & BUS_DMASYNC_PREREAD) && !(op & BUS_DMASYNC_PREWRITE)) {
 			bpage = STAILQ_FIRST(&map->bpages);
 			while (bpage != NULL) {
 				if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 					dcache_inv_poc_dma(bpage->vaddr,
 					    bpage->busaddr, bpage->datacount);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 		}
 
 		/*
 		 * Re-invalidate the caches on a POSTREAD, even though they were
 		 * already invalidated at PREREAD time.  Aggressive prefetching
 		 * due to accesses to other data near the dma buffer could have
 		 * brought buffer data into the caches which is now stale.  The
 		 * caches are invalidated from the outermost to innermost; the
 		 * prefetches could be happening right now, and if L1 were
 		 * invalidated first, stale L2 data could be prefetched into L1.
 		 */
 		if (op & BUS_DMASYNC_POSTREAD) {
 			while (bpage != NULL) {
 				if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 					dcache_inv_poc(bpage->vaddr,
 					    bpage->busaddr, bpage->datacount);
 				tempvaddr = 0;
 				datavaddr = bpage->datavaddr;
 				if (datavaddr == 0) {
 					tempvaddr = pmap_quick_enter_page(
 					    bpage->datapage);
 					datavaddr = tempvaddr | bpage->dataoffs;
 				}
 				bcopy((void *)bpage->vaddr, (void *)datavaddr,
 				    bpage->datacount);
 				if (tempvaddr != 0)
 					pmap_quick_remove_page(tempvaddr);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 	}
 
 	/*
 	 * For COHERENT memory no cache maintenance is necessary, but ensure all
 	 * writes have reached memory for the PREWRITE case.  No action is
 	 * needed for a PREREAD without PREWRITE also set, because that would
 	 * imply that the cpu had written to the COHERENT buffer and expected
 	 * the dma device to see that change, and by definition a PREWRITE sync
 	 * is required to make that happen.
 	 */
 	if (map->flags & DMAMAP_COHERENT) {
 		if (op & BUS_DMASYNC_PREWRITE) {
 			dsb();
 			if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 				cpu_l2cache_drain_writebuf();
 		}
 		return;
 	}
 
 	/*
 	 * Cache maintenance for normal (non-COHERENT non-bounce) buffers.  All
 	 * the comments about the sequences for flushing cache levels in the
 	 * bounce buffer code above apply here as well.  In particular, the fact
 	 * that the sequence is inner-to-outer for PREREAD invalidation and
 	 * outer-to-inner for POSTREAD invalidation is not a mistake.
 	 */
 	if (map->sync_count != 0) {
 		sl = &map->slist[0];
 		end = &map->slist[map->sync_count];
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 		    "performing sync", __func__, dmat, dmat->flags, op);
 
 		for ( ; sl != end; ++sl)
 			dma_dcache_sync(sl, op);
 	}
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if ((dmat->alignment <= bz->alignment) &&
 		    (dmat->lowaddr >= bz->lowaddr)) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->lowaddr;
 	bz->alignment = MAX(dmat->alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests (pages bounced)");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_ULONG(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA,
 		    M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE,
 		    M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0);
 		if (bpage->vaddr == 0) {
 			free(bpage, M_BUSDMA);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
     bus_addr_t addr, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 	KASSERT(map != NULL, ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr & PAGE_MASK;
 		bpage->busaddr |= addr & PAGE_MASK;
 	}
 	bpage->datavaddr = vaddr;
 	bpage->datapage = PHYS_TO_VM_PAGE(addr);
 	bpage->dataoffs = addr & PAGE_MASK;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 			    map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback,
 		    map->callback_arg, BUS_DMA_WAITOK);
 		dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
Index: head/sys/vm/uma.h
===================================================================
--- head/sys/vm/uma.h	(revision 356533)
+++ head/sys/vm/uma.h	(revision 356534)
@@ -1,710 +1,703 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff@FreeBSD.org>
  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 /*
  * uma.h - External definitions for the Universal Memory Allocator
  *
 */
 
 #ifndef _VM_UMA_H_
 #define _VM_UMA_H_
 
 #include <sys/param.h>		/* For NULL */
 #include <sys/malloc.h>		/* For M_* */
 
 /* User visible parameters */
 #define UMA_SMALLEST_UNIT       (PAGE_SIZE / 256) /* Smallest item allocated */
 
 /* Types and type defs */
 
 struct uma_zone;
 /* Opaque type used as a handle to the zone */
 typedef struct uma_zone * uma_zone_t;
 
 /*
  * Item constructor
  *
  * Arguments:
  *	item  A pointer to the memory which has been allocated.
  *	arg   The arg field passed to uma_zalloc_arg
  *	size  The size of the allocated item
  *	flags See zalloc flags
  *
  * Returns:
  *	0      on success
  *      errno  on failure
  *
  * Discussion:
  *	The constructor is called just before the memory is returned
  *	to the user. It may block if necessary.
  */
 typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
 
 /*
  * Item destructor
  *
  * Arguments:
  *	item  A pointer to the memory which has been allocated.
  *	size  The size of the item being destructed.
  *	arg   Argument passed through uma_zfree_arg
  *
  * Returns:
  *	Nothing
  *
  * Discussion:
  *	The destructor may perform operations that differ from those performed
  *	by the initializer, but it must leave the object in the same state.
  *	This IS type stable storage.  This is called after EVERY zfree call.
  */
 typedef void (*uma_dtor)(void *mem, int size, void *arg);
 
 /*
  * Item initializer
  *
  * Arguments:
  *	item  A pointer to the memory which has been allocated.
  *	size  The size of the item being initialized.
  *	flags See zalloc flags
  *
  * Returns:
  *	0      on success
  *      errno  on failure
  *
  * Discussion:
  *	The initializer is called when the memory is cached in the uma zone.
  *	The initializer and the destructor should leave the object in the same
  *	state.
  */
 typedef int (*uma_init)(void *mem, int size, int flags);
 
 /*
  * Item discard function
  *
  * Arguments:
  *	item  A pointer to memory which has been 'freed' but has not left the
  *	      zone's cache.
  *	size  The size of the item being discarded.
  *
  * Returns:
  *	Nothing
  *
  * Discussion:
  *	This routine is called when memory leaves a zone and is returned to the
  *	system for other uses.  It is the counter-part to the init function.
  */
 typedef void (*uma_fini)(void *mem, int size);
 
 /*
  * Import new memory into a cache zone.
  */
 typedef int (*uma_import)(void *arg, void **store, int count, int domain,
     int flags);
 
 /*
  * Free memory from a cache zone.
  */
 typedef void (*uma_release)(void *arg, void **store, int count);
 
 /*
  * What's the difference between initializing and constructing?
  *
  * The item is initialized when it is cached, and this is the state that the
  * object should be in when returned to the allocator. The purpose of this is
  * to remove some code which would otherwise be called on each allocation by
  * utilizing a known, stable state.  This differs from the constructor which
  * will be called on EVERY allocation.
  *
  * For example, in the initializer you may want to initialize embedded locks,
  * NULL list pointers, set up initial states, magic numbers, etc.  This way if
  * the object is held in the allocator and re-used it won't be necessary to
  * re-initialize it.
  *
  * The constructor may be used to lock a data structure, link it on to lists,
  * bump reference counts or total counts of outstanding structures, etc.
  *
  */
 
 
 /* Function proto types */
 
 /*
  * Create a new uma zone
  *
  * Arguments:
  *	name  The text name of the zone for debugging and stats. This memory
  *		should not be freed until the zone has been deallocated.
  *	size  The size of the object that is being created.
  *	ctor  The constructor that is called when the object is allocated.
  *	dtor  The destructor that is called when the object is freed.
  *	init  An initializer that sets up the initial state of the memory.
  *	fini  A discard function that undoes initialization done by init.
  *		ctor/dtor/init/fini may all be null, see notes above.
  *	align A bitmask that corresponds to the requested alignment
  *		eg 4 would be 0x3
  *	flags A set of parameters that control the behavior of the zone.
  *
  * Returns:
  *	A pointer to a structure which is intended to be opaque to users of
  *	the interface.  The value may be null if the wait flag is not set.
  */
 uma_zone_t uma_zcreate(const char *name, size_t size, uma_ctor ctor,
 		    uma_dtor dtor, uma_init uminit, uma_fini fini,
 		    int align, uint32_t flags);
 
 /*
  * Create a secondary uma zone
  *
  * Arguments:
  *	name  The text name of the zone for debugging and stats. This memory
  *		should not be freed until the zone has been deallocated.
  *	ctor  The constructor that is called when the object is allocated.
  *	dtor  The destructor that is called when the object is freed.
  *	zinit  An initializer that sets up the initial state of the memory
  *		as the object passes from the Keg's slab to the Zone's cache.
  *	zfini  A discard function that undoes initialization done by init
  *		as the object passes from the Zone's cache to the Keg's slab.
  *
  *		ctor/dtor/zinit/zfini may all be null, see notes above.
  *		Note that the zinit and zfini specified here are NOT
  *		exactly the same as the init/fini specified to uma_zcreate()
  *		when creating a master zone.  These zinit/zfini are called
  *		on the TRANSITION from keg to zone (and vice-versa). Once
  *		these are set, the primary zone may alter its init/fini
  *		(which are called when the object passes from VM to keg)
  *		using uma_zone_set_init/fini()) as well as its own
  *		zinit/zfini (unset by default for master zone) with
  *		uma_zone_set_zinit/zfini() (note subtle 'z' prefix).
  *
  *	master  A reference to this zone's Master Zone (Primary Zone),
  *		which contains the backing Keg for the Secondary Zone
  *		being added.
  *
  * Returns:
  *	A pointer to a structure which is intended to be opaque to users of
  *	the interface.  The value may be null if the wait flag is not set.
  */
 uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_zone_t master);
 
 /*
  * Create cache-only zones.
  *
  * This allows uma's per-cpu cache facilities to handle arbitrary
  * pointers.  Consumers must specify the import and release functions to
  * fill and destroy caches.  UMA does not allocate any memory for these
  * zones.  The 'arg' parameter is passed to import/release and is caller
  * specific.
  */
 uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_import zimport,
 		    uma_release zrelease, void *arg, int flags);
 
 /*
  * Definitions for uma_zcreate flags
  *
  * These flags share space with UMA_ZFLAGs in uma_int.h.  Be careful not to
- * overlap when adding new features.  0xff000000 is in use by uma_int.h.
+ * overlap when adding new features.
  */
-#define UMA_ZONE_PAGEABLE	0x0001	/* Return items not fully backed by
-					   physical memory XXX Not yet */
 #define UMA_ZONE_ZINIT		0x0002	/* Initialize with zeros */
-#define UMA_ZONE_STATIC		0x0004	/* Statically sized zone */
-#define UMA_ZONE_OFFPAGE	0x0008	/* Force the slab structure allocation
-					   off of the real memory */
+#define UMA_ZONE_NOTOUCH	0x0008	/* UMA may not access the memory */
 #define UMA_ZONE_MALLOC		0x0010	/* For use by malloc(9) only! */
 #define UMA_ZONE_NOFREE		0x0020	/* Do not free slabs of this type! */
 #define UMA_ZONE_MTXCLASS	0x0040	/* Create a new lock class */
 #define	UMA_ZONE_VM		0x0080	/*
 					 * Used for internal vm datastructures
 					 * only.
 					 */
-#define	UMA_ZONE_HASH		0x0100	/*
-					 * Use a hash table instead of caching
-					 * information in the vm_page.
-					 */
+#define	UMA_ZONE_NOTPAGE	0x0100	/* allocf memory not vm pages */
 #define	UMA_ZONE_SECONDARY	0x0200	/* Zone is a Secondary Zone */
 #define	UMA_ZONE_NOBUCKET	0x0400	/* Do not use buckets. */
 #define	UMA_ZONE_MAXBUCKET	0x0800	/* Use largest buckets. */
-#define	UMA_ZONE_CACHESPREAD	0x1000	/*
+#define	UMA_ZONE_MINBUCKET	0x1000	/* Use smallest buckets. */
+#define	UMA_ZONE_CACHESPREAD	0x2000	/*
 					 * Spread memory start locations across
 					 * all possible cache lines.  May
 					 * require many virtually contiguous
 					 * backend pages and can fail early.
 					 */
-#define	UMA_ZONE_VTOSLAB	0x2000	/* Zone uses vtoslab for lookup. */
 #define	UMA_ZONE_NODUMP		0x4000	/*
 					 * Zone's pages will not be included in
 					 * mini-dumps.
 					 */
 #define	UMA_ZONE_PCPU		0x8000	/*
 					 * Allocates mp_maxid + 1 slabs of PAGE_SIZE
 					 */
-#define	UMA_ZONE_MINBUCKET	0x10000	/* Use smallest buckets. */
-#define	UMA_ZONE_FIRSTTOUCH	0x20000	/* First touch NUMA policy */
-#define	UMA_ZONE_ROUNDROBIN	0x40000	/* Round-robin NUMA policy. */
+#define	UMA_ZONE_FIRSTTOUCH	0x10000	/* First touch NUMA policy */
+#define	UMA_ZONE_ROUNDROBIN	0x20000	/* Round-robin NUMA policy. */
+/* In use by UMA_ZFLAGs:	0xffe00000 */
 
 /*
  * These flags are shared between the keg and zone.  In zones wishing to add
  * new kegs these flags must be compatible.  Some are determined based on
  * physical parameters of the request and may not be provided by the consumer.
  */
 #define	UMA_ZONE_INHERIT						\
-    (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE |		\
-    UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU |			\
-    UMA_ZONE_FIRSTTOUCH | UMA_ZONE_ROUNDROBIN)
+    (UMA_ZONE_NOTOUCH | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE |		\
+     UMA_ZONE_NOTPAGE | UMA_ZONE_PCPU | UMA_ZONE_FIRSTTOUCH |		\
+     UMA_ZONE_ROUNDROBIN)
 
 /* Definitions for align */
 #define UMA_ALIGN_PTR	(sizeof(void *) - 1)	/* Alignment fit for ptr */
 #define UMA_ALIGN_LONG	(sizeof(long) - 1)	/* "" long */
 #define UMA_ALIGN_INT	(sizeof(int) - 1)	/* "" int */
 #define UMA_ALIGN_SHORT	(sizeof(short) - 1)	/* "" short */
 #define UMA_ALIGN_CHAR	(sizeof(char) - 1)	/* "" char */
 #define UMA_ALIGN_CACHE	(0 - 1)			/* Cache line size align */
 #define	UMA_ALIGNOF(type) (_Alignof(type) - 1)	/* Alignment fit for 'type' */
 
 #define	UMA_ANYDOMAIN	-1	/* Special value for domain search. */
 
 /*
  * Destroys an empty uma zone.  If the zone is not empty uma complains loudly.
  *
  * Arguments:
  *	zone  The zone we want to destroy.
  *
  */
 void uma_zdestroy(uma_zone_t zone);
 
 /*
  * Allocates an item out of a zone
  *
  * Arguments:
  *	zone  The zone we are allocating from
  *	arg   This data is passed to the ctor function
  *	flags See sys/malloc.h for available flags.
  *
  * Returns:
  *	A non-null pointer to an initialized element from the zone is
  *	guaranteed if the wait flag is M_WAITOK.  Otherwise a null pointer
  *	may be returned if the zone is empty or the ctor failed.
  */
 
 void *uma_zalloc_arg(uma_zone_t zone, void *arg, int flags);
 void *uma_zalloc_pcpu_arg(uma_zone_t zone, void *arg, int flags);
 
 /*
  * Allocate an item from a specific NUMA domain.  This uses a slow path in
  * the allocator but is guaranteed to allocate memory from the requested
  * domain if M_WAITOK is set.
  *
  * Arguments:
  *	zone  The zone we are allocating from
  *	arg   This data is passed to the ctor function
  *	domain The domain to allocate from.
  *	flags See sys/malloc.h for available flags.
  */
 void *uma_zalloc_domain(uma_zone_t zone, void *arg, int domain, int flags);
 
 /*
  * Allocates an item out of a zone without supplying an argument
  *
  * This is just a wrapper for uma_zalloc_arg for convenience.
  *
  */
 static __inline void *uma_zalloc(uma_zone_t zone, int flags);
 static __inline void *uma_zalloc_pcpu(uma_zone_t zone, int flags);
 
 static __inline void *
 uma_zalloc(uma_zone_t zone, int flags)
 {
 	return uma_zalloc_arg(zone, NULL, flags);
 }
 
 static __inline void *
 uma_zalloc_pcpu(uma_zone_t zone, int flags)
 {
 	return uma_zalloc_pcpu_arg(zone, NULL, flags);
 }
 
 /*
  * Frees an item back into the specified zone.
  *
  * Arguments:
  *	zone  The zone the item was originally allocated out of.
  *	item  The memory to be freed.
  *	arg   Argument passed to the destructor
  *
  * Returns:
  *	Nothing.
  */
 
 void uma_zfree_arg(uma_zone_t zone, void *item, void *arg);
 void uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *arg);
 
 /*
  * Frees an item back to the specified zone's domain specific pool.
  *
  * Arguments:
  *	zone  The zone the item was originally allocated out of.
  *	item  The memory to be freed.
  *	arg   Argument passed to the destructor
  */
 void uma_zfree_domain(uma_zone_t zone, void *item, void *arg);
 
 /*
  * Frees an item back to a zone without supplying an argument
  *
  * This is just a wrapper for uma_zfree_arg for convenience.
  *
  */
 static __inline void uma_zfree(uma_zone_t zone, void *item);
 static __inline void uma_zfree_pcpu(uma_zone_t zone, void *item);
 
 static __inline void
 uma_zfree(uma_zone_t zone, void *item)
 {
 	uma_zfree_arg(zone, item, NULL);
 }
 
 static __inline void
 uma_zfree_pcpu(uma_zone_t zone, void *item)
 {
 	uma_zfree_pcpu_arg(zone, item, NULL);
 }
 
 /*
  * Wait until the specified zone can allocate an item.
  */
 void uma_zwait(uma_zone_t zone);
 
 /*
  * Backend page supplier routines
  *
  * Arguments:
  *	zone  The zone that is requesting pages.
  *	size  The number of bytes being requested.
  *	pflag Flags for these memory pages, see below.
  *	domain The NUMA domain that we prefer for this allocation.
  *	wait  Indicates our willingness to block.
  *
  * Returns:
  *	A pointer to the allocated memory or NULL on failure.
  */
 
 typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
     uint8_t *pflag, int wait);
 
 /*
  * Backend page free routines
  *
  * Arguments:
  *	item  A pointer to the previously allocated pages.
  *	size  The original size of the allocation.
  *	pflag The flags for the slab.  See UMA_SLAB_* below.
  *
  * Returns:
  *	None
  */
 typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
 
 /*
  * Reclaims unused memory
  *
  * Arguments:
  *	req  Reclamation request type.
  * Returns:
  *	None
  */
 #define	UMA_RECLAIM_DRAIN	1	/* release bucket cache */
 #define	UMA_RECLAIM_DRAIN_CPU	2	/* release bucket and per-CPU caches */
 #define	UMA_RECLAIM_TRIM	3	/* trim bucket cache to WSS */
 void uma_reclaim(int req);
 void uma_zone_reclaim(uma_zone_t, int req);
 
 /*
  * Sets the alignment mask to be used for all zones requesting cache
  * alignment.  Should be called by MD boot code prior to starting VM/UMA.
  *
  * Arguments:
  *	align The alignment mask
  *
  * Returns:
  *	Nothing
  */
 void uma_set_align(int align);
 
 /*
  * Set a reserved number of items to hold for M_USE_RESERVE allocations.  All
  * other requests must allocate new backing pages.
  */
 void uma_zone_reserve(uma_zone_t zone, int nitems);
 
 /*
  * Reserves the maximum KVA space required by the zone and configures the zone
  * to use a VM_ALLOC_NOOBJ-based backend allocator.
  *
  * Arguments:
  *	zone  The zone to update.
  *	nitems  The upper limit on the number of items that can be allocated.
  *
  * Returns:
  *	0  if KVA space can not be allocated
  *	1  if successful
  *
  * Discussion:
  *	When the machine supports a direct map and the zone's items are smaller
  *	than a page, the zone will use the direct map instead of allocating KVA
  *	space.
  */
 int uma_zone_reserve_kva(uma_zone_t zone, int nitems);
 
 /*
  * Sets a high limit on the number of items allowed in a zone
  *
  * Arguments:
  *	zone  The zone to limit
  *	nitems  The requested upper limit on the number of items allowed
  *
  * Returns:
  *	int  The effective value of nitems
  */
 int uma_zone_set_max(uma_zone_t zone, int nitems);
 
 /*
  * Sets a high limit on the number of items allowed in zone's bucket cache
  *
  * Arguments:
  *      zone  The zone to limit
  *      nitems  The requested upper limit on the number of items allowed
  */
 void uma_zone_set_maxcache(uma_zone_t zone, int nitems);
 
 /*
  * Obtains the effective limit on the number of items in a zone
  *
  * Arguments:
  *	zone  The zone to obtain the effective limit from
  *
  * Return:
  *	0  No limit
  *	int  The effective limit of the zone
  */
 int uma_zone_get_max(uma_zone_t zone);
 
 /*
  * Sets a warning to be printed when limit is reached
  *
  * Arguments:
  *	zone  The zone we will warn about
  *	warning  Warning content
  *
  * Returns:
  *	Nothing
  */
 void uma_zone_set_warning(uma_zone_t zone, const char *warning);
 
 /*
  * Sets a function to run when limit is reached
  *
  * Arguments:
  *	zone  The zone to which this applies
  *	fx  The function ro run
  *
  * Returns:
  *	Nothing
  */
 typedef void (*uma_maxaction_t)(uma_zone_t, int);
 void uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t);
 
 /*
  * Obtains the approximate current number of items allocated from a zone
  *
  * Arguments:
  *	zone  The zone to obtain the current allocation count from
  *
  * Return:
  *	int  The approximate current number of items allocated from the zone
  */
 int uma_zone_get_cur(uma_zone_t zone);
 
 /*
  * The following two routines (uma_zone_set_init/fini)
  * are used to set the backend init/fini pair which acts on an
  * object as it becomes allocated and is placed in a slab within
  * the specified zone's backing keg.  These should probably not
  * be changed once allocations have already begun, but only be set
  * immediately upon zone creation.
  */
 void uma_zone_set_init(uma_zone_t zone, uma_init uminit);
 void uma_zone_set_fini(uma_zone_t zone, uma_fini fini);
 
 /*
  * The following two routines (uma_zone_set_zinit/zfini) are
  * used to set the zinit/zfini pair which acts on an object as
  * it passes from the backing Keg's slab cache to the
  * specified Zone's bucket cache.  These should probably not
  * be changed once allocations have already begun, but only be set
  * immediately upon zone creation.
  */
 void uma_zone_set_zinit(uma_zone_t zone, uma_init zinit);
 void uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini);
 
 /*
  * Replaces the standard backend allocator for this zone.
  *
  * Arguments:
  *	zone   The zone whose backend allocator is being changed.
  *	allocf A pointer to the allocation function
  *
  * Returns:
  *	Nothing
  *
  * Discussion:
  *	This could be used to implement pageable allocation, or perhaps
  *	even DMA allocators if used in conjunction with the OFFPAGE
  *	zone flag.
  */
 
 void uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf);
 
 /*
  * Used for freeing memory provided by the allocf above
  *
  * Arguments:
  *	zone  The zone that intends to use this free routine.
  *	freef The page freeing routine.
  *
  * Returns:
  *	Nothing
  */
 
 void uma_zone_set_freef(uma_zone_t zone, uma_free freef);
 
 /*
  * These flags are setable in the allocf and visible in the freef.
  */
 #define UMA_SLAB_BOOT	0x01		/* Slab alloced from boot pages */
 #define UMA_SLAB_KERNEL	0x04		/* Slab alloced from kmem */
 #define UMA_SLAB_PRIV	0x08		/* Slab alloced from priv allocator */
 #define UMA_SLAB_OFFP	0x10		/* Slab is managed separately  */
 /* 0x02, 0x40, and 0x80 are available */
 
 /*
  * Used to pre-fill a zone with some number of items
  *
  * Arguments:
  *	zone    The zone to fill
  *	itemcnt The number of items to reserve
  *
  * Returns:
  *	Nothing
  *
  * NOTE: This is blocking and should only be done at startup
  */
 void uma_prealloc(uma_zone_t zone, int itemcnt);
 
 /*
  * Used to determine if a fixed-size zone is exhausted.
  *
  * Arguments:
  *	zone    The zone to check
  *
  * Returns:
  *	Non-zero if zone is exhausted.
  */
 int uma_zone_exhausted(uma_zone_t zone);
 
 /*
  * Common UMA_ZONE_PCPU zones.
  */
 extern uma_zone_t pcpu_zone_int;
 extern uma_zone_t pcpu_zone_64;
 
 /*
  * Exported statistics structures to be used by user space monitoring tools.
  * Statistics stream consists of a uma_stream_header, followed by a series of
  * alternative uma_type_header and uma_type_stat structures.
  */
 #define	UMA_STREAM_VERSION	0x00000001
 struct uma_stream_header {
 	uint32_t	ush_version;	/* Stream format version. */
 	uint32_t	ush_maxcpus;	/* Value of MAXCPU for stream. */
 	uint32_t	ush_count;	/* Number of records. */
 	uint32_t	_ush_pad;	/* Pad/reserved field. */
 };
 
 #define	UTH_MAX_NAME	32
 #define	UTH_ZONE_SECONDARY	0x00000001
 struct uma_type_header {
 	/*
 	 * Static per-zone data, some extracted from the supporting keg.
 	 */
 	char		uth_name[UTH_MAX_NAME];
 	uint32_t	uth_align;	/* Keg: alignment. */
 	uint32_t	uth_size;	/* Keg: requested size of item. */
 	uint32_t	uth_rsize;	/* Keg: real size of item. */
 	uint32_t	uth_maxpages;	/* Keg: maximum number of pages. */
 	uint32_t	uth_limit;	/* Keg: max items to allocate. */
 
 	/*
 	 * Current dynamic zone/keg-derived statistics.
 	 */
 	uint32_t	uth_pages;	/* Keg: pages allocated. */
 	uint32_t	uth_keg_free;	/* Keg: items free. */
 	uint32_t	uth_zone_free;	/* Zone: items free. */
 	uint32_t	uth_bucketsize;	/* Zone: desired bucket size. */
 	uint32_t	uth_zone_flags;	/* Zone: flags. */
 	uint64_t	uth_allocs;	/* Zone: number of allocations. */
 	uint64_t	uth_frees;	/* Zone: number of frees. */
 	uint64_t	uth_fails;	/* Zone: number of alloc failures. */
 	uint64_t	uth_sleeps;	/* Zone: number of alloc sleeps. */
 	uint64_t	uth_xdomain;	/* Zone: Number of cross domain frees. */
 	uint64_t	_uth_reserved1[1];	/* Reserved. */
 };
 
 struct uma_percpu_stat {
 	uint64_t	ups_allocs;	/* Cache: number of allocations. */
 	uint64_t	ups_frees;	/* Cache: number of frees. */
 	uint64_t	ups_cache_free;	/* Cache: free items in cache. */
 	uint64_t	_ups_reserved[5];	/* Reserved. */
 };
 
 void uma_reclaim_wakeup(void);
 void uma_reclaim_worker(void *);
 
 unsigned long uma_limit(void);
 
 /* Return the amount of memory managed by UMA. */
 unsigned long uma_size(void);
 
 /* Return the amount of memory remaining.  May be negative. */
 long uma_avail(void);
 
 #endif	/* _VM_UMA_H_ */
Index: head/sys/vm/uma_core.c
===================================================================
--- head/sys/vm/uma_core.c	(revision 356533)
+++ head/sys/vm/uma_core.c	(revision 356534)
@@ -1,5044 +1,5049 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2019 Jeffrey Roberson <jeff@FreeBSD.org>
  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
  * Copyright (c) 2004-2006 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * uma_core.c  Implementation of the Universal Memory allocator
  *
  * This allocator is intended to replace the multitude of similar object caches
  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
  * efficient.  A primary design goal is to return unused memory to the rest of
  * the system.  This will make the system as a whole more flexible due to the
  * ability to move memory to subsystems which most need it instead of leaving
  * pools of reserved memory unused.
  *
  * The basic ideas stem from similar slab/zone based allocators whose algorithms
  * are well known.
  *
  */
 
 /*
  * TODO:
  *	- Improve memory usage for large allocations
  *	- Investigate cache size adjustments
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_param.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitset.h>
 #include <sys/domainset.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/types.h>
 #include <sys/limits.h>
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/sysctl.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <sys/taskqueue.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_domainset.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_param.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_pagequeue.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 #include <vm/uma_dbg.h>
 
 #include <ddb/ddb.h>
 
 #ifdef DEBUG_MEMGUARD
 #include <vm/memguard.h>
 #endif
 
 /*
  * This is the zone and keg from which all zones are spawned.
  */
 static uma_zone_t kegs;
 static uma_zone_t zones;
 
 /* This is the zone from which all offpage uma_slab_ts are allocated. */
 static uma_zone_t slabzone;
 
 /*
  * The initial hash tables come out of this zone so they can be allocated
  * prior to malloc coming up.
  */
 static uma_zone_t hashzone;
 
 /* The boot-time adjusted value for cache line alignment. */
 int uma_align_cache = 64 - 1;
 
 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
 static MALLOC_DEFINE(M_UMA, "UMA", "UMA Misc");
 
 /*
  * Are we allowed to allocate buckets?
  */
 static int bucketdisable = 1;
 
 /* Linked list of all kegs in the system */
 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
 
 /* Linked list of all cache-only zones in the system */
 static LIST_HEAD(,uma_zone) uma_cachezones =
     LIST_HEAD_INITIALIZER(uma_cachezones);
 
 /* This RW lock protects the keg list */
 static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
 
 /*
  * Pointer and counter to pool of pages, that is preallocated at
  * startup to bootstrap UMA.
  */
 static char *bootmem;
 static int boot_pages;
 
 static struct sx uma_reclaim_lock;
 
 /*
  * kmem soft limit, initialized by uma_set_limit().  Ensure that early
  * allocations don't trigger a wakeup of the reclaim thread.
  */
 unsigned long uma_kmem_limit = LONG_MAX;
 SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0,
     "UMA kernel memory soft limit");
 unsigned long uma_kmem_total;
 SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0,
     "UMA kernel memory usage");
 
 /* Is the VM done starting up? */
 static enum { BOOT_COLD = 0, BOOT_STRAPPED, BOOT_PAGEALLOC, BOOT_BUCKETS,
     BOOT_RUNNING } booted = BOOT_COLD;
 
 /*
  * This is the handle used to schedule events that need to happen
  * outside of the allocation fast path.
  */
 static struct callout uma_callout;
 #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
 
 /*
  * This structure is passed as the zone ctor arg so that I don't have to create
  * a special allocation function just for zones.
  */
 struct uma_zctor_args {
 	const char *name;
 	size_t size;
 	uma_ctor ctor;
 	uma_dtor dtor;
 	uma_init uminit;
 	uma_fini fini;
 	uma_import import;
 	uma_release release;
 	void *arg;
 	uma_keg_t keg;
 	int align;
 	uint32_t flags;
 };
 
 struct uma_kctor_args {
 	uma_zone_t zone;
 	size_t size;
 	uma_init uminit;
 	uma_fini fini;
 	int align;
 	uint32_t flags;
 };
 
 struct uma_bucket_zone {
 	uma_zone_t	ubz_zone;
 	char		*ubz_name;
 	int		ubz_entries;	/* Number of items it can hold. */
 	int		ubz_maxsize;	/* Maximum allocation size per-item. */
 };
 
 /*
  * Compute the actual number of bucket entries to pack them in power
  * of two sizes for more efficient space utilization.
  */
 #define	BUCKET_SIZE(n)						\
     (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
 
 #define	BUCKET_MAX	BUCKET_SIZE(256)
 #define	BUCKET_MIN	BUCKET_SIZE(4)
 
 struct uma_bucket_zone bucket_zones[] = {
 	{ NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
 	{ NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
 	{ NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
 	{ NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
 	{ NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
 	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
 	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
 	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
 	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
 	{ NULL, NULL, 0}
 };
 
 /*
  * Flags and enumerations to be passed to internal functions.
  */
 enum zfreeskip {
 	SKIP_NONE =	0,
 	SKIP_CNT =	0x00000001,
 	SKIP_DTOR =	0x00010000,
 	SKIP_FINI =	0x00020000,
 };
 
 /* Prototypes.. */
 
 int	uma_startup_count(int);
 void	uma_startup(void *, int);
 void	uma_startup1(void);
 void	uma_startup2(void);
 
 static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void page_free(void *, vm_size_t, uint8_t);
 static void pcpu_page_free(void *, vm_size_t, uint8_t);
 static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int);
 static void cache_drain(uma_zone_t);
 static void bucket_drain(uma_zone_t, uma_bucket_t);
 static void bucket_cache_reclaim(uma_zone_t zone, bool);
 static int keg_ctor(void *, int, void *, int);
 static void keg_dtor(void *, int, void *);
 static int zone_ctor(void *, int, void *, int);
 static void zone_dtor(void *, int, void *);
 static int zero_init(void *, int, int);
 static void keg_small_init(uma_keg_t keg);
 static void keg_large_init(uma_keg_t keg);
 static void zone_foreach(void (*zfunc)(uma_zone_t, void *), void *);
 static void zone_timeout(uma_zone_t zone, void *);
 static int hash_alloc(struct uma_hash *, u_int);
 static int hash_expand(struct uma_hash *, struct uma_hash *);
 static void hash_free(struct uma_hash *hash);
 static void uma_timeout(void *);
 static void uma_startup3(void);
 static void *zone_alloc_item(uma_zone_t, void *, int, int);
 static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
 static int zone_alloc_limit(uma_zone_t zone, int count, int flags);
 static void zone_free_limit(uma_zone_t zone, int count);
 static void bucket_enable(void);
 static void bucket_init(void);
 static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
 static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
 static void bucket_zone_drain(void);
 static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
 static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
 static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
 static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
     uma_fini fini, int align, uint32_t flags);
 static int zone_import(void *, void **, int, int, int);
 static void zone_release(void *, void **, int);
 static bool cache_alloc(uma_zone_t, uma_cache_t, void *, int);
 static bool cache_free(uma_zone_t, uma_cache_t, void *, void *, int);
 
 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS);
 
 static uint64_t uma_zone_get_allocs(uma_zone_t zone);
 
 #ifdef INVARIANTS
 static uint64_t uma_keg_get_allocs(uma_keg_t zone);
 static inline struct noslabbits *slab_dbg_bits(uma_slab_t slab, uma_keg_t keg);
 
 static bool uma_dbg_kskip(uma_keg_t keg, void *mem);
 static bool uma_dbg_zskip(uma_zone_t zone, void *mem);
 static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
 static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
 
 static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD, 0,
     "Memory allocation debugging");
 
 static u_int dbg_divisor = 1;
 SYSCTL_UINT(_vm_debug, OID_AUTO, divisor,
     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0,
     "Debug & thrash every this item in memory allocator");
 
 static counter_u64_t uma_dbg_cnt = EARLY_COUNTER;
 static counter_u64_t uma_skip_cnt = EARLY_COUNTER;
 SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD,
     &uma_dbg_cnt, "memory items debugged");
 SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD,
     &uma_skip_cnt, "memory items skipped, not debugged");
 #endif
 
 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
 
 SYSCTL_NODE(_vm, OID_AUTO, uma, CTLFLAG_RW, 0, "Universal Memory Allocator");
 
 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
 
 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
 
 static int zone_warnings = 1;
 SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
     "Warn when UMA zones becomes full");
 
 /*
  * This routine checks to see whether or not it's safe to enable buckets.
  */
 static void
 bucket_enable(void)
 {
 
 	KASSERT(booted >= BOOT_BUCKETS, ("Bucket enable before init"));
 	bucketdisable = vm_page_count_min();
 }
 
 /*
  * Initialize bucket_zones, the array of zones of buckets of various sizes.
  *
  * For each zone, calculate the memory required for each bucket, consisting
  * of the header and an array of pointers.
  */
 static void
 bucket_init(void)
 {
 	struct uma_bucket_zone *ubz;
 	int size;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
 		size += sizeof(void *) * ubz->ubz_entries;
 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET |
 		    UMA_ZONE_FIRSTTOUCH);
 	}
 }
 
 /*
  * Given a desired number of entries for a bucket, return the zone from which
  * to allocate the bucket.
  */
 static struct uma_bucket_zone *
 bucket_zone_lookup(int entries)
 {
 	struct uma_bucket_zone *ubz;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
 		if (ubz->ubz_entries >= entries)
 			return (ubz);
 	ubz--;
 	return (ubz);
 }
 
 static struct uma_bucket_zone *
 bucket_zone_max(uma_zone_t zone, int nitems)
 {
 	struct uma_bucket_zone *ubz;
 	int bpcpu;
 
 	bpcpu = 2;
 	if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
 		/* Count the cross-domain bucket. */
 		bpcpu++;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
 		if (ubz->ubz_entries * bpcpu * mp_ncpus > nitems)
 			break;
 	if (ubz == &bucket_zones[0])
 		ubz = NULL;
 	else
 		ubz--;
 	return (ubz);
 }
 
 static int
 bucket_select(int size)
 {
 	struct uma_bucket_zone *ubz;
 
 	ubz = &bucket_zones[0];
 	if (size > ubz->ubz_maxsize)
 		return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
 
 	for (; ubz->ubz_entries != 0; ubz++)
 		if (ubz->ubz_maxsize < size)
 			break;
 	ubz--;
 	return (ubz->ubz_entries);
 }
 
 static uma_bucket_t
 bucket_alloc(uma_zone_t zone, void *udata, int flags)
 {
 	struct uma_bucket_zone *ubz;
 	uma_bucket_t bucket;
 
 	/*
 	 * This is to stop us from allocating per cpu buckets while we're
 	 * running out of vm.boot_pages.  Otherwise, we would exhaust the
 	 * boot pages.  This also prevents us from allocating buckets in
 	 * low memory situations.
 	 */
 	if (bucketdisable)
 		return (NULL);
 	/*
 	 * To limit bucket recursion we store the original zone flags
 	 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
 	 * NOVM flag to persist even through deep recursions.  We also
 	 * store ZFLAG_BUCKET once we have recursed attempting to allocate
 	 * a bucket for a bucket zone so we do not allow infinite bucket
 	 * recursion.  This cookie will even persist to frees of unused
 	 * buckets via the allocation path or bucket allocations in the
 	 * free path.
 	 */
 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
 		udata = (void *)(uintptr_t)zone->uz_flags;
 	else {
 		if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
 			return (NULL);
 		udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
 	}
 	if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
 		flags |= M_NOVM;
 	ubz = bucket_zone_lookup(zone->uz_bucket_size);
 	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
 		ubz++;
 	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
 	if (bucket) {
 #ifdef INVARIANTS
 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
 #endif
 		bucket->ub_cnt = 0;
 		bucket->ub_entries = ubz->ubz_entries;
 	}
 
 	return (bucket);
 }
 
 static void
 bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
 {
 	struct uma_bucket_zone *ubz;
 
 	KASSERT(bucket->ub_cnt == 0,
 	    ("bucket_free: Freeing a non free bucket."));
 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
 		udata = (void *)(uintptr_t)zone->uz_flags;
 	ubz = bucket_zone_lookup(bucket->ub_entries);
 	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
 }
 
 static void
 bucket_zone_drain(void)
 {
 	struct uma_bucket_zone *ubz;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
 		uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN);
 }
 
 /*
  * Attempt to satisfy an allocation by retrieving a full bucket from one of the
  * zone's caches.
  */
 static uma_bucket_t
 zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom)
 {
 	uma_bucket_t bucket;
 
 	ZONE_LOCK_ASSERT(zone);
 
 	if ((bucket = TAILQ_FIRST(&zdom->uzd_buckets)) != NULL) {
 		MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
 		TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
 		zdom->uzd_nitems -= bucket->ub_cnt;
 		if (zdom->uzd_imin > zdom->uzd_nitems)
 			zdom->uzd_imin = zdom->uzd_nitems;
 		zone->uz_bkt_count -= bucket->ub_cnt;
 	}
 	return (bucket);
 }
 
 /*
  * Insert a full bucket into the specified cache.  The "ws" parameter indicates
  * whether the bucket's contents should be counted as part of the zone's working
  * set.
  */
 static void
 zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
     const bool ws)
 {
 
 	ZONE_LOCK_ASSERT(zone);
 	KASSERT(!ws || zone->uz_bkt_count < zone->uz_bkt_max,
 	    ("%s: zone %p overflow", __func__, zone));
 
 	if (ws)
 		TAILQ_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
 	else
 		TAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
 	zdom->uzd_nitems += bucket->ub_cnt;
 	if (ws && zdom->uzd_imax < zdom->uzd_nitems)
 		zdom->uzd_imax = zdom->uzd_nitems;
 	zone->uz_bkt_count += bucket->ub_cnt;
 }
 
 /* Pops an item out of a per-cpu cache bucket. */
 static inline void *
 cache_bucket_pop(uma_cache_t cache, uma_cache_bucket_t bucket)
 {
 	void *item;
 
 	CRITICAL_ASSERT(curthread);
 
 	bucket->ucb_cnt--;
 	item = bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt];
 #ifdef INVARIANTS
 	bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = NULL;
 	KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
 #endif
 	cache->uc_allocs++;
 
 	return (item);
 }
 
 /* Pushes an item into a per-cpu cache bucket. */
 static inline void
 cache_bucket_push(uma_cache_t cache, uma_cache_bucket_t bucket, void *item)
 {
 
 	CRITICAL_ASSERT(curthread);
 	KASSERT(bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] == NULL,
 	    ("uma_zfree: Freeing to non free bucket index."));
 
 	bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = item;
 	bucket->ucb_cnt++;
 	cache->uc_frees++;
 }
 
 /*
  * Unload a UMA bucket from a per-cpu cache.
  */
 static inline uma_bucket_t
 cache_bucket_unload(uma_cache_bucket_t bucket)
 {
 	uma_bucket_t b;
 
 	b = bucket->ucb_bucket;
 	if (b != NULL) {
 		MPASS(b->ub_entries == bucket->ucb_entries);
 		b->ub_cnt = bucket->ucb_cnt;
 		bucket->ucb_bucket = NULL;
 		bucket->ucb_entries = bucket->ucb_cnt = 0;
 	}
 
 	return (b);
 }
 
 static inline uma_bucket_t
 cache_bucket_unload_alloc(uma_cache_t cache)
 {
 
 	return (cache_bucket_unload(&cache->uc_allocbucket));
 }
 
 static inline uma_bucket_t
 cache_bucket_unload_free(uma_cache_t cache)
 {
 
 	return (cache_bucket_unload(&cache->uc_freebucket));
 }
 
 static inline uma_bucket_t
 cache_bucket_unload_cross(uma_cache_t cache)
 {
 
 	return (cache_bucket_unload(&cache->uc_crossbucket));
 }
 
 /*
  * Load a bucket into a per-cpu cache bucket.
  */
 static inline void
 cache_bucket_load(uma_cache_bucket_t bucket, uma_bucket_t b)
 {
 
 	CRITICAL_ASSERT(curthread);
 	MPASS(bucket->ucb_bucket == NULL);
 
 	bucket->ucb_bucket = b;
 	bucket->ucb_cnt = b->ub_cnt;
 	bucket->ucb_entries = b->ub_entries;
 }
 
 static inline void
 cache_bucket_load_alloc(uma_cache_t cache, uma_bucket_t b)
 {
 
 	cache_bucket_load(&cache->uc_allocbucket, b);
 }
 
 static inline void
 cache_bucket_load_free(uma_cache_t cache, uma_bucket_t b)
 {
 
 	cache_bucket_load(&cache->uc_freebucket, b);
 }
 
 #ifdef NUMA
 static inline void 
 cache_bucket_load_cross(uma_cache_t cache, uma_bucket_t b)
 {
 
 	cache_bucket_load(&cache->uc_crossbucket, b);
 }
 #endif
 
 /*
  * Copy and preserve ucb_spare.
  */
 static inline void
 cache_bucket_copy(uma_cache_bucket_t b1, uma_cache_bucket_t b2)
 {
 
 	b1->ucb_bucket = b2->ucb_bucket;
 	b1->ucb_entries = b2->ucb_entries;
 	b1->ucb_cnt = b2->ucb_cnt;
 }
 
 /*
  * Swap two cache buckets.
  */
 static inline void
 cache_bucket_swap(uma_cache_bucket_t b1, uma_cache_bucket_t b2)
 {
 	struct uma_cache_bucket b3;
 
 	CRITICAL_ASSERT(curthread);
 
 	cache_bucket_copy(&b3, b1);
 	cache_bucket_copy(b1, b2);
 	cache_bucket_copy(b2, &b3);
 }
 
 static void
 zone_log_warning(uma_zone_t zone)
 {
 	static const struct timeval warninterval = { 300, 0 };
 
 	if (!zone_warnings || zone->uz_warning == NULL)
 		return;
 
 	if (ratecheck(&zone->uz_ratecheck, &warninterval))
 		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
 }
 
 static inline void
 zone_maxaction(uma_zone_t zone)
 {
 
 	if (zone->uz_maxaction.ta_func != NULL)
 		taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
 }
 
 /*
  * Routine called by timeout which is used to fire off some time interval
  * based calculations.  (stats, hash size, etc.)
  *
  * Arguments:
  *	arg   Unused
  *
  * Returns:
  *	Nothing
  */
 static void
 uma_timeout(void *unused)
 {
 	bucket_enable();
 	zone_foreach(zone_timeout, NULL);
 
 	/* Reschedule this event */
 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 }
 
 /*
  * Update the working set size estimate for the zone's bucket cache.
  * The constants chosen here are somewhat arbitrary.  With an update period of
  * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the
  * last 100s.
  */
 static void
 zone_domain_update_wss(uma_zone_domain_t zdom)
 {
 	long wss;
 
 	MPASS(zdom->uzd_imax >= zdom->uzd_imin);
 	wss = zdom->uzd_imax - zdom->uzd_imin;
 	zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
 	zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5;
 }
 
 /*
  * Routine to perform timeout driven calculations.  This expands the
  * hashes and does per cpu statistics aggregation.
  *
  *  Returns nothing.
  */
 static void
 zone_timeout(uma_zone_t zone, void *unused)
 {
 	uma_keg_t keg;
 	u_int slabs, pages;
 
-	if ((zone->uz_flags & UMA_ZONE_HASH) == 0)
+	if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0)
 		goto update_wss;
 
 	keg = zone->uz_keg;
 
 	/*
 	 * Hash zones are non-numa by definition so the first domain
 	 * is the only one present.
 	 */
 	KEG_LOCK(keg, 0);
 	pages = keg->uk_domain[0].ud_pages;
 
 	/*
 	 * Expand the keg hash table.
 	 *
 	 * This is done if the number of slabs is larger than the hash size.
 	 * What I'm trying to do here is completely reduce collisions.  This
 	 * may be a little aggressive.  Should I allow for two collisions max?
 	 */
 	if ((slabs = pages / keg->uk_ppera) > keg->uk_hash.uh_hashsize) {
 		struct uma_hash newhash;
 		struct uma_hash oldhash;
 		int ret;
 
 		/*
 		 * This is so involved because allocating and freeing
 		 * while the keg lock is held will lead to deadlock.
 		 * I have to do everything in stages and check for
 		 * races.
 		 */
 		KEG_UNLOCK(keg, 0);
 		ret = hash_alloc(&newhash, 1 << fls(slabs));
 		KEG_LOCK(keg, 0);
 		if (ret) {
 			if (hash_expand(&keg->uk_hash, &newhash)) {
 				oldhash = keg->uk_hash;
 				keg->uk_hash = newhash;
 			} else
 				oldhash = newhash;
 
 			KEG_UNLOCK(keg, 0);
 			hash_free(&oldhash);
 			goto update_wss;
 		}
 	}
 	KEG_UNLOCK(keg, 0);
 
 update_wss:
 	ZONE_LOCK(zone);
 	for (int i = 0; i < vm_ndomains; i++)
 		zone_domain_update_wss(&zone->uz_domain[i]);
 	ZONE_UNLOCK(zone);
 }
 
 /*
  * Allocate and zero fill the next sized hash table from the appropriate
  * backing store.
  *
  * Arguments:
  *	hash  A new hash structure with the old hash size in uh_hashsize
  *
  * Returns:
  *	1 on success and 0 on failure.
  */
 static int
 hash_alloc(struct uma_hash *hash, u_int size)
 {
 	size_t alloc;
 
 	KASSERT(powerof2(size), ("hash size must be power of 2"));
 	if (size > UMA_HASH_SIZE_INIT)  {
 		hash->uh_hashsize = size;
 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
 		hash->uh_slab_hash = malloc(alloc, M_UMAHASH, M_NOWAIT);
 	} else {
 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
 		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
 		    UMA_ANYDOMAIN, M_WAITOK);
 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
 	}
 	if (hash->uh_slab_hash) {
 		bzero(hash->uh_slab_hash, alloc);
 		hash->uh_hashmask = hash->uh_hashsize - 1;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Expands the hash table for HASH zones.  This is done from zone_timeout
  * to reduce collisions.  This must not be done in the regular allocation
  * path, otherwise, we can recurse on the vm while allocating pages.
  *
  * Arguments:
  *	oldhash  The hash you want to expand
  *	newhash  The hash structure for the new table
  *
  * Returns:
  *	Nothing
  *
  * Discussion:
  */
 static int
 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
 {
 	uma_hash_slab_t slab;
 	u_int hval;
 	u_int idx;
 
 	if (!newhash->uh_slab_hash)
 		return (0);
 
 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
 		return (0);
 
 	/*
 	 * I need to investigate hash algorithms for resizing without a
 	 * full rehash.
 	 */
 
 	for (idx = 0; idx < oldhash->uh_hashsize; idx++)
 		while (!LIST_EMPTY(&oldhash->uh_slab_hash[idx])) {
 			slab = LIST_FIRST(&oldhash->uh_slab_hash[idx]);
 			LIST_REMOVE(slab, uhs_hlink);
 			hval = UMA_HASH(newhash, slab->uhs_data);
 			LIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
 			    slab, uhs_hlink);
 		}
 
 	return (1);
 }
 
 /*
  * Free the hash bucket to the appropriate backing store.
  *
  * Arguments:
  *	slab_hash  The hash bucket we're freeing
  *	hashsize   The number of entries in that hash bucket
  *
  * Returns:
  *	Nothing
  */
 static void
 hash_free(struct uma_hash *hash)
 {
 	if (hash->uh_slab_hash == NULL)
 		return;
 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
 		zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
 	else
 		free(hash->uh_slab_hash, M_UMAHASH);
 }
 
 /*
  * Frees all outstanding items in a bucket
  *
  * Arguments:
  *	zone   The zone to free to, must be unlocked.
  *	bucket The free/alloc bucket with items.
  *
  * Returns:
  *	Nothing
  */
 
 static void
 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
 {
 	int i;
 
 	if (bucket == NULL || bucket->ub_cnt == 0)
 		return;
 
 	if (zone->uz_fini)
 		for (i = 0; i < bucket->ub_cnt; i++) 
 			zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
 	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
 	if (zone->uz_max_items > 0)
 		zone_free_limit(zone, bucket->ub_cnt);
 	bucket->ub_cnt = 0;
 }
 
 /*
  * Drains the per cpu caches for a zone.
  *
  * NOTE: This may only be called while the zone is being torn down, and not
  * during normal operation.  This is necessary in order that we do not have
  * to migrate CPUs to drain the per-CPU caches.
  *
  * Arguments:
  *	zone     The zone to drain, must be unlocked.
  *
  * Returns:
  *	Nothing
  */
 static void
 cache_drain(uma_zone_t zone)
 {
 	uma_cache_t cache;
 	uma_bucket_t bucket;
 	int cpu;
 
 	/*
 	 * XXX: It is safe to not lock the per-CPU caches, because we're
 	 * tearing down the zone anyway.  I.e., there will be no further use
 	 * of the caches at this point.
 	 *
 	 * XXX: It would good to be able to assert that the zone is being
 	 * torn down to prevent improper use of cache_drain().
 	 */
 	CPU_FOREACH(cpu) {
 		cache = &zone->uz_cpu[cpu];
 		bucket = cache_bucket_unload_alloc(cache);
 		if (bucket != NULL) {
 			bucket_drain(zone, bucket);
 			bucket_free(zone, bucket, NULL);
 		}
 		bucket = cache_bucket_unload_free(cache);
 		if (bucket != NULL) {
 			bucket_drain(zone, bucket);
 			bucket_free(zone, bucket, NULL);
 		}
 		bucket = cache_bucket_unload_cross(cache);
 		if (bucket != NULL) {
 			bucket_drain(zone, bucket);
 			bucket_free(zone, bucket, NULL);
 		}
 	}
 	bucket_cache_reclaim(zone, true);
 }
 
 static void
 cache_shrink(uma_zone_t zone, void *unused)
 {
 
 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 		return;
 
 	ZONE_LOCK(zone);
 	zone->uz_bucket_size =
 	    (zone->uz_bucket_size_min + zone->uz_bucket_size) / 2;
 	ZONE_UNLOCK(zone);
 }
 
 static void
 cache_drain_safe_cpu(uma_zone_t zone, void *unused)
 {
 	uma_cache_t cache;
 	uma_bucket_t b1, b2, b3;
 	int domain;
 
 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 		return;
 
 	b1 = b2 = b3 = NULL;
 	ZONE_LOCK(zone);
 	critical_enter();
 	if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH)
 		domain = PCPU_GET(domain);
 	else
 		domain = 0;
 	cache = &zone->uz_cpu[curcpu];
 	b1 = cache_bucket_unload_alloc(cache);
 	if (b1 != NULL && b1->ub_cnt != 0) {
 		zone_put_bucket(zone, &zone->uz_domain[domain], b1, false);
 		b1 = NULL;
 	}
 	b2 = cache_bucket_unload_free(cache);
 	if (b2 != NULL && b2->ub_cnt != 0) {
 		zone_put_bucket(zone, &zone->uz_domain[domain], b2, false);
 		b2 = NULL;
 	}
 	b3 = cache_bucket_unload_cross(cache);
 	critical_exit();
 	ZONE_UNLOCK(zone);
 	if (b1)
 		bucket_free(zone, b1, NULL);
 	if (b2)
 		bucket_free(zone, b2, NULL);
 	if (b3) {
 		bucket_drain(zone, b3);
 		bucket_free(zone, b3, NULL);
 	}
 }
 
 /*
  * Safely drain per-CPU caches of a zone(s) to alloc bucket.
  * This is an expensive call because it needs to bind to all CPUs
  * one by one and enter a critical section on each of them in order
  * to safely access their cache buckets.
  * Zone lock must not be held on call this function.
  */
 static void
 pcpu_cache_drain_safe(uma_zone_t zone)
 {
 	int cpu;
 
 	/*
 	 * Polite bucket sizes shrinking was not enough, shrink aggressively.
 	 */
 	if (zone)
 		cache_shrink(zone, NULL);
 	else
 		zone_foreach(cache_shrink, NULL);
 
 	CPU_FOREACH(cpu) {
 		thread_lock(curthread);
 		sched_bind(curthread, cpu);
 		thread_unlock(curthread);
 
 		if (zone)
 			cache_drain_safe_cpu(zone, NULL);
 		else
 			zone_foreach(cache_drain_safe_cpu, NULL);
 	}
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 }
 
 /*
  * Reclaim cached buckets from a zone.  All buckets are reclaimed if the caller
  * requested a drain, otherwise the per-domain caches are trimmed to either
  * estimated working set size.
  */
 static void
 bucket_cache_reclaim(uma_zone_t zone, bool drain)
 {
 	uma_zone_domain_t zdom;
 	uma_bucket_t bucket;
 	long target, tofree;
 	int i;
 
 	for (i = 0; i < vm_ndomains; i++) {
 		/*
 		 * The cross bucket is partially filled and not part of
 		 * the item count.  Reclaim it individually here.
 		 */
 		zdom = &zone->uz_domain[i];
 		ZONE_CROSS_LOCK(zone);
 		bucket = zdom->uzd_cross;
 		zdom->uzd_cross = NULL;
 		ZONE_CROSS_UNLOCK(zone);
 		if (bucket != NULL) {
 			bucket_drain(zone, bucket);
 			bucket_free(zone, bucket, NULL);
 		}
 
 		/*
 		 * Shrink the zone bucket size to ensure that the per-CPU caches
 		 * don't grow too large.
 		 */
 		ZONE_LOCK(zone);
 		if (i == 0 && zone->uz_bucket_size > zone->uz_bucket_size_min)
 			zone->uz_bucket_size--;
 
 		/*
 		 * If we were asked to drain the zone, we are done only once
 		 * this bucket cache is empty.  Otherwise, we reclaim items in
 		 * excess of the zone's estimated working set size.  If the
 		 * difference nitems - imin is larger than the WSS estimate,
 		 * then the estimate will grow at the end of this interval and
 		 * we ignore the historical average.
 		 */
 		target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems -
 		    zdom->uzd_imin);
 		while (zdom->uzd_nitems > target) {
 			bucket = TAILQ_LAST(&zdom->uzd_buckets, uma_bucketlist);
 			if (bucket == NULL)
 				break;
 			tofree = bucket->ub_cnt;
 			TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
 			zdom->uzd_nitems -= tofree;
 
 			/*
 			 * Shift the bounds of the current WSS interval to avoid
 			 * perturbing the estimate.
 			 */
 			zdom->uzd_imax -= lmin(zdom->uzd_imax, tofree);
 			zdom->uzd_imin -= lmin(zdom->uzd_imin, tofree);
 
 			ZONE_UNLOCK(zone);
 			bucket_drain(zone, bucket);
 			bucket_free(zone, bucket, NULL);
 			ZONE_LOCK(zone);
 		}
 		ZONE_UNLOCK(zone);
 	}
 }
 
 static void
 keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
 {
 	uint8_t *mem;
 	int i;
 	uint8_t flags;
 
 	CTR4(KTR_UMA, "keg_free_slab keg %s(%p) slab %p, returning %d bytes",
 	    keg->uk_name, keg, slab, PAGE_SIZE * keg->uk_ppera);
 
 	mem = slab_data(slab, keg);
 	flags = slab->us_flags;
 	i = start;
 	if (keg->uk_fini != NULL) {
 		for (i--; i > -1; i--)
 #ifdef INVARIANTS
 		/*
 		 * trash_fini implies that dtor was trash_dtor. trash_fini
 		 * would check that memory hasn't been modified since free,
 		 * which executed trash_dtor.
 		 * That's why we need to run uma_dbg_kskip() check here,
 		 * albeit we don't make skip check for other init/fini
 		 * invocations.
 		 */
 		if (!uma_dbg_kskip(keg, slab_item(slab, keg, i)) ||
 		    keg->uk_fini != trash_fini)
 #endif
 			keg->uk_fini(slab_item(slab, keg, i), keg->uk_size);
 	}
-	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
+	if (keg->uk_flags & UMA_ZFLAG_OFFPAGE)
 		zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
 	keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
 	uma_total_dec(PAGE_SIZE * keg->uk_ppera);
 }
 
 /*
  * Frees pages from a keg back to the system.  This is done on demand from
  * the pageout daemon.
  *
  * Returns nothing.
  */
 static void
 keg_drain(uma_keg_t keg)
 {
 	struct slabhead freeslabs = { 0 };
 	uma_domain_t dom;
 	uma_slab_t slab, tmp;
 	int i, n;
 
 	/*
 	 * We don't want to take pages from statically allocated kegs at this
 	 * time
 	 */
 	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
 		return;
 
 	for (i = 0; i < vm_ndomains; i++) {
 		CTR4(KTR_UMA, "keg_drain %s(%p) domain %d free items: %u",
 		    keg->uk_name, keg, i, dom->ud_free);
 		n = 0;
 		dom = &keg->uk_domain[i];
 		KEG_LOCK(keg, i);
 		LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) {
 			/* We have nowhere to free these to. */
 			if (slab->us_flags & UMA_SLAB_BOOT)
 				continue;
-			if (keg->uk_flags & UMA_ZONE_HASH)
+			if (keg->uk_flags & UMA_ZFLAG_HASH)
 				UMA_HASH_REMOVE(&keg->uk_hash, slab);
 			n++;
 			LIST_REMOVE(slab, us_link);
 			LIST_INSERT_HEAD(&freeslabs, slab, us_link);
 		}
 		dom->ud_pages -= n * keg->uk_ppera;
 		dom->ud_free -= n * keg->uk_ipers;
 		KEG_UNLOCK(keg, i);
 	}
 
 	while ((slab = LIST_FIRST(&freeslabs)) != NULL) {
 		LIST_REMOVE(slab, us_link);
 		keg_free_slab(keg, slab, keg->uk_ipers);
 	}
 }
 
 static void
 zone_reclaim(uma_zone_t zone, int waitok, bool drain)
 {
 
 	/*
 	 * Set draining to interlock with zone_dtor() so we can release our
 	 * locks as we go.  Only dtor() should do a WAITOK call since it
 	 * is the only call that knows the structure will still be available
 	 * when it wakes up.
 	 */
 	ZONE_LOCK(zone);
 	while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
 		if (waitok == M_NOWAIT)
 			goto out;
 		msleep(zone, &zone->uz_lock, PVM, "zonedrain", 1);
 	}
 	zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
 	ZONE_UNLOCK(zone);
 	bucket_cache_reclaim(zone, drain);
 
 	/*
 	 * The DRAINING flag protects us from being freed while
 	 * we're running.  Normally the uma_rwlock would protect us but we
 	 * must be able to release and acquire the right lock for each keg.
 	 */
 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0)
 		keg_drain(zone->uz_keg);
 	ZONE_LOCK(zone);
 	zone->uz_flags &= ~UMA_ZFLAG_RECLAIMING;
 	wakeup(zone);
 out:
 	ZONE_UNLOCK(zone);
 }
 
 static void
 zone_drain(uma_zone_t zone, void *unused)
 {
 
 	zone_reclaim(zone, M_NOWAIT, true);
 }
 
 static void
 zone_trim(uma_zone_t zone, void *unused)
 {
 
 	zone_reclaim(zone, M_NOWAIT, false);
 }
 
 /*
  * Allocate a new slab for a keg and inserts it into the partial slab list.
  * The keg should be unlocked on entry.  If the allocation succeeds it will
  * be locked on return.
  *
  * Arguments:
  *	flags   Wait flags for the item initialization routine
  *	aflags  Wait flags for the slab allocation
  *
  * Returns:
  *	The slab that was allocated or NULL if there is no memory and the
  *	caller specified M_NOWAIT.
  */
 static uma_slab_t
 keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags,
     int aflags)
 {
 	uma_domain_t dom;
 	uma_alloc allocf;
 	uma_slab_t slab;
 	unsigned long size;
 	uint8_t *mem;
 	uint8_t sflags;
 	int i;
 
 	KASSERT(domain >= 0 && domain < vm_ndomains,
 	    ("keg_alloc_slab: domain %d out of range", domain));
 
 	allocf = keg->uk_allocf;
 	slab = NULL;
 	mem = NULL;
-	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
+	if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) {
 		slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, aflags);
 		if (slab == NULL)
 			goto fail;
 	}
 
 	/*
 	 * This reproduces the old vm_zone behavior of zero filling pages the
 	 * first time they are added to a zone.
 	 *
 	 * Malloced items are zeroed in uma_zalloc.
 	 */
 
 	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
 		aflags |= M_ZERO;
 	else
 		aflags &= ~M_ZERO;
 
 	if (keg->uk_flags & UMA_ZONE_NODUMP)
 		aflags |= M_NODUMP;
 
 	/* zone is passed for legacy reasons. */
 	size = keg->uk_ppera * PAGE_SIZE;
 	mem = allocf(zone, size, domain, &sflags, aflags);
 	if (mem == NULL) {
-		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
+		if (keg->uk_flags & UMA_ZFLAG_OFFPAGE)
 			zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
 		goto fail;
 	}
 	uma_total_inc(size);
 
 	/* For HASH zones all pages go to the same uma_domain. */
-	if ((keg->uk_flags & UMA_ZONE_HASH) != 0)
+	if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0)
 		domain = 0;
 
 	/* Point the slab into the allocated memory */
-	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
+	if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE))
 		slab = (uma_slab_t )(mem + keg->uk_pgoff);
 	else
 		((uma_hash_slab_t)slab)->uhs_data = mem;
 
-	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
+	if (keg->uk_flags & UMA_ZFLAG_VTOSLAB)
 		for (i = 0; i < keg->uk_ppera; i++)
 			vsetzoneslab((vm_offset_t)mem + (i * PAGE_SIZE),
 			    zone, slab);
 
 	slab->us_freecount = keg->uk_ipers;
 	slab->us_flags = sflags;
 	slab->us_domain = domain;
 
 	BIT_FILL(keg->uk_ipers, &slab->us_free);
 #ifdef INVARIANTS
 	BIT_ZERO(keg->uk_ipers, slab_dbg_bits(slab, keg));
 #endif
 
 	if (keg->uk_init != NULL) {
 		for (i = 0; i < keg->uk_ipers; i++)
 			if (keg->uk_init(slab_item(slab, keg, i),
 			    keg->uk_size, flags) != 0)
 				break;
 		if (i != keg->uk_ipers) {
 			keg_free_slab(keg, slab, i);
 			goto fail;
 		}
 	}
 	KEG_LOCK(keg, domain);
 
 	CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)",
 	    slab, keg->uk_name, keg);
 
-	if (keg->uk_flags & UMA_ZONE_HASH)
+	if (keg->uk_flags & UMA_ZFLAG_HASH)
 		UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
 
 	/*
 	 * If we got a slab here it's safe to mark it partially used
 	 * and return.  We assume that the caller is going to remove
 	 * at least one item.
 	 */
 	dom = &keg->uk_domain[domain];
 	LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
 	dom->ud_pages += keg->uk_ppera;
 	dom->ud_free += keg->uk_ipers;
 
 	return (slab);
 
 fail:
 	return (NULL);
 }
 
 /*
  * This function is intended to be used early on in place of page_alloc() so
  * that we may use the boot time page cache to satisfy allocations before
  * the VM is ready.
  */
 static void *
 startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
     int wait)
 {
 	uma_keg_t keg;
 	void *mem;
 	int pages;
 
 	keg = zone->uz_keg;
 	/*
 	 * If we are in BOOT_BUCKETS or higher, than switch to real
 	 * allocator.  Zones with page sized slabs switch at BOOT_PAGEALLOC.
 	 */
 	switch (booted) {
 		case BOOT_COLD:
 		case BOOT_STRAPPED:
 			break;
 		case BOOT_PAGEALLOC:
 			if (keg->uk_ppera > 1)
 				break;
 		case BOOT_BUCKETS:
 		case BOOT_RUNNING:
 #ifdef UMA_MD_SMALL_ALLOC
 			keg->uk_allocf = (keg->uk_ppera > 1) ?
 			    page_alloc : uma_small_alloc;
 #else
 			keg->uk_allocf = page_alloc;
 #endif
 			return keg->uk_allocf(zone, bytes, domain, pflag, wait);
 	}
 
 	/*
 	 * Check our small startup cache to see if it has pages remaining.
 	 */
 	pages = howmany(bytes, PAGE_SIZE);
 	KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__));
 	if (pages > boot_pages)
 		panic("UMA zone \"%s\": Increase vm.boot_pages", zone->uz_name);
 #ifdef DIAGNOSTIC
 	printf("%s from \"%s\", %d boot pages left\n", __func__, zone->uz_name,
 	    boot_pages);
 #endif
 	mem = bootmem;
 	boot_pages -= pages;
 	bootmem += pages * PAGE_SIZE;
 	*pflag = UMA_SLAB_BOOT;
 
 	return (mem);
 }
 
 /*
  * Allocates a number of pages from the system
  *
  * Arguments:
  *	bytes  The number of bytes requested
  *	wait  Shall we wait?
  *
  * Returns:
  *	A pointer to the alloced memory or possibly
  *	NULL if M_NOWAIT is set.
  */
 static void *
 page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
     int wait)
 {
 	void *p;	/* Returned page */
 
 	*pflag = UMA_SLAB_KERNEL;
 	p = (void *)kmem_malloc_domainset(DOMAINSET_FIXED(domain), bytes, wait);
 
 	return (p);
 }
 
 static void *
 pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
     int wait)
 {
 	struct pglist alloctail;
 	vm_offset_t addr, zkva;
 	int cpu, flags;
 	vm_page_t p, p_next;
 #ifdef NUMA
 	struct pcpu *pc;
 #endif
 
 	MPASS(bytes == (mp_maxid + 1) * PAGE_SIZE);
 
 	TAILQ_INIT(&alloctail);
 	flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
 	    malloc2vm_flags(wait);
 	*pflag = UMA_SLAB_KERNEL;
 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
 		if (CPU_ABSENT(cpu)) {
 			p = vm_page_alloc(NULL, 0, flags);
 		} else {
 #ifndef NUMA
 			p = vm_page_alloc(NULL, 0, flags);
 #else
 			pc = pcpu_find(cpu);
 			p = vm_page_alloc_domain(NULL, 0, pc->pc_domain, flags);
 			if (__predict_false(p == NULL))
 				p = vm_page_alloc(NULL, 0, flags);
 #endif
 		}
 		if (__predict_false(p == NULL))
 			goto fail;
 		TAILQ_INSERT_TAIL(&alloctail, p, listq);
 	}
 	if ((addr = kva_alloc(bytes)) == 0)
 		goto fail;
 	zkva = addr;
 	TAILQ_FOREACH(p, &alloctail, listq) {
 		pmap_qenter(zkva, &p, 1);
 		zkva += PAGE_SIZE;
 	}
 	return ((void*)addr);
 fail:
 	TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
 		vm_page_unwire_noq(p);
 		vm_page_free(p);
 	}
 	return (NULL);
 }
 
 /*
  * Allocates a number of pages from within an object
  *
  * Arguments:
  *	bytes  The number of bytes requested
  *	wait   Shall we wait?
  *
  * Returns:
  *	A pointer to the alloced memory or possibly
  *	NULL if M_NOWAIT is set.
  */
 static void *
 noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
     int wait)
 {
 	TAILQ_HEAD(, vm_page) alloctail;
 	u_long npages;
 	vm_offset_t retkva, zkva;
 	vm_page_t p, p_next;
 	uma_keg_t keg;
 
 	TAILQ_INIT(&alloctail);
 	keg = zone->uz_keg;
 
 	npages = howmany(bytes, PAGE_SIZE);
 	while (npages > 0) {
 		p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT |
 		    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
 		    ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK :
 		    VM_ALLOC_NOWAIT));
 		if (p != NULL) {
 			/*
 			 * Since the page does not belong to an object, its
 			 * listq is unused.
 			 */
 			TAILQ_INSERT_TAIL(&alloctail, p, listq);
 			npages--;
 			continue;
 		}
 		/*
 		 * Page allocation failed, free intermediate pages and
 		 * exit.
 		 */
 		TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
 			vm_page_unwire_noq(p);
 			vm_page_free(p); 
 		}
 		return (NULL);
 	}
 	*flags = UMA_SLAB_PRIV;
 	zkva = keg->uk_kva +
 	    atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
 	retkva = zkva;
 	TAILQ_FOREACH(p, &alloctail, listq) {
 		pmap_qenter(zkva, &p, 1);
 		zkva += PAGE_SIZE;
 	}
 
 	return ((void *)retkva);
 }
 
 /*
  * Frees a number of pages to the system
  *
  * Arguments:
  *	mem   A pointer to the memory to be freed
  *	size  The size of the memory being freed
  *	flags The original p->us_flags field
  *
  * Returns:
  *	Nothing
  */
 static void
 page_free(void *mem, vm_size_t size, uint8_t flags)
 {
 
 	if ((flags & UMA_SLAB_KERNEL) == 0)
 		panic("UMA: page_free used with invalid flags %x", flags);
 
 	kmem_free((vm_offset_t)mem, size);
 }
 
 /*
  * Frees pcpu zone allocations
  *
  * Arguments:
  *	mem   A pointer to the memory to be freed
  *	size  The size of the memory being freed
  *	flags The original p->us_flags field
  *
  * Returns:
  *	Nothing
  */
 static void
 pcpu_page_free(void *mem, vm_size_t size, uint8_t flags)
 {
 	vm_offset_t sva, curva;
 	vm_paddr_t paddr;
 	vm_page_t m;
 
 	MPASS(size == (mp_maxid+1)*PAGE_SIZE);
 	sva = (vm_offset_t)mem;
 	for (curva = sva; curva < sva + size; curva += PAGE_SIZE) {
 		paddr = pmap_kextract(curva);
 		m = PHYS_TO_VM_PAGE(paddr);
 		vm_page_unwire_noq(m);
 		vm_page_free(m);
 	}
 	pmap_qremove(sva, size >> PAGE_SHIFT);
 	kva_free(sva, size);
 }
 
 
 /*
  * Zero fill initializer
  *
  * Arguments/Returns follow uma_init specifications
  */
 static int
 zero_init(void *mem, int size, int flags)
 {
 	bzero(mem, size);
 	return (0);
 }
 
 #ifdef INVARIANTS
 struct noslabbits *
 slab_dbg_bits(uma_slab_t slab, uma_keg_t keg)
 {
 
 	return ((void *)((char *)&slab->us_free + BITSET_SIZE(keg->uk_ipers)));
 }
 #endif
 
 /*
  * Actual size of embedded struct slab (!OFFPAGE).
  */
 size_t
 slab_sizeof(int nitems)
 {
 	size_t s;
 
 	s = sizeof(struct uma_slab) + BITSET_SIZE(nitems) * SLAB_BITSETS;
 	return (roundup(s, UMA_ALIGN_PTR + 1));
 }
 
 /*
  * Size of memory for embedded slabs (!OFFPAGE).
  */
 size_t
 slab_space(int nitems)
 {
 	return (UMA_SLAB_SIZE - slab_sizeof(nitems));
 }
 
 /*
  * Compute the number of items that will fit in an embedded (!OFFPAGE) slab
  * with a given size and alignment.
  */
 int
 slab_ipers(size_t size, int align)
 {
 	int rsize;
 	int nitems;
 
         /*
          * Compute the ideal number of items that will fit in a page and
          * then compute the actual number based on a bitset nitems wide.
          */
 	rsize = roundup(size, align + 1);
         nitems = UMA_SLAB_SIZE / rsize;
 	return (slab_space(nitems) / rsize);
 }
 
 /*
  * Finish creating a small uma keg.  This calculates ipers, and the keg size.
  *
  * Arguments
  *	keg  The zone we should initialize
  *
  * Returns
  *	Nothing
  */
 static void
 keg_small_init(uma_keg_t keg)
 {
 	u_int rsize;
 	u_int memused;
 	u_int wastedspace;
 	u_int shsize;
 	u_int slabsize;
 
 	if (keg->uk_flags & UMA_ZONE_PCPU) {
 		u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
 
 		slabsize = UMA_PCPU_ALLOC_SIZE;
 		keg->uk_ppera = ncpus;
 	} else {
 		slabsize = UMA_SLAB_SIZE;
 		keg->uk_ppera = 1;
 	}
 
 	/*
 	 * Calculate the size of each allocation (rsize) according to
 	 * alignment.  If the requested size is smaller than we have
 	 * allocation bits for we round it up.
 	 */
 	rsize = keg->uk_size;
 	if (rsize < slabsize / SLAB_MAX_SETSIZE)
 		rsize = slabsize / SLAB_MAX_SETSIZE;
 	if (rsize & keg->uk_align)
 		rsize = roundup(rsize, keg->uk_align + 1);
 	keg->uk_rsize = rsize;
 
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
 	    keg->uk_rsize < UMA_PCPU_ALLOC_SIZE,
 	    ("%s: size %u too large", __func__, keg->uk_rsize));
 
 	/*
 	 * Use a pessimistic bit count for shsize.  It may be possible to
 	 * squeeze one more item in for very particular sizes if we were
 	 * to loop and reduce the bitsize if there is waste.
 	 */
-	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
+	if (keg->uk_flags & (UMA_ZONE_NOTOUCH | UMA_ZONE_PCPU)) {
+		keg->uk_flags |= UMA_ZFLAG_OFFPAGE;
 		shsize = 0;
-	else 
+	} else
 		shsize = slab_sizeof(slabsize / rsize);
 
 	if (rsize <= slabsize - shsize)
 		keg->uk_ipers = (slabsize - shsize) / rsize;
 	else {
 		/* Handle special case when we have 1 item per slab, so
 		 * alignment requirement can be relaxed. */
 		KASSERT(keg->uk_size <= slabsize - shsize,
 		    ("%s: size %u greater than slab", __func__, keg->uk_size));
 		keg->uk_ipers = 1;
 	}
 	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE,
 	    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 
 	memused = keg->uk_ipers * rsize + shsize;
 	wastedspace = slabsize - memused;
 
 	/*
 	 * We can't do OFFPAGE if we're internal or if we've been
 	 * asked to not go to the VM for buckets.  If we do this we
 	 * may end up going to the VM  for slabs which we do not
 	 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
 	 * of UMA_ZONE_VM, which clearly forbids it.
 	 */
 	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
-	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
+	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) {
+		KASSERT((keg->uk_flags & UMA_ZFLAG_OFFPAGE) == 0,
+		    ("%s: incompatible flags 0x%b", __func__, keg->uk_flags,
+		     PRINT_UMA_ZFLAGS));
 		return;
+	}
 
 	/*
 	 * See if using an OFFPAGE slab will limit our waste.  Only do
 	 * this if it permits more items per-slab.
 	 *
 	 * XXX We could try growing slabsize to limit max waste as well.
 	 * Historically this was not done because the VM could not
 	 * efficiently handle contiguous allocations.
 	 */
 	if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
 	    (keg->uk_ipers < (slabsize / keg->uk_rsize))) {
 		keg->uk_ipers = slabsize / keg->uk_rsize;
 		KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE,
 		    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 		CTR6(KTR_UMA, "UMA decided we need offpage slab headers for "
 		    "keg: %s(%p), calculated wastedspace = %d, "
 		    "maximum wasted space allowed = %d, "
 		    "calculated ipers = %d, "
 		    "new wasted space = %d\n", keg->uk_name, keg, wastedspace,
 		    slabsize / UMA_MAX_WASTE, keg->uk_ipers,
 		    slabsize - keg->uk_ipers * keg->uk_rsize);
 		/*
 		 * If we had access to memory to embed a slab header we
 		 * also have a page structure to use vtoslab() instead of
 		 * hash to find slabs.  If the zone was explicitly created
 		 * OFFPAGE we can't necessarily touch the memory.
 		 */
-		if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0)
-			keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
+		keg->uk_flags |= UMA_ZFLAG_OFFPAGE;
 	}
 
-	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
-	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
-		keg->uk_flags |= UMA_ZONE_HASH;
+	if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0) {
+		if ((keg->uk_flags & UMA_ZONE_NOTPAGE) != 0)
+			keg->uk_flags |= UMA_ZFLAG_HASH;
+		else
+			keg->uk_flags |= UMA_ZFLAG_VTOSLAB;
+	}
 }
 
 /*
  * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
  * more complicated.
  *
  * Arguments
  *	keg  The keg we should initialize
  *
  * Returns
  *	Nothing
  */
 static void
 keg_large_init(uma_keg_t keg)
 {
 
 	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
 	    ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
 
 	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
 	keg->uk_ipers = 1;
 	keg->uk_rsize = keg->uk_size;
 
 	/* Check whether we have enough space to not do OFFPAGE. */
-	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0 &&
+	if ((keg->uk_flags & UMA_ZONE_NOTOUCH) == 0 &&
 	    PAGE_SIZE * keg->uk_ppera - keg->uk_rsize <
 	    slab_sizeof(SLAB_MIN_SETSIZE)) {
 		/*
 		 * We can't do OFFPAGE if we're internal, in which case
 		 * we need an extra page per allocation to contain the
 		 * slab header.
 		 */
 		if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) == 0)
-			keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
+			keg->uk_flags |= UMA_ZFLAG_OFFPAGE;
 		else
 			keg->uk_ppera++;
 	}
 
-	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
-	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
-		keg->uk_flags |= UMA_ZONE_HASH;
+	if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0) {
+		if ((keg->uk_flags & UMA_ZONE_NOTPAGE) != 0)
+			keg->uk_flags |= UMA_ZFLAG_HASH;
+		else
+			keg->uk_flags |= UMA_ZFLAG_VTOSLAB;
+	}
 }
 
 static void
 keg_cachespread_init(uma_keg_t keg)
 {
 	int alignsize;
 	int trailer;
 	int pages;
 	int rsize;
 
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
 	    ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
 
 	alignsize = keg->uk_align + 1;
 	rsize = keg->uk_size;
 	/*
 	 * We want one item to start on every align boundary in a page.  To
 	 * do this we will span pages.  We will also extend the item by the
 	 * size of align if it is an even multiple of align.  Otherwise, it
 	 * would fall on the same boundary every time.
 	 */
 	if (rsize & keg->uk_align)
 		rsize = (rsize & ~keg->uk_align) + alignsize;
 	if ((rsize & alignsize) == 0)
 		rsize += alignsize;
 	trailer = rsize - keg->uk_size;
 	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
 	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
 	keg->uk_rsize = rsize;
 	keg->uk_ppera = pages;
 	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
-	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
+	keg->uk_flags |= UMA_ZFLAG_OFFPAGE | UMA_ZFLAG_VTOSLAB;
 	KASSERT(keg->uk_ipers <= SLAB_MAX_SETSIZE,
 	    ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
 	    keg->uk_ipers));
 }
 
 /*
  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
  * the keg onto the global keg list.
  *
  * Arguments/Returns follow uma_ctor specifications
  *	udata  Actually uma_kctor_args
  */
 static int
 keg_ctor(void *mem, int size, void *udata, int flags)
 {
 	struct uma_kctor_args *arg = udata;
 	uma_keg_t keg = mem;
 	uma_zone_t zone;
 	int i;
 
 	bzero(keg, size);
 	keg->uk_size = arg->size;
 	keg->uk_init = arg->uminit;
 	keg->uk_fini = arg->fini;
 	keg->uk_align = arg->align;
 	keg->uk_reserve = 0;
 	keg->uk_flags = arg->flags;
 	keg->uk_slabzone = NULL;
 
 	/*
 	 * We use a global round-robin policy by default.  Zones with
 	 * UMA_ZONE_FIRSTTOUCH set will use first-touch instead, in which
 	 * case the iterator is never run.
 	 */
 	keg->uk_dr.dr_policy = DOMAINSET_RR();
 	keg->uk_dr.dr_iter = 0;
 
 	/*
 	 * The master zone is passed to us at keg-creation time.
 	 */
 	zone = arg->zone;
 	keg->uk_name = zone->uz_name;
 
 	if (arg->flags & UMA_ZONE_VM)
 		keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
 
 	if (arg->flags & UMA_ZONE_ZINIT)
 		keg->uk_init = zero_init;
 
 	if (arg->flags & UMA_ZONE_MALLOC)
-		keg->uk_flags |= UMA_ZONE_VTOSLAB;
+		keg->uk_flags |= UMA_ZFLAG_VTOSLAB;
 
-	if (arg->flags & UMA_ZONE_PCPU)
-#ifdef SMP
-		keg->uk_flags |= UMA_ZONE_OFFPAGE;
-#else
-		keg->uk_flags &= ~UMA_ZONE_PCPU;
+#ifndef SMP
+	keg->uk_flags &= ~UMA_ZONE_PCPU;
 #endif
 
 	if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
 		keg_cachespread_init(keg);
 	} else {
 		if (keg->uk_size > slab_space(SLAB_MIN_SETSIZE))
 			keg_large_init(keg);
 		else
 			keg_small_init(keg);
 	}
 
 	/*
 	 * Use a first-touch NUMA policy for all kegs that pmap_extract()
 	 * will work on with the exception of critical VM structures
 	 * necessary for paging.
 	 *
 	 * Zones may override the default by specifying either.
 	 */
 #ifdef NUMA
 	if ((keg->uk_flags &
-	    (UMA_ZONE_HASH | UMA_ZONE_VM | UMA_ZONE_ROUNDROBIN)) == 0)
+	    (UMA_ZFLAG_HASH | UMA_ZONE_VM | UMA_ZONE_ROUNDROBIN)) == 0)
 		keg->uk_flags |= UMA_ZONE_FIRSTTOUCH;
 	else if ((keg->uk_flags & UMA_ZONE_FIRSTTOUCH) == 0)
 		keg->uk_flags |= UMA_ZONE_ROUNDROBIN;
 #endif
 
-	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
+	if (keg->uk_flags & UMA_ZFLAG_OFFPAGE)
 		keg->uk_slabzone = slabzone;
 
 	/*
 	 * If we haven't booted yet we need allocations to go through the
 	 * startup cache until the vm is ready.
 	 */
 	if (booted < BOOT_PAGEALLOC)
 		keg->uk_allocf = startup_alloc;
 #ifdef UMA_MD_SMALL_ALLOC
 	else if (keg->uk_ppera == 1)
 		keg->uk_allocf = uma_small_alloc;
 #endif
 	else if (keg->uk_flags & UMA_ZONE_PCPU)
 		keg->uk_allocf = pcpu_page_alloc;
 	else
 		keg->uk_allocf = page_alloc;
 #ifdef UMA_MD_SMALL_ALLOC
 	if (keg->uk_ppera == 1)
 		keg->uk_freef = uma_small_free;
 	else
 #endif
 	if (keg->uk_flags & UMA_ZONE_PCPU)
 		keg->uk_freef = pcpu_page_free;
 	else
 		keg->uk_freef = page_free;
 
 	/*
 	 * Initialize keg's locks.
 	 */
 	for (i = 0; i < vm_ndomains; i++)
 		KEG_LOCK_INIT(keg, i, (arg->flags & UMA_ZONE_MTXCLASS));
 
 	/*
 	 * If we're putting the slab header in the actual page we need to
 	 * figure out where in each page it goes.  See slab_sizeof
 	 * definition.
 	 */
-	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
+	if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE)) {
 		size_t shsize;
 
 		shsize = slab_sizeof(keg->uk_ipers);
 		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - shsize;
 		/*
 		 * The only way the following is possible is if with our
 		 * UMA_ALIGN_PTR adjustments we are now bigger than
 		 * UMA_SLAB_SIZE.  I haven't checked whether this is
 		 * mathematically possible for all cases, so we make
 		 * sure here anyway.
 		 */
 		KASSERT(keg->uk_pgoff + shsize <= PAGE_SIZE * keg->uk_ppera,
 		    ("zone %s ipers %d rsize %d size %d slab won't fit",
 		    zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size));
 	}
 
-	if (keg->uk_flags & UMA_ZONE_HASH)
+	if (keg->uk_flags & UMA_ZFLAG_HASH)
 		hash_alloc(&keg->uk_hash, 0);
 
 	CTR3(KTR_UMA, "keg_ctor %p zone %s(%p)\n", keg, zone->uz_name, zone);
 
 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
 
 	rw_wlock(&uma_rwlock);
 	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
 	rw_wunlock(&uma_rwlock);
 	return (0);
 }
 
 static void
 zone_alloc_counters(uma_zone_t zone, void *unused)
 {
 
 	zone->uz_allocs = counter_u64_alloc(M_WAITOK);
 	zone->uz_frees = counter_u64_alloc(M_WAITOK);
 	zone->uz_fails = counter_u64_alloc(M_WAITOK);
 }
 
 static void
 zone_alloc_sysctl(uma_zone_t zone, void *unused)
 {
 	uma_zone_domain_t zdom;
 	uma_domain_t dom;
 	uma_keg_t keg;
 	struct sysctl_oid *oid, *domainoid;
 	int domains, i, cnt;
 	static const char *nokeg = "cache zone";
 	char *c;
 
 	/*
 	 * Make a sysctl safe copy of the zone name by removing
 	 * any special characters and handling dups by appending
 	 * an index.
 	 */
 	if (zone->uz_namecnt != 0) {
 		/* Count the number of decimal digits and '_' separator. */
 		for (i = 1, cnt = zone->uz_namecnt; cnt != 0; i++)
 			cnt /= 10;
 		zone->uz_ctlname = malloc(strlen(zone->uz_name) + i + 1,
 		    M_UMA, M_WAITOK);
 		sprintf(zone->uz_ctlname, "%s_%d", zone->uz_name,
 		    zone->uz_namecnt);
 	} else
 		zone->uz_ctlname = strdup(zone->uz_name, M_UMA);
 	for (c = zone->uz_ctlname; *c != '\0'; c++)
 		if (strchr("./\\ -", *c) != NULL)
 			*c = '_';
 
 	/*
 	 * Basic parameters at the root.
 	 */
 	zone->uz_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_vm_uma),
 	    OID_AUTO, zone->uz_ctlname, CTLFLAG_RD, NULL, "");
 	oid = zone->uz_oid;
 	SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "size", CTLFLAG_RD, &zone->uz_size, 0, "Allocation size");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "flags", CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE,
 	    zone, 0, sysctl_handle_uma_zone_flags, "A",
 	    "Allocator configuration flags");
 	SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "bucket_size", CTLFLAG_RD, &zone->uz_bucket_size, 0,
 	    "Desired per-cpu cache size");
 	SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "bucket_size_max", CTLFLAG_RD, &zone->uz_bucket_size_max, 0,
 	    "Maximum allowed per-cpu cache size");
 
 	/*
 	 * keg if present.
 	 */
-	if ((zone->uz_flags & UMA_ZONE_HASH) == 0)
+	if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0)
 		domains = vm_ndomains;
 	else
 		domains = 1;
 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
 	    "keg", CTLFLAG_RD, NULL, "");
 	keg = zone->uz_keg;
 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0) {
 		SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "name", CTLFLAG_RD, keg->uk_name, "Keg name");
 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "rsize", CTLFLAG_RD, &keg->uk_rsize, 0,
 		    "Real object size with alignment");
 		SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "ppera", CTLFLAG_RD, &keg->uk_ppera, 0,
 		    "pages per-slab allocation");
 		SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "ipers", CTLFLAG_RD, &keg->uk_ipers, 0,
 		    "items available per-slab");
 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "align", CTLFLAG_RD, &keg->uk_align, 0,
 		    "item alignment mask");
 		SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "efficiency", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
 		    keg, 0, sysctl_handle_uma_slab_efficiency, "I",
 		    "Slab utilization (100 - internal fragmentation %)");
 		domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(oid),
 		    OID_AUTO, "domain", CTLFLAG_RD, NULL, "");
 		for (i = 0; i < domains; i++) {
 			dom = &keg->uk_domain[i];
 			oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
 			    OID_AUTO, VM_DOMAIN(i)->vmd_name, CTLFLAG_RD,
 			    NULL, "");
 			SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 			    "pages", CTLFLAG_RD, &dom->ud_pages, 0,
 			    "Total pages currently allocated from VM");
 			SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 			    "free", CTLFLAG_RD, &dom->ud_free, 0,
 			    "items free in the slab layer");
 		}
 	} else
 		SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "name", CTLFLAG_RD, nokeg, "Keg name");
 
 	/*
 	 * Information about zone limits.
 	 */
 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
 	    "limit", CTLFLAG_RD, NULL, "");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "items", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
 	    zone, 0, sysctl_handle_uma_zone_items, "QU",
 	    "current number of allocated items if limit is set");
 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "max_items", CTLFLAG_RD, &zone->uz_max_items, 0,
 	    "Maximum number of cached items");
 	SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "sleepers", CTLFLAG_RD, &zone->uz_sleepers, 0,
 	    "Number of threads sleeping at limit");
 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "sleeps", CTLFLAG_RD, &zone->uz_sleeps, 0,
 	    "Total zone limit sleeps");
 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "bucket_max", CTLFLAG_RD, &zone->uz_bkt_max, 0,
 	    "Maximum number of items in the bucket cache");
 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "bucket_cnt", CTLFLAG_RD, &zone->uz_bkt_count, 0,
 	    "Number of items in the bucket cache");
 
 	/*
 	 * Per-domain zone information.
 	 */
 	domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid),
 	    OID_AUTO, "domain", CTLFLAG_RD, NULL, "");
 	if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0)
 		domains = 1;
 	for (i = 0; i < domains; i++) {
 		zdom = &zone->uz_domain[i];
 		oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
 		    OID_AUTO, VM_DOMAIN(i)->vmd_name, CTLFLAG_RD, NULL, "");
 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "nitems", CTLFLAG_RD, &zdom->uzd_nitems,
 		    "number of items in this domain");
 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "imax", CTLFLAG_RD, &zdom->uzd_imax,
 		    "maximum item count in this period");
 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "imin", CTLFLAG_RD, &zdom->uzd_imin,
 		    "minimum item count in this period");
 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "wss", CTLFLAG_RD, &zdom->uzd_wss,
 		    "Working set size");
 	}
 
 	/*
 	 * General statistics.
 	 */
 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
 	    "stats", CTLFLAG_RD, NULL, "");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "current", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
 	    zone, 1, sysctl_handle_uma_zone_cur, "I",
 	    "Current number of allocated items");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "allocs", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
 	    zone, 0, sysctl_handle_uma_zone_allocs, "QU",
 	    "Total allocation calls");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "frees", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
 	    zone, 0, sysctl_handle_uma_zone_frees, "QU",
 	    "Total free calls");
 	SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "fails", CTLFLAG_RD, &zone->uz_fails,
 	    "Number of allocation failures");
 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "xdomain", CTLFLAG_RD, &zone->uz_xdomain, 0,
 	    "Free calls from the wrong domain");
 }
 
 struct uma_zone_count {
 	const char	*name;
 	int		count;
 };
 
 static void
 zone_count(uma_zone_t zone, void *arg)
 {
 	struct uma_zone_count *cnt;
 
 	cnt = arg;
 	/*
 	 * Some zones are rapidly created with identical names and
 	 * destroyed out of order.  This can lead to gaps in the count.
 	 * Use one greater than the maximum observed for this name.
 	 */
 	if (strcmp(zone->uz_name, cnt->name) == 0)
 		cnt->count = MAX(cnt->count,
 		    zone->uz_namecnt + 1);
 }
 
 static void
 zone_update_caches(uma_zone_t zone)
 {
 	int i;
 
 	for (i = 0; i <= mp_maxid; i++) {
 		cache_set_uz_size(&zone->uz_cpu[i], zone->uz_size);
 		cache_set_uz_flags(&zone->uz_cpu[i], zone->uz_flags);
 	}
 }
 
 /*
  * Zone header ctor.  This initializes all fields, locks, etc.
  *
  * Arguments/Returns follow uma_ctor specifications
  *	udata  Actually uma_zctor_args
  */
 static int
 zone_ctor(void *mem, int size, void *udata, int flags)
 {
 	struct uma_zone_count cnt;
 	struct uma_zctor_args *arg = udata;
 	uma_zone_t zone = mem;
 	uma_zone_t z;
 	uma_keg_t keg;
 	int i;
 
 	bzero(zone, size);
 	zone->uz_name = arg->name;
 	zone->uz_ctor = arg->ctor;
 	zone->uz_dtor = arg->dtor;
 	zone->uz_init = NULL;
 	zone->uz_fini = NULL;
 	zone->uz_sleeps = 0;
 	zone->uz_xdomain = 0;
 	zone->uz_bucket_size = 0;
 	zone->uz_bucket_size_min = 0;
 	zone->uz_bucket_size_max = BUCKET_MAX;
 	zone->uz_flags = 0;
 	zone->uz_warning = NULL;
 	/* The domain structures follow the cpu structures. */
 	zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus];
 	zone->uz_bkt_max = ULONG_MAX;
 	timevalclear(&zone->uz_ratecheck);
 
 	/* Count the number of duplicate names. */
 	cnt.name = arg->name;
 	cnt.count = 0;
 	zone_foreach(zone_count, &cnt);
 	zone->uz_namecnt = cnt.count;
 	ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
 	ZONE_CROSS_LOCK_INIT(zone);
 
 	for (i = 0; i < vm_ndomains; i++)
 		TAILQ_INIT(&zone->uz_domain[i].uzd_buckets);
 
 #ifdef INVARIANTS
 	if (arg->uminit == trash_init && arg->fini == trash_fini)
 		zone->uz_flags |= UMA_ZFLAG_TRASH | UMA_ZFLAG_CTORDTOR;
 #endif
 
 	/*
 	 * This is a pure cache zone, no kegs.
 	 */
 	if (arg->import) {
 		KASSERT((arg->flags & UMA_ZFLAG_CACHE) != 0,
 		    ("zone_ctor: Import specified for non-cache zone."));
 		if (arg->flags & UMA_ZONE_VM)
 			arg->flags |= UMA_ZFLAG_CACHEONLY;
 		zone->uz_flags = arg->flags;
 		zone->uz_size = arg->size;
 		zone->uz_import = arg->import;
 		zone->uz_release = arg->release;
 		zone->uz_arg = arg->arg;
 		rw_wlock(&uma_rwlock);
 		LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
 		rw_wunlock(&uma_rwlock);
 		goto out;
 	}
 
 	/*
 	 * Use the regular zone/keg/slab allocator.
 	 */
 	zone->uz_import = zone_import;
 	zone->uz_release = zone_release;
 	zone->uz_arg = zone; 
 	keg = arg->keg;
 
 	if (arg->flags & UMA_ZONE_SECONDARY) {
 		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
 		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
 		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
 		zone->uz_init = arg->uminit;
 		zone->uz_fini = arg->fini;
 		zone->uz_flags |= UMA_ZONE_SECONDARY;
 		rw_wlock(&uma_rwlock);
 		ZONE_LOCK(zone);
 		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
 			if (LIST_NEXT(z, uz_link) == NULL) {
 				LIST_INSERT_AFTER(z, zone, uz_link);
 				break;
 			}
 		}
 		ZONE_UNLOCK(zone);
 		rw_wunlock(&uma_rwlock);
 	} else if (keg == NULL) {
 		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
 		    arg->align, arg->flags)) == NULL)
 			return (ENOMEM);
 	} else {
 		struct uma_kctor_args karg;
 		int error;
 
 		/* We should only be here from uma_startup() */
 		karg.size = arg->size;
 		karg.uminit = arg->uminit;
 		karg.fini = arg->fini;
 		karg.align = arg->align;
 		karg.flags = arg->flags;
 		karg.zone = zone;
 		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
 		    flags);
 		if (error)
 			return (error);
 	}
 
 	/* Inherit properties from the keg. */
 	zone->uz_keg = keg;
 	zone->uz_size = keg->uk_size;
 	zone->uz_flags |= (keg->uk_flags &
 	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
 
 out:
 	if (__predict_true(booted == BOOT_RUNNING)) {
 		zone_alloc_counters(zone, NULL);
 		zone_alloc_sysctl(zone, NULL);
 	} else {
 		zone->uz_allocs = EARLY_COUNTER;
 		zone->uz_frees = EARLY_COUNTER;
 		zone->uz_fails = EARLY_COUNTER;
 	}
 
 	KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
 	    (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
 	    ("Invalid zone flag combination"));
 	if (arg->flags & UMA_ZFLAG_INTERNAL)
 		zone->uz_bucket_size_max = zone->uz_bucket_size = 0;
 	if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0)
 		zone->uz_bucket_size = BUCKET_MAX;
 	else if ((arg->flags & UMA_ZONE_MINBUCKET) != 0)
 		zone->uz_bucket_size_max = zone->uz_bucket_size = BUCKET_MIN;
 	else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0)
 		zone->uz_bucket_size = 0;
 	else
 		zone->uz_bucket_size = bucket_select(zone->uz_size);
 	zone->uz_bucket_size_min = zone->uz_bucket_size;
 	if (zone->uz_dtor != NULL || zone->uz_ctor != NULL)
 		zone->uz_flags |= UMA_ZFLAG_CTORDTOR;
 	zone_update_caches(zone);
 
 	return (0);
 }
 
 /*
  * Keg header dtor.  This frees all data, destroys locks, frees the hash
  * table and removes the keg from the global list.
  *
  * Arguments/Returns follow uma_dtor specifications
  *	udata  unused
  */
 static void
 keg_dtor(void *arg, int size, void *udata)
 {
 	uma_keg_t keg;
 	uint32_t free, pages;
 	int i;
 
 	keg = (uma_keg_t)arg;
 	free = pages = 0;
 	for (i = 0; i < vm_ndomains; i++) {
 		free += keg->uk_domain[i].ud_free;
 		pages += keg->uk_domain[i].ud_pages;
 		KEG_LOCK_FINI(keg, i);
 	}
 	if (free != 0)
 		printf("Freed UMA keg (%s) was not empty (%u items). "
 		    " Lost %u pages of memory.\n",
 		    keg->uk_name ? keg->uk_name : "",
 		    free, pages);
 
 	hash_free(&keg->uk_hash);
 }
 
 /*
  * Zone header dtor.
  *
  * Arguments/Returns follow uma_dtor specifications
  *	udata  unused
  */
 static void
 zone_dtor(void *arg, int size, void *udata)
 {
 	uma_zone_t zone;
 	uma_keg_t keg;
 
 	zone = (uma_zone_t)arg;
 
 	sysctl_remove_oid(zone->uz_oid, 1, 1);
 
 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
 		cache_drain(zone);
 
 	rw_wlock(&uma_rwlock);
 	LIST_REMOVE(zone, uz_link);
 	rw_wunlock(&uma_rwlock);
 	/*
 	 * XXX there are some races here where
 	 * the zone can be drained but zone lock
 	 * released and then refilled before we
 	 * remove it... we dont care for now
 	 */
 	zone_reclaim(zone, M_WAITOK, true);
 	/*
 	 * We only destroy kegs from non secondary/non cache zones.
 	 */
 	if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) {
 		keg = zone->uz_keg;
 		rw_wlock(&uma_rwlock);
 		LIST_REMOVE(keg, uk_link);
 		rw_wunlock(&uma_rwlock);
 		zone_free_item(kegs, keg, NULL, SKIP_NONE);
 	}
 	counter_u64_free(zone->uz_allocs);
 	counter_u64_free(zone->uz_frees);
 	counter_u64_free(zone->uz_fails);
 	free(zone->uz_ctlname, M_UMA);
 	ZONE_LOCK_FINI(zone);
 	ZONE_CROSS_LOCK_FINI(zone);
 }
 
 /*
  * Traverses every zone in the system and calls a callback
  *
  * Arguments:
  *	zfunc  A pointer to a function which accepts a zone
  *		as an argument.
  *
  * Returns:
  *	Nothing
  */
 static void
 zone_foreach(void (*zfunc)(uma_zone_t, void *arg), void *arg)
 {
 	uma_keg_t keg;
 	uma_zone_t zone;
 
 	/*
 	 * Before BOOT_RUNNING we are guaranteed to be single
 	 * threaded, so locking isn't needed. Startup functions
 	 * are allowed to use M_WAITOK.
 	 */
 	if (__predict_true(booted == BOOT_RUNNING))
 		rw_rlock(&uma_rwlock);
 	LIST_FOREACH(keg, &uma_kegs, uk_link) {
 		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
 			zfunc(zone, arg);
 	}
 	LIST_FOREACH(zone, &uma_cachezones, uz_link)
 		zfunc(zone, arg);
 	if (__predict_true(booted == BOOT_RUNNING))
 		rw_runlock(&uma_rwlock);
 }
 
 /*
  * Count how many pages do we need to bootstrap.  VM supplies
  * its need in early zones in the argument, we add up our zones,
  * which consist of the UMA Slabs, UMA Hash and 9 Bucket zones.  The
  * zone of zones and zone of kegs are accounted separately.
  */
 #define	UMA_BOOT_ZONES	11
 static int zsize, ksize;
 int
 uma_startup_count(int vm_zones)
 {
 	int zones, pages;
 	u_int zppera, zipers;
 	u_int kppera, kipers;
 	size_t space, size;
 
 	ksize = sizeof(struct uma_keg) +
 	    (sizeof(struct uma_domain) * vm_ndomains);
 	ksize = roundup(ksize, UMA_SUPER_ALIGN);
 	zsize = sizeof(struct uma_zone) +
 	    (sizeof(struct uma_cache) * (mp_maxid + 1)) +
 	    (sizeof(struct uma_zone_domain) * vm_ndomains);
 	zsize = roundup(zsize, UMA_SUPER_ALIGN);
 
 	/*
 	 * Memory for the zone of kegs and its keg, and for zone
 	 * of zones.  Allocated directly in uma_startup().
 	 */
 	pages = howmany(zsize * 2 + ksize, PAGE_SIZE);
 
 #ifdef	UMA_MD_SMALL_ALLOC
 	zones = UMA_BOOT_ZONES;
 #else
 	zones = UMA_BOOT_ZONES + vm_zones;
 	vm_zones = 0;
 #endif
 	size = slab_sizeof(SLAB_MAX_SETSIZE);
 	space = slab_space(SLAB_MAX_SETSIZE);
 
 	/* Memory for the rest of startup zones, UMA and VM, ... */
 	if (zsize > space) {
 		/* See keg_large_init(). */
 		zppera = howmany(zsize + slab_sizeof(1), PAGE_SIZE);
 		zipers = 1;
 		zones += vm_zones;
 	} else {
 		zppera = 1;
 		zipers = space / zsize;
 	}
 	pages += howmany(zones, zipers) * zppera;
 
 	/* ... and their kegs. Note that zone of zones allocates a keg! */
 	if (ksize > space) {
 		/* See keg_large_init(). */
 		kppera = howmany(ksize + slab_sizeof(1), PAGE_SIZE);
 		kipers = 1;
 	} else {
 		kppera = 1;
 		kipers = space / ksize;
 	}
 	pages += howmany(zones + 1, kipers) * kppera;
 
 	/*
 	 * Allocate an additional slab for zones and kegs on NUMA
 	 * systems.  The round-robin allocation policy will populate at
 	 * least one slab per-domain.
 	 */
 	pages += (vm_ndomains - 1) * (zppera + kppera);
 
 	return (pages);
 }
 
 void
 uma_startup(void *mem, int npages)
 {
 	struct uma_zctor_args args;
 	uma_keg_t masterkeg;
 	uintptr_t m;
 
 #ifdef DIAGNOSTIC
 	printf("Entering %s with %d boot pages configured\n", __func__, npages);
 #endif
 
 	rw_init(&uma_rwlock, "UMA lock");
 
 	/* Use bootpages memory for the zone of zones and zone of kegs. */
 	m = (uintptr_t)mem;
 	zones = (uma_zone_t)m;
 	m += zsize;
 	kegs = (uma_zone_t)m;
 	m += zsize;
 	masterkeg = (uma_keg_t)m;
 	m += ksize;
 	m = roundup(m, PAGE_SIZE);
 	npages -= (m - (uintptr_t)mem) / PAGE_SIZE;
 	mem = (void *)m;
 
 	/* "manually" create the initial zone */
 	memset(&args, 0, sizeof(args));
 	args.name = "UMA Kegs";
 	args.size = ksize;
 	args.ctor = keg_ctor;
 	args.dtor = keg_dtor;
 	args.uminit = zero_init;
 	args.fini = NULL;
 	args.keg = masterkeg;
 	args.align = UMA_SUPER_ALIGN - 1;
 	args.flags = UMA_ZFLAG_INTERNAL;
 	zone_ctor(kegs, zsize, &args, M_WAITOK);
 
 	bootmem = mem;
 	boot_pages = npages;
 
 	args.name = "UMA Zones";
 	args.size = zsize;
 	args.ctor = zone_ctor;
 	args.dtor = zone_dtor;
 	args.uminit = zero_init;
 	args.fini = NULL;
 	args.keg = NULL;
 	args.align = UMA_SUPER_ALIGN - 1;
 	args.flags = UMA_ZFLAG_INTERNAL;
 	zone_ctor(zones, zsize, &args, M_WAITOK);
 
 	/* Now make a zone for slab headers */
 	slabzone = uma_zcreate("UMA Slabs", sizeof(struct uma_hash_slab),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 
 	hashzone = uma_zcreate("UMA Hash",
 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 
 	booted = BOOT_STRAPPED;
 }
 
 void
 uma_startup1(void)
 {
 
 #ifdef DIAGNOSTIC
 	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
 #endif
 	booted = BOOT_PAGEALLOC;
 }
 
 void
 uma_startup2(void)
 {
 
 #ifdef DIAGNOSTIC
 	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
 #endif
 	sx_init(&uma_reclaim_lock, "umareclaim");
 	bucket_init();
 	booted = BOOT_BUCKETS;
 	bucket_enable();
 }
 
 /*
  * Initialize our callout handle
  *
  */
 static void
 uma_startup3(void)
 {
 
 #ifdef INVARIANTS
 	TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor);
 	uma_dbg_cnt = counter_u64_alloc(M_WAITOK);
 	uma_skip_cnt = counter_u64_alloc(M_WAITOK);
 #endif
 	zone_foreach(zone_alloc_counters, NULL);
 	zone_foreach(zone_alloc_sysctl, NULL);
 	callout_init(&uma_callout, 1);
 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 	booted = BOOT_RUNNING;
 }
 
 static uma_keg_t
 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
 		int align, uint32_t flags)
 {
 	struct uma_kctor_args args;
 
 	args.size = size;
 	args.uminit = uminit;
 	args.fini = fini;
 	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
 	args.flags = flags;
 	args.zone = zone;
 	return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK));
 }
 
 /* Public functions */
 /* See uma.h */
 void
 uma_set_align(int align)
 {
 
 	if (align != UMA_ALIGN_CACHE)
 		uma_align_cache = align;
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
 		uma_init uminit, uma_fini fini, int align, uint32_t flags)
 
 {
 	struct uma_zctor_args args;
 	uma_zone_t res;
 	bool locked;
 
 	KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"",
 	    align, name));
 
 	/* This stuff is essential for the zone ctor */
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = uminit;
 	args.fini = fini;
 #ifdef  INVARIANTS
 	/*
 	 * Inject procedures which check for memory use after free if we are
 	 * allowed to scramble the memory while it is not allocated.  This
 	 * requires that: UMA is actually able to access the memory, no init
 	 * or fini procedures, no dependency on the initial value of the
 	 * memory, and no (legitimate) use of the memory after free.  Note,
 	 * the ctor and dtor do not need to be empty.
-	 *
-	 * XXX UMA_ZONE_OFFPAGE.
 	 */
-	if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
-	    uminit == NULL && fini == NULL) {
+	if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOTOUCH |
+	    UMA_ZONE_NOFREE))) && uminit == NULL && fini == NULL) {
 		args.uminit = trash_init;
 		args.fini = trash_fini;
 	}
 #endif
 	args.align = align;
 	args.flags = flags;
 	args.keg = NULL;
 
 	if (booted < BOOT_BUCKETS) {
 		locked = false;
 	} else {
 		sx_slock(&uma_reclaim_lock);
 		locked = true;
 	}
 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
 	if (locked)
 		sx_sunlock(&uma_reclaim_lock);
 	return (res);
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_zone_t master)
 {
 	struct uma_zctor_args args;
 	uma_keg_t keg;
 	uma_zone_t res;
 	bool locked;
 
 	keg = master->uz_keg;
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = keg->uk_size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = zinit;
 	args.fini = zfini;
 	args.align = keg->uk_align;
 	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
 	args.keg = keg;
 
 	if (booted < BOOT_BUCKETS) {
 		locked = false;
 	} else {
 		sx_slock(&uma_reclaim_lock);
 		locked = true;
 	}
 	/* XXX Attaches only one keg of potentially many. */
 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
 	if (locked)
 		sx_sunlock(&uma_reclaim_lock);
 	return (res);
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_import zimport,
 		    uma_release zrelease, void *arg, int flags)
 {
 	struct uma_zctor_args args;
 
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = zinit;
 	args.fini = zfini;
 	args.import = zimport;
 	args.release = zrelease;
 	args.arg = arg;
 	args.align = 0;
 	args.flags = flags | UMA_ZFLAG_CACHE;
 
 	return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
 }
 
 /* See uma.h */
 void
 uma_zdestroy(uma_zone_t zone)
 {
 
 	sx_slock(&uma_reclaim_lock);
 	zone_free_item(zones, zone, NULL, SKIP_NONE);
 	sx_sunlock(&uma_reclaim_lock);
 }
 
 void
 uma_zwait(uma_zone_t zone)
 {
 	void *item;
 
 	item = uma_zalloc_arg(zone, NULL, M_WAITOK);
 	uma_zfree(zone, item);
 }
 
 void *
 uma_zalloc_pcpu_arg(uma_zone_t zone, void *udata, int flags)
 {
 	void *item;
 #ifdef SMP
 	int i;
 
 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
 #endif
 	item = uma_zalloc_arg(zone, udata, flags & ~M_ZERO);
 	if (item != NULL && (flags & M_ZERO)) {
 #ifdef SMP
 		for (i = 0; i <= mp_maxid; i++)
 			bzero(zpcpu_get_cpu(item, i), zone->uz_size);
 #else
 		bzero(item, zone->uz_size);
 #endif
 	}
 	return (item);
 }
 
 /*
  * A stub while both regular and pcpu cases are identical.
  */
 void
 uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *udata)
 {
 
 #ifdef SMP
 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
 #endif
 	uma_zfree_arg(zone, item, udata);
 }
 
 #ifdef INVARIANTS
 #define	UMA_ALWAYS_CTORDTOR	1
 #else
 #define	UMA_ALWAYS_CTORDTOR	0
 #endif
 
 static void *
 item_ctor(uma_zone_t zone, int size, void *udata, int flags, void *item)
 {
 #ifdef INVARIANTS
 	bool skipdbg;
 
 	skipdbg = uma_dbg_zskip(zone, item);
 	if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
 	    zone->uz_ctor != trash_ctor)
 		trash_ctor(item, size, udata, flags);
 #endif
 	if (__predict_false(zone->uz_ctor != NULL) &&
 	    zone->uz_ctor(item, size, udata, flags) != 0) {
 		counter_u64_add(zone->uz_fails, 1);
 		zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
 		return (NULL);
 	}
 #ifdef INVARIANTS
 	if (!skipdbg)
 		uma_dbg_alloc(zone, NULL, item);
 #endif
 	if (flags & M_ZERO)
 		bzero(item, size);
 
 	return (item);
 }
 
 static inline void
 item_dtor(uma_zone_t zone, void *item, int size, void *udata,
     enum zfreeskip skip)
 {
 #ifdef INVARIANTS
 	bool skipdbg;
 
 	skipdbg = uma_dbg_zskip(zone, item);
 	if (skip == SKIP_NONE && !skipdbg) {
 		if ((zone->uz_flags & UMA_ZONE_MALLOC) != 0)
 			uma_dbg_free(zone, udata, item);
 		else
 			uma_dbg_free(zone, NULL, item);
 	}
 #endif
 	if (__predict_true(skip < SKIP_DTOR)) {
 		if (zone->uz_dtor != NULL)
 			zone->uz_dtor(item, size, udata);
 #ifdef INVARIANTS
 		if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
 		    zone->uz_dtor != trash_dtor)
 			trash_dtor(item, size, udata);
 #endif
 	}
 }
 
 /* See uma.h */
 void *
 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
 {
 	uma_cache_bucket_t bucket;
 	uma_cache_t cache;
 	void *item;
 	int domain, size, uz_flags;
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
 
 	/* This is the fast path allocation */
 	CTR4(KTR_UMA, "uma_zalloc_arg thread %x zone %s(%p) flags %d",
 	    curthread, zone->uz_name, zone, flags);
 
 #ifdef WITNESS
 	if (flags & M_WAITOK) {
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
 	}
 #endif
 
 #ifdef INVARIANTS
 	KASSERT((flags & M_EXEC) == 0, ("uma_zalloc_arg: called with M_EXEC"));
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zalloc_arg: called with spinlock or critical section held"));
 	if (zone->uz_flags & UMA_ZONE_PCPU)
 		KASSERT((flags & M_ZERO) == 0, ("allocating from a pcpu zone "
 		    "with M_ZERO passed"));
 #endif
 
 #ifdef DEBUG_MEMGUARD
 	if (memguard_cmp_zone(zone)) {
 		item = memguard_alloc(zone->uz_size, flags);
 		if (item != NULL) {
 			if (zone->uz_init != NULL &&
 			    zone->uz_init(item, zone->uz_size, flags) != 0)
 				return (NULL);
 			if (zone->uz_ctor != NULL &&
 			    zone->uz_ctor(item, zone->uz_size, udata,
 			    flags) != 0) {
 				counter_u64_add(zone->uz_fails, 1);
 			    	zone->uz_fini(item, zone->uz_size);
 				return (NULL);
 			}
 			return (item);
 		}
 		/* This is unfortunate but should not be fatal. */
 	}
 #endif
 	/*
 	 * If possible, allocate from the per-CPU cache.  There are two
 	 * requirements for safe access to the per-CPU cache: (1) the thread
 	 * accessing the cache must not be preempted or yield during access,
 	 * and (2) the thread must not migrate CPUs without switching which
 	 * cache it accesses.  We rely on a critical section to prevent
 	 * preemption and migration.  We release the critical section in
 	 * order to acquire the zone mutex if we are unable to allocate from
 	 * the current cache; when we re-acquire the critical section, we
 	 * must detect and handle migration if it has occurred.
 	 */
 	critical_enter();
 	do {
 		cache = &zone->uz_cpu[curcpu];
 		bucket = &cache->uc_allocbucket;
 		size = cache_uz_size(cache);
 		uz_flags = cache_uz_flags(cache);
 		if (__predict_true(bucket->ucb_cnt != 0)) {
 			item = cache_bucket_pop(cache, bucket);
 			critical_exit();
 			if (__predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0 ||
 			    UMA_ALWAYS_CTORDTOR))
 				return (item_ctor(zone, size, udata, flags, item));
 			if (flags & M_ZERO)
 				bzero(item, size);
 			return (item);
 		}
 	} while (cache_alloc(zone, cache, udata, flags));
 	critical_exit();
 
 	/*
 	 * We can not get a bucket so try to return a single item.
 	 */
 	if (uz_flags & UMA_ZONE_FIRSTTOUCH)
 		domain = PCPU_GET(domain);
 	else
 		domain = UMA_ANYDOMAIN;
 	return (zone_alloc_item(zone, udata, domain, flags));
 }
 
 /*
  * Replenish an alloc bucket and possibly restore an old one.  Called in
  * a critical section.  Returns in a critical section.
  *
  * A false return value indicates an allocation failure.
  * A true return value indicates success and the caller should retry.
  */
 static __noinline bool
 cache_alloc(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
 {
 	uma_zone_domain_t zdom;
 	uma_bucket_t bucket;
 	int domain;
 	bool lockfail;
 
 	CRITICAL_ASSERT(curthread);
 
 	/*
 	 * If we have run out of items in our alloc bucket see
 	 * if we can switch with the free bucket.
 	 */
 	if (cache->uc_freebucket.ucb_cnt != 0) {
 		cache_bucket_swap(&cache->uc_freebucket, &cache->uc_allocbucket);
 		return (true);
 	}
 
 	/*
 	 * Discard any empty allocation bucket while we hold no locks.
 	 */
 	bucket = cache_bucket_unload_alloc(cache);
 	critical_exit();
 	if (bucket != NULL)
 		bucket_free(zone, bucket, udata);
 
 	/* Short-circuit for zones without buckets and low memory. */
 	if (zone->uz_bucket_size == 0 || bucketdisable) {
 		critical_enter();
 		return (false);
 	}
 
 	/*
 	 * Attempt to retrieve the item from the per-CPU cache has failed, so
 	 * we must go back to the zone.  This requires the zone lock, so we
 	 * must drop the critical section, then re-acquire it when we go back
 	 * to the cache.  Since the critical section is released, we may be
 	 * preempted or migrate.  As such, make sure not to maintain any
 	 * thread-local state specific to the cache from prior to releasing
 	 * the critical section.
 	 */
 	lockfail = 0;
 	if (ZONE_TRYLOCK(zone) == 0) {
 		/* Record contention to size the buckets. */
 		ZONE_LOCK(zone);
 		lockfail = 1;
 	}
 
 	/* See if we lost the race to fill the cache. */
 	critical_enter();
 	cache = &zone->uz_cpu[curcpu];
 	if (cache->uc_allocbucket.ucb_bucket != NULL) {
 		ZONE_UNLOCK(zone);
 		return (true);
 	}
 
 	/*
 	 * Check the zone's cache of buckets.
 	 */
 	if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH) {
 		domain = PCPU_GET(domain);
 		zdom = &zone->uz_domain[domain];
 	} else {
 		domain = UMA_ANYDOMAIN;
 		zdom = &zone->uz_domain[0];
 	}
 
 	if ((bucket = zone_fetch_bucket(zone, zdom)) != NULL) {
 		ZONE_UNLOCK(zone);
 		KASSERT(bucket->ub_cnt != 0,
 		    ("uma_zalloc_arg: Returning an empty bucket."));
 		cache_bucket_load_alloc(cache, bucket);
 		return (true);
 	}
 	/* We are no longer associated with this CPU. */
 	critical_exit();
 
 	/*
 	 * We bump the uz count when the cache size is insufficient to
 	 * handle the working set.
 	 */
 	if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
 		zone->uz_bucket_size++;
 	ZONE_UNLOCK(zone);
 
 	/*
 	 * Fill a bucket and attempt to use it as the alloc bucket.
 	 */
 	bucket = zone_alloc_bucket(zone, udata, domain, flags);
 	CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
 	    zone->uz_name, zone, bucket);
 	if (bucket == NULL) {
 		critical_enter();
 		return (false);
 	}
 
 	/*
 	 * See if we lost the race or were migrated.  Cache the
 	 * initialized bucket to make this less likely or claim
 	 * the memory directly.
 	 */
 	ZONE_LOCK(zone);
 	critical_enter();
 	cache = &zone->uz_cpu[curcpu];
 	if (cache->uc_allocbucket.ucb_bucket == NULL &&
 	    ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0 ||
 	    domain == PCPU_GET(domain))) {
 		cache_bucket_load_alloc(cache, bucket);
 		zdom->uzd_imax += bucket->ub_cnt;
 	} else if (zone->uz_bkt_count >= zone->uz_bkt_max) {
 		critical_exit();
 		ZONE_UNLOCK(zone);
 		bucket_drain(zone, bucket);
 		bucket_free(zone, bucket, udata);
 		critical_enter();
 		return (true);
 	} else
 		zone_put_bucket(zone, zdom, bucket, false);
 	ZONE_UNLOCK(zone);
 	return (true);
 }
 
 void *
 uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags)
 {
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
 
 	/* This is the fast path allocation */
 	CTR5(KTR_UMA,
 	    "uma_zalloc_domain thread %x zone %s(%p) domain %d flags %d",
 	    curthread, zone->uz_name, zone, domain, flags);
 
 	if (flags & M_WAITOK) {
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "uma_zalloc_domain: zone \"%s\"", zone->uz_name);
 	}
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zalloc_domain: called with spinlock or critical section held"));
 
 	return (zone_alloc_item(zone, udata, domain, flags));
 }
 
 /*
  * Find a slab with some space.  Prefer slabs that are partially used over those
  * that are totally full.  This helps to reduce fragmentation.
  *
  * If 'rr' is 1, search all domains starting from 'domain'.  Otherwise check
  * only 'domain'.
  */
 static uma_slab_t
 keg_first_slab(uma_keg_t keg, int domain, bool rr)
 {
 	uma_domain_t dom;
 	uma_slab_t slab;
 	int start;
 
 	KASSERT(domain >= 0 && domain < vm_ndomains,
 	    ("keg_first_slab: domain %d out of range", domain));
 	KEG_LOCK_ASSERT(keg, domain);
 
 	slab = NULL;
 	start = domain;
 	do {
 		dom = &keg->uk_domain[domain];
 		if (!LIST_EMPTY(&dom->ud_part_slab))
 			return (LIST_FIRST(&dom->ud_part_slab));
 		if (!LIST_EMPTY(&dom->ud_free_slab)) {
 			slab = LIST_FIRST(&dom->ud_free_slab);
 			LIST_REMOVE(slab, us_link);
 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
 			return (slab);
 		}
 		if (rr)
 			domain = (domain + 1) % vm_ndomains;
 	} while (domain != start);
 
 	return (NULL);
 }
 
 /*
  * Fetch an existing slab from a free or partial list.  Returns with the
  * keg domain lock held if a slab was found or unlocked if not.
  */
 static uma_slab_t
 keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags)
 {
 	uma_slab_t slab;
 	uint32_t reserve;
 
 	/* HASH has a single free list. */
-	if ((keg->uk_flags & UMA_ZONE_HASH) != 0)
+	if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0)
 		domain = 0;
 
 	KEG_LOCK(keg, domain);
 	reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
 	if (keg->uk_domain[domain].ud_free <= reserve ||
 	    (slab = keg_first_slab(keg, domain, rr)) == NULL) {
 		KEG_UNLOCK(keg, domain);
 		return (NULL);
 	}
 	return (slab);
 }
 
 static uma_slab_t
 keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags)
 {
 	struct vm_domainset_iter di;
 	uma_slab_t slab;
 	int aflags, domain;
 	bool rr;
 
 restart:
 	/*
 	 * Use the keg's policy if upper layers haven't already specified a
 	 * domain (as happens with first-touch zones).
 	 *
 	 * To avoid races we run the iterator with the keg lock held, but that
 	 * means that we cannot allow the vm_domainset layer to sleep.  Thus,
 	 * clear M_WAITOK and handle low memory conditions locally.
 	 */
 	rr = rdomain == UMA_ANYDOMAIN;
 	if (rr) {
 		aflags = (flags & ~M_WAITOK) | M_NOWAIT;
 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
 		    &aflags);
 	} else {
 		aflags = flags;
 		domain = rdomain;
 	}
 
 	for (;;) {
 		slab = keg_fetch_free_slab(keg, domain, rr, flags);
 		if (slab != NULL)
 			return (slab);
 
 		/*
 		 * M_NOVM means don't ask at all!
 		 */
 		if (flags & M_NOVM)
 			break;
 
 		slab = keg_alloc_slab(keg, zone, domain, flags, aflags);
 		if (slab != NULL)
 			return (slab);
 		if (!rr && (flags & M_WAITOK) == 0)
 			break;
 		if (rr && vm_domainset_iter_policy(&di, &domain) != 0) {
 			if ((flags & M_WAITOK) != 0) {
 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
 				goto restart;
 			}
 			break;
 		}
 	}
 
 	/*
 	 * We might not have been able to get a slab but another cpu
 	 * could have while we were unlocked.  Check again before we
 	 * fail.
 	 */
 	if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL)
 		return (slab);
 
 	return (NULL);
 }
 
 static void *
 slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
 {
 	uma_domain_t dom;
 	void *item;
 	uint8_t freei;
 
 	KEG_LOCK_ASSERT(keg, slab->us_domain);
 
 	dom = &keg->uk_domain[slab->us_domain];
 	freei = BIT_FFS(keg->uk_ipers, &slab->us_free) - 1;
 	BIT_CLR(keg->uk_ipers, freei, &slab->us_free);
 	item = slab_item(slab, keg, freei);
 	slab->us_freecount--;
 	dom->ud_free--;
 
 	/* Move this slab to the full list */
 	if (slab->us_freecount == 0) {
 		LIST_REMOVE(slab, us_link);
 		LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link);
 	}
 
 	return (item);
 }
 
 static int
 zone_import(void *arg, void **bucket, int max, int domain, int flags)
 {
 	uma_domain_t dom;
 	uma_zone_t zone;
 	uma_slab_t slab;
 	uma_keg_t keg;
 #ifdef NUMA
 	int stripe;
 #endif
 	int i;
 
 	zone = arg;
 	slab = NULL;
 	keg = zone->uz_keg;
 	/* Try to keep the buckets totally full */
 	for (i = 0; i < max; ) {
 		if ((slab = keg_fetch_slab(keg, zone, domain, flags)) == NULL)
 			break;
 #ifdef NUMA
 		stripe = howmany(max, vm_ndomains);
 #endif
 		dom = &keg->uk_domain[slab->us_domain];
 		while (slab->us_freecount && i < max) { 
 			bucket[i++] = slab_alloc_item(keg, slab);
 			if (dom->ud_free <= keg->uk_reserve)
 				break;
 #ifdef NUMA
 			/*
 			 * If the zone is striped we pick a new slab for every
 			 * N allocations.  Eliminating this conditional will
 			 * instead pick a new domain for each bucket rather
 			 * than stripe within each bucket.  The current option
 			 * produces more fragmentation and requires more cpu
 			 * time but yields better distribution.
 			 */
 			if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0 &&
 			    vm_ndomains > 1 && --stripe == 0)
 				break;
 #endif
 		}
 		KEG_UNLOCK(keg, slab->us_domain);
 		/* Don't block if we allocated any successfully. */
 		flags &= ~M_WAITOK;
 		flags |= M_NOWAIT;
 	}
 
 	return i;
 }
 
 static int
 zone_alloc_limit_hard(uma_zone_t zone, int count, int flags)
 {
 	uint64_t old, new, total, max;
 
 	/*
 	 * The hard case.  We're going to sleep because there were existing
 	 * sleepers or because we ran out of items.  This routine enforces
 	 * fairness by keeping fifo order.
 	 *
 	 * First release our ill gotten gains and make some noise.
 	 */
 	for (;;) {
 		zone_free_limit(zone, count);
 		zone_log_warning(zone);
 		zone_maxaction(zone);
 		if (flags & M_NOWAIT)
 			return (0);
 
 		/*
 		 * We need to allocate an item or set ourself as a sleeper
 		 * while the sleepq lock is held to avoid wakeup races.  This
 		 * is essentially a home rolled semaphore.
 		 */
 		sleepq_lock(&zone->uz_max_items);
 		old = zone->uz_items;
 		do {
 			MPASS(UZ_ITEMS_SLEEPERS(old) < UZ_ITEMS_SLEEPERS_MAX);
 			/* Cache the max since we will evaluate twice. */
 			max = zone->uz_max_items;
 			if (UZ_ITEMS_SLEEPERS(old) != 0 ||
 			    UZ_ITEMS_COUNT(old) >= max)
 				new = old + UZ_ITEMS_SLEEPER;
 			else
 				new = old + MIN(count, max - old);
 		} while (atomic_fcmpset_64(&zone->uz_items, &old, new) == 0);
 
 		/* We may have successfully allocated under the sleepq lock. */
 		if (UZ_ITEMS_SLEEPERS(new) == 0) {
 			sleepq_release(&zone->uz_max_items);
 			return (new - old);
 		}
 
 		/*
 		 * This is in a different cacheline from uz_items so that we
 		 * don't constantly invalidate the fastpath cacheline when we
 		 * adjust item counts.  This could be limited to toggling on
 		 * transitions.
 		 */
 		atomic_add_32(&zone->uz_sleepers, 1);
 		atomic_add_64(&zone->uz_sleeps, 1);
 
 		/*
 		 * We have added ourselves as a sleeper.  The sleepq lock
 		 * protects us from wakeup races.  Sleep now and then retry.
 		 */
 		sleepq_add(&zone->uz_max_items, NULL, "zonelimit", 0, 0);
 		sleepq_wait(&zone->uz_max_items, PVM);
 
 		/*
 		 * After wakeup, remove ourselves as a sleeper and try
 		 * again.  We no longer have the sleepq lock for protection.
 		 *
 		 * Subract ourselves as a sleeper while attempting to add
 		 * our count.
 		 */
 		atomic_subtract_32(&zone->uz_sleepers, 1);
 		old = atomic_fetchadd_64(&zone->uz_items,
 		    -(UZ_ITEMS_SLEEPER - count));
 		/* We're no longer a sleeper. */
 		old -= UZ_ITEMS_SLEEPER;
 
 		/*
 		 * If we're still at the limit, restart.  Notably do not
 		 * block on other sleepers.  Cache the max value to protect
 		 * against changes via sysctl.
 		 */
 		total = UZ_ITEMS_COUNT(old);
 		max = zone->uz_max_items;
 		if (total >= max)
 			continue;
 		/* Truncate if necessary, otherwise wake other sleepers. */
 		if (total + count > max) {
 			zone_free_limit(zone, total + count - max);
 			count = max - total;
 		} else if (total + count < max && UZ_ITEMS_SLEEPERS(old) != 0)
 			wakeup_one(&zone->uz_max_items);
 
 		return (count);
 	}
 }
 
 /*
  * Allocate 'count' items from our max_items limit.  Returns the number
  * available.  If M_NOWAIT is not specified it will sleep until at least
  * one item can be allocated.
  */
 static int
 zone_alloc_limit(uma_zone_t zone, int count, int flags)
 {
 	uint64_t old;
 	uint64_t max;
 
 	max = zone->uz_max_items;
 	MPASS(max > 0);
 
 	/*
 	 * We expect normal allocations to succeed with a simple
 	 * fetchadd.
 	 */
 	old = atomic_fetchadd_64(&zone->uz_items, count);
 	if (__predict_true(old + count <= max))
 		return (count);
 
 	/*
 	 * If we had some items and no sleepers just return the
 	 * truncated value.  We have to release the excess space
 	 * though because that may wake sleepers who weren't woken
 	 * because we were temporarily over the limit.
 	 */
 	if (old < max) {
 		zone_free_limit(zone, (old + count) - max);
 		return (max - old);
 	}
 	return (zone_alloc_limit_hard(zone, count, flags));
 }
 
 /*
  * Free a number of items back to the limit.
  */
 static void
 zone_free_limit(uma_zone_t zone, int count)
 {
 	uint64_t old;
 
 	MPASS(count > 0);
 
 	/*
 	 * In the common case we either have no sleepers or
 	 * are still over the limit and can just return.
 	 */
 	old = atomic_fetchadd_64(&zone->uz_items, -count);
 	if (__predict_true(UZ_ITEMS_SLEEPERS(old) == 0 ||
 	   UZ_ITEMS_COUNT(old) - count >= zone->uz_max_items))
 		return;
 
 	/*
 	 * Moderate the rate of wakeups.  Sleepers will continue
 	 * to generate wakeups if necessary.
 	 */
 	wakeup_one(&zone->uz_max_items);
 }
 
 static uma_bucket_t
 zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
 {
 	uma_bucket_t bucket;
 	int maxbucket, cnt;
 
 	CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain);
 
 	/* Avoid allocs targeting empty domains. */
 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
 		domain = UMA_ANYDOMAIN;
 
 	if (zone->uz_max_items > 0)
 		maxbucket = zone_alloc_limit(zone, zone->uz_bucket_size,
 		    M_NOWAIT);
 	else
 		maxbucket = zone->uz_bucket_size;
 	if (maxbucket == 0)
 		return (false);
 
 	/* Don't wait for buckets, preserve caller's NOVM setting. */
 	bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
 	if (bucket == NULL) {
 		cnt = 0;
 		goto out;
 	}
 
 	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
 	    MIN(maxbucket, bucket->ub_entries), domain, flags);
 
 	/*
 	 * Initialize the memory if necessary.
 	 */
 	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
 		int i;
 
 		for (i = 0; i < bucket->ub_cnt; i++)
 			if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
 			    flags) != 0)
 				break;
 		/*
 		 * If we couldn't initialize the whole bucket, put the
 		 * rest back onto the freelist.
 		 */
 		if (i != bucket->ub_cnt) {
 			zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
 			    bucket->ub_cnt - i);
 #ifdef INVARIANTS
 			bzero(&bucket->ub_bucket[i],
 			    sizeof(void *) * (bucket->ub_cnt - i));
 #endif
 			bucket->ub_cnt = i;
 		}
 	}
 
 	cnt = bucket->ub_cnt;
 	if (bucket->ub_cnt == 0) {
 		bucket_free(zone, bucket, udata);
 		counter_u64_add(zone->uz_fails, 1);
 		bucket = NULL;
 	}
 out:
 	if (zone->uz_max_items > 0 && cnt < maxbucket)
 		zone_free_limit(zone, maxbucket - cnt);
 
 	return (bucket);
 }
 
 /*
  * Allocates a single item from a zone.
  *
  * Arguments
  *	zone   The zone to alloc for.
  *	udata  The data to be passed to the constructor.
  *	domain The domain to allocate from or UMA_ANYDOMAIN.
  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
  *
  * Returns
  *	NULL if there is no memory and M_NOWAIT is set
  *	An item if successful
  */
 
 static void *
 zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
 {
 	void *item;
 
 	if (zone->uz_max_items > 0 && zone_alloc_limit(zone, 1, flags) == 0)
 		return (NULL);
 
 	/* Avoid allocs targeting empty domains. */
 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
 		domain = UMA_ANYDOMAIN;
 
 	if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
 		goto fail_cnt;
 
 	/*
 	 * We have to call both the zone's init (not the keg's init)
 	 * and the zone's ctor.  This is because the item is going from
 	 * a keg slab directly to the user, and the user is expecting it
 	 * to be both zone-init'd as well as zone-ctor'd.
 	 */
 	if (zone->uz_init != NULL) {
 		if (zone->uz_init(item, zone->uz_size, flags) != 0) {
 			zone_free_item(zone, item, udata, SKIP_FINI | SKIP_CNT);
 			goto fail_cnt;
 		}
 	}
 	item = item_ctor(zone, zone->uz_size, udata, flags, item);
 	if (item == NULL)
 		goto fail;
 
 	counter_u64_add(zone->uz_allocs, 1);
 	CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item,
 	    zone->uz_name, zone);
 
 	return (item);
 
 fail_cnt:
 	counter_u64_add(zone->uz_fails, 1);
 fail:
 	if (zone->uz_max_items > 0)
 		zone_free_limit(zone, 1);
 	CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
 	    zone->uz_name, zone);
 
 	return (NULL);
 }
 
 /* See uma.h */
 void
 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
 {
 	uma_cache_t cache;
 	uma_cache_bucket_t bucket;
 	int domain, itemdomain, uz_flags;
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
 
 	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
 	    zone->uz_name);
 
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zfree_arg: called with spinlock or critical section held"));
 
         /* uma_zfree(..., NULL) does nothing, to match free(9). */
         if (item == NULL)
                 return;
 #ifdef DEBUG_MEMGUARD
 	if (is_memguard_addr(item)) {
 		if (zone->uz_dtor != NULL)
 			zone->uz_dtor(item, zone->uz_size, udata);
 		if (zone->uz_fini != NULL)
 			zone->uz_fini(item, zone->uz_size);
 		memguard_free(item);
 		return;
 	}
 #endif
 
 	/*
 	 * We are accessing the per-cpu cache without a critical section to
 	 * fetch size and flags.  This is acceptable, if we are preempted we
 	 * will simply read another cpu's line.
 	 */
 	cache = &zone->uz_cpu[curcpu];
 	uz_flags = cache_uz_flags(cache);
 	if (__predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0 ||
 	    UMA_ALWAYS_CTORDTOR))
 		item_dtor(zone, item, cache_uz_size(cache), udata, SKIP_NONE);
 
 	/*
 	 * The race here is acceptable.  If we miss it we'll just have to wait
 	 * a little longer for the limits to be reset.
 	 */
 	if (__predict_false(uz_flags & UMA_ZFLAG_LIMIT)) {
 		if (zone->uz_sleepers > 0)
 			goto zfree_item;
 	}
 
 	/*
 	 * If possible, free to the per-CPU cache.  There are two
 	 * requirements for safe access to the per-CPU cache: (1) the thread
 	 * accessing the cache must not be preempted or yield during access,
 	 * and (2) the thread must not migrate CPUs without switching which
 	 * cache it accesses.  We rely on a critical section to prevent
 	 * preemption and migration.  We release the critical section in
 	 * order to acquire the zone mutex if we are unable to free to the
 	 * current cache; when we re-acquire the critical section, we must
 	 * detect and handle migration if it has occurred.
 	 */
 	domain = itemdomain = 0;
 #ifdef NUMA
 	if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
 		itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
 #endif
 	critical_enter();
 	do {
 		cache = &zone->uz_cpu[curcpu];
 #ifdef NUMA
 		domain = PCPU_GET(domain);
 		if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 &&
 		    domain != itemdomain) {
 			bucket = &cache->uc_crossbucket;
 		} else
 #endif
 		{
 			/*
 			 * Try to free into the allocbucket first to give LIFO
 			 * ordering for cache-hot datastructures.  Spill over
 			 * into the freebucket if necessary.  Alloc will swap
 			 * them if one runs dry.
 			 */
 			bucket = &cache->uc_allocbucket;
 			if (__predict_false(bucket->ucb_cnt >=
 			    bucket->ucb_entries))
 				bucket = &cache->uc_freebucket;
 		}
 		if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) {
 			cache_bucket_push(cache, bucket, item);
 			critical_exit();
 			return;
 		}
 	} while (cache_free(zone, cache, udata, item, itemdomain));
 	critical_exit();
 
 	/*
 	 * If nothing else caught this, we'll just do an internal free.
 	 */
 zfree_item:
 	zone_free_item(zone, item, udata, SKIP_DTOR);
 }
 
 #ifdef NUMA
 /*
  * sort crossdomain free buckets to domain correct buckets and cache
  * them.
  */
 static void
 zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata)
 {
 	struct uma_bucketlist fullbuckets;
 	uma_zone_domain_t zdom;
 	uma_bucket_t b;
 	void *item;
 	int domain;
 
 	CTR3(KTR_UMA,
 	    "uma_zfree: zone %s(%p) draining cross bucket %p",
 	    zone->uz_name, zone, bucket);
 
 	TAILQ_INIT(&fullbuckets);
 
 	/*
 	 * To avoid having ndomain * ndomain buckets for sorting we have a
 	 * lock on the current crossfree bucket.  A full matrix with
 	 * per-domain locking could be used if necessary.
 	 */
 	ZONE_CROSS_LOCK(zone);
 	while (bucket->ub_cnt > 0) {
 		item = bucket->ub_bucket[bucket->ub_cnt - 1];
 		domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
 		zdom = &zone->uz_domain[domain];
 		if (zdom->uzd_cross == NULL) {
 			zdom->uzd_cross = bucket_alloc(zone, udata, M_NOWAIT);
 			if (zdom->uzd_cross == NULL)
 				break;
 		}
 		zdom->uzd_cross->ub_bucket[zdom->uzd_cross->ub_cnt++] = item;
 		if (zdom->uzd_cross->ub_cnt == zdom->uzd_cross->ub_entries) {
 			TAILQ_INSERT_HEAD(&fullbuckets, zdom->uzd_cross,
 			    ub_link);
 			zdom->uzd_cross = NULL;
 		}
 		bucket->ub_cnt--;
 	}
 	ZONE_CROSS_UNLOCK(zone);
 	if (!TAILQ_EMPTY(&fullbuckets)) {
 		ZONE_LOCK(zone);
 		while ((b = TAILQ_FIRST(&fullbuckets)) != NULL) {
 			TAILQ_REMOVE(&fullbuckets, b, ub_link);
 			if (zone->uz_bkt_count >= zone->uz_bkt_max) {
 				ZONE_UNLOCK(zone);
 				bucket_drain(zone, b);
 				bucket_free(zone, b, udata);
 				ZONE_LOCK(zone);
 			} else {
 				domain = _vm_phys_domain(
 				    pmap_kextract(
 				    (vm_offset_t)b->ub_bucket[0]));
 				zdom = &zone->uz_domain[domain];
 				zone_put_bucket(zone, zdom, b, true);
 			}
 		}
 		ZONE_UNLOCK(zone);
 	}
 	if (bucket->ub_cnt != 0)
 		bucket_drain(zone, bucket);
 	bucket_free(zone, bucket, udata);
 }
 #endif
 
 static void
 zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
     int domain, int itemdomain)
 {
 	uma_zone_domain_t zdom;
 
 #ifdef NUMA
 	/*
 	 * Buckets coming from the wrong domain will be entirely for the
 	 * only other domain on two domain systems.  In this case we can
 	 * simply cache them.  Otherwise we need to sort them back to
 	 * correct domains.
 	 */
 	if (domain != itemdomain && vm_ndomains > 2) {
 		zone_free_cross(zone, bucket, udata);
 		return;
 	}
 #endif
 
 	/*
 	 * Attempt to save the bucket in the zone's domain bucket cache.
 	 *
 	 * We bump the uz count when the cache size is insufficient to
 	 * handle the working set.
 	 */
 	if (ZONE_TRYLOCK(zone) == 0) {
 		/* Record contention to size the buckets. */
 		ZONE_LOCK(zone);
 		if (zone->uz_bucket_size < zone->uz_bucket_size_max)
 			zone->uz_bucket_size++;
 	}
 
 	CTR3(KTR_UMA,
 	    "uma_zfree: zone %s(%p) putting bucket %p on free list",
 	    zone->uz_name, zone, bucket);
 	/* ub_cnt is pointing to the last free item */
 	KASSERT(bucket->ub_cnt == bucket->ub_entries,
 	    ("uma_zfree: Attempting to insert partial  bucket onto the full list.\n"));
 	if (zone->uz_bkt_count >= zone->uz_bkt_max) {
 		ZONE_UNLOCK(zone);
 		bucket_drain(zone, bucket);
 		bucket_free(zone, bucket, udata);
 	} else {
 		zdom = &zone->uz_domain[itemdomain];
 		zone_put_bucket(zone, zdom, bucket, true);
 		ZONE_UNLOCK(zone);
 	}
 }
 
 /*
  * Populate a free or cross bucket for the current cpu cache.  Free any
  * existing full bucket either to the zone cache or back to the slab layer.
  *
  * Enters and returns in a critical section.  false return indicates that
  * we can not satisfy this free in the cache layer.  true indicates that
  * the caller should retry.
  */
 static __noinline bool
 cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, void *item,
     int itemdomain)
 {
 	uma_cache_bucket_t cbucket;
 	uma_bucket_t bucket;
 	int domain;
 
 	CRITICAL_ASSERT(curthread);
 
 	if (zone->uz_bucket_size == 0 || bucketdisable)
 		return false;
 
 	cache = &zone->uz_cpu[curcpu];
 
 	/*
 	 * FIRSTTOUCH domains need to free to the correct zdom.  When
 	 * enabled this is the zdom of the item.   The bucket is the
 	 * cross bucket if the current domain and itemdomain do not match.
 	 */
 	cbucket = &cache->uc_freebucket;
 #ifdef NUMA
 	if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) {
 		domain = PCPU_GET(domain);
 		if (domain != itemdomain) {
 			cbucket = &cache->uc_crossbucket;
 			if (cbucket->ucb_cnt != 0)
 				atomic_add_64(&zone->uz_xdomain,
 				    cbucket->ucb_cnt);
 		}
 	} else
 #endif
 		itemdomain = domain = 0;
 	bucket = cache_bucket_unload(cbucket);
 
 	/* We are no longer associated with this CPU. */
 	critical_exit();
 
 	if (bucket != NULL)
 		zone_free_bucket(zone, bucket, udata, domain, itemdomain);
 
 	bucket = bucket_alloc(zone, udata, M_NOWAIT);
 	CTR3(KTR_UMA, "uma_zfree: zone %s(%p) allocated bucket %p",
 	    zone->uz_name, zone, bucket);
 	critical_enter();
 	if (bucket == NULL)
 		return (false);
 	cache = &zone->uz_cpu[curcpu];
 #ifdef NUMA
 	/*
 	 * Check to see if we should be populating the cross bucket.  If it
 	 * is already populated we will fall through and attempt to populate
 	 * the free bucket.
 	 */
 	if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) {
 		domain = PCPU_GET(domain);
 		if (domain != itemdomain &&
 		    cache->uc_crossbucket.ucb_bucket == NULL) {
 			cache_bucket_load_cross(cache, bucket);
 			return (true);
 		}
 	}
 #endif
 	/*
 	 * We may have lost the race to fill the bucket or switched CPUs.
 	 */
 	if (cache->uc_freebucket.ucb_bucket != NULL) {
 		critical_exit();
 		bucket_free(zone, bucket, udata);
 		critical_enter();
 	} else
 		cache_bucket_load_free(cache, bucket);
 
 	return (true);
 }
 
 void
 uma_zfree_domain(uma_zone_t zone, void *item, void *udata)
 {
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
 
 	CTR2(KTR_UMA, "uma_zfree_domain thread %x zone %s", curthread,
 	    zone->uz_name);
 
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zfree_domain: called with spinlock or critical section held"));
 
         /* uma_zfree(..., NULL) does nothing, to match free(9). */
         if (item == NULL)
                 return;
 	zone_free_item(zone, item, udata, SKIP_NONE);
 }
 
 static void
 slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item)
 {
 	uma_keg_t keg;
 	uma_domain_t dom;
 	uint8_t freei;
 
 	keg = zone->uz_keg;
 	KEG_LOCK_ASSERT(keg, slab->us_domain);
 
 	/* Do we need to remove from any lists? */
 	dom = &keg->uk_domain[slab->us_domain];
 	if (slab->us_freecount+1 == keg->uk_ipers) {
 		LIST_REMOVE(slab, us_link);
 		LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
 	} else if (slab->us_freecount == 0) {
 		LIST_REMOVE(slab, us_link);
 		LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
 	}
 
 	/* Slab management. */
 	freei = slab_item_index(slab, keg, item);
 	BIT_SET(keg->uk_ipers, freei, &slab->us_free);
 	slab->us_freecount++;
 
 	/* Keg statistics. */
 	dom->ud_free++;
 }
 
 static void
 zone_release(void *arg, void **bucket, int cnt)
 {
 	struct mtx *lock;
 	uma_zone_t zone;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	uint8_t *mem;
 	void *item;
 	int i;
 
 	zone = arg;
 	keg = zone->uz_keg;
 	lock = NULL;
-	if (__predict_false((zone->uz_flags & UMA_ZONE_HASH) != 0))
+	if (__predict_false((zone->uz_flags & UMA_ZFLAG_HASH) != 0))
 		lock = KEG_LOCK(keg, 0);
 	for (i = 0; i < cnt; i++) {
 		item = bucket[i];
-		if (__predict_true((zone->uz_flags & UMA_ZONE_VTOSLAB) != 0)) {
+		if (__predict_true((zone->uz_flags & UMA_ZFLAG_VTOSLAB) != 0)) {
 			slab = vtoslab((vm_offset_t)item);
 		} else {
 			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
-			if ((zone->uz_flags & UMA_ZONE_HASH) != 0)
+			if ((zone->uz_flags & UMA_ZFLAG_HASH) != 0)
 				slab = hash_sfind(&keg->uk_hash, mem);
 			else
 				slab = (uma_slab_t)(mem + keg->uk_pgoff);
 		}
 		if (lock != KEG_LOCKPTR(keg, slab->us_domain)) {
 			if (lock != NULL)
 				mtx_unlock(lock);
 			lock = KEG_LOCK(keg, slab->us_domain);
 		}
 		slab_free_item(zone, slab, item);
 	}
 	if (lock != NULL)
 		mtx_unlock(lock);
 }
 
 /*
  * Frees a single item to any zone.
  *
  * Arguments:
  *	zone   The zone to free to
  *	item   The item we're freeing
  *	udata  User supplied data for the dtor
  *	skip   Skip dtors and finis
  */
 static void
 zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
 {
 
 	item_dtor(zone, item, zone->uz_size, udata, skip);
 
 	if (skip < SKIP_FINI && zone->uz_fini)
 		zone->uz_fini(item, zone->uz_size);
 
 	zone->uz_release(zone->uz_arg, &item, 1);
 
 	if (skip & SKIP_CNT)
 		return;
 
 	counter_u64_add(zone->uz_frees, 1);
 
 	if (zone->uz_max_items > 0)
 		zone_free_limit(zone, 1);
 }
 
 /* See uma.h */
 int
 uma_zone_set_max(uma_zone_t zone, int nitems)
 {
 	struct uma_bucket_zone *ubz;
 	int count;
 
 	/*
 	 * XXX This can misbehave if the zone has any allocations with
 	 * no limit and a limit is imposed.  There is currently no
 	 * way to clear a limit.
 	 */
 	ZONE_LOCK(zone);
 	ubz = bucket_zone_max(zone, nitems);
 	count = ubz != NULL ? ubz->ubz_entries : 0;
 	zone->uz_bucket_size_max = zone->uz_bucket_size = count;
 	if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
 		zone->uz_bucket_size_min = zone->uz_bucket_size_max;
 	zone->uz_max_items = nitems;
 	zone->uz_flags |= UMA_ZFLAG_LIMIT;
 	zone_update_caches(zone);
 	/* We may need to wake waiters. */
 	wakeup(&zone->uz_max_items);
 	ZONE_UNLOCK(zone);
 
 	return (nitems);
 }
 
 /* See uma.h */
 void
 uma_zone_set_maxcache(uma_zone_t zone, int nitems)
 {
 	struct uma_bucket_zone *ubz;
 	int bpcpu;
 
 	ZONE_LOCK(zone);
 	ubz = bucket_zone_max(zone, nitems);
 	if (ubz != NULL) {
 		bpcpu = 2;
 		if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
 			/* Count the cross-domain bucket. */
 			bpcpu++;
 		nitems -= ubz->ubz_entries * bpcpu * mp_ncpus;
 		zone->uz_bucket_size_max = ubz->ubz_entries;
 	} else {
 		zone->uz_bucket_size_max = zone->uz_bucket_size = 0;
 	}
 	if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
 		zone->uz_bucket_size_min = zone->uz_bucket_size_max;
 	zone->uz_bkt_max = nitems;
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 int
 uma_zone_get_max(uma_zone_t zone)
 {
 	int nitems;
 
 	nitems = atomic_load_64(&zone->uz_max_items);
 
 	return (nitems);
 }
 
 /* See uma.h */
 void
 uma_zone_set_warning(uma_zone_t zone, const char *warning)
 {
 
 	ZONE_ASSERT_COLD(zone);
 	zone->uz_warning = warning;
 }
 
 /* See uma.h */
 void
 uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
 {
 
 	ZONE_ASSERT_COLD(zone);
 	TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
 }
 
 /* See uma.h */
 int
 uma_zone_get_cur(uma_zone_t zone)
 {
 	int64_t nitems;
 	u_int i;
 
 	nitems = 0;
 	if (zone->uz_allocs != EARLY_COUNTER && zone->uz_frees != EARLY_COUNTER)
 		nitems = counter_u64_fetch(zone->uz_allocs) -
 		    counter_u64_fetch(zone->uz_frees);
 	CPU_FOREACH(i)
 		nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs) -
 		    atomic_load_64(&zone->uz_cpu[i].uc_frees);
 
 	return (nitems < 0 ? 0 : nitems);
 }
 
 static uint64_t
 uma_zone_get_allocs(uma_zone_t zone)
 {
 	uint64_t nitems;
 	u_int i;
 
 	nitems = 0;
 	if (zone->uz_allocs != EARLY_COUNTER)
 		nitems = counter_u64_fetch(zone->uz_allocs);
 	CPU_FOREACH(i)
 		nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs);
 
 	return (nitems);
 }
 
 static uint64_t
 uma_zone_get_frees(uma_zone_t zone)
 {
 	uint64_t nitems;
 	u_int i;
 
 	nitems = 0;
 	if (zone->uz_frees != EARLY_COUNTER)
 		nitems = counter_u64_fetch(zone->uz_frees);
 	CPU_FOREACH(i)
 		nitems += atomic_load_64(&zone->uz_cpu[i].uc_frees);
 
 	return (nitems);
 }
 
 #ifdef INVARIANTS
 /* Used only for KEG_ASSERT_COLD(). */
 static uint64_t
 uma_keg_get_allocs(uma_keg_t keg)
 {
 	uma_zone_t z;
 	uint64_t nitems;
 
 	nitems = 0;
 	LIST_FOREACH(z, &keg->uk_zones, uz_link)
 		nitems += uma_zone_get_allocs(z);
 
 	return (nitems);
 }
 #endif
 
 /* See uma.h */
 void
 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
 {
 	uma_keg_t keg;
 
 	KEG_GET(zone, keg);
 	KEG_ASSERT_COLD(keg);
 	keg->uk_init = uminit;
 }
 
 /* See uma.h */
 void
 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
 {
 	uma_keg_t keg;
 
 	KEG_GET(zone, keg);
 	KEG_ASSERT_COLD(keg);
 	keg->uk_fini = fini;
 }
 
 /* See uma.h */
 void
 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
 {
 
 	ZONE_ASSERT_COLD(zone);
 	zone->uz_init = zinit;
 }
 
 /* See uma.h */
 void
 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
 {
 
 	ZONE_ASSERT_COLD(zone);
 	zone->uz_fini = zfini;
 }
 
 /* See uma.h */
 void
 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
 {
 	uma_keg_t keg;
 
 	KEG_GET(zone, keg);
 	KEG_ASSERT_COLD(keg);
 	keg->uk_freef = freef;
 }
 
 /* See uma.h */
 void
 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
 {
 	uma_keg_t keg;
 
 	KEG_GET(zone, keg);
 	KEG_ASSERT_COLD(keg);
 	keg->uk_allocf = allocf;
 }
 
 /* See uma.h */
 void
 uma_zone_reserve(uma_zone_t zone, int items)
 {
 	uma_keg_t keg;
 
 	KEG_GET(zone, keg);
 	KEG_ASSERT_COLD(keg);
 	keg->uk_reserve = items;
 }
 
 /* See uma.h */
 int
 uma_zone_reserve_kva(uma_zone_t zone, int count)
 {
 	uma_keg_t keg;
 	vm_offset_t kva;
 	u_int pages;
 
 	KEG_GET(zone, keg);
 	KEG_ASSERT_COLD(keg);
 	ZONE_ASSERT_COLD(zone);
 
 	pages = howmany(count, keg->uk_ipers) * keg->uk_ppera;
 
 #ifdef UMA_MD_SMALL_ALLOC
 	if (keg->uk_ppera > 1) {
 #else
 	if (1) {
 #endif
 		kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
 		if (kva == 0)
 			return (0);
 	} else
 		kva = 0;
 
 	ZONE_LOCK(zone);
 	MPASS(keg->uk_kva == 0);
 	keg->uk_kva = kva;
 	keg->uk_offset = 0;
 	zone->uz_max_items = pages * keg->uk_ipers;
 #ifdef UMA_MD_SMALL_ALLOC
 	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
 #else
 	keg->uk_allocf = noobj_alloc;
 #endif
 	keg->uk_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
 	zone->uz_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
 	zone_update_caches(zone);
 	ZONE_UNLOCK(zone);
 
 	return (1);
 }
 
 /* See uma.h */
 void
 uma_prealloc(uma_zone_t zone, int items)
 {
 	struct vm_domainset_iter di;
 	uma_domain_t dom;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	int aflags, domain, slabs;
 
 	KEG_GET(zone, keg);
 	slabs = howmany(items, keg->uk_ipers);
 	while (slabs-- > 0) {
 		aflags = M_NOWAIT;
 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
 		    &aflags);
 		for (;;) {
 			slab = keg_alloc_slab(keg, zone, domain, M_WAITOK,
 			    aflags);
 			if (slab != NULL) {
 				dom = &keg->uk_domain[slab->us_domain];
 				LIST_REMOVE(slab, us_link);
 				LIST_INSERT_HEAD(&dom->ud_free_slab, slab,
 				    us_link);
 				KEG_UNLOCK(keg, slab->us_domain);
 				break;
 			}
 			if (vm_domainset_iter_policy(&di, &domain) != 0)
 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
 		}
 	}
 }
 
 /* See uma.h */
 void
 uma_reclaim(int req)
 {
 
 	CTR0(KTR_UMA, "UMA: vm asked us to release pages!");
 	sx_xlock(&uma_reclaim_lock);
 	bucket_enable();
 
 	switch (req) {
 	case UMA_RECLAIM_TRIM:
 		zone_foreach(zone_trim, NULL);
 		break;
 	case UMA_RECLAIM_DRAIN:
 	case UMA_RECLAIM_DRAIN_CPU:
 		zone_foreach(zone_drain, NULL);
 		if (req == UMA_RECLAIM_DRAIN_CPU) {
 			pcpu_cache_drain_safe(NULL);
 			zone_foreach(zone_drain, NULL);
 		}
 		break;
 	default:
 		panic("unhandled reclamation request %d", req);
 	}
 
 	/*
 	 * Some slabs may have been freed but this zone will be visited early
 	 * we visit again so that we can free pages that are empty once other
 	 * zones are drained.  We have to do the same for buckets.
 	 */
 	zone_drain(slabzone, NULL);
 	bucket_zone_drain();
 	sx_xunlock(&uma_reclaim_lock);
 }
 
 static volatile int uma_reclaim_needed;
 
 void
 uma_reclaim_wakeup(void)
 {
 
 	if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0)
 		wakeup(uma_reclaim);
 }
 
 void
 uma_reclaim_worker(void *arg __unused)
 {
 
 	for (;;) {
 		sx_xlock(&uma_reclaim_lock);
 		while (atomic_load_int(&uma_reclaim_needed) == 0)
 			sx_sleep(uma_reclaim, &uma_reclaim_lock, PVM, "umarcl",
 			    hz);
 		sx_xunlock(&uma_reclaim_lock);
 		EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
 		uma_reclaim(UMA_RECLAIM_DRAIN_CPU);
 		atomic_store_int(&uma_reclaim_needed, 0);
 		/* Don't fire more than once per-second. */
 		pause("umarclslp", hz);
 	}
 }
 
 /* See uma.h */
 void
 uma_zone_reclaim(uma_zone_t zone, int req)
 {
 
 	switch (req) {
 	case UMA_RECLAIM_TRIM:
 		zone_trim(zone, NULL);
 		break;
 	case UMA_RECLAIM_DRAIN:
 		zone_drain(zone, NULL);
 		break;
 	case UMA_RECLAIM_DRAIN_CPU:
 		pcpu_cache_drain_safe(zone);
 		zone_drain(zone, NULL);
 		break;
 	default:
 		panic("unhandled reclamation request %d", req);
 	}
 }
 
 /* See uma.h */
 int
 uma_zone_exhausted(uma_zone_t zone)
 {
 
 	return (atomic_load_32(&zone->uz_sleepers) > 0);
 }
 
 unsigned long
 uma_limit(void)
 {
 
 	return (uma_kmem_limit);
 }
 
 void
 uma_set_limit(unsigned long limit)
 {
 
 	uma_kmem_limit = limit;
 }
 
 unsigned long
 uma_size(void)
 {
 
 	return (atomic_load_long(&uma_kmem_total));
 }
 
 long
 uma_avail(void)
 {
 
 	return (uma_kmem_limit - uma_size());
 }
 
 #ifdef DDB
 /*
  * Generate statistics across both the zone and its per-cpu cache's.  Return
  * desired statistics if the pointer is non-NULL for that statistic.
  *
  * Note: does not update the zone statistics, as it can't safely clear the
  * per-CPU cache statistic.
  *
  */
 static void
 uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
     uint64_t *freesp, uint64_t *sleepsp, uint64_t *xdomainp)
 {
 	uma_cache_t cache;
 	uint64_t allocs, frees, sleeps, xdomain;
 	int cachefree, cpu;
 
 	allocs = frees = sleeps = xdomain = 0;
 	cachefree = 0;
 	CPU_FOREACH(cpu) {
 		cache = &z->uz_cpu[cpu];
 		cachefree += cache->uc_allocbucket.ucb_cnt;
 		cachefree += cache->uc_freebucket.ucb_cnt;
 		xdomain += cache->uc_crossbucket.ucb_cnt;
 		cachefree += cache->uc_crossbucket.ucb_cnt;
 		allocs += cache->uc_allocs;
 		frees += cache->uc_frees;
 	}
 	allocs += counter_u64_fetch(z->uz_allocs);
 	frees += counter_u64_fetch(z->uz_frees);
 	sleeps += z->uz_sleeps;
 	xdomain += z->uz_xdomain;
 	if (cachefreep != NULL)
 		*cachefreep = cachefree;
 	if (allocsp != NULL)
 		*allocsp = allocs;
 	if (freesp != NULL)
 		*freesp = frees;
 	if (sleepsp != NULL)
 		*sleepsp = sleeps;
 	if (xdomainp != NULL)
 		*xdomainp = xdomain;
 }
 #endif /* DDB */
 
 static int
 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
 {
 	uma_keg_t kz;
 	uma_zone_t z;
 	int count;
 
 	count = 0;
 	rw_rlock(&uma_rwlock);
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
 			count++;
 	}
 	LIST_FOREACH(z, &uma_cachezones, uz_link)
 		count++;
 
 	rw_runlock(&uma_rwlock);
 	return (sysctl_handle_int(oidp, &count, 0, req));
 }
 
 static void
 uma_vm_zone_stats(struct uma_type_header *uth, uma_zone_t z, struct sbuf *sbuf,
     struct uma_percpu_stat *ups, bool internal)
 {
 	uma_zone_domain_t zdom;
 	uma_cache_t cache;
 	int i;
 
 
 	for (i = 0; i < vm_ndomains; i++) {
 		zdom = &z->uz_domain[i];
 		uth->uth_zone_free += zdom->uzd_nitems;
 	}
 	uth->uth_allocs = counter_u64_fetch(z->uz_allocs);
 	uth->uth_frees = counter_u64_fetch(z->uz_frees);
 	uth->uth_fails = counter_u64_fetch(z->uz_fails);
 	uth->uth_sleeps = z->uz_sleeps;
 	uth->uth_xdomain = z->uz_xdomain;
 
 	/*
 	 * While it is not normally safe to access the cache bucket pointers
 	 * while not on the CPU that owns the cache, we only allow the pointers
 	 * to be exchanged without the zone lock held, not invalidated, so
 	 * accept the possible race associated with bucket exchange during
 	 * monitoring.  Use atomic_load_ptr() to ensure that the bucket pointers
 	 * are loaded only once.
 	 */
 	for (i = 0; i < mp_maxid + 1; i++) {
 		bzero(&ups[i], sizeof(*ups));
 		if (internal || CPU_ABSENT(i))
 			continue;
 		cache = &z->uz_cpu[i];
 		ups[i].ups_cache_free += cache->uc_allocbucket.ucb_cnt;
 		ups[i].ups_cache_free += cache->uc_freebucket.ucb_cnt;
 		ups[i].ups_cache_free += cache->uc_crossbucket.ucb_cnt;
 		ups[i].ups_allocs = cache->uc_allocs;
 		ups[i].ups_frees = cache->uc_frees;
 	}
 }
 
 static int
 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct uma_stream_header ush;
 	struct uma_type_header uth;
 	struct uma_percpu_stat *ups;
 	struct sbuf sbuf;
 	uma_keg_t kz;
 	uma_zone_t z;
 	uint64_t items;
 	uint32_t kfree, pages;
 	int count, error, i;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
 	ups = malloc((mp_maxid + 1) * sizeof(*ups), M_TEMP, M_WAITOK);
 
 	count = 0;
 	rw_rlock(&uma_rwlock);
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
 			count++;
 	}
 
 	LIST_FOREACH(z, &uma_cachezones, uz_link)
 		count++;
 
 	/*
 	 * Insert stream header.
 	 */
 	bzero(&ush, sizeof(ush));
 	ush.ush_version = UMA_STREAM_VERSION;
 	ush.ush_maxcpus = (mp_maxid + 1);
 	ush.ush_count = count;
 	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
 
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		kfree = pages = 0;
 		for (i = 0; i < vm_ndomains; i++) {
 			kfree += kz->uk_domain[i].ud_free;
 			pages += kz->uk_domain[i].ud_pages;
 		}
 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 			bzero(&uth, sizeof(uth));
 			ZONE_LOCK(z);
 			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
 			uth.uth_align = kz->uk_align;
 			uth.uth_size = kz->uk_size;
 			uth.uth_rsize = kz->uk_rsize;
 			if (z->uz_max_items > 0) {
 				items = UZ_ITEMS_COUNT(z->uz_items);
 				uth.uth_pages = (items / kz->uk_ipers) *
 					kz->uk_ppera;
 			} else
 				uth.uth_pages = pages;
 			uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) *
 			    kz->uk_ppera;
 			uth.uth_limit = z->uz_max_items;
 			uth.uth_keg_free = kfree;
 
 			/*
 			 * A zone is secondary is it is not the first entry
 			 * on the keg's zone list.
 			 */
 			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
 			    (LIST_FIRST(&kz->uk_zones) != z))
 				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
 			uma_vm_zone_stats(&uth, z, &sbuf, ups,
 			    kz->uk_flags & UMA_ZFLAG_INTERNAL);
 			ZONE_UNLOCK(z);
 			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
 			for (i = 0; i < mp_maxid + 1; i++)
 				(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
 		}
 	}
 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
 		bzero(&uth, sizeof(uth));
 		ZONE_LOCK(z);
 		strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
 		uth.uth_size = z->uz_size;
 		uma_vm_zone_stats(&uth, z, &sbuf, ups, false);
 		ZONE_UNLOCK(z);
 		(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
 		for (i = 0; i < mp_maxid + 1; i++)
 			(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
 	}
 
 	rw_runlock(&uma_rwlock);
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	free(ups, M_TEMP);
 	return (error);
 }
 
 int
 sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
 {
 	uma_zone_t zone = *(uma_zone_t *)arg1;
 	int error, max;
 
 	max = uma_zone_get_max(zone);
 	error = sysctl_handle_int(oidp, &max, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	uma_zone_set_max(zone, max);
 
 	return (0);
 }
 
 int
 sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
 {
 	uma_zone_t zone;
 	int cur;
 
 	/*
 	 * Some callers want to add sysctls for global zones that
 	 * may not yet exist so they pass a pointer to a pointer.
 	 */
 	if (arg2 == 0)
 		zone = *(uma_zone_t *)arg1;
 	else
 		zone = arg1;
 	cur = uma_zone_get_cur(zone);
 	return (sysctl_handle_int(oidp, &cur, 0, req));
 }
 
 static int
 sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS)
 {
 	uma_zone_t zone = arg1;
 	uint64_t cur;
 
 	cur = uma_zone_get_allocs(zone);
 	return (sysctl_handle_64(oidp, &cur, 0, req));
 }
 
 static int
 sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS)
 {
 	uma_zone_t zone = arg1;
 	uint64_t cur;
 
 	cur = uma_zone_get_frees(zone);
 	return (sysctl_handle_64(oidp, &cur, 0, req));
 }
 
 static int
 sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sbuf;
 	uma_zone_t zone = arg1;
 	int error;
 
 	sbuf_new_for_sysctl(&sbuf, NULL, 0, req);
 	if (zone->uz_flags != 0)
 		sbuf_printf(&sbuf, "0x%b", zone->uz_flags, PRINT_UMA_ZFLAGS);
 	else
 		sbuf_printf(&sbuf, "0");
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 
 	return (error);
 }
 
 static int
 sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS)
 {
 	uma_keg_t keg = arg1;
 	int avail, effpct, total;
 
 	total = keg->uk_ppera * PAGE_SIZE;
-	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) != 0)
+	if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0)
 		total += slab_sizeof(SLAB_MAX_SETSIZE);
 	/*
 	 * We consider the client's requested size and alignment here, not the
 	 * real size determination uk_rsize, because we also adjust the real
 	 * size for internal implementation reasons (max bitset size).
 	 */
 	avail = keg->uk_ipers * roundup2(keg->uk_size, keg->uk_align + 1);
 	if ((keg->uk_flags & UMA_ZONE_PCPU) != 0)
 		avail *= mp_maxid + 1;
 	effpct = 100 * avail / total;
 	return (sysctl_handle_int(oidp, &effpct, 0, req));
 }
 
 static int
 sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS)
 {
 	uma_zone_t zone = arg1;
 	uint64_t cur;
 
 	cur = UZ_ITEMS_COUNT(atomic_load_64(&zone->uz_items));
 	return (sysctl_handle_64(oidp, &cur, 0, req));
 }
 
 #ifdef INVARIANTS
 static uma_slab_t
 uma_dbg_getslab(uma_zone_t zone, void *item)
 {
 	uma_slab_t slab;
 	uma_keg_t keg;
 	uint8_t *mem;
 
 	/*
 	 * It is safe to return the slab here even though the
 	 * zone is unlocked because the item's allocation state
 	 * essentially holds a reference.
 	 */
 	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0)
 		return (NULL);
-	if (zone->uz_flags & UMA_ZONE_VTOSLAB)
+	if (zone->uz_flags & UMA_ZFLAG_VTOSLAB)
 		return (vtoslab((vm_offset_t)mem));
 	keg = zone->uz_keg;
-	if ((keg->uk_flags & UMA_ZONE_HASH) == 0)
+	if ((keg->uk_flags & UMA_ZFLAG_HASH) == 0)
 		return ((uma_slab_t)(mem + keg->uk_pgoff));
 	KEG_LOCK(keg, 0);
 	slab = hash_sfind(&keg->uk_hash, mem);
 	KEG_UNLOCK(keg, 0);
 
 	return (slab);
 }
 
 static bool
 uma_dbg_zskip(uma_zone_t zone, void *mem)
 {
 
 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0)
 		return (true);
 
 	return (uma_dbg_kskip(zone->uz_keg, mem));
 }
 
 static bool
 uma_dbg_kskip(uma_keg_t keg, void *mem)
 {
 	uintptr_t idx;
 
 	if (dbg_divisor == 0)
 		return (true);
 
 	if (dbg_divisor == 1)
 		return (false);
 
 	idx = (uintptr_t)mem >> PAGE_SHIFT;
 	if (keg->uk_ipers > 1) {
 		idx *= keg->uk_ipers;
 		idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize;
 	}
 
 	if ((idx / dbg_divisor) * dbg_divisor != idx) {
 		counter_u64_add(uma_skip_cnt, 1);
 		return (true);
 	}
 	counter_u64_add(uma_dbg_cnt, 1);
 
 	return (false);
 }
 
 /*
  * Set up the slab's freei data such that uma_dbg_free can function.
  *
  */
 static void
 uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
 {
 	uma_keg_t keg;
 	int freei;
 
 	if (slab == NULL) {
 		slab = uma_dbg_getslab(zone, item);
 		if (slab == NULL) 
 			panic("uma: item %p did not belong to zone %s\n",
 			    item, zone->uz_name);
 	}
 	keg = zone->uz_keg;
 	freei = slab_item_index(slab, keg, item);
 
 	if (BIT_ISSET(keg->uk_ipers, freei, slab_dbg_bits(slab, keg)))
 		panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 	BIT_SET_ATOMIC(keg->uk_ipers, freei, slab_dbg_bits(slab, keg));
 }
 
 /*
  * Verifies freed addresses.  Checks for alignment, valid slab membership
  * and duplicate frees.
  *
  */
 static void
 uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
 {
 	uma_keg_t keg;
 	int freei;
 
 	if (slab == NULL) {
 		slab = uma_dbg_getslab(zone, item);
 		if (slab == NULL) 
 			panic("uma: Freed item %p did not belong to zone %s\n",
 			    item, zone->uz_name);
 	}
 	keg = zone->uz_keg;
 	freei = slab_item_index(slab, keg, item);
 
 	if (freei >= keg->uk_ipers)
 		panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 
 	if (slab_item(slab, keg, freei) != item)
 		panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 
 	if (!BIT_ISSET(keg->uk_ipers, freei, slab_dbg_bits(slab, keg)))
 		panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 
 	BIT_CLR_ATOMIC(keg->uk_ipers, freei, slab_dbg_bits(slab, keg));
 }
 #endif /* INVARIANTS */
 
 #ifdef DDB
 static int64_t
 get_uma_stats(uma_keg_t kz, uma_zone_t z, uint64_t *allocs, uint64_t *used,
     uint64_t *sleeps, long *cachefree, uint64_t *xdomain)
 {
 	uint64_t frees;
 	int i;
 
 	if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
 		*allocs = counter_u64_fetch(z->uz_allocs);
 		frees = counter_u64_fetch(z->uz_frees);
 		*sleeps = z->uz_sleeps;
 		*cachefree = 0;
 		*xdomain = 0;
 	} else
 		uma_zone_sumstat(z, cachefree, allocs, &frees, sleeps,
 		    xdomain);
 	for (i = 0; i < vm_ndomains; i++) {
 		*cachefree += z->uz_domain[i].uzd_nitems;
 		if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
 		    (LIST_FIRST(&kz->uk_zones) != z)))
 			*cachefree += kz->uk_domain[i].ud_free;
 	}
 	*used = *allocs - frees;
 	return (((int64_t)*used + *cachefree) * kz->uk_size);
 }
 
 DB_SHOW_COMMAND(uma, db_show_uma)
 {
 	const char *fmt_hdr, *fmt_entry;
 	uma_keg_t kz;
 	uma_zone_t z;
 	uint64_t allocs, used, sleeps, xdomain;
 	long cachefree;
 	/* variables for sorting */
 	uma_keg_t cur_keg;
 	uma_zone_t cur_zone, last_zone;
 	int64_t cur_size, last_size, size;
 	int ties;
 
 	/* /i option produces machine-parseable CSV output */
 	if (modif[0] == 'i') {
 		fmt_hdr = "%s,%s,%s,%s,%s,%s,%s,%s,%s\n";
 		fmt_entry = "\"%s\",%ju,%jd,%ld,%ju,%ju,%u,%jd,%ju\n";
 	} else {
 		fmt_hdr = "%18s %6s %7s %7s %11s %7s %7s %10s %8s\n";
 		fmt_entry = "%18s %6ju %7jd %7ld %11ju %7ju %7u %10jd %8ju\n";
 	}
 
 	db_printf(fmt_hdr, "Zone", "Size", "Used", "Free", "Requests",
 	    "Sleeps", "Bucket", "Total Mem", "XFree");
 
 	/* Sort the zones with largest size first. */
 	last_zone = NULL;
 	last_size = INT64_MAX;
 	for (;;) {
 		cur_zone = NULL;
 		cur_size = -1;
 		ties = 0;
 		LIST_FOREACH(kz, &uma_kegs, uk_link) {
 			LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 				/*
 				 * In the case of size ties, print out zones
 				 * in the order they are encountered.  That is,
 				 * when we encounter the most recently output
 				 * zone, we have already printed all preceding
 				 * ties, and we must print all following ties.
 				 */
 				if (z == last_zone) {
 					ties = 1;
 					continue;
 				}
 				size = get_uma_stats(kz, z, &allocs, &used,
 				    &sleeps, &cachefree, &xdomain);
 				if (size > cur_size && size < last_size + ties)
 				{
 					cur_size = size;
 					cur_zone = z;
 					cur_keg = kz;
 				}
 			}
 		}
 		if (cur_zone == NULL)
 			break;
 
 		size = get_uma_stats(cur_keg, cur_zone, &allocs, &used,
 		    &sleeps, &cachefree, &xdomain);
 		db_printf(fmt_entry, cur_zone->uz_name,
 		    (uintmax_t)cur_keg->uk_size, (intmax_t)used, cachefree,
 		    (uintmax_t)allocs, (uintmax_t)sleeps,
 		    (unsigned)cur_zone->uz_bucket_size, (intmax_t)size,
 		    xdomain);
 
 		if (db_pager_quit)
 			return;
 		last_zone = cur_zone;
 		last_size = cur_size;
 	}
 }
 
 DB_SHOW_COMMAND(umacache, db_show_umacache)
 {
 	uma_zone_t z;
 	uint64_t allocs, frees;
 	long cachefree;
 	int i;
 
 	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
 	    "Requests", "Bucket");
 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
 		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL);
 		for (i = 0; i < vm_ndomains; i++)
 			cachefree += z->uz_domain[i].uzd_nitems;
 		db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
 		    z->uz_name, (uintmax_t)z->uz_size,
 		    (intmax_t)(allocs - frees), cachefree,
 		    (uintmax_t)allocs, z->uz_bucket_size);
 		if (db_pager_quit)
 			return;
 	}
 }
 #endif	/* DDB */
Index: head/sys/vm/uma_int.h
===================================================================
--- head/sys/vm/uma_int.h	(revision 356533)
+++ head/sys/vm/uma_int.h	(revision 356534)
@@ -1,678 +1,692 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2019 Jeffrey Roberson <jeff@FreeBSD.org>
  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 #include <sys/counter.h>
 #include <sys/_bitset.h>
 #include <sys/_domainset.h>
 #include <sys/_task.h>
 
 /* 
  * This file includes definitions, structures, prototypes, and inlines that
  * should not be used outside of the actual implementation of UMA.
  */
 
 /* 
  * The brief summary;  Zones describe unique allocation types.  Zones are
  * organized into per-CPU caches which are filled by buckets.  Buckets are
  * organized according to memory domains.  Buckets are filled from kegs which
  * are also organized according to memory domains.  Kegs describe a unique
  * allocation type, backend memory provider, and layout.  Kegs are associated
  * with one or more zones and zones reference one or more kegs.  Kegs provide
  * slabs which are virtually contiguous collections of pages.  Each slab is
  * broken down int one or more items that will satisfy an individual allocation.
  *
  * Allocation is satisfied in the following order:
  * 1) Per-CPU cache
  * 2) Per-domain cache of buckets
  * 3) Slab from any of N kegs
  * 4) Backend page provider
  *
  * More detail on individual objects is contained below:
  *
  * Kegs contain lists of slabs which are stored in either the full bin, empty
  * bin, or partially allocated bin, to reduce fragmentation.  They also contain
  * the user supplied value for size, which is adjusted for alignment purposes
  * and rsize is the result of that.  The Keg also stores information for
  * managing a hash of page addresses that maps pages to uma_slab_t structures
  * for pages that don't have embedded uma_slab_t's.
  *
  * Keg slab lists are organized by memory domain to support NUMA allocation
  * policies.  By default allocations are spread across domains to reduce the
  * potential for hotspots.  Special keg creation flags may be specified to
  * prefer location allocation.  However there is no strict enforcement as frees
  * may happen on any CPU and these are returned to the CPU-local cache
  * regardless of the originating domain.
  *  
  * The uma_slab_t may be embedded in a UMA_SLAB_SIZE chunk of memory or it may
  * be allocated off the page from a special slab zone.  The free list within a
  * slab is managed with a bitmask.  For item sizes that would yield more than
  * 10% memory waste we potentially allocate a separate uma_slab_t if this will
  * improve the number of items per slab that will fit.  
  *
  * The only really gross cases, with regards to memory waste, are for those
  * items that are just over half the page size.   You can get nearly 50% waste,
  * so you fall back to the memory footprint of the power of two allocator. I
  * have looked at memory allocation sizes on many of the machines available to
  * me, and there does not seem to be an abundance of allocations at this range
  * so at this time it may not make sense to optimize for it.  This can, of 
  * course, be solved with dynamic slab sizes.
  *
  * Kegs may serve multiple Zones but by far most of the time they only serve
  * one.  When a Zone is created, a Keg is allocated and setup for it.  While
  * the backing Keg stores slabs, the Zone caches Buckets of items allocated
  * from the slabs.  Each Zone is equipped with an init/fini and ctor/dtor
  * pair, as well as with its own set of small per-CPU caches, layered above
  * the Zone's general Bucket cache.
  *
  * The PCPU caches are protected by critical sections, and may be accessed
  * safely only from their associated CPU, while the Zones backed by the same
  * Keg all share a common Keg lock (to coalesce contention on the backing
  * slabs).  The backing Keg typically only serves one Zone but in the case of
  * multiple Zones, one of the Zones is considered the Master Zone and all
  * Zone-related stats from the Keg are done in the Master Zone.  For an
  * example of a Multi-Zone setup, refer to the Mbuf allocation code.
  */
 
 /*
  *	This is the representation for normal (Non OFFPAGE slab)
  *
  *	i == item
  *	s == slab pointer
  *
  *	<----------------  Page (UMA_SLAB_SIZE) ------------------>
  *	___________________________________________________________
  *     | _  _  _  _  _  _  _  _  _  _  _  _  _  _  _   ___________ |
  *     ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i| |slab header||
  *     ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_| |___________|| 
  *     |___________________________________________________________|
  *
  *
  *	This is an OFFPAGE slab. These can be larger than UMA_SLAB_SIZE.
  *
  *	___________________________________________________________
  *     | _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _   |
  *     ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i|  |
  *     ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_|  |
  *     |___________________________________________________________|
  *       ___________    ^
  *	|slab header|   |
  *	|___________|---*
  *
  */
 
 #ifndef VM_UMA_INT_H
 #define VM_UMA_INT_H
 
 #define UMA_SLAB_SIZE	PAGE_SIZE	/* How big are our slabs? */
 #define UMA_SLAB_MASK	(PAGE_SIZE - 1)	/* Mask to get back to the page */
 #define UMA_SLAB_SHIFT	PAGE_SHIFT	/* Number of bits PAGE_MASK */
 
 /* Max waste percentage before going to off page slab management */
 #define UMA_MAX_WASTE	10
 
+/*
+ * These flags must not overlap with the UMA_ZONE flags specified in uma.h.
+ */
+#define	UMA_ZFLAG_OFFPAGE	0x00200000	/*
+						 * Force the slab structure
+						 * allocation off of the real
+						 * memory.
+						 */
+#define	UMA_ZFLAG_HASH		0x00400000	/*
+						 * Use a hash table instead of
+						 * caching information in the
+						 * vm_page.
+						 */
+#define	UMA_ZFLAG_VTOSLAB	0x00800000	/*
+						 * Zone uses vtoslab for
+						 * lookup.
+						 */
+#define	UMA_ZFLAG_CTORDTOR	0x01000000	/* Zone has ctor/dtor set. */
+#define	UMA_ZFLAG_LIMIT		0x02000000	/* Zone has limit set. */
+#define	UMA_ZFLAG_CACHE		0x04000000	/* uma_zcache_create()d it */
+#define	UMA_ZFLAG_RECLAIMING	0x08000000	/* Running zone_reclaim(). */
+#define	UMA_ZFLAG_BUCKET	0x10000000	/* Bucket zone. */
+#define	UMA_ZFLAG_INTERNAL	0x20000000	/* No offpage no PCPU. */
+#define	UMA_ZFLAG_TRASH		0x40000000	/* Add trash ctor/dtor. */
+#define	UMA_ZFLAG_CACHEONLY	0x80000000	/* Don't ask VM for buckets. */
 
+#define	UMA_ZFLAG_INHERIT						\
+    (UMA_ZFLAG_OFFPAGE | UMA_ZFLAG_HASH | UMA_ZFLAG_VTOSLAB |		\
+     UMA_ZFLAG_BUCKET | UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY)
+
+#define	PRINT_UMA_ZFLAGS	"\20"	\
+    "\40CACHEONLY"			\
+    "\37TRASH"				\
+    "\36INTERNAL"			\
+    "\35BUCKET"				\
+    "\34RECLAIMING"			\
+    "\33CACHE"				\
+    "\32LIMIT"				\
+    "\31CTORDTOR"			\
+    "\30VTOSLAB"			\
+    "\27HASH"				\
+    "\26OFFPAGE"			\
+    "\22ROUNDROBIN"			\
+    "\21FIRSTTOUCH"			\
+    "\20PCPU"				\
+    "\17NODUMP"				\
+    "\16CACHESPREAD"			\
+    "\15MINBUCKET"			\
+    "\14MAXBUCKET"			\
+    "\13NOBUCKET"			\
+    "\12SECONDARY"			\
+    "\11NOTPAGE"			\
+    "\10VM"				\
+    "\7MTXCLASS"			\
+    "\6NOFREE"				\
+    "\5MALLOC"				\
+    "\4NOTOUCH"				\
+    "\2ZINIT"
+
 /*
  * Hash table for freed address -> slab translation.
  *
  * Only zones with memory not touchable by the allocator use the
  * hash table.  Otherwise slabs are found with vtoslab().
  */
 #define UMA_HASH_SIZE_INIT	32		
 
 #define UMA_HASH(h, s) ((((uintptr_t)s) >> UMA_SLAB_SHIFT) & (h)->uh_hashmask)
 
 #define UMA_HASH_INSERT(h, s, mem)					\
 	LIST_INSERT_HEAD(&(h)->uh_slab_hash[UMA_HASH((h),		\
 	    (mem))], (uma_hash_slab_t)(s), uhs_hlink)
 
 #define UMA_HASH_REMOVE(h, s)						\
 	LIST_REMOVE((uma_hash_slab_t)(s), uhs_hlink)
 
 LIST_HEAD(slabhashhead, uma_hash_slab);
 
 struct uma_hash {
 	struct slabhashhead	*uh_slab_hash;	/* Hash table for slabs */
 	u_int		uh_hashsize;	/* Current size of the hash table */
 	u_int		uh_hashmask;	/* Mask used during hashing */
 };
 
 /*
  * Align field or structure to cache 'sector' in intel terminology.  This
  * is more efficient with adjacent line prefetch.
  */
 #if defined(__amd64__) || defined(__powerpc64__)
 #define UMA_SUPER_ALIGN	(CACHE_LINE_SIZE * 2)
 #else
 #define UMA_SUPER_ALIGN	CACHE_LINE_SIZE
 #endif
 
 #define	UMA_ALIGN	__aligned(UMA_SUPER_ALIGN)
 
 /*
  * The uma_bucket structure is used to queue and manage buckets divorced
  * from per-cpu caches.  They are loaded into uma_cache_bucket structures
  * for use.
  */
 struct uma_bucket {
 	TAILQ_ENTRY(uma_bucket)	ub_link;	/* Link into the zone */
 	int16_t	ub_cnt;				/* Count of items in bucket. */
 	int16_t	ub_entries;			/* Max items. */
 	void	*ub_bucket[];			/* actual allocation storage */
 };
 
 typedef struct uma_bucket * uma_bucket_t;
 
 /*
  * The uma_cache_bucket structure is statically allocated on each per-cpu
  * cache.  Its use reduces branches and cache misses in the fast path.
  */
 struct uma_cache_bucket {
 	uma_bucket_t	ucb_bucket;
 	int16_t		ucb_cnt;
 	int16_t		ucb_entries;
 	uint32_t	ucb_spare;
 };
 
 typedef struct uma_cache_bucket * uma_cache_bucket_t;
 
 /*
  * The uma_cache structure is allocated for each cpu for every zone
  * type.  This optimizes synchronization out of the allocator fast path.
  */
 struct uma_cache {
 	struct uma_cache_bucket	uc_freebucket;	/* Bucket we're freeing to */
 	struct uma_cache_bucket	uc_allocbucket;	/* Bucket to allocate from */
 	struct uma_cache_bucket	uc_crossbucket;	/* cross domain bucket */
 	uint64_t		uc_allocs;	/* Count of allocations */
 	uint64_t		uc_frees;	/* Count of frees */
 } UMA_ALIGN;
 
 typedef struct uma_cache * uma_cache_t;
 
 LIST_HEAD(slabhead, uma_slab);
 
 /*
  * The cache structure pads perfectly into 64 bytes so we use spare
  * bits from the embedded cache buckets to store information from the zone
  * and keep all fast-path allocations accessing a single per-cpu line.
  */
 static inline void
 cache_set_uz_flags(uma_cache_t cache, uint32_t flags)
 {
 
 	cache->uc_freebucket.ucb_spare = flags;
 }
 
 static inline void
 cache_set_uz_size(uma_cache_t cache, uint32_t size)
 {
 
 	cache->uc_allocbucket.ucb_spare = size;
 }
 
 static inline uint32_t
 cache_uz_flags(uma_cache_t cache)
 {
 
 	return (cache->uc_freebucket.ucb_spare);
 }
  
 static inline uint32_t
 cache_uz_size(uma_cache_t cache)
 {
 
 	return (cache->uc_allocbucket.ucb_spare);
 }
  
 /*
  * Per-domain slab lists.  Embedded in the kegs.
  */
 struct uma_domain {
 	struct mtx_padalign ud_lock;	/* Lock for the domain lists. */
 	struct slabhead	ud_part_slab;	/* partially allocated slabs */
 	struct slabhead	ud_free_slab;	/* completely unallocated slabs */
 	struct slabhead ud_full_slab;	/* fully allocated slabs */
 	uint32_t	ud_pages;	/* Total page count */
 	uint32_t	ud_free;	/* Count of items free in slabs */
 } __aligned(CACHE_LINE_SIZE);
 
 typedef struct uma_domain * uma_domain_t;
 
 /*
  * Keg management structure
  *
  * TODO: Optimize for cache line size
  *
  */
 struct uma_keg {
 	struct uma_hash	uk_hash;
 	LIST_HEAD(,uma_zone)	uk_zones;	/* Keg's zones */
 
 	struct domainset_ref uk_dr;	/* Domain selection policy. */
 	uint32_t	uk_align;	/* Alignment mask */
 	uint32_t	uk_reserve;	/* Number of reserved items. */
 	uint32_t	uk_size;	/* Requested size of each item */
 	uint32_t	uk_rsize;	/* Real size of each item */
 
 	uma_init	uk_init;	/* Keg's init routine */
 	uma_fini	uk_fini;	/* Keg's fini routine */
 	uma_alloc	uk_allocf;	/* Allocation function */
 	uma_free	uk_freef;	/* Free routine */
 
 	u_long		uk_offset;	/* Next free offset from base KVA */
 	vm_offset_t	uk_kva;		/* Zone base KVA */
 	uma_zone_t	uk_slabzone;	/* Slab zone backing us, if OFFPAGE */
 
 	uint32_t	uk_pgoff;	/* Offset to uma_slab struct */
 	uint16_t	uk_ppera;	/* pages per allocation from backend */
 	uint16_t	uk_ipers;	/* Items per slab */
 	uint32_t	uk_flags;	/* Internal flags */
 
 	/* Least used fields go to the last cache line. */
 	const char	*uk_name;		/* Name of creating zone. */
 	LIST_ENTRY(uma_keg)	uk_link;	/* List of all kegs */
 
 	/* Must be last, variable sized. */
 	struct uma_domain	uk_domain[];	/* Keg's slab lists. */
 };
 typedef struct uma_keg	* uma_keg_t;
 
 #ifdef _KERNEL
 #define	KEG_ASSERT_COLD(k)						\
 	KASSERT(uma_keg_get_allocs((k)) == 0,				\
 	    ("keg %s initialization after use.", (k)->uk_name))
 
 /*
  * Free bits per-slab.
  */
 #define	SLAB_MAX_SETSIZE	(PAGE_SIZE / UMA_SMALLEST_UNIT)
 #define	SLAB_MIN_SETSIZE	_BITSET_BITS
 BITSET_DEFINE(slabbits, SLAB_MAX_SETSIZE);
 BITSET_DEFINE(noslabbits, 0);
 
 /*
  * The slab structure manages a single contiguous allocation from backing
  * store and subdivides it into individually allocatable items.
  */
 struct uma_slab {
 	LIST_ENTRY(uma_slab)	us_link;	/* slabs in zone */
 	uint16_t	us_freecount;		/* How many are free? */
 	uint8_t		us_flags;		/* Page flags see uma.h */
 	uint8_t		us_domain;		/* Backing NUMA domain. */
 	struct noslabbits us_free;		/* Free bitmask, flexible. */
 };
 _Static_assert(sizeof(struct uma_slab) == offsetof(struct uma_slab, us_free),
     "us_free field must be last");
 #if MAXMEMDOM >= 255
 #error "Slab domain type insufficient"
 #endif
 
 typedef struct uma_slab * uma_slab_t;
 
 /*
  * On INVARIANTS builds, the slab contains a second bitset of the same size,
  * "dbg_bits", which is laid out immediately after us_free.
  */
 #ifdef INVARIANTS
 #define	SLAB_BITSETS	2
 #else
 #define	SLAB_BITSETS	1
 #endif
 
 /* These three functions are for embedded (!OFFPAGE) use only. */
 size_t slab_sizeof(int nitems);
 size_t slab_space(int nitems);
 int slab_ipers(size_t size, int align);
 
 /*
  * Slab structure with a full sized bitset and hash link for both
  * HASH and OFFPAGE zones.
  */
 struct uma_hash_slab {
 	struct uma_slab		uhs_slab;	/* Must be first. */
 	struct slabbits		uhs_bits1;	/* Must be second. */
 #ifdef INVARIANTS
 	struct slabbits		uhs_bits2;	/* Must be third. */
 #endif
 	LIST_ENTRY(uma_hash_slab) uhs_hlink;	/* Link for hash table */
 	uint8_t			*uhs_data;	/* First item */
 };
 
 typedef struct uma_hash_slab * uma_hash_slab_t;
 
 static inline void *
 slab_data(uma_slab_t slab, uma_keg_t keg)
 {
 
-	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0)
+	if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) == 0)
 		return ((void *)((uintptr_t)slab - keg->uk_pgoff));
 	else
 		return (((uma_hash_slab_t)slab)->uhs_data);
 }
 
 static inline void *
 slab_item(uma_slab_t slab, uma_keg_t keg, int index)
 {
 	uintptr_t data;
 
 	data = (uintptr_t)slab_data(slab, keg);
 	return ((void *)(data + keg->uk_rsize * index));
 }
 
 static inline int
 slab_item_index(uma_slab_t slab, uma_keg_t keg, void *item)
 {
 	uintptr_t data;
 
 	data = (uintptr_t)slab_data(slab, keg);
 	return (((uintptr_t)item - data) / keg->uk_rsize);
 }
 #endif /* _KERNEL */
 
 TAILQ_HEAD(uma_bucketlist, uma_bucket);
 
 struct uma_zone_domain {
 	struct uma_bucketlist uzd_buckets; /* full buckets */
 	uma_bucket_t	uzd_cross;	/* Fills from cross buckets. */
 	long		uzd_nitems;	/* total item count */
 	long		uzd_imax;	/* maximum item count this period */
 	long		uzd_imin;	/* minimum item count this period */
 	long		uzd_wss;	/* working set size estimate */
 } __aligned(CACHE_LINE_SIZE);
 
 typedef struct uma_zone_domain * uma_zone_domain_t;
 
 /*
  * Zone structure - per memory type.
  */
 struct uma_zone {
 	/* Offset 0, used in alloc/free fast/medium fast path and const. */
 	uma_keg_t	uz_keg;		/* This zone's keg if !CACHE */
 	struct uma_zone_domain	*uz_domain;	/* per-domain buckets */
 	uint32_t	uz_flags;	/* Flags inherited from kegs */
 	uint32_t	uz_size;	/* Size inherited from kegs */
 	uma_ctor	uz_ctor;	/* Constructor for each allocation */
 	uma_dtor	uz_dtor;	/* Destructor */
 	uint64_t	uz_spare0;
 	uint64_t	uz_max_items;	/* Maximum number of items to alloc */
 	uint32_t	uz_sleepers;	/* Threads sleeping on limit */
 	uint16_t	uz_bucket_size;	/* Number of items in full bucket */
 	uint16_t	uz_bucket_size_max; /* Maximum number of bucket items */
 
 	/* Offset 64, used in bucket replenish. */
 	uma_import	uz_import;	/* Import new memory to cache. */
 	uma_release	uz_release;	/* Release memory from cache. */
 	void		*uz_arg;	/* Import/release argument. */
 	uma_init	uz_init;	/* Initializer for each item */
 	uma_fini	uz_fini;	/* Finalizer for each item. */
 	void		*uz_spare1;
 	uint64_t	uz_bkt_count;    /* Items in bucket cache */
 	uint64_t	uz_bkt_max;	/* Maximum bucket cache size */
 
 	/* Offset 128 Rare. */
 	/*
 	 * The lock is placed here to avoid adjacent line prefetcher
 	 * in fast paths and to take up space near infrequently accessed
 	 * members to reduce alignment overhead.
 	 */
 	struct mtx	uz_lock;	/* Lock for the zone */
 	LIST_ENTRY(uma_zone) uz_link;	/* List of all zones in keg */
 	const char	*uz_name;	/* Text name of the zone */
 	/* The next two fields are used to print a rate-limited warnings. */
 	const char	*uz_warning;	/* Warning to print on failure */
 	struct timeval	uz_ratecheck;	/* Warnings rate-limiting */
 	struct task	uz_maxaction;	/* Task to run when at limit */
 	uint16_t	uz_bucket_size_min; /* Min number of items in bucket */
 
 	struct mtx_padalign	uz_cross_lock;	/* Cross domain free lock */
 
 	/* Offset 256+, stats and misc. */
 	counter_u64_t	uz_allocs;	/* Total number of allocations */
 	counter_u64_t	uz_frees;	/* Total number of frees */
 	counter_u64_t	uz_fails;	/* Total number of alloc failures */
 	uint64_t	uz_sleeps;	/* Total number of alloc sleeps */
 	uint64_t	uz_xdomain;	/* Total number of cross-domain frees */
 	volatile uint64_t uz_items;	/* Total items count & sleepers */
 
 	char		*uz_ctlname;	/* sysctl safe name string. */
 	struct sysctl_oid *uz_oid;	/* sysctl oid pointer. */
 	int		uz_namecnt;	/* duplicate name count. */
 
 	/*
 	 * This HAS to be the last item because we adjust the zone size
 	 * based on NCPU and then allocate the space for the zones.
 	 */
 	struct uma_cache	uz_cpu[]; /* Per cpu caches */
 
 	/* uz_domain follows here. */
 };
-
-/*
- * These flags must not overlap with the UMA_ZONE flags specified in uma.h.
- */
-#define	UMA_ZFLAG_CTORDTOR	0x01000000	/* Zone has ctor/dtor set. */
-#define	UMA_ZFLAG_LIMIT		0x02000000	/* Zone has limit set. */
-#define	UMA_ZFLAG_CACHE		0x04000000	/* uma_zcache_create()d it */
-#define	UMA_ZFLAG_RECLAIMING	0x08000000	/* Running zone_reclaim(). */
-#define	UMA_ZFLAG_BUCKET	0x10000000	/* Bucket zone. */
-#define UMA_ZFLAG_INTERNAL	0x20000000	/* No offpage no PCPU. */
-#define UMA_ZFLAG_TRASH		0x40000000	/* Add trash ctor/dtor. */
-#define UMA_ZFLAG_CACHEONLY	0x80000000	/* Don't ask VM for buckets. */
-
-#define	UMA_ZFLAG_INHERIT						\
-    (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | UMA_ZFLAG_BUCKET)
-
-#define	PRINT_UMA_ZFLAGS	"\20"	\
-    "\40CACHEONLY"			\
-    "\37TRASH"				\
-    "\36INTERNAL"			\
-    "\35BUCKET"				\
-    "\34RECLAIMING"			\
-    "\33CACHE"				\
-    "\32LIMIT"				\
-    "\31CTORDTOR"			\
-    "\23ROUNDROBIN"			\
-    "\22FIRSTTOUCH"			\
-    "\21MINBUCKET"			\
-    "\20PCPU"				\
-    "\17NODUMP"				\
-    "\16VTOSLAB"			\
-    "\15CACHESPREAD"			\
-    "\14MAXBUCKET"			\
-    "\13NOBUCKET"			\
-    "\12SECONDARY"			\
-    "\11HASH"				\
-    "\10VM"				\
-    "\7MTXCLASS"			\
-    "\6NOFREE"				\
-    "\5MALLOC"				\
-    "\4OFFPAGE"				\
-    "\3STATIC"				\
-    "\2ZINIT"				\
-    "\1PAGEABLE"
 
 /*
  * Macros for interpreting the uz_items field.  20 bits of sleeper count
  * and 44 bit of item count.
  */
 #define	UZ_ITEMS_SLEEPER_SHIFT	44LL
 #define	UZ_ITEMS_SLEEPERS_MAX	((1 << (64 - UZ_ITEMS_SLEEPER_SHIFT)) - 1)
 #define	UZ_ITEMS_COUNT_MASK	((1LL << UZ_ITEMS_SLEEPER_SHIFT) - 1)
 #define	UZ_ITEMS_COUNT(x)	((x) & UZ_ITEMS_COUNT_MASK)
 #define	UZ_ITEMS_SLEEPERS(x)	((x) >> UZ_ITEMS_SLEEPER_SHIFT)
 #define	UZ_ITEMS_SLEEPER	(1LL << UZ_ITEMS_SLEEPER_SHIFT)
 
 #define	ZONE_ASSERT_COLD(z)						\
 	KASSERT(uma_zone_get_allocs((z)) == 0,				\
 	    ("zone %s initialization after use.", (z)->uz_name))
 
 #undef	UMA_ALIGN
 
 #ifdef _KERNEL
 /* Internal prototypes */
 static __inline uma_slab_t hash_sfind(struct uma_hash *hash, uint8_t *data);
 
 /* Lock Macros */
 
 #define	KEG_LOCKPTR(k, d)	(struct mtx *)&(k)->uk_domain[(d)].ud_lock
 #define	KEG_LOCK_INIT(k, d, lc)						\
 	do {								\
 		if ((lc))						\
 			mtx_init(KEG_LOCKPTR(k, d), (k)->uk_name,	\
 			    (k)->uk_name, MTX_DEF | MTX_DUPOK);		\
 		else							\
 			mtx_init(KEG_LOCKPTR(k, d), (k)->uk_name,	\
 			    "UMA zone", MTX_DEF | MTX_DUPOK);		\
 	} while (0)
 
 #define	KEG_LOCK_FINI(k, d)	mtx_destroy(KEG_LOCKPTR(k, d))
 #define	KEG_LOCK(k, d)							\
 	({ mtx_lock(KEG_LOCKPTR(k, d)); KEG_LOCKPTR(k, d); })
 #define	KEG_UNLOCK(k, d)	mtx_unlock(KEG_LOCKPTR(k, d))
 #define	KEG_LOCK_ASSERT(k, d)	mtx_assert(KEG_LOCKPTR(k, d), MA_OWNED)
 
 #define	KEG_GET(zone, keg) do {					\
 	(keg) = (zone)->uz_keg;					\
 	KASSERT((void *)(keg) != (void *)&(zone)->uz_lock,	\
 	    ("%s: Invalid zone %p type", __func__, (zone)));	\
 	} while (0)
 
 #define	ZONE_LOCK_INIT(z, lc)					\
 	do {							\
 		if ((lc))					\
 			mtx_init(&(z)->uz_lock, (z)->uz_name,	\
 			    (z)->uz_name, MTX_DEF | MTX_DUPOK);	\
 		else						\
 			mtx_init(&(z)->uz_lock, (z)->uz_name,	\
 			    "UMA zone", MTX_DEF | MTX_DUPOK);	\
 	} while (0)
 
 #define	ZONE_LOCK(z)	mtx_lock(&(z)->uz_lock)
 #define	ZONE_TRYLOCK(z)	mtx_trylock(&(z)->uz_lock)
 #define	ZONE_UNLOCK(z)	mtx_unlock(&(z)->uz_lock)
 #define	ZONE_LOCK_FINI(z)	mtx_destroy(&(z)->uz_lock)
 #define	ZONE_LOCK_ASSERT(z)	mtx_assert(&(z)->uz_lock, MA_OWNED)
 
 #define	ZONE_CROSS_LOCK_INIT(z)					\
 	mtx_init(&(z)->uz_cross_lock, "UMA Cross", NULL, MTX_DEF)
 #define	ZONE_CROSS_LOCK(z)	mtx_lock(&(z)->uz_cross_lock)
 #define	ZONE_CROSS_UNLOCK(z)	mtx_unlock(&(z)->uz_cross_lock)
 #define	ZONE_CROSS_LOCK_FINI(z)	mtx_destroy(&(z)->uz_cross_lock)
 
 /*
  * Find a slab within a hash table.  This is used for OFFPAGE zones to lookup
  * the slab structure.
  *
  * Arguments:
  *	hash  The hash table to search.
  *	data  The base page of the item.
  *
  * Returns:
  *	A pointer to a slab if successful, else NULL.
  */
 static __inline uma_slab_t
 hash_sfind(struct uma_hash *hash, uint8_t *data)
 {
         uma_hash_slab_t slab;
         u_int hval;
 
         hval = UMA_HASH(hash, data);
 
         LIST_FOREACH(slab, &hash->uh_slab_hash[hval], uhs_hlink) {
                 if ((uint8_t *)slab->uhs_data == data)
                         return (&slab->uhs_slab);
         }
         return (NULL);
 }
 
 static __inline uma_slab_t
 vtoslab(vm_offset_t va)
 {
 	vm_page_t p;
 
 	p = PHYS_TO_VM_PAGE(pmap_kextract(va));
 	return (p->plinks.uma.slab);
 }
 
 static __inline void
 vtozoneslab(vm_offset_t va, uma_zone_t *zone, uma_slab_t *slab)
 {
 	vm_page_t p;
 
 	p = PHYS_TO_VM_PAGE(pmap_kextract(va));
 	*slab = p->plinks.uma.slab;
 	*zone = p->plinks.uma.zone;
 }
 
 static __inline void
 vsetzoneslab(vm_offset_t va, uma_zone_t zone, uma_slab_t slab)
 {
 	vm_page_t p;
 
 	p = PHYS_TO_VM_PAGE(pmap_kextract(va));
 	p->plinks.uma.slab = slab;
 	p->plinks.uma.zone = zone;
 }
 
 extern unsigned long uma_kmem_limit;
 extern unsigned long uma_kmem_total;
 
 /* Adjust bytes under management by UMA. */
 static inline void
 uma_total_dec(unsigned long size)
 {
 
 	atomic_subtract_long(&uma_kmem_total, size);
 }
 
 static inline void
 uma_total_inc(unsigned long size)
 {
 
 	if (atomic_fetchadd_long(&uma_kmem_total, size) > uma_kmem_limit)
 		uma_reclaim_wakeup();
 }
 
 /*
  * The following two functions may be defined by architecture specific code
  * if they can provide more efficient allocation functions.  This is useful
  * for using direct mapped addresses.
  */
 void *uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain,
     uint8_t *pflag, int wait);
 void uma_small_free(void *mem, vm_size_t size, uint8_t flags);
 
 /* Set a global soft limit on UMA managed memory. */
 void uma_set_limit(unsigned long limit);
 #endif /* _KERNEL */
 
 #endif /* VM_UMA_INT_H */