Index: sys/amd64/amd64/machdep.c
===================================================================
--- sys/amd64/amd64/machdep.c
+++ sys/amd64/amd64/machdep.c
@@ -279,7 +279,7 @@
 		memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
 		freeenv(sysenv);
 	}
-	if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count))
+	if (memsize < ptoa((uintmax_t)vm_free_count()))
 		memsize = ptoa((uintmax_t)Maxmem);
 	printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
 	realmem = atop(memsize);
@@ -306,8 +306,8 @@
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
-	    ptoa((uintmax_t)vm_cnt.v_free_count),
-	    ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+	    ptoa((uintmax_t)vm_free_count()),
+	    ptoa((uintmax_t)vm_free_count()) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
Index: sys/arm/arm/machdep.c
===================================================================
--- sys/arm/arm/machdep.c
+++ sys/arm/arm/machdep.c
@@ -228,8 +228,8 @@
 	    (uintmax_t)arm32_ptob(realmem),
 	    (uintmax_t)arm32_ptob(realmem) / mbyte);
 	printf("avail memory = %ju (%ju MB)\n",
-	    (uintmax_t)arm32_ptob(vm_cnt.v_free_count),
-	    (uintmax_t)arm32_ptob(vm_cnt.v_free_count) / mbyte);
+	    (uintmax_t)arm32_ptob(vm_free_count()),
+	    (uintmax_t)arm32_ptob(vm_free_count()) / mbyte);
 	if (bootverbose) {
 		arm_physmem_print_tables();
 		devmap_print_table();
Index: sys/arm/arm/pmap-v4.c
===================================================================
--- sys/arm/arm/pmap-v4.c
+++ sys/arm/arm/pmap-v4.c
@@ -3817,7 +3817,7 @@
 
 	pv_entry_count++;
 	if (pv_entry_count > pv_entry_high_water)
-		pagedaemon_wakeup();
+		pagedaemon_wakeup(0); /* XXX ARM NUMA */
 	ret_value = uma_zalloc(pvzone, M_NOWAIT);
 	return ret_value;
 }
Index: sys/cddl/compat/opensolaris/sys/kmem.h
===================================================================
--- sys/cddl/compat/opensolaris/sys/kmem.h
+++ sys/cddl/compat/opensolaris/sys/kmem.h
@@ -78,7 +78,7 @@
 int kmem_debugging(void);
 void *calloc(size_t n, size_t s);
 
-#define	freemem				vm_cnt.v_free_count
+#define	freemem				vm_free_count()
 #define	minfree				vm_cnt.v_free_min
 #define	heap_arena			kernel_arena
 #define	zio_arena			NULL
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -379,7 +379,7 @@
 arc_free_target_init(void *unused __unused)
 {
 
-	zfs_arc_free_target = vm_pageout_wakeup_thresh;
+	zfs_arc_free_target = (vm_cnt.v_free_min / 10) * 11;
 }
 SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
     arc_free_target_init, NULL);
Index: sys/compat/linprocfs/linprocfs.c
===================================================================
--- sys/compat/linprocfs/linprocfs.c
+++ sys/compat/linprocfs/linprocfs.c
@@ -156,7 +156,7 @@
 	/*
 	 * The correct thing here would be:
 	 *
-	memfree = vm_cnt.v_free_count * PAGE_SIZE;
+	memfree = vm_free_count() * PAGE_SIZE;
 	memused = memtotal - memfree;
 	 *
 	 * but it might mislead linux binaries into thinking there
@@ -178,7 +178,7 @@
 	 * like unstaticizing it just for linprocfs's sake.
 	 */
 	buffers = 0;
-	cached = vm_cnt.v_inactive_count * PAGE_SIZE;
+	cached = vm_inactive_count() * PAGE_SIZE;
 
 	sbuf_printf(sb,
 	    "MemTotal: %9lu kB\n"
Index: sys/fs/tmpfs/tmpfs_subr.c
===================================================================
--- sys/fs/tmpfs/tmpfs_subr.c
+++ sys/fs/tmpfs/tmpfs_subr.c
@@ -106,7 +106,8 @@
 {
 	vm_ooffset_t avail;
 
-	avail = swap_pager_avail + vm_cnt.v_free_count - tmpfs_pages_reserved;
+	/* XXX */
+	avail = swap_pager_avail + vm_free_count() - tmpfs_pages_reserved;
 	if (__predict_false(avail < 0))
 		avail = 0;
 	return (avail);
Index: sys/i386/i386/machdep.c
===================================================================
--- sys/i386/i386/machdep.c
+++ sys/i386/i386/machdep.c
@@ -271,7 +271,7 @@
 		memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
 		freeenv(sysenv);
 	}
-	if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count))
+	if (memsize < ptoa((uintmax_t)vm_free_count()))
 		memsize = ptoa((uintmax_t)Maxmem);
 	printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
 	realmem = atop(memsize);
@@ -298,8 +298,8 @@
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
-	    ptoa((uintmax_t)vm_cnt.v_free_count),
-	    ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+	    ptoa((uintmax_t)vm_free_count()),
+	    ptoa((uintmax_t)vm_free_count()) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
Index: sys/kern/init_main.c
===================================================================
--- sys/kern/init_main.c
+++ sys/kern/init_main.c
@@ -87,6 +87,7 @@
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
+#include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <sys/copyright.h>
@@ -555,7 +556,7 @@
 	p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
 	p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
 	/* Cast to avoid overflow on i386/PAE. */
-	pageablemem = ptoa((vm_paddr_t)vm_cnt.v_free_count);
+	pageablemem = ptoa((vm_paddr_t)vm_free_count());
 	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur =
 	    p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem;
 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3;
Index: sys/kern/subr_vmem.c
===================================================================
--- sys/kern/subr_vmem.c
+++ sys/kern/subr_vmem.c
@@ -59,6 +59,7 @@
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/vmem.h>
+#include <sys/vmmeter.h>
 
 #include "opt_vm.h"
 
@@ -72,6 +73,8 @@
 #include <vm/vm_param.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
 
 #define	VMEM_OPTORDER		5
 #define	VMEM_OPTVALUE		(1 << VMEM_OPTORDER)
@@ -641,7 +644,7 @@
 		 * possible due to M_USE_RESERVE page allocation.
 		 */
 		if (wait & M_WAITOK)
-			VM_WAIT;
+			vm_wait_domain(domain);
 		return (NULL);
 	}
 	mtx_unlock(&vmem_bt_lock);
Index: sys/kern/subr_witness.c
===================================================================
--- sys/kern/subr_witness.c
+++ sys/kern/subr_witness.c
@@ -139,7 +139,7 @@
 #define	WITNESS_COUNT 		1536
 #endif
 #define	WITNESS_HASH_SIZE	251	/* Prime, gives load factor < 2 */
-#define	WITNESS_PENDLIST	(2048 + MAXCPU)
+#define	WITNESS_PENDLIST	(2048 + (MAXCPU * 4))
 
 /* Allocate 256 KB of stack data space */
 #define	WITNESS_LO_DATA_COUNT	2048
Index: sys/mips/mips/machdep.c
===================================================================
--- sys/mips/mips/machdep.c
+++ sys/mips/mips/machdep.c
@@ -210,8 +210,8 @@
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%juMB)\n", 
-	    ptoa((uintmax_t)vm_cnt.v_free_count),
-	    ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+	    ptoa((uintmax_t)vm_free_count()),
+	    ptoa((uintmax_t)vm_free_count()) / 1048576);
 	cpu_init_interrupts();
 
 	/*
Index: sys/powerpc/booke/pmap.c
===================================================================
--- sys/powerpc/booke/pmap.c
+++ sys/powerpc/booke/pmap.c
@@ -1183,7 +1183,7 @@
 
 	pv_entry_count++;
 	if (pv_entry_count > pv_entry_high_water)
-		pagedaemon_wakeup();
+		pagedaemon_wakeup(0); /* XXX powerpc NUMA */
 	pv = uma_zalloc(pvzone, M_NOWAIT);
 
 	return (pv);
Index: sys/powerpc/powerpc/machdep.c
===================================================================
--- sys/powerpc/powerpc/machdep.c
+++ sys/powerpc/powerpc/machdep.c
@@ -213,8 +213,8 @@
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
-	    ptoa((uintmax_t)vm_cnt.v_free_count),
-	    ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+	    ptoa((uintmax_t)vm_free_count()),
+	    ptoa((uintmax_t)vm_free_count()) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
Index: sys/sparc64/sparc64/machdep.c
===================================================================
--- sys/sparc64/sparc64/machdep.c
+++ sys/sparc64/sparc64/machdep.c
@@ -190,8 +190,8 @@
 	EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL,
 	    SHUTDOWN_PRI_LAST);
 
-	printf("avail memory = %lu (%lu MB)\n", vm_cnt.v_free_count * PAGE_SIZE,
-	    vm_cnt.v_free_count / ((1024 * 1024) / PAGE_SIZE));
+	printf("avail memory = %lu (%lu MB)\n", vm_free_count() * PAGE_SIZE,
+	    vm_free_count() / ((1024 * 1024) / PAGE_SIZE));
 
 	if (bootverbose)
 		printf("machine: %s\n", sparc64_model);
Index: sys/sys/vmmeter.h
===================================================================
--- sys/sys/vmmeter.h
+++ sys/sys/vmmeter.h
@@ -141,23 +141,23 @@
 	u_int v_interrupt_free_min; /* (c) reserved pages for int code */
 	u_int v_free_severe;	/* (c) severe page depletion point */
 	u_int v_wire_count VMMETER_ALIGNED; /* (a) pages wired down */
-	u_int v_active_count VMMETER_ALIGNED; /* (a) pages active */
-	u_int v_inactive_count VMMETER_ALIGNED;	/* (a) pages inactive */
-	u_int v_laundry_count VMMETER_ALIGNED; /* (a) pages eligible for
-						  laundering */
-	u_int v_free_count VMMETER_ALIGNED; /* (f) pages free */
 };
 #endif /* _KERNEL || _WANT_VMMETER */
 
 #ifdef _KERNEL
 
+#include <sys/domainset.h>
+
 extern struct vmmeter vm_cnt;
-extern u_int vm_pageout_wakeup_thresh;
+extern domainset_t vm_min_domains;
+extern domainset_t vm_severe_domains;
 
 #define	VM_CNT_ADD(var, x)	counter_u64_add(vm_cnt.var, x)
 #define	VM_CNT_INC(var)		VM_CNT_ADD(var, 1)
 #define	VM_CNT_FETCH(var)	counter_u64_fetch(vm_cnt.var)
 
+u_int vm_free_count(void);
+
 /*
  * Return TRUE if we are under our severe low-free-pages threshold
  *
@@ -168,7 +168,7 @@
 vm_page_count_severe(void)
 {
 
-	return (vm_cnt.v_free_severe > vm_cnt.v_free_count);
+	return (!DOMAINSET_EMPTY(&vm_severe_domains));
 }
 
 /*
@@ -184,50 +184,8 @@
 vm_page_count_min(void)
 {
 
-	return (vm_cnt.v_free_min > vm_cnt.v_free_count);
+	return (!DOMAINSET_EMPTY(&vm_min_domains));
 }
 
-/*
- * Return TRUE if we have not reached our free page target during
- * free page recovery operations.
- */
-static inline int
-vm_page_count_target(void)
-{
-
-	return (vm_cnt.v_free_target > vm_cnt.v_free_count);
-}
-
-/*
- * Return the number of pages we need to free-up or cache
- * A positive number indicates that we do not have enough free pages.
- */
-static inline int
-vm_paging_target(void)
-{
-
-	return (vm_cnt.v_free_target - vm_cnt.v_free_count);
-}
-
-/*
- * Returns TRUE if the pagedaemon needs to be woken up.
- */
-static inline int
-vm_paging_needed(u_int free_count)
-{
-
-	return (free_count < vm_pageout_wakeup_thresh);
-}
-
-/*
- * Return the number of pages we need to launder.
- * A positive number indicates that we have a shortfall of clean pages.
- */
-static inline int
-vm_laundry_target(void)
-{
-
-	return (vm_paging_target());
-}
 #endif	/* _KERNEL */
 #endif	/* _SYS_VMMETER_H_ */
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -2327,7 +2327,7 @@
 	 * of data we will have to page back in, plus an epsilon so
 	 * the system doesn't become critically low on swap space.
 	 */
-	if (vm_cnt.v_free_count + swap_pager_avail < nblks + nswap_lowat)
+	if (vm_free_count() + swap_pager_avail < nblks + nswap_lowat)
 		return (ENOMEM);
 
 	/*
Index: sys/vm/uma.h
===================================================================
--- sys/vm/uma.h
+++ sys/vm/uma.h
@@ -47,6 +47,7 @@
 /* Types and type defs */
 
 struct uma_zone;
+struct vm_domain_iterator;
 /* Opaque type used as a handle to the zone */
 typedef struct uma_zone * uma_zone_t;
 
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c
+++ sys/vm/uma_core.c
@@ -3409,7 +3409,7 @@
 		slab->us_data = (void *)addr;
 		slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC;
 		slab->us_size = size;
-		slab->us_domain = vm_phys_domidx(PHYS_TO_VM_PAGE(
+		slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE(
 		    pmap_kextract(addr)));
 		uma_total_inc(size);
 	} else {
Index: sys/vm/vm_extern.h
===================================================================
--- sys/vm/vm_extern.h
+++ sys/vm/vm_extern.h
@@ -122,5 +122,9 @@
 void vm_imgact_unmap_page(struct sf_buf *sf);
 void vm_thread_dispose(struct thread *td);
 int vm_thread_new(struct thread *td, int pages);
+u_int vm_active_count(void);
+u_int vm_inactive_count(void);
+u_int vm_laundry_count(void);
+u_int vm_wait_count(void);
 #endif				/* _KERNEL */
 #endif				/* !_VM_EXTERN_H_ */
Index: sys/vm/vm_glue.c
===================================================================
--- sys/vm/vm_glue.c
+++ sys/vm/vm_glue.c
@@ -552,7 +552,7 @@
 	}
 
 	while (vm_page_count_severe()) {
-		VM_WAIT;
+		vm_wait_severe();
 	}
 
 	if ((flags & RFMEM) == 0) {
Index: sys/vm/vm_init.c
===================================================================
--- sys/vm/vm_init.c
+++ sys/vm/vm_init.c
@@ -89,6 +89,7 @@
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
Index: sys/vm/vm_kern.c
===================================================================
--- sys/vm/vm_kern.c
+++ sys/vm/vm_kern.c
@@ -92,6 +92,7 @@
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
@@ -196,7 +197,7 @@
 				if (!vm_page_reclaim_contig_domain(domain,
 				    pflags, 1, low, high, PAGE_SIZE, 0) &&
 				    (flags & M_WAITOK) != 0)
-					VM_WAIT;
+					vm_wait_domain(domain);
 				VM_OBJECT_WLOCK(object);
 				tries++;
 				goto retry;
@@ -205,9 +206,9 @@
 			vmem_free(vmem, addr, size);
 			return (0);
 		}
-		KASSERT(vm_phys_domidx(m) == domain,
+		KASSERT(vm_phys_domain(m) == domain,
 		    ("kmem_alloc_attr_domain: Domain mismatch %d != %d",
-		    vm_phys_domidx(m), domain));
+		    vm_phys_domain(m), domain));
 		if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
 			pmap_zero_page(m);
 		m->valid = VM_PAGE_BITS_ALL;
@@ -280,7 +281,7 @@
 			if (!vm_page_reclaim_contig_domain(domain, pflags,
 			    npages, low, high, alignment, boundary) &&
 			    (flags & M_WAITOK) != 0)
-				VM_WAIT;
+				vm_wait_domain(domain);
 			VM_OBJECT_WLOCK(object);
 			tries++;
 			goto retry;
@@ -288,9 +289,9 @@
 		vmem_free(vmem, addr, size);
 		return (0);
 	}
-	KASSERT(vm_phys_domidx(m) == domain,
+	KASSERT(vm_phys_domain(m) == domain,
 	    ("kmem_alloc_contig_domain: Domain mismatch %d != %d",
-	    vm_phys_domidx(m), domain));
+	    vm_phys_domain(m), domain));
 	end_m = m + npages;
 	tmp = addr;
 	for (; m < end_m; m++) {
@@ -452,9 +453,9 @@
 			kmem_unback(object, addr, i);
 			return (KERN_NO_SPACE);
 		}
-		KASSERT(vm_phys_domidx(m) == domain,
+		KASSERT(vm_phys_domain(m) == domain,
 		    ("kmem_back_domain: Domain mismatch %d != %d",
-		    vm_phys_domidx(m), domain));
+		    vm_phys_domain(m), domain));
 		if (flags & M_ZERO && (m->flags & PG_ZERO) == 0)
 			pmap_zero_page(m);
 		KASSERT((m->oflags & VPO_UNMANAGED) != 0,
@@ -514,7 +515,7 @@
 	end = offset + size;
 	VM_OBJECT_WLOCK(object);
 	m = vm_page_lookup(object, atop(offset)); 
-	domain = vm_phys_domidx(m);
+	domain = vm_phys_domain(m);
 	for (; offset < end; offset += PAGE_SIZE, m = next) {
 		next = vm_page_next(m);
 		vm_page_unwire(m, PQ_NONE);
Index: sys/vm/vm_map.c
===================================================================
--- sys/vm/vm_map.c
+++ sys/vm/vm_map.c
@@ -2016,7 +2016,7 @@
 		 * free pages allocating pv entries.
 		 */
 		if (((flags & MAP_PREFAULT_MADVISE) != 0 &&
-		    vm_cnt.v_free_count < vm_cnt.v_free_reserved) ||
+		    vm_page_count_severe()) ||
 		    ((flags & MAP_PREFAULT_PARTIAL) != 0 &&
 		    tmpidx >= threshold)) {
 			psize = tmpidx;
Index: sys/vm/vm_meter.c
===================================================================
--- sys/vm/vm_meter.c
+++ sys/vm/vm_meter.c
@@ -53,6 +53,8 @@
 #include <vm/vm_page.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
@@ -213,9 +215,6 @@
 							total.t_dw++;
 						else
 							total.t_sl++;
-						if (td->td_wchan ==
-						    &vm_cnt.v_free_count)
-							total.t_pw++;
 					}
 					break;
 				case TDS_CAN_RUN:
@@ -283,7 +282,8 @@
 		}
 	}
 	mtx_unlock(&vm_object_list_mtx);
-	total.t_free = vm_cnt.v_free_count;
+	total.t_pw = vm_wait_count();
+	total.t_free = vm_free_count();
 #if defined(COMPAT_FREEBSD11)
 	/* sysctl(8) allocates twice as much memory as reported by sysctl(3) */
 	if (curproc->p_osrel < P_OSREL_VMTOTAL64 && (req->oldlen ==
@@ -339,7 +339,7 @@
 
 #define	VM_STATS(parent, var, descr) \
     SYSCTL_OID(parent, OID_AUTO, var, CTLTYPE_U64 | CTLFLAG_MPSAFE | \
-    CTLFLAG_RD, &vm_cnt.var, 0, sysctl_handle_vmstat, "QU", descr);
+    CTLFLAG_RD, &vm_cnt.var, 0, sysctl_handle_vmstat, "QU", descr)
 #define	VM_STATS_VM(var, descr)		VM_STATS(_vm_stats_vm, var, descr)
 #define	VM_STATS_SYS(var, descr)	VM_STATS(_vm_stats_sys, var, descr)
 
@@ -379,19 +379,36 @@
 VM_STATS_VM(v_rforkpages, "VM pages affected by rfork()");
 VM_STATS_VM(v_kthreadpages, "VM pages affected by fork() by kernel");
 
+static int
+sysctl_handle_vmstat_proc(SYSCTL_HANDLER_ARGS)
+{
+	u_int (*fn)(void);
+	uint32_t val;
+
+	fn = arg1;
+	val = fn();
+	return (SYSCTL_OUT(req, &val, sizeof(val)));
+}
+
+#define	VM_STATS_PROC(var, descr, fn) \
+    SYSCTL_OID(_vm_stats_vm, OID_AUTO, var, CTLTYPE_U32 | CTLFLAG_MPSAFE | \
+    CTLFLAG_RD, fn, 0, sysctl_handle_vmstat_proc, "IU", descr)
+
 #define	VM_STATS_UINT(var, descr)	\
     SYSCTL_UINT(_vm_stats_vm, OID_AUTO, var, CTLFLAG_RD, &vm_cnt.var, 0, descr)
+
 VM_STATS_UINT(v_page_size, "Page size in bytes");
 VM_STATS_UINT(v_page_count, "Total number of pages in system");
 VM_STATS_UINT(v_free_reserved, "Pages reserved for deadlock");
 VM_STATS_UINT(v_free_target, "Pages desired free");
 VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold");
-VM_STATS_UINT(v_free_count, "Free pages");
+VM_STATS_PROC(v_free_count, "Free pages", vm_free_count);
 VM_STATS_UINT(v_wire_count, "Wired pages");
-VM_STATS_UINT(v_active_count, "Active pages");
+VM_STATS_PROC(v_active_count, "Active pages", vm_active_count);
 VM_STATS_UINT(v_inactive_target, "Desired inactive pages");
-VM_STATS_UINT(v_inactive_count, "Inactive pages");
-VM_STATS_UINT(v_laundry_count, "Pages eligible for laundering");
+VM_STATS_PROC(v_inactive_count, "Inactive pages", vm_inactive_count);
+VM_STATS_PROC(v_laundry_count, "Pages eligible for laundering",
+    vm_laundry_count);
 VM_STATS_UINT(v_pageout_free_min, "Min pages reserved for kernel");
 VM_STATS_UINT(v_interrupt_free_min, "Reserved pages for interrupt code");
 VM_STATS_UINT(v_free_severe, "Severe page depletion point");
@@ -406,3 +423,52 @@
 SYSCTL_UINT(_vm_stats_vm, OID_AUTO, v_tcached, CTLFLAG_RD,
     SYSCTL_NULL_UINT_PTR, 0, "Dummy for compatibility");
 #endif
+
+u_int
+vm_free_count(void)
+{
+	u_int v;
+	int i;
+
+	v = 0;
+	for (i = 0; i < vm_ndomains; i++)
+		v += vm_dom[i].vmd_free_count;
+
+	return (v);
+}
+
+static
+u_int
+vm_pagequeue_count(int pq)
+{
+	u_int v;
+	int i;
+
+	v = 0;
+	for (i = 0; i < vm_ndomains; i++)
+		v += vm_dom[i].vmd_pagequeues[pq].pq_cnt;
+
+	return (v);
+}
+
+u_int
+vm_active_count(void)
+{
+
+	return vm_pagequeue_count(PQ_ACTIVE);
+}
+
+u_int
+vm_inactive_count(void)
+{
+
+	return vm_pagequeue_count(PQ_INACTIVE);
+}
+
+u_int
+vm_laundry_count(void)
+{
+
+	return vm_pagequeue_count(PQ_LAUNDRY);
+}
+
Index: sys/vm/vm_object.h
===================================================================
--- sys/vm/vm_object.h
+++ sys/vm/vm_object.h
@@ -297,6 +297,17 @@
 	}
 }
 
+static __inline bool
+vm_object_reserv(vm_object_t object)
+{
+
+	if (object != NULL &&
+	    (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) == OBJ_COLORED) {
+		return (true);
+	}
+	return (false);
+}
+
 void vm_object_clear_flag(vm_object_t object, u_short bits);
 void vm_object_pip_add(vm_object_t object, short i);
 void vm_object_pip_subtract(vm_object_t object, short i);
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -96,6 +96,8 @@
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -218,54 +218,10 @@
 #endif
 SLIST_HEAD(spglist, vm_page);
 
-struct vm_pagequeue {
-	struct mtx	pq_mutex;
-	struct pglist	pq_pl;
-	int		pq_cnt;
-	u_int		* const pq_vcnt;
-	const char	* const pq_name;
-} __aligned(CACHE_LINE_SIZE);
-
-
-struct vm_domain {
-	struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
-	struct vmem *vmd_kernel_arena;
-	u_int vmd_page_count;
-	u_int vmd_free_count;
-	long vmd_segs;	/* bitmask of the segments */
-	boolean_t vmd_oom;
-	int vmd_oom_seq;
-	int vmd_last_active_scan;
-	struct vm_page vmd_laundry_marker;
-	struct vm_page vmd_marker; /* marker for pagedaemon private use */
-	struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
-};
-
-extern struct vm_domain vm_dom[MAXMEMDOM];
-
-#define	vm_pagequeue_assert_locked(pq)	mtx_assert(&(pq)->pq_mutex, MA_OWNED)
-#define	vm_pagequeue_lock(pq)		mtx_lock(&(pq)->pq_mutex)
-#define	vm_pagequeue_lockptr(pq)	(&(pq)->pq_mutex)
-#define	vm_pagequeue_unlock(pq)		mtx_unlock(&(pq)->pq_mutex)
-
 #ifdef _KERNEL
 extern vm_page_t bogus_page;
-
-static __inline void
-vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
-{
-
-#ifdef notyet
-	vm_pagequeue_assert_locked(pq);
-#endif
-	pq->pq_cnt += addend;
-	atomic_add_int(pq->pq_vcnt, addend);
-}
-#define	vm_pagequeue_cnt_inc(pq)	vm_pagequeue_cnt_add((pq), 1)
-#define	vm_pagequeue_cnt_dec(pq)	vm_pagequeue_cnt_add((pq), -1)
 #endif	/* _KERNEL */
 
-extern struct mtx_padalign vm_page_queue_free_mtx;
 extern struct mtx_padalign pa_lock[];
 
 #if defined(__arm__)
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -115,8 +115,9 @@
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
+#include <vm/vm_pager.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/vm_extern.h>
@@ -131,10 +132,16 @@
  */
 
 struct vm_domain vm_dom[MAXMEMDOM];
-struct mtx_padalign __exclusive_cache_line vm_page_queue_free_mtx;
 
 struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT];
+struct mtx_padalign __exclusive_cache_line vm_domainset_lock;
+domainset_t __exclusive_cache_line vm_min_domains;
+domainset_t __exclusive_cache_line vm_severe_domains;
+static int vm_min_waiters;
+static int vm_severe_waiters;
+static int vm_pageproc_waiters;
 
+
 /*
  * bogus page -- for I/O to/from partially complete buffers,
  * or for paging into sparsely invalid regions.
@@ -159,24 +166,22 @@
 SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages");
 
-/* Is the page daemon waiting for free pages? */
-static int vm_pageout_pages_needed;
-
 static uma_zone_t fakepg_zone;
 
 static void vm_page_alloc_check(vm_page_t m);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
 static void vm_page_enqueue(uint8_t queue, vm_page_t m);
 static void vm_page_free_phys(vm_page_t m);
-static void vm_page_free_wakeup(void);
 static void vm_page_init(void *dummy);
 static int vm_page_insert_after(vm_page_t m, vm_object_t object,
     vm_pindex_t pindex, vm_page_t mpred);
 static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,
     vm_page_t mpred);
-static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
-    vm_paddr_t high);
-static int vm_page_alloc_fail(vm_object_t object, int req);
+static int vm_page_reclaim_run(int req_class, int domain, u_long npages,
+    vm_page_t m_run, vm_paddr_t high);
+static void vm_domain_free_wakeup(struct vm_domain *);
+static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object,
+    int req);
 
 SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL);
 
@@ -313,6 +318,7 @@
 static void
 vm_page_blacklist_check(char *list, char *end)
 {
+	struct vm_domain *vmd;
 	vm_paddr_t pa;
 	vm_page_t m;
 	char *next;
@@ -325,9 +331,10 @@
 		m = vm_phys_paddr_to_vm_page(pa);
 		if (m == NULL)
 			continue;
-		mtx_lock(&vm_page_queue_free_mtx);
+		vmd = vm_pagequeue_domain(m);
+		vm_domain_free_lock(vmd);
 		ret = vm_phys_unfree_page(m);
-		mtx_unlock(&vm_page_queue_free_mtx);
+		vm_domain_free_unlock(vmd);
 		if (ret == TRUE) {
 			TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
 			if (bootverbose)
@@ -390,28 +397,23 @@
 }
 
 static void
-vm_page_domain_init(struct vm_domain *vmd)
+vm_page_domain_init(int domain)
 {
+	struct vm_domain *vmd;
 	struct vm_pagequeue *pq;
 	int i;
 
+	vmd = VM_DOMAIN(domain);
+	bzero(vmd, sizeof(*vmd));
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) =
 	    "vm inactive pagequeue";
-	*__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) =
-	    &vm_cnt.v_inactive_count;
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) =
 	    "vm active pagequeue";
-	*__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) =
-	    &vm_cnt.v_active_count;
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) =
 	    "vm laundry pagequeue";
-	*__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) =
-	    &vm_cnt.v_laundry_count;
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) =
 	    "vm unswappable pagequeue";
-	/* Unswappable dirty pages are counted as being in the laundry. */
-	*__DECONST(int **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_vcnt) =
-	    &vm_cnt.v_laundry_count;
+	vmd->vmd_domain = domain;
 	vmd->vmd_page_count = 0;
 	vmd->vmd_free_count = 0;
 	vmd->vmd_segs = 0;
@@ -422,6 +424,7 @@
 		mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue",
 		    MTX_DEF | MTX_DUPOK);
 	}
+	mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF);
 }
 
 /*
@@ -458,7 +461,6 @@
 vm_offset_t
 vm_page_startup(vm_offset_t vaddr)
 {
-	struct vm_domain *vmd;
 	struct vm_phys_seg *seg;
 	vm_page_t m;
 	char *list, *listend;
@@ -489,11 +491,11 @@
 	/*
 	 * Initialize the page and queue locks.
 	 */
-	mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF);
+	mtx_init(&vm_domainset_lock, "vm domainset lock", NULL, MTX_DEF);
 	for (i = 0; i < PA_LOCK_COUNT; i++)
 		mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF);
 	for (i = 0; i < vm_ndomains; i++)
-		vm_page_domain_init(&vm_dom[i]);
+		vm_page_domain_init(i);
 
 	/*
 	 * Almost all of the pages needed for bootstrapping UMA are used
@@ -691,7 +693,6 @@
 	 * physical memory allocator's free lists.
 	 */
 	vm_cnt.v_page_count = 0;
-	vm_cnt.v_free_count = 0;
 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
 		seg = &vm_phys_segs[segind];
 		for (m = seg->first_page, pa = seg->start; pa < seg->end;
@@ -706,6 +707,8 @@
 		 * or doesn't overlap any of them.
 		 */
 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
+			struct vm_domain *vmd;
+
 			if (seg->start < phys_avail[i] ||
 			    seg->end > phys_avail[i + 1])
 				continue;
@@ -713,13 +716,14 @@
 			m = seg->first_page;
 			pagecount = (u_long)atop(seg->end - seg->start);
 
-			mtx_lock(&vm_page_queue_free_mtx);
+			vmd = VM_DOMAIN(seg->domain);
+			vm_domain_free_lock(vmd);
 			vm_phys_free_contig(m, pagecount);
-			vm_phys_freecnt_adj(m, (int)pagecount);
-			mtx_unlock(&vm_page_queue_free_mtx);
+			vm_domain_freecnt_adj(vmd, (int)pagecount);
+			vm_domain_free_unlock(vmd);
 			vm_cnt.v_page_count += (u_int)pagecount;
 
-			vmd = &vm_dom[seg->domain];
+			vmd = VM_DOMAIN(seg->domain);;
 			vmd->vmd_page_count += (u_int)pagecount;
 			vmd->vmd_segs |= 1UL << m->segind;
 			break;
@@ -1644,12 +1648,40 @@
 	return (m);
 }
 
+/*
+ * Returns true if the number of free pages exceeds the minimum
+ * for the request class and false otherwise.
+ */
+int
+vm_domain_available(struct vm_domain *vmd, int req, int npages)
+{
+
+	vm_domain_free_assert_locked(vmd);
+	req = req & VM_ALLOC_CLASS_MASK;
+
+	/*
+	 * The page daemon is allowed to dig deeper into the free page list.
+	 */
+	if (curproc == pageproc && req != VM_ALLOC_INTERRUPT)
+		req = VM_ALLOC_SYSTEM;
+
+	if (vmd->vmd_free_count >= npages + vmd->vmd_free_reserved ||
+	    (req == VM_ALLOC_SYSTEM &&
+	    vmd->vmd_free_count >= npages + vmd->vmd_interrupt_free_min) ||
+	    (req == VM_ALLOC_INTERRUPT &&
+	    vmd->vmd_free_count >= npages))
+		return (1);
+
+	return (0);
+}
+
 vm_page_t
 vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
     int req, vm_page_t mpred)
 {
+	struct vm_domain *vmd;
 	vm_page_t m;
-	int flags, req_class;
+	int flags;
 	u_int free_count;
 
 	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
@@ -1665,34 +1697,27 @@
 	if (object != NULL)
 		VM_OBJECT_ASSERT_WLOCKED(object);
 
-	req_class = req & VM_ALLOC_CLASS_MASK;
-
-	/*
-	 * The page daemon is allowed to dig deeper into the free page list.
-	 */
-	if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
-		req_class = VM_ALLOC_SYSTEM;
-
-	/*
-	 * Allocate a page if the number of free pages exceeds the minimum
-	 * for the request class.
-	 */
 again:
 	m = NULL;
-	mtx_lock(&vm_page_queue_free_mtx);
-	if (vm_cnt.v_free_count > vm_cnt.v_free_reserved ||
-	    (req_class == VM_ALLOC_SYSTEM &&
-	    vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) ||
-	    (req_class == VM_ALLOC_INTERRUPT &&
-	    vm_cnt.v_free_count > 0)) {
+#if VM_NRESERVLEVEL > 0
+	if (vm_object_reserv(object) &&
+	    (m = vm_reserv_extend(req, object, pindex, domain, mpred))
+	    != NULL) {
+		domain = vm_phys_domain(m);
+		vmd = VM_DOMAIN(domain);
+		goto found;
+	}
+#endif
+	vmd = VM_DOMAIN(domain);
+	vm_domain_free_lock(vmd);
+	if (vm_domain_available(vmd, req, 1)) {
 		/*
 		 * Can we allocate the page from a reservation?
 		 */
 #if VM_NRESERVLEVEL > 0
-		if (object == NULL || (object->flags & (OBJ_COLORED |
-		    OBJ_FICTITIOUS)) != OBJ_COLORED || (m =
-		    vm_reserv_alloc_page(object, pindex, domain,
-		    mpred)) == NULL)
+		if (!vm_object_reserv(object) ||
+		    (m = vm_reserv_alloc_page(object, pindex,
+		    domain, mpred)) == NULL)
 #endif
 		{
 			/*
@@ -1714,7 +1739,7 @@
 		/*
 		 * Not allocatable, give up.
 		 */
-		if (vm_page_alloc_fail(object, req))
+		if (vm_domain_alloc_fail(vmd, object, req))
 			goto again;
 		return (NULL);
 	}
@@ -1723,8 +1748,18 @@
 	 *  At this point we had better have found a good page.
 	 */
 	KASSERT(m != NULL, ("missing page"));
-	free_count = vm_phys_freecnt_adj(m, -1);
-	mtx_unlock(&vm_page_queue_free_mtx);
+	free_count = vm_domain_freecnt_adj(vmd, -1);
+	vm_domain_free_unlock(vmd);
+
+	/*
+	 * Don't wakeup too often - wakeup the pageout daemon when
+	 * we would be nearly out of memory.
+	 */
+	if (vm_paging_needed(vmd, free_count))
+		pagedaemon_wakeup(vmd->vmd_domain);
+#if VM_NRESERVLEVEL > 0
+found:
+#endif
 	vm_page_alloc_check(m);
 
 	/*
@@ -1757,7 +1792,7 @@
 
 	if (object != NULL) {
 		if (vm_page_insert_after(m, object, pindex, mpred)) {
-			pagedaemon_wakeup();
+			pagedaemon_wakeup(domain);
 			if (req & VM_ALLOC_WIRED) {
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 				m->wire_count = 0;
@@ -1782,13 +1817,6 @@
 	} else
 		m->pindex = pindex;
 
-	/*
-	 * Don't wakeup too often - wakeup the pageout daemon when
-	 * we would be nearly out of memory.
-	 */
-	if (vm_paging_needed(free_count))
-		pagedaemon_wakeup();
-
 	return (m);
 }
 
@@ -1856,9 +1884,9 @@
     int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr)
 {
+	struct vm_domain *vmd;
 	vm_page_t m, m_ret, mpred;
 	u_int busy_lock, flags, oflags;
-	int req_class;
 
 	mpred = NULL;	/* XXX: pacify gcc */
 	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
@@ -1876,14 +1904,7 @@
 		    object));
 	}
 	KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
-	req_class = req & VM_ALLOC_CLASS_MASK;
 
-	/*
-	 * The page daemon is allowed to dig deeper into the free page list.
-	 */
-	if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
-		req_class = VM_ALLOC_SYSTEM;
-
 	if (object != NULL) {
 		mpred = vm_radix_lookup_le(&object->rtree, pindex);
 		KASSERT(mpred == NULL || mpred->pindex != pindex,
@@ -1895,19 +1916,25 @@
 	 * below the lower bound for the allocation class?
 	 */
 again:
+#if VM_NRESERVLEVEL > 0
+	if (vm_object_reserv(object) &&
+	    (m_ret = vm_reserv_extend_contig(req, object, pindex, domain,
+	    npages, low, high, alignment, boundary, mpred)) != NULL) {
+		domain = vm_phys_domain(m_ret);
+		vmd = VM_DOMAIN(domain);
+		goto found;
+	}
+#endif
 	m_ret = NULL;
-	mtx_lock(&vm_page_queue_free_mtx);
-	if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved ||
-	    (req_class == VM_ALLOC_SYSTEM &&
-	    vm_cnt.v_free_count >= npages + vm_cnt.v_interrupt_free_min) ||
-	    (req_class == VM_ALLOC_INTERRUPT &&
-	    vm_cnt.v_free_count >= npages)) {
+	vmd = VM_DOMAIN(domain);
+	vm_domain_free_lock(vmd);
+	if (vm_domain_available(vmd, req, npages)) {
 		/*
 		 * Can we allocate the pages from a reservation?
 		 */
 #if VM_NRESERVLEVEL > 0
 retry:
-		if (object == NULL || (object->flags & OBJ_COLORED) == 0 ||
+		if (!vm_object_reserv(object) ||
 		    (m_ret = vm_reserv_alloc_contig(object, pindex, domain,
 		    npages, low, high, alignment, boundary, mpred)) == NULL)
 #endif
@@ -1923,12 +1950,15 @@
 #endif
 	}
 	if (m_ret == NULL) {
-		if (vm_page_alloc_fail(object, req))
+		if (vm_domain_alloc_fail(vmd, object, req))
 			goto again;
 		return (NULL);
 	}
-	vm_phys_freecnt_adj(m_ret, -npages);
-	mtx_unlock(&vm_page_queue_free_mtx);
+	vm_domain_freecnt_adj(vmd, -npages);
+	vm_domain_free_unlock(vmd);
+#if VM_NRESERVLEVEL > 0
+found:
+#endif
 	for (m = m_ret; m < &m_ret[npages]; m++)
 		vm_page_alloc_check(m);
 
@@ -1964,7 +1994,7 @@
 		m->oflags = oflags;
 		if (object != NULL) {
 			if (vm_page_insert_after(m, object, pindex, mpred)) {
-				pagedaemon_wakeup();
+				pagedaemon_wakeup(domain);
 				if ((req & VM_ALLOC_WIRED) != 0)
 					atomic_subtract_int(
 					    &vm_cnt.v_wire_count, npages);
@@ -1994,8 +2024,9 @@
 			pmap_page_set_memattr(m, memattr);
 		pindex++;
 	}
-	if (vm_paging_needed(vm_cnt.v_free_count))
-		pagedaemon_wakeup();
+	vmd = VM_DOMAIN(domain);
+	if (vm_paging_needed(vmd, vmd->vmd_free_count))
+		pagedaemon_wakeup(domain);
 	return (m_ret);
 }
 
@@ -2057,37 +2088,26 @@
 vm_page_t
 vm_page_alloc_freelist_domain(int domain, int freelist, int req)
 {
+	struct vm_domain *vmd;
 	vm_page_t m;
 	u_int flags, free_count;
-	int req_class;
 
-	req_class = req & VM_ALLOC_CLASS_MASK;
-
 	/*
-	 * The page daemon is allowed to dig deeper into the free page list.
-	 */
-	if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
-		req_class = VM_ALLOC_SYSTEM;
-
-	/*
 	 * Do not allocate reserved pages unless the req has asked for it.
 	 */
+	vmd = VM_DOMAIN(domain);
 again:
-	mtx_lock(&vm_page_queue_free_mtx);
-	if (vm_cnt.v_free_count > vm_cnt.v_free_reserved ||
-	    (req_class == VM_ALLOC_SYSTEM &&
-	    vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) ||
-	    (req_class == VM_ALLOC_INTERRUPT &&
-	    vm_cnt.v_free_count > 0))
+	vm_domain_free_lock(vmd);
+	if (vm_domain_available(vmd, req, 1))
 		m = vm_phys_alloc_freelist_pages(domain, freelist,
 		    VM_FREEPOOL_DIRECT, 0);
 	if (m == NULL) {
-		if (vm_page_alloc_fail(NULL, req))
+		if (vm_domain_alloc_fail(vmd, NULL, req))
 			goto again;
 		return (NULL);
 	}
-	free_count = vm_phys_freecnt_adj(m, -1);
-	mtx_unlock(&vm_page_queue_free_mtx);
+	free_count = vm_domain_freecnt_adj(vmd, -1);
+	vm_domain_free_unlock(vmd);
 	vm_page_alloc_check(m);
 
 	/*
@@ -2108,8 +2128,8 @@
 	}
 	/* Unmanaged pages don't use "act_count". */
 	m->oflags = VPO_UNMANAGED;
-	if (vm_paging_needed(free_count))
-		pagedaemon_wakeup();
+	if (vm_paging_needed(vmd, free_count))
+		pagedaemon_wakeup(domain);
 	return (m);
 }
 
@@ -2331,9 +2351,10 @@
  *	"req_class" must be an allocation class.
  */
 static int
-vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
+vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run,
     vm_paddr_t high)
 {
+	struct vm_domain *vmd;
 	struct mtx *m_mtx;
 	struct spglist free;
 	vm_object_t object;
@@ -2483,7 +2504,9 @@
 unlock:
 			VM_OBJECT_WUNLOCK(object);
 		} else {
-			mtx_lock(&vm_page_queue_free_mtx);
+			MPASS(vm_phys_domain(m) == domain);
+			vmd = VM_DOMAIN(domain);
+			vm_domain_free_lock(vmd);
 			order = m->order;
 			if (order < VM_NFREEORDER) {
 				/*
@@ -2500,7 +2523,7 @@
 			else if (vm_reserv_is_page_free(m))
 				order = 0;
 #endif
-			mtx_unlock(&vm_page_queue_free_mtx);
+			vm_domain_free_unlock(vmd);
 			if (order == VM_NFREEORDER)
 				error = EINVAL;
 		}
@@ -2508,13 +2531,15 @@
 	if (m_mtx != NULL)
 		mtx_unlock(m_mtx);
 	if ((m = SLIST_FIRST(&free)) != NULL) {
-		mtx_lock(&vm_page_queue_free_mtx);
+		vmd = VM_DOMAIN(domain);
+		vm_domain_free_lock(vmd);
 		do {
+			MPASS(vm_phys_domain(m) == domain);
 			SLIST_REMOVE_HEAD(&free, plinks.s.ss);
 			vm_page_free_phys(m);
 		} while ((m = SLIST_FIRST(&free)) != NULL);
-		vm_page_free_wakeup();
-		mtx_unlock(&vm_page_queue_free_mtx);
+		vm_domain_free_wakeup(vmd);
+		vm_domain_free_unlock(vmd);
 	}
 	return (error);
 }
@@ -2554,6 +2579,7 @@
 vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
 {
+	struct vm_domain *vmd;
 	vm_paddr_t curr_low;
 	vm_page_t m_run, m_runs[NRUNS];
 	u_long count, reclaimed;
@@ -2574,9 +2600,10 @@
 	 * Return if the number of free pages cannot satisfy the requested
 	 * allocation.
 	 */
-	count = vm_cnt.v_free_count;
-	if (count < npages + vm_cnt.v_free_reserved || (count < npages +
-	    vm_cnt.v_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
+	vmd = VM_DOMAIN(domain);
+	count = vmd->vmd_free_count;
+	if (count < npages + vmd->vmd_free_reserved || (count < npages +
+	    vmd->vmd_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
 	    (count < npages && req_class == VM_ALLOC_INTERRUPT))
 		return (false);
 
@@ -2612,8 +2639,8 @@
 		for (i = 0; count > 0 && i < NRUNS; i++) {
 			count--;
 			m_run = m_runs[RUN_INDEX(count)];
-			error = vm_page_reclaim_run(req_class, npages, m_run,
-			    high);
+			error = vm_page_reclaim_run(req_class, domain, npages,
+			    m_run, high);
 			if (error == 0) {
 				reclaimed += npages;
 				if (reclaimed >= MIN_RECLAIM)
@@ -2653,66 +2680,190 @@
 	return (ret);
 }
 
+/*
+ * Set the domain in the appropriate page level domainset.
+ */
+void
+vm_domain_set(struct vm_domain *vmd)
+{
 
+	mtx_lock(&vm_domainset_lock);
+	if (!vmd->vmd_minset && vm_paging_min(vmd)) {
+		vmd->vmd_minset = 1;
+		DOMAINSET_SET(vmd->vmd_domain, &vm_min_domains);
+	}
+	if (!vmd->vmd_severeset && vm_paging_severe(vmd)) {
+		vmd->vmd_severeset = 1;
+		DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains);
+	}
+	mtx_unlock(&vm_domainset_lock);
+}
+
 /*
- *	vm_wait:	(also see VM_WAIT macro)
+ * Clear the domain from the appropriate page level domainset.
+ */
+static void
+vm_domain_clear(struct vm_domain *vmd)
+{
+
+	mtx_lock(&vm_domainset_lock);
+	if (vmd->vmd_minset && !vm_paging_min(vmd)) {
+		vmd->vmd_minset = 0;
+		DOMAINSET_CLR(vmd->vmd_domain, &vm_min_domains);
+		if (vm_min_waiters != 0) {
+			vm_min_waiters = 0;
+			wakeup(&vm_min_domains);
+		}
+	}
+	if (vmd->vmd_severeset && !vm_paging_severe(vmd)) {
+		vmd->vmd_severeset = 0;
+		DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains);
+		if (vm_severe_waiters != 0) {
+			vm_severe_waiters = 0;
+			wakeup(&vm_severe_domains);
+		}
+	}
+	mtx_unlock(&vm_domainset_lock);
+}
+
+/*
+ * Wait for free pages to exceed the min threshold globally.
+ */
+void
+vm_wait_min(void)
+{
+
+	mtx_lock(&vm_domainset_lock);
+	while (vm_page_count_min()) {
+		vm_min_waiters++;
+		msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0);
+	}
+	mtx_unlock(&vm_domainset_lock);
+}
+
+/*
+ * Wait for free pages to exceed the severe threshold globally.
+ */
+void
+vm_wait_severe(void)
+{
+
+	mtx_lock(&vm_domainset_lock);
+	while (vm_page_count_severe()) {
+		vm_severe_waiters++;
+		msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0);
+	}
+	mtx_unlock(&vm_domainset_lock);
+}
+
+u_int
+vm_wait_count(void)
+{
+	u_int cnt;
+	int i;
+
+	cnt = 0;
+	for (i = 0; i < vm_ndomains; i++)
+		cnt += VM_DOMAIN(i)->vmd_waiters;
+	cnt += vm_severe_waiters + vm_min_waiters;
+
+	return (cnt);
+}
+
+/*
+ *	vm_wait_domain:
  *
  *	Sleep until free pages are available for allocation.
- *	- Called in various places before memory allocations.
+ *	- Called in various places after failed memory allocations.
  */
-static void
-_vm_wait(void)
+void
+vm_wait_domain(int domain)
 {
+	struct vm_domain *vmd;
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vmd = VM_DOMAIN(domain);
+	vm_domain_free_assert_locked(vmd);
+
 	if (curproc == pageproc) {
-		vm_pageout_pages_needed = 1;
-		msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx,
-		    PDROP | PSWP, "VMWait", 0);
+		vmd->vmd_pageout_pages_needed = 1;
+		msleep(&vmd->vmd_pageout_pages_needed,
+		    vm_domain_free_lockptr(vmd), PDROP | PSWP, "VMWait", 0);
 	} else {
 		if (pageproc == NULL)
 			panic("vm_wait in early boot");
-		pagedaemon_wait(PVM, "vmwait");
+		pagedaemon_wait(domain, PVM, "vmwait");
 	}
 }
 
+/*
+ *	vm_wait:	(also see VM_WAIT macro)
+ *
+ *	Sleep until free pages are available for allocation.
+ *	- Called in various places after failed memory allocations.
+ */
 void
 vm_wait(void)
 {
 
-	mtx_lock(&vm_page_queue_free_mtx);
-	_vm_wait();
+	/*
+	 * We use racey wakeup synchronization to avoid expensive global
+	 * locking for the pageproc when sleeping with a non-specific vm_wait.
+	 * To handle this, we only sleep for one tick in this instance.  It
+	 * is expected that most allocations for the pageproc will come from
+	 * kmem or vm_page_grab* which will use the more specific and
+	 * race-free vm_wait_domain().
+	 */
+	if (curproc == pageproc) {
+		mtx_lock(&vm_domainset_lock);
+		vm_pageproc_waiters++;
+		msleep(&vm_pageproc_waiters, &vm_domainset_lock, PVM,
+		    "pageprocwait", 1);
+		mtx_unlock(&vm_domainset_lock);
+	} else {
+		/*
+		 * XXX Ideally we would wait only until the allocation could
+		 * be satisfied.  This condition can cause new allocators to
+		 * consume all freed pages while old allocators wait.
+		 */
+		mtx_lock(&vm_domainset_lock);
+		if (vm_page_count_min()) {
+			vm_min_waiters++;
+			msleep(&vm_min_domains, &vm_domainset_lock, PVM,
+			    "vmwait", 0);
+		}
+		mtx_unlock(&vm_domainset_lock);
+	}
 }
 
 /*
- *	vm_page_alloc_fail:
+ *	vm_domain_alloc_fail:
  *
  *	Called when a page allocation function fails.  Informs the
  *	pagedaemon and performs the requested wait.  Requires the
- *	page_queue_free and object lock on entry.  Returns with the
+ *	domain_free and object lock on entry.  Returns with the
  *	object lock held and free lock released.  Returns an error when
  *	retry is necessary.
  *
  */
 static int
-vm_page_alloc_fail(vm_object_t object, int req)
+vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req)
 {
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(vmd);
 
-	atomic_add_int(&vm_pageout_deficit,
+	atomic_add_int(&vmd->vmd_pageout_deficit,
 	    max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
 	if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) {
 		if (object != NULL) 
 			VM_OBJECT_WUNLOCK(object);
-		_vm_wait();
+		vm_wait_domain(vmd->vmd_domain);
 		if (object != NULL) 
 			VM_OBJECT_WLOCK(object);
 		if (req & VM_ALLOC_WAITOK)
 			return (EAGAIN);
 	} else {
-		mtx_unlock(&vm_page_queue_free_mtx);
-		pagedaemon_wakeup();
+		vm_domain_free_unlock(vmd);
+		pagedaemon_wakeup(vmd->vmd_domain);
 	}
 	return (0);
 }
@@ -2731,18 +2882,19 @@
 vm_waitpfault(void)
 {
 
-	mtx_lock(&vm_page_queue_free_mtx);
-	pagedaemon_wait(PUSER, "pfault");
+	mtx_lock(&vm_domainset_lock);
+	if (vm_page_count_min()) {
+		vm_min_waiters++;
+		msleep(&vm_min_domains, &vm_domainset_lock, PUSER, "pfault", 0);
+	}
+	mtx_unlock(&vm_domainset_lock);
 }
 
 struct vm_pagequeue *
 vm_page_pagequeue(vm_page_t m)
 {
 
-	if (vm_page_in_laundry(m))
-		return (&vm_dom[0].vmd_pagequeues[m->queue]);
-	else
-		return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
+	return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]);
 }
 
 /*
@@ -2804,10 +2956,7 @@
 	KASSERT(queue < PQ_COUNT,
 	    ("vm_page_enqueue: invalid queue %u request for page %p",
 	    queue, m));
-	if (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE)
-		pq = &vm_dom[0].vmd_pagequeues[queue];
-	else
-		pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
+	pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue];
 	vm_pagequeue_lock(pq);
 	m->queue = queue;
 	TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
@@ -2889,7 +3038,7 @@
 }
 
 /*
- *	vm_page_free_wakeup:
+ *	vm_domain_free_wakeup:
  *
  *	Helper routine for vm_page_free_toq().  This routine is called
  *	when a page is added to the free queues.
@@ -2897,28 +3046,39 @@
  *	The page queues must be locked.
  */
 static void
-vm_page_free_wakeup(void)
+vm_domain_free_wakeup(struct vm_domain *vmd)
 {
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(vmd);
+
 	/*
 	 * if pageout daemon needs pages, then tell it that there are
 	 * some free.
 	 */
-	if (vm_pageout_pages_needed &&
-	    vm_cnt.v_free_count >= vm_cnt.v_pageout_free_min) {
-		wakeup(&vm_pageout_pages_needed);
-		vm_pageout_pages_needed = 0;
+	if (vmd->vmd_pageout_pages_needed &&
+	    vmd->vmd_free_count >= vmd->vmd_pageout_free_min) {
+		wakeup(&vmd->vmd_pageout_pages_needed);
+		vmd->vmd_pageout_pages_needed = 0;
 	}
 	/*
 	 * wakeup processes that are waiting on memory if we hit a
 	 * high water mark. And wakeup scheduler process if we have
 	 * lots of memory. this process will swapin processes.
 	 */
-	if (vm_pages_needed && !vm_page_count_min()) {
-		vm_pages_needed = false;
-		wakeup(&vm_cnt.v_free_count);
+	if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
+		vmd->vmd_pages_needed = false;
+		wakeup(&vmd->vmd_free_count);
 	}
+	if ((vmd->vmd_minset && !vm_paging_min(vmd)) ||
+	    (vmd->vmd_severeset && !vm_paging_severe(vmd)))
+		vm_domain_clear(vmd);
+
+	/* See comments in vm_wait(); */
+	if (vm_pageproc_waiters) {
+		vm_pageproc_waiters = 0;
+		wakeup(&vm_pageproc_waiters);
+	}
+
 }
 
 /*
@@ -3008,9 +3168,9 @@
 vm_page_free_phys(vm_page_t m)
 {
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(vm_pagequeue_domain(m));
 
-	vm_phys_freecnt_adj(m, 1);
+	vm_domain_freecnt_adj(vm_pagequeue_domain(m), 1);
 #if VM_NRESERVLEVEL > 0
 	if (!vm_reserv_free_page(m))
 #endif
@@ -3020,15 +3180,27 @@
 void
 vm_page_free_phys_pglist(struct pglist *tq)
 {
+	struct vm_domain *vmd;
 	vm_page_t m;
 
 	if (TAILQ_EMPTY(tq))
 		return;
-	mtx_lock(&vm_page_queue_free_mtx);
-	TAILQ_FOREACH(m, tq, listq)
+	vmd = NULL;
+	TAILQ_FOREACH(m, tq, listq) {
+		if (vmd != vm_pagequeue_domain(m)) {
+			if (vmd != NULL) {
+				vm_domain_free_wakeup(vmd);
+				vm_domain_free_unlock(vmd);
+			}
+			vmd = vm_pagequeue_domain(m);
+			vm_domain_free_lock(vmd);
+		}
 		vm_page_free_phys(m);
-	vm_page_free_wakeup();
-	mtx_unlock(&vm_page_queue_free_mtx);
+	}
+	if (vmd != NULL) {
+		vm_domain_free_wakeup(vmd);
+		vm_domain_free_unlock(vmd);
+	}
 }
 
 /*
@@ -3043,13 +3215,15 @@
 void
 vm_page_free_toq(vm_page_t m)
 {
+	struct vm_domain *vmd;
 
 	if (!vm_page_free_prep(m, false))
 		return;
-	mtx_lock(&vm_page_queue_free_mtx);
+	vmd = vm_pagequeue_domain(m);
+	vm_domain_free_lock(vmd);
 	vm_page_free_phys(m);
-	vm_page_free_wakeup();
-	mtx_unlock(&vm_page_queue_free_mtx);
+	vm_domain_free_wakeup(vmd);
+	vm_domain_free_unlock(vmd);
 }
 
 /*
@@ -3160,7 +3334,7 @@
 	if ((queue = m->queue) == PQ_INACTIVE && !noreuse)
 		return;
 	if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
-		pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE];
+		pq = &vm_pagequeue_domain(m)->vmd_pagequeues[PQ_INACTIVE];
 		/* Avoid multiple acquisitions of the inactive queue lock. */
 		if (queue == PQ_INACTIVE) {
 			vm_pagequeue_lock(pq);
@@ -3172,8 +3346,9 @@
 		}
 		m->queue = PQ_INACTIVE;
 		if (noreuse)
-			TAILQ_INSERT_BEFORE(&vm_phys_domain(m)->vmd_inacthead,
-			    m, plinks.q);
+			TAILQ_INSERT_BEFORE(
+			    &vm_pagequeue_domain(m)->vmd_inacthead, m,
+			    plinks.q);
 		else
 			TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 		vm_pagequeue_cnt_inc(pq);
@@ -3950,10 +4125,10 @@
 DB_SHOW_COMMAND(page, vm_page_print_page_info)
 {
 
-	db_printf("vm_cnt.v_free_count: %d\n", vm_cnt.v_free_count);
-	db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count);
-	db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count);
-	db_printf("vm_cnt.v_laundry_count: %d\n", vm_cnt.v_laundry_count);
+	db_printf("vm_cnt.v_free_count: %d\n", vm_free_count());
+	db_printf("vm_cnt.v_inactive_count: %d\n", vm_inactive_count());
+	db_printf("vm_cnt.v_active_count: %d\n", vm_active_count());
+	db_printf("vm_cnt.v_laundry_count: %d\n", vm_laundry_count());
 	db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count);
 	db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
 	db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
@@ -3965,7 +4140,7 @@
 {
 	int dom;
 
-	db_printf("pq_free %d\n", vm_cnt.v_free_count);
+	db_printf("pq_free %d\n", vm_free_count());
 	for (dom = 0; dom < vm_ndomains; dom++) {
 		db_printf(
     "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n",
Index: sys/vm/vm_pageout.h
===================================================================
--- sys/vm/vm_pageout.h
+++ sys/vm/vm_pageout.h
@@ -74,9 +74,7 @@
  */
 
 extern int vm_page_max_wired;
-extern int vm_pageout_deficit;
 extern int vm_pageout_page_count;
-extern bool vm_pages_needed;
 
 #define	VM_OOM_MEM	1
 #define	VM_OOM_SWAPZ	2
@@ -95,12 +93,15 @@
  *	Signal pageout-daemon and wait for it.
  */
 
-void pagedaemon_wait(int pri, const char *wmesg);
-void pagedaemon_wakeup(void);
+void pagedaemon_wait(int domain, int pri, const char *wmesg);
+void pagedaemon_wakeup(int domain);
 #define VM_WAIT vm_wait()
 #define VM_WAITPFAULT vm_waitpfault()
 void vm_wait(void);
 void vm_waitpfault(void);
+void vm_wait_domain(int domain);
+void vm_wait_min(void);
+void vm_wait_severe(void);
 
 #ifdef _KERNEL
 int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *);
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -110,6 +110,7 @@
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
@@ -147,20 +148,8 @@
 #define	VM_LAUNDER_RATE		10
 #define	VM_INACT_SCAN_RATE	2
 
-int vm_pageout_deficit;		/* Estimated number of pages deficit */
-u_int vm_pageout_wakeup_thresh;
 static int vm_pageout_oom_seq = 12;
-static bool vm_pageout_wanted;	/* Event on which pageout daemon sleeps */
-bool vm_pages_needed;		/* Are threads waiting for free pages? */
 
-/* Pending request for dirty page laundering. */
-static enum {
-	VM_LAUNDRY_IDLE,
-	VM_LAUNDRY_BACKGROUND,
-	VM_LAUNDRY_SHORTFALL
-} vm_laundry_request = VM_LAUNDRY_IDLE;
-static int vm_inactq_scans;
-
 static int vm_pageout_update_period;
 static int disable_swap_pageouts;
 static int lowmem_period = 10;
@@ -173,10 +162,6 @@
 	CTLFLAG_RWTUN, &vm_panic_on_oom, 0,
 	"panic on out of memory instead of killing the largest process");
 
-SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
-	CTLFLAG_RWTUN, &vm_pageout_wakeup_thresh, 0,
-	"free page threshold for waking up the pageout daemon");
-
 SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
 	CTLFLAG_RWTUN, &vm_pageout_update_period, 0,
 	"Maximum active LRU update period");
@@ -200,11 +185,6 @@
     &act_scan_laundry_weight, 0,
     "weight given to clean vs. dirty pages in active queue scans");
 
-static u_int vm_background_launder_target;
-SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RWTUN,
-    &vm_background_launder_target, 0,
-    "background laundering target, in pages");
-
 static u_int vm_background_launder_rate = 4096;
 SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN,
     &vm_background_launder_rate, 0,
@@ -959,18 +939,18 @@
 static void
 vm_pageout_laundry_worker(void *arg)
 {
-	struct vm_domain *domain;
+	struct vm_domain *vmd;
 	struct vm_pagequeue *pq;
 	uint64_t nclean, ndirty;
 	u_int inactq_scans, last_launder;
-	int domidx, last_target, launder, shortfall, shortfall_cycle, target;
+	int domain, last_target, launder, shortfall, shortfall_cycle, target;
 	bool in_shortfall;
 
-	domidx = (uintptr_t)arg;
-	domain = &vm_dom[domidx];
-	pq = &domain->vmd_pagequeues[PQ_LAUNDRY];
-	KASSERT(domain->vmd_segs != 0, ("domain without segments"));
-	vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY);
+	domain = (uintptr_t)arg;
+	vmd = VM_DOMAIN(domain);
+	pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
+	KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
+	vm_pageout_init_marker(&vmd->vmd_laundry_marker, PQ_LAUNDRY);
 
 	shortfall = 0;
 	in_shortfall = false;
@@ -982,9 +962,9 @@
 	/*
 	 * Calls to these handlers are serialized by the swap syscall lock.
 	 */
-	(void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, domain,
+	(void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, vmd,
 	    EVENTHANDLER_PRI_ANY);
-	(void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, domain,
+	(void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, vmd,
 	    EVENTHANDLER_PRI_ANY);
 
 	/*
@@ -1006,7 +986,7 @@
 			target = shortfall;
 		} else if (!in_shortfall)
 			goto trybackground;
-		else if (shortfall_cycle == 0 || vm_laundry_target() <= 0) {
+		else if (shortfall_cycle == 0 || vm_laundry_target(vmd) <= 0) {
 			/*
 			 * We recently entered shortfall and began laundering
 			 * pages.  If we have completed that laundering run
@@ -1040,11 +1020,12 @@
 		 * memory pressure required to trigger laundering decreases.
 		 */
 trybackground:
-		nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count;
-		ndirty = vm_cnt.v_laundry_count;
+		nclean = vmd->vmd_free_count +
+		    vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt;
+		ndirty = vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt;
 		if (target == 0 && inactq_scans != last_launder &&
 		    ndirty * isqrt(inactq_scans - last_launder) >= nclean) {
-			target = vm_background_launder_target;
+			target = vmd->vmd_background_launder_target;
 		}
 
 		/*
@@ -1076,7 +1057,7 @@
 			 * pages could exceed "target" by the maximum size of
 			 * a cluster minus one. 
 			 */
-			target -= min(vm_pageout_launder(domain, launder,
+			target -= min(vm_pageout_launder(vmd, launder,
 			    in_shortfall), target);
 			pause("laundp", hz / VM_LAUNDER_RATE);
 		}
@@ -1087,8 +1068,8 @@
 		 * kicks us.
 		 */
 		vm_pagequeue_lock(pq);
-		if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE)
-			(void)mtx_sleep(&vm_laundry_request,
+		if (target == 0 && vmd->vmd_laundry_request == VM_LAUNDRY_IDLE)
+			(void)mtx_sleep(&vmd->vmd_laundry_request,
 			    vm_pagequeue_lockptr(pq), PVM, "launds", 0);
 
 		/*
@@ -1096,16 +1077,17 @@
 		 * a shortfall laundering unless we're already in the middle of
 		 * one.  This may preempt a background laundering.
 		 */
-		if (vm_laundry_request == VM_LAUNDRY_SHORTFALL &&
+		if (vmd->vmd_laundry_request == VM_LAUNDRY_SHORTFALL &&
 		    (!in_shortfall || shortfall_cycle == 0)) {
-			shortfall = vm_laundry_target() + vm_pageout_deficit;
+			shortfall = vm_laundry_target(vmd) +
+			    vmd->vmd_pageout_deficit;
 			target = 0;
 		} else
 			shortfall = 0;
 
 		if (target == 0)
-			vm_laundry_request = VM_LAUNDRY_IDLE;
-		inactq_scans = vm_inactq_scans;
+			vmd->vmd_laundry_request = VM_LAUNDRY_IDLE;
+		inactq_scans = vmd->vmd_inactq_scans;
 		vm_pagequeue_unlock(pq);
 	}
 }
@@ -1134,7 +1116,7 @@
 	 * If we need to reclaim memory ask kernel caches to return
 	 * some.  We rate limit to avoid thrashing.
 	 */
-	if (vmd == &vm_dom[0] && pass > 0 &&
+	if (vmd == VM_DOMAIN(0) && pass > 0 &&
 	    (time_uptime - lowmem_uptime) >= lowmem_period) {
 		/*
 		 * Decrease registered cache sizes.
@@ -1163,8 +1145,8 @@
 	 * the page daemon and this calculation.
 	 */
 	if (pass > 0) {
-		deficit = atomic_readandclear_int(&vm_pageout_deficit);
-		page_shortage = vm_paging_target() + deficit;
+		deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
+		page_shortage = vm_paging_target(vmd) + deficit;
 	} else
 		page_shortage = deficit = 0;
 	starting_page_shortage = page_shortage;
@@ -1357,18 +1339,20 @@
 	 * keep count.
 	 */
 	if (starting_page_shortage > 0) {
-		pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY];
+		pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
 		vm_pagequeue_lock(pq);
-		if (vm_laundry_request == VM_LAUNDRY_IDLE &&
+		if (vmd->vmd_laundry_request == VM_LAUNDRY_IDLE &&
 		    (pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) {
 			if (page_shortage > 0) {
-				vm_laundry_request = VM_LAUNDRY_SHORTFALL;
+				vmd->vmd_laundry_request = VM_LAUNDRY_SHORTFALL;
 				VM_CNT_INC(v_pdshortfalls);
-			} else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)
-				vm_laundry_request = VM_LAUNDRY_BACKGROUND;
-			wakeup(&vm_laundry_request);
+			} else if (vmd->vmd_laundry_request !=
+			    VM_LAUNDRY_SHORTFALL)
+				vmd->vmd_laundry_request =
+				    VM_LAUNDRY_BACKGROUND;
+			wakeup(&vmd->vmd_laundry_request);
 		}
-		vm_inactq_scans++;
+		vmd->vmd_inactq_scans++;
 		vm_pagequeue_unlock(pq);
 	}
 
@@ -1397,9 +1381,9 @@
 	 * more aggressively, improving the effectiveness of clustering and
 	 * ensuring that they can eventually be reused.
 	 */
-	inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count +
-	    vm_cnt.v_laundry_count / act_scan_laundry_weight) +
-	    vm_paging_target() + deficit + addl_page_shortage;
+	inactq_shortage = vmd->vmd_inactive_target - (pq->pq_cnt +
+	    vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight) +
+	    vm_paging_target(vmd) + deficit + addl_page_shortage;
 	inactq_shortage *= act_scan_laundry_weight;
 
 	pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
@@ -1742,6 +1726,8 @@
 	}
 	sx_sunlock(&allproc_lock);
 	if (bigproc != NULL) {
+		int i;
+
 		if (vm_panic_on_oom != 0)
 			panic("out of swap space");
 		PROC_LOCK(bigproc);
@@ -1749,19 +1735,20 @@
 		sched_nice(bigproc, PRIO_MIN);
 		_PRELE(bigproc);
 		PROC_UNLOCK(bigproc);
-		wakeup(&vm_cnt.v_free_count);
+		for (i = 0; i < vm_ndomains; i++)
+			wakeup(&VM_DOMAIN(i)->vmd_free_count);
 	}
 }
 
 static void
 vm_pageout_worker(void *arg)
 {
-	struct vm_domain *domain;
-	int domidx, pass;
+	struct vm_domain *vmd;
+	int domain, pass;
 	bool target_met;
 
-	domidx = (uintptr_t)arg;
-	domain = &vm_dom[domidx];
+	domain = (uintptr_t)arg;
+	vmd = VM_DOMAIN(domain);
 	pass = 0;
 	target_met = true;
 
@@ -1771,18 +1758,18 @@
 	 * is allocated.
 	 */
 
-	KASSERT(domain->vmd_segs != 0, ("domain without segments"));
-	domain->vmd_last_active_scan = ticks;
-	vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE);
-	vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE);
-	TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl,
-	    &domain->vmd_inacthead, plinks.q);
+	KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
+	vmd->vmd_last_active_scan = ticks;
+	vm_pageout_init_marker(&vmd->vmd_marker, PQ_INACTIVE);
+	vm_pageout_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE);
+	TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pl,
+	    &vmd->vmd_inacthead, plinks.q);
 
 	/*
 	 * The pageout daemon worker is never done, so loop forever.
 	 */
 	while (TRUE) {
-		mtx_lock(&vm_page_queue_free_mtx);
+		vm_domain_free_lock(vmd);
 
 		/*
 		 * Generally, after a level >= 1 scan, if there are enough
@@ -1796,34 +1783,34 @@
 		 * thread will, nonetheless, wait until another page is freed
 		 * or this wakeup is performed.
 		 */
-		if (vm_pages_needed && !vm_page_count_min()) {
-			vm_pages_needed = false;
-			wakeup(&vm_cnt.v_free_count);
+		if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
+			vmd->vmd_pages_needed = false;
+			wakeup(&vmd->vmd_free_count);
 		}
 
 		/*
-		 * Do not clear vm_pageout_wanted until we reach our free page
+		 * Do not clear vmd_pageout_wanted until we reach our free page
 		 * target.  Otherwise, we may be awakened over and over again,
 		 * wasting CPU time.
 		 */
-		if (vm_pageout_wanted && target_met)
-			vm_pageout_wanted = false;
+		if (vmd->vmd_pageout_wanted && target_met)
+			vmd->vmd_pageout_wanted = false;
 
 		/*
 		 * Might the page daemon receive a wakeup call?
 		 */
-		if (vm_pageout_wanted) {
+		if (vmd->vmd_pageout_wanted) {
 			/*
-			 * No.  Either vm_pageout_wanted was set by another
+			 * No.  Either vmd_pageout_wanted was set by another
 			 * thread during the previous scan, which must have
-			 * been a level 0 scan, or vm_pageout_wanted was
+			 * been a level 0 scan, or vmd_pageout_wanted was
 			 * already set and the scan failed to free enough
 			 * pages.  If we haven't yet performed a level >= 1
 			 * (page reclamation) scan, then increase the level
 			 * and scan again now.  Otherwise, sleep a bit and
 			 * try again later.
 			 */
-			mtx_unlock(&vm_page_queue_free_mtx);
+			vm_domain_free_unlock(vmd);
 			if (pass >= 1)
 				pause("pwait", hz / VM_INACT_SCAN_RATE);
 			pass++;
@@ -1834,20 +1821,20 @@
 			 * sleep until the next wakeup or until pages need to
 			 * have their reference stats updated.
 			 */
-			if (vm_pages_needed) {
-				mtx_unlock(&vm_page_queue_free_mtx);
+			if (vmd->vmd_pages_needed) {
+				vm_domain_free_unlock(vmd);
 				if (pass == 0)
 					pass++;
-			} else if (mtx_sleep(&vm_pageout_wanted,
-			    &vm_page_queue_free_mtx, PDROP | PVM, "psleep",
-			    hz) == 0) {
+			} else if (mtx_sleep(&vmd->vmd_pageout_wanted,
+			    vm_domain_free_lockptr(vmd), PDROP | PVM,
+			    "psleep", hz) == 0) {
 				VM_CNT_INC(v_pdwakeups);
 				pass = 1;
 			} else
 				pass = 0;
 		}
 
-		target_met = vm_pageout_scan(domain, pass);
+		target_met = vm_pageout_scan(vmd, pass);
 	}
 }
 
@@ -1855,43 +1842,78 @@
  *	vm_pageout_init initialises basic pageout daemon settings.
  */
 static void
-vm_pageout_init(void)
+vm_pageout_init_domain(int domain)
 {
-	/*
-	 * Initialize some paging parameters.
-	 */
-	vm_cnt.v_interrupt_free_min = 2;
-	if (vm_cnt.v_page_count < 2000)
-		vm_pageout_page_count = 8;
+	struct vm_domain *vmd;
 
+	vmd = VM_DOMAIN(domain);
+	vmd->vmd_interrupt_free_min = 2;
+
 	/*
 	 * v_free_reserved needs to include enough for the largest
 	 * swap pager structures plus enough for any pv_entry structs
 	 * when paging. 
 	 */
-	if (vm_cnt.v_page_count > 1024)
-		vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200;
+	if (vmd->vmd_page_count > 1024)
+		vmd->vmd_free_min = 4 + (vmd->vmd_page_count - 1024) / 200;
 	else
-		vm_cnt.v_free_min = 4;
-	vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
-	    vm_cnt.v_interrupt_free_min;
-	vm_cnt.v_free_reserved = vm_pageout_page_count +
-	    vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768);
-	vm_cnt.v_free_severe = vm_cnt.v_free_min / 2;
-	vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved;
-	vm_cnt.v_free_min += vm_cnt.v_free_reserved;
-	vm_cnt.v_free_severe += vm_cnt.v_free_reserved;
-	vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2;
-	if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3)
-		vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3;
+		vmd->vmd_free_min = 4;
+	vmd->vmd_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
+	    vmd->vmd_interrupt_free_min;
+	vmd->vmd_free_reserved = vm_pageout_page_count +
+	    vmd->vmd_pageout_free_min + (vmd->vmd_page_count / 768);
+	vmd->vmd_free_severe = vmd->vmd_free_min / 2;
+	vmd->vmd_free_target = 4 * vmd->vmd_free_min + vmd->vmd_free_reserved;
+	vmd->vmd_free_min += vmd->vmd_free_reserved;
+	vmd->vmd_free_severe += vmd->vmd_free_reserved;
+	vmd->vmd_inactive_target = (3 * vmd->vmd_free_target) / 2;
+	if (vmd->vmd_inactive_target > vmd->vmd_free_count / 3)
+		vmd->vmd_inactive_target = vmd->vmd_free_count / 3;
 
 	/*
 	 * Set the default wakeup threshold to be 10% above the minimum
 	 * page limit.  This keeps the steady state out of shortfall.
 	 */
-	vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11;
+	vmd->vmd_pageout_wakeup_thresh = (vmd->vmd_free_min / 10) * 11;
 
 	/*
+	 * Target amount of memory to move out of the laundry queue during a
+	 * background laundering.  This is proportional to the amount of system
+	 * memory.
+	 */
+	vmd->vmd_background_launder_target = (vmd->vmd_free_target -
+	    vmd->vmd_free_min) / 10;
+}
+
+static void
+vm_pageout_init(void)
+{
+	u_int freecount;
+	int i;
+
+	/*
+	 * Initialize some paging parameters.
+	 */
+	if (vm_cnt.v_page_count < 2000)
+		vm_pageout_page_count = 8;
+
+	freecount = 0;
+	for (i = 0; i < vm_ndomains; i++) {
+		struct vm_domain *vmd;
+
+		vm_pageout_init_domain(i);
+		vmd = VM_DOMAIN(i);
+		vm_cnt.v_free_reserved += vmd->vmd_free_reserved;
+		vm_cnt.v_free_target += vmd->vmd_free_target;
+		vm_cnt.v_free_min += vmd->vmd_free_min;
+		vm_cnt.v_inactive_target += vmd->vmd_inactive_target;
+		vm_cnt.v_pageout_free_min += vmd->vmd_pageout_free_min;
+		vm_cnt.v_interrupt_free_min += vmd->vmd_interrupt_free_min;
+		vm_cnt.v_free_severe += vmd->vmd_free_severe;
+		freecount += vmd->vmd_free_count;
+	}
+
+	/*
 	 * Set interval in seconds for active scan.  We want to visit each
 	 * page at least once every ten minutes.  This is to prevent worst
 	 * case paging behaviors with stale active LRU.
@@ -1899,17 +1921,8 @@
 	if (vm_pageout_update_period == 0)
 		vm_pageout_update_period = 600;
 
-	/* XXX does not really belong here */
 	if (vm_page_max_wired == 0)
-		vm_page_max_wired = vm_cnt.v_free_count / 3;
-
-	/*
-	 * Target amount of memory to move out of the laundry queue during a
-	 * background laundering.  This is proportional to the amount of system
-	 * memory.
-	 */
-	vm_background_launder_target = (vm_cnt.v_free_target -
-	    vm_cnt.v_free_min) / 10;
+		vm_page_max_wired = freecount / 3;
 }
 
 /*
@@ -1933,6 +1946,12 @@
 			panic("starting pageout for domain %d, error %d\n",
 			    i, error);
 		}
+		error = kthread_add(vm_pageout_laundry_worker,
+		    (void *)(uintptr_t)i, curproc, NULL, 0, 0,
+		    "laundry: dom%d", i);
+		if (error != 0)
+			panic("starting laundry for domain %d, error %d",
+			    i, error);
 	}
 	error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
 	    0, 0, "uma");
@@ -1945,14 +1964,16 @@
  * Perform an advisory wakeup of the page daemon.
  */
 void
-pagedaemon_wakeup(void)
+pagedaemon_wakeup(int domain)
 {
+	struct vm_domain *vmd;
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_NOTOWNED);
+	vmd = VM_DOMAIN(domain);
+	vm_domain_free_assert_unlocked(vmd);
 
-	if (!vm_pageout_wanted && curthread->td_proc != pageproc) {
-		vm_pageout_wanted = true;
-		wakeup(&vm_pageout_wanted);
+	if (!vmd->vmd_pageout_wanted && curthread->td_proc != pageproc) {
+		vmd->vmd_pageout_wanted = true;
+		wakeup(&vmd->vmd_pageout_wanted);
 	}
 }
 
@@ -1962,22 +1983,26 @@
  * This function returns with the free queues mutex unlocked.
  */
 void
-pagedaemon_wait(int pri, const char *wmesg)
+pagedaemon_wait(int domain, int pri, const char *wmesg)
 {
+	struct vm_domain *vmd;
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vmd = VM_DOMAIN(domain);
+	vm_domain_free_assert_locked(vmd);
 
 	/*
-	 * vm_pageout_wanted may have been set by an advisory wakeup, but if the
-	 * page daemon is running on a CPU, the wakeup will have been lost.
+	 * vmd_pageout_wanted may have been set by an advisory wakeup, but if
+	 * the page daemon is running on a CPU, the wakeup will have been lost.
 	 * Thus, deliver a potentially spurious wakeup to ensure that the page
 	 * daemon has been notified of the shortage.
 	 */
-	if (!vm_pageout_wanted || !vm_pages_needed) {
-		vm_pageout_wanted = true;
-		wakeup(&vm_pageout_wanted);
+	if (!vmd->vmd_pageout_wanted || !vmd->vmd_pages_needed) {
+		vmd->vmd_pageout_wanted = true;
+		wakeup(&vmd->vmd_pageout_wanted);
 	}
-	vm_pages_needed = true;
-	msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | pri,
+	vmd->vmd_pages_needed = true;
+	vmd->vmd_waiters++;
+	msleep(&vmd->vmd_free_count, vm_domain_free_lockptr(vmd), PDROP | pri,
 	    wmesg, 0);
+	vmd->vmd_waiters--;
 }
Index: sys/vm/vm_pagequeue.h
===================================================================
--- sys/vm/vm_pagequeue.h
+++ sys/vm/vm_pagequeue.h
@@ -0,0 +1,235 @@
+/*-
+ * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
+ *
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)vm_page.h	8.2 (Berkeley) 12/13/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_VM_PAGEQUEUE_
+#define	_VM_PAGEQUEUE_
+
+#ifdef _KERNEL
+struct vm_pagequeue {
+	struct mtx	pq_mutex;
+	struct pglist	pq_pl;
+	int		pq_cnt;
+	const char	* const pq_name;
+} __aligned(CACHE_LINE_SIZE);
+
+
+struct vm_domain {
+	struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
+	struct mtx_padalign vmd_free_mtx;
+	struct vmem *vmd_kernel_arena;
+	u_int vmd_domain;		/* Domain number. */
+	u_int vmd_page_count;
+	long vmd_segs;			/* bitmask of the segments */
+
+	/* Paging control variables, locked by domain_free_mtx. */
+	u_int vmd_free_count;
+	boolean_t vmd_oom;
+	int vmd_oom_seq;
+	int vmd_last_active_scan;
+	struct vm_page vmd_laundry_marker;
+	struct vm_page vmd_marker; /* marker for pagedaemon private use */
+	struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
+
+	int vmd_pageout_pages_needed;	/* page daemon waiting for pages? */
+	int vmd_pageout_deficit;	/* Estimated number of pages deficit */
+	int vmd_waiters;		/* Pageout waiters. */
+	bool vmd_pages_needed;	/* Are threads waiting for free pages? */
+	bool vmd_pageout_wanted;	/* pageout daemon wait channel */
+	bool vmd_minset;		/* Are we in vm_min_domains? */
+	bool vmd_severeset;		/* Are we in vm_severe_domains? */
+	int vmd_inactq_scans;
+	enum {
+		VM_LAUNDRY_IDLE = 0,
+		VM_LAUNDRY_BACKGROUND,
+		VM_LAUNDRY_SHORTFALL
+	} vmd_laundry_request;
+
+	/* Paging thresholds. */
+	u_int vmd_background_launder_target;
+	u_int vmd_free_reserved;	/* (c) pages reserved for deadlock */
+	u_int vmd_free_target;		/* (c) pages desired free */
+	u_int vmd_free_min;		/* (c) pages desired free */
+	u_int vmd_inactive_target;	/* (c) pages desired inactive */
+	u_int vmd_pageout_free_min;	/* (c) min pages reserved for kernel */
+	u_int vmd_pageout_wakeup_thresh;/* (c) min pages to wake pagedaemon */
+	u_int vmd_interrupt_free_min;	/* (c) reserved pages for int code */
+	u_int vmd_free_severe;		/* (c) severe page depletion point */
+} __aligned(CACHE_LINE_SIZE);
+
+extern struct vm_domain vm_dom[MAXMEMDOM];
+
+#define	VM_DOMAIN(n)	(&vm_dom[(n)])
+
+#define	vm_pagequeue_assert_locked(pq)	mtx_assert(&(pq)->pq_mutex, MA_OWNED)
+#define	vm_pagequeue_lock(pq)		mtx_lock(&(pq)->pq_mutex)
+#define	vm_pagequeue_lockptr(pq)	(&(pq)->pq_mutex)
+#define	vm_pagequeue_unlock(pq)		mtx_unlock(&(pq)->pq_mutex)
+
+#define	vm_domain_free_assert_locked(n)					\
+	    mtx_assert(vm_domain_free_lockptr((n)), MA_OWNED)
+#define	vm_domain_free_assert_unlocked(n)				\
+	    mtx_assert(vm_domain_free_lockptr((n)), MA_NOTOWNED)
+#define	vm_domain_free_lock(d)						\
+	    mtx_lock(vm_domain_free_lockptr((d)))
+#define	vm_domain_free_lockptr(d)					\
+	    (&(d)->vmd_free_mtx)
+#define	vm_domain_free_unlock(d)					\
+	    mtx_unlock(vm_domain_free_lockptr((d)))
+
+static __inline void
+vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
+{
+
+#ifdef notyet
+	vm_pagequeue_assert_locked(pq);
+#endif
+	pq->pq_cnt += addend;
+}
+#define	vm_pagequeue_cnt_inc(pq)	vm_pagequeue_cnt_add((pq), 1)
+#define	vm_pagequeue_cnt_dec(pq)	vm_pagequeue_cnt_add((pq), -1)
+
+void vm_domain_set(struct vm_domain *vmd);
+int vm_domain_available(struct vm_domain *vmd, int req, int npages);
+
+/*
+ *      vm_pagequeue_domain:
+ *
+ *      Return the memory domain the page belongs to.
+ */
+static inline struct vm_domain *
+vm_pagequeue_domain(vm_page_t m)
+{
+
+	return (VM_DOMAIN(vm_phys_domain(m)));
+}
+
+/*
+ * Return the number of pages we need to free-up or cache
+ * A positive number indicates that we do not have enough free pages.
+ */
+static inline int
+vm_paging_target(struct vm_domain *vmd)
+{
+
+	return (vmd->vmd_free_target - vmd->vmd_free_count);
+}
+
+/*
+ * Returns TRUE if the pagedaemon needs to be woken up.
+ */
+static inline int
+vm_paging_needed(struct vm_domain *vmd, u_int free_count)
+{
+
+	return (free_count < vmd->vmd_pageout_wakeup_thresh);
+}
+
+/*
+ * Returns TRUE if the domain is below the min paging target.
+ */
+static inline int
+vm_paging_min(struct vm_domain *vmd)
+{
+
+        return (vmd->vmd_free_min > vmd->vmd_free_count);
+}
+
+/*
+ * Returns TRUE if the domain is below the severe paging target.
+ */
+static inline int
+vm_paging_severe(struct vm_domain *vmd)
+{
+
+        return (vmd->vmd_free_severe > vmd->vmd_free_count);
+}
+
+/*
+ * Return the number of pages we need to launder.
+ * A positive number indicates that we have a shortfall of clean pages.
+ */
+static inline int
+vm_laundry_target(struct vm_domain *vmd)
+{
+
+	return (vm_paging_target(vmd));
+}
+
+static inline u_int
+vm_domain_freecnt_adj(struct vm_domain *vmd, int adj)
+{
+	u_int ret;
+
+	vm_domain_free_assert_locked(vmd);
+	ret = vmd->vmd_free_count += adj;
+	if ((!vmd->vmd_minset && vm_paging_min(vmd)) ||
+	    (!vmd->vmd_severeset && vm_paging_severe(vmd)))
+		vm_domain_set(vmd);
+
+	return (ret);
+}
+
+
+#endif	/* _KERNEL */
+#endif				/* !_VM_PAGEQUEUE_ */
Index: sys/vm/vm_phys.h
===================================================================
--- sys/vm/vm_phys.h
+++ sys/vm/vm_phys.h
@@ -96,12 +96,12 @@
 
 /*
  *
- *	vm_phys_domidx:
+ *	vm_phys_domain:
  *
  *	Return the index of the domain the page belongs to.
  */
 static inline int
-vm_phys_domidx(vm_page_t m)
+vm_phys_domain(vm_page_t m)
 {
 #ifdef NUMA
 	int domn, segind;
@@ -115,27 +115,6 @@
 #else
 	return (0);
 #endif
-}
-
-/*
- *	vm_phys_domain:
- *
- * 	Return the memory domain the page belongs to.
- */
-static inline struct vm_domain *
-vm_phys_domain(vm_page_t m)
-{
-
-	return (&vm_dom[vm_phys_domidx(m)]);
-}
-
-static inline u_int
-vm_phys_freecnt_adj(vm_page_t m, int adj)
-{
-
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
-	vm_phys_domain(m)->vmd_free_count += adj;
-	return (vm_cnt.v_free_count += adj);
 }
 
 #endif	/* _KERNEL */
Index: sys/vm/vm_phys.c
===================================================================
--- sys/vm/vm_phys.c
+++ sys/vm/vm_phys.c
@@ -67,6 +67,7 @@
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
 
 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
     "Too many physsegs.");
@@ -653,7 +654,7 @@
 	if (flind < 0)
 		return (NULL);
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(VM_DOMAIN(domain));
 	fl = &vm_phys_free_queues[domain][flind][pool][0];
 	for (oind = order; oind < VM_NFREEORDER; oind++) {
 		m = TAILQ_FIRST(&fl[oind].pl);
@@ -906,8 +907,8 @@
 	    m, m->pool));
 	KASSERT(order < VM_NFREEORDER,
 	    ("vm_phys_free_pages: order %d is out of range", order));
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	seg = &vm_phys_segs[m->segind];
+	vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
 	if (order < VM_NFREEORDER - 1) {
 		pa = VM_PAGE_TO_PHYS(m);
 		do {
@@ -945,7 +946,7 @@
 	 * Avoid unnecessary coalescing by freeing the pages in the largest
 	 * possible power-of-two-sized subsets.
 	 */
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(vm_pagequeue_domain(m));
 	for (;; npages -= n) {
 		/*
 		 * Unsigned "min" is used here so that "order" is assigned
@@ -1051,14 +1052,13 @@
 	vm_page_t m_set, m_tmp;
 	int order;
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
-
 	/*
 	 * First, find the contiguous, power of two-sized set of free
 	 * physical pages containing the given physical page "m" and
 	 * assign it to "m_set".
 	 */
 	seg = &vm_phys_segs[m->segind];
+	vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
 	    order < VM_NFREEORDER - 1; ) {
 		order++;
@@ -1122,7 +1122,7 @@
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(VM_DOMAIN(domain));
 	if (low >= high)
 		return (NULL);
 	m_run = NULL;
@@ -1167,7 +1167,7 @@
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
 	/* Compute the queue that is the best fit for npages. */
 	for (order = 0; (1 << order) < npages; order++);
 	/* Search for a run satisfying the specified conditions. */
Index: sys/vm/vm_reserv.h
===================================================================
--- sys/vm/vm_reserv.h
+++ sys/vm/vm_reserv.h
@@ -50,8 +50,14 @@
 vm_page_t	vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex,
 		    int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
 		    u_long alignment, vm_paddr_t boundary, vm_page_t mpred);
+vm_page_t	vm_reserv_extend_contig(int req, vm_object_t object,
+		    vm_pindex_t pindex, int domain, u_long npages,
+		    vm_paddr_t low, vm_paddr_t high, u_long alignment,
+		    vm_paddr_t boundary, vm_page_t mpred);
 vm_page_t	vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex,
 		    int domain, vm_page_t mpred);
+vm_page_t	vm_reserv_extend(int req, vm_object_t object,
+		    vm_pindex_t pindex, int domain, vm_page_t mpred);
 void		vm_reserv_break_all(vm_object_t object);
 boolean_t	vm_reserv_free_page(vm_page_t m);
 void		vm_reserv_init(void);
Index: sys/vm/vm_reserv.c
===================================================================
--- sys/vm/vm_reserv.c
+++ sys/vm/vm_reserv.c
@@ -59,7 +59,9 @@
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
 #include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 
@@ -163,17 +165,21 @@
  * object's list of reservations.
  *
  * A partially populated reservation can be broken and reclaimed at any time.
+ *
+ * f - vm_domain_free_lock
+ * o - vm_reserv_object_lock
+ * c - constant after boot
  */
 struct vm_reserv {
-	TAILQ_ENTRY(vm_reserv) partpopq;
-	LIST_ENTRY(vm_reserv) objq;
-	vm_object_t	object;			/* containing object */
-	vm_pindex_t	pindex;			/* offset within object */
-	vm_page_t	pages;			/* first page of a superpage */
-	int		domain;			/* NUMA domain */
-	int		popcnt;			/* # of pages in use */
-	char		inpartpopq;
-	popmap_t	popmap[NPOPMAP];	/* bit vector of used pages */
+	TAILQ_ENTRY(vm_reserv) partpopq;	/* (f) per-domain queue. */
+	LIST_ENTRY(vm_reserv) objq;		/* (o, f) object queue */
+	vm_object_t	object;			/* (o, f) containing object */
+	vm_pindex_t	pindex;			/* (o, f) offset in object */
+	vm_page_t	pages;			/* (c) first page  */
+	int		domain;			/* (c) NUMA domain. */
+	int		popcnt;			/* (f) # of pages in use */
+	char		inpartpopq;		/* (f) */
+	popmap_t	popmap[NPOPMAP];	/* (f) bit vector, used pages */
 };
 
 /*
@@ -234,6 +240,25 @@
 SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
     &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
 
+/*
+ * The object lock pool is used to synchronize the rvq.  We can not use a
+ * pool mutex because it is required before malloc works.
+ *
+ * The "hash" function could be made faster without divide and modulo.
+ */
+#define	VM_RESERV_OBJ_LOCK_COUNT	MAXCPU
+
+struct mtx_padalign vm_reserv_object_mtx[VM_RESERV_OBJ_LOCK_COUNT];
+
+#define	vm_reserv_object_lock_idx(object)			\
+	    (((uintptr_t)object / sizeof(*object)) % VM_RESERV_OBJ_LOCK_COUNT)
+#define	vm_reserv_object_lock_ptr(object)			\
+	    &vm_reserv_object_mtx[vm_reserv_object_lock_idx((object))]
+#define	vm_reserv_object_lock(object)				\
+	    mtx_lock(vm_reserv_object_lock_ptr((object)))
+#define	vm_reserv_object_unlock(object)				\
+	    mtx_unlock(vm_reserv_object_lock_ptr((object)))
+
 static void		vm_reserv_break(vm_reserv_t rv, vm_page_t m);
 static void		vm_reserv_depopulate(vm_reserv_t rv, int index);
 static vm_reserv_t	vm_reserv_from_page(vm_page_t m);
@@ -288,12 +313,12 @@
 		for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
 			counter = 0;
 			unused_pages = 0;
-			mtx_lock(&vm_page_queue_free_mtx);
+			vm_domain_free_lock(VM_DOMAIN(domain));
 			TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
 				counter++;
 				unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
 			}
-			mtx_unlock(&vm_page_queue_free_mtx);
+			vm_domain_free_unlock(VM_DOMAIN(domain));
 			sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
 			    domain, level,
 			    unused_pages * ((int)PAGE_SIZE / 1024), counter);
@@ -305,6 +330,49 @@
 }
 
 /*
+ * Remove a reservation from the object's objq.
+ */
+static void
+vm_reserv_remove(vm_reserv_t rv)
+{
+	vm_object_t object;
+
+	KASSERT(rv->object != NULL,
+	    ("vm_reserv_remove: reserv %p is free", rv));
+	KASSERT(!rv->inpartpopq,
+	    ("vm_reserv_remove: reserv %p's inpartpopq is TRUE", rv));
+	object = rv->object;
+	vm_reserv_object_lock(object);
+	LIST_REMOVE(rv, objq);
+	rv->object = NULL;
+	vm_reserv_object_unlock(object);
+}
+
+/*
+ * Insert a new reservation into the object's objq.
+ */
+static void
+vm_reserv_insert(vm_reserv_t rv, vm_object_t object, vm_pindex_t pindex)
+{
+	int i;
+
+	KASSERT(rv->object == NULL,
+	    ("vm_reserv_insert: reserv %p isn't free", rv));
+	KASSERT(rv->popcnt == 0,
+	    ("vm_reserv_insert: reserv %p's popcnt is corrupted", rv));
+	KASSERT(!rv->inpartpopq,
+	    ("vm_reserv_insert: reserv %p's inpartpopq is TRUE", rv));
+	for (i = 0; i < NPOPMAP; i++)
+		KASSERT(rv->popmap[i] == 0,
+		    ("vm_reserv_insert: reserv %p's popmap is corrupted", rv));
+	vm_reserv_object_lock(object);
+	rv->pindex = pindex;
+	rv->object = object;
+	LIST_INSERT_HEAD(&object->rvq, rv, objq);
+	vm_reserv_object_unlock(object);
+}
+
+/*
  * Reduces the given reservation's population count.  If the population count
  * becomes zero, the reservation is destroyed.  Additionally, moves the
  * reservation to the tail of the partially populated reservation queue if the
@@ -316,7 +384,7 @@
 vm_reserv_depopulate(vm_reserv_t rv, int index)
 {
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
 	KASSERT(rv->object != NULL,
 	    ("vm_reserv_depopulate: reserv %p is free", rv));
 	KASSERT(popmap_is_set(rv->popmap, index),
@@ -339,9 +407,7 @@
 	popmap_clear(rv->popmap, index);
 	rv->popcnt--;
 	if (rv->popcnt == 0) {
-		LIST_REMOVE(rv, objq);
-		rv->object = NULL;
-		rv->domain = -1;
+		vm_reserv_remove(rv);
 		vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
 		vm_reserv_freed++;
 	} else {
@@ -361,6 +427,43 @@
 }
 
 /*
+ * Returns an existing reservation or NULL and initialized successor pointer.
+ */
+static vm_reserv_t
+vm_reserv_from_object(vm_object_t object, vm_pindex_t pindex,
+    vm_page_t mpred, vm_page_t *msuccp)
+{
+	vm_reserv_t rv;
+	vm_page_t msucc;
+
+	msucc = NULL;
+	if (mpred != NULL) {
+		KASSERT(mpred->object == object,
+		    ("vm_reserv_from_object: object doesn't contain mpred"));
+		KASSERT(mpred->pindex < pindex,
+		    ("vm_reserv_from_object: mpred doesn't precede pindex"));
+		rv = vm_reserv_from_page(mpred);
+		if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
+			goto found;
+		msucc = TAILQ_NEXT(mpred, listq);
+	} else
+		msucc = TAILQ_FIRST(&object->memq);
+	if (msucc != NULL) {
+		KASSERT(msucc->pindex > pindex,
+		    ("vm_reserv_from_object: msucc doesn't succeed pindex"));
+		rv = vm_reserv_from_page(msucc);
+		if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
+			goto found;
+	}
+	rv = NULL;
+
+found:
+	*msuccp = msucc;
+
+	return (rv);
+}
+
+/*
  * Returns TRUE if the given reservation contains the given page index and
  * FALSE otherwise.
  */
@@ -381,7 +484,7 @@
 vm_reserv_populate(vm_reserv_t rv, int index)
 {
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
 	KASSERT(rv->object != NULL,
 	    ("vm_reserv_populate: reserv %p is free", rv));
 	KASSERT(popmap_is_clear(rv->popmap, index),
@@ -423,6 +526,100 @@
  * The object and free page queue must be locked.
  */
 vm_page_t
+vm_reserv_extend_contig(int req, vm_object_t object, vm_pindex_t pindex,
+    int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
+    u_long alignment, vm_paddr_t boundary, vm_page_t mpred)
+{
+	struct vm_domain *vmd;
+	vm_paddr_t pa, size;
+	vm_page_t m, msucc;
+	vm_reserv_t rv;
+	int i, index;
+
+	VM_OBJECT_ASSERT_WLOCKED(object);
+	KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
+
+	/*
+	 * Is a reservation fundamentally impossible?
+	 */
+	if (pindex < VM_RESERV_INDEX(object, pindex) ||
+	    pindex + npages > object->size || object->resident_page_count == 0)
+		return (NULL);
+
+	/*
+	 * All reservations of a particular size have the same alignment.
+	 * Assuming that the first page is allocated from a reservation, the
+	 * least significant bits of its physical address can be determined
+	 * from its offset from the beginning of the reservation and the size
+	 * of the reservation.
+	 *
+	 * Could the specified index within a reservation of the smallest
+	 * possible size satisfy the alignment and boundary requirements?
+	 */
+	pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT;
+	if ((pa & (alignment - 1)) != 0)
+		return (NULL);
+	size = npages << PAGE_SHIFT;
+	if (((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
+		return (NULL);
+
+	/*
+	 * Look for an existing reservation.
+	 */
+	rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+	if (rv == NULL)
+		return (NULL);
+	KASSERT(object != kernel_object || rv->domain == domain,
+	    ("vm_reserv_extend_contig: Domain mismatch from reservation."));
+	index = VM_RESERV_INDEX(object, pindex);
+	/* Does the allocation fit within the reservation? */
+	if (index + npages > VM_LEVEL_0_NPAGES)
+		return (NULL);
+	domain = rv->domain;
+	vmd = VM_DOMAIN(domain);
+	vm_domain_free_lock(vmd);
+	if (rv->object != object || !vm_domain_available(vmd, req, npages)) {
+		m = NULL;
+		goto out;
+	}
+	m = &rv->pages[index];
+	pa = VM_PAGE_TO_PHYS(m);
+	if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
+	    ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) {
+		m = NULL;
+		goto out;
+	}
+	/* Handle vm_page_rename(m, new_object, ...). */
+	for (i = 0; i < npages; i++) {
+		if (popmap_is_set(rv->popmap, index + i)) {
+			m = NULL;
+			goto out;
+		}
+	}
+	for (i = 0; i < npages; i++)
+		vm_reserv_populate(rv, index + i);
+	vm_domain_freecnt_adj(vmd, -npages);
+out:
+	vm_domain_free_unlock(vmd);
+	return (m);
+}
+
+/*
+ * Allocates a contiguous set of physical pages of the given size "npages"
+ * from existing or newly created reservations.  All of the physical pages
+ * must be at or above the given physical address "low" and below the given
+ * physical address "high".  The given value "alignment" determines the
+ * alignment of the first physical page in the set.  If the given value
+ * "boundary" is non-zero, then the set of physical pages cannot cross any
+ * physical address boundary that is a multiple of that value.  Both
+ * "alignment" and "boundary" must be a power of two.
+ *
+ * The page "mpred" must immediately precede the offset "pindex" within the
+ * specified object.
+ *
+ * The object and free page queue must be locked.
+ */
+vm_page_t
 vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_page_t mpred)
@@ -434,7 +631,7 @@
 	u_long allocpages, maxpages, minpages;
 	int i, index, n;
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(VM_DOMAIN(domain));
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
 
@@ -463,52 +660,48 @@
 		return (NULL);
 
 	/*
-	 * Look for an existing reservation.
+	 * Callers should've extended an existing reservation prior to
+	 * calling this function.  If a reservation exists it is
+	 * incompatible with the allocation.
 	 */
-	if (mpred != NULL) {
-		KASSERT(mpred->object == object,
-		    ("vm_reserv_alloc_contig: object doesn't contain mpred"));
-		KASSERT(mpred->pindex < pindex,
-		    ("vm_reserv_alloc_contig: mpred doesn't precede pindex"));
-		rv = vm_reserv_from_page(mpred);
-		if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
-			goto found;
-		msucc = TAILQ_NEXT(mpred, listq);
-	} else
-		msucc = TAILQ_FIRST(&object->memq);
-	if (msucc != NULL) {
-		KASSERT(msucc->pindex > pindex,
-		    ("vm_reserv_alloc_contig: msucc doesn't succeed pindex"));
-		rv = vm_reserv_from_page(msucc);
-		if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
-			goto found;
-	}
+	rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+	if (rv != NULL)
+		return (NULL);
 
 	/*
 	 * Could at least one reservation fit between the first index to the
 	 * left that can be used ("leftcap") and the first index to the right
 	 * that cannot be used ("rightcap")?
+	 *
+	 * We must synchronize with the reserv object lock to protect the
+	 * pindex/object of the resulting reservations against rename while
+	 * we are inspecting.
 	 */
 	first = pindex - VM_RESERV_INDEX(object, pindex);
+	minpages = VM_RESERV_INDEX(object, pindex) + npages;
+	maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
+	allocpages = maxpages;
+	vm_reserv_object_lock(object);
 	if (mpred != NULL) {
 		if ((rv = vm_reserv_from_page(mpred))->object != object)
 			leftcap = mpred->pindex + 1;
 		else
 			leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
-		if (leftcap > first)
+		if (leftcap > first) {
+			vm_reserv_object_unlock(object);
 			return (NULL);
+		}
 	}
-	minpages = VM_RESERV_INDEX(object, pindex) + npages;
-	maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
-	allocpages = maxpages;
 	if (msucc != NULL) {
 		if ((rv = vm_reserv_from_page(msucc))->object != object)
 			rightcap = msucc->pindex;
 		else
 			rightcap = rv->pindex;
 		if (first + maxpages > rightcap) {
-			if (maxpages == VM_LEVEL_0_NPAGES)
+			if (maxpages == VM_LEVEL_0_NPAGES) {
+				vm_reserv_object_unlock(object);
 				return (NULL);
+			}
 
 			/*
 			 * At least one reservation will fit between "leftcap"
@@ -519,6 +712,7 @@
 			allocpages = minpages;
 		}
 	}
+	vm_reserv_object_unlock(object);
 
 	/*
 	 * Would the last new reservation extend past the end of the object?
@@ -549,7 +743,7 @@
 	    VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
 	if (m == NULL)
 		return (NULL);
-	KASSERT(vm_phys_domidx(m) == domain,
+	KASSERT(vm_phys_domain(m) == domain,
 	    ("vm_reserv_alloc_contig: Page domain does not match requested."));
 
 	/*
@@ -565,22 +759,7 @@
 		KASSERT(rv->pages == m,
 		    ("vm_reserv_alloc_contig: reserv %p's pages is corrupted",
 		    rv));
-		KASSERT(rv->object == NULL,
-		    ("vm_reserv_alloc_contig: reserv %p isn't free", rv));
-		LIST_INSERT_HEAD(&object->rvq, rv, objq);
-		rv->object = object;
-		rv->pindex = first;
-		rv->domain = domain;
-		KASSERT(rv->popcnt == 0,
-		    ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted",
-		    rv));
-		KASSERT(!rv->inpartpopq,
-		    ("vm_reserv_alloc_contig: reserv %p's inpartpopq is TRUE",
-		    rv));
-		for (i = 0; i < NPOPMAP; i++)
-			KASSERT(rv->popmap[i] == 0,
-		    ("vm_reserv_alloc_contig: reserv %p's popmap is corrupted",
-			    rv));
+		vm_reserv_insert(rv, object, first);
 		n = ulmin(VM_LEVEL_0_NPAGES - index, npages);
 		for (i = 0; i < n; i++)
 			vm_reserv_populate(rv, index + i);
@@ -594,31 +773,68 @@
 		allocpages -= VM_LEVEL_0_NPAGES;
 	} while (allocpages >= VM_LEVEL_0_NPAGES);
 	return (m_ret);
+}
 
+/*
+ * Attempts to extend an existing reservation and allocate the page to the
+ * object.
+ *
+ * The page "mpred" must immediately precede the offset "pindex" within the
+ * specified object.
+ *
+ * The object must be locked.
+ */
+vm_page_t
+vm_reserv_extend(int req, vm_object_t object, vm_pindex_t pindex, int domain,
+    vm_page_t mpred)
+{
+	struct vm_domain *vmd;
+	vm_page_t m, msucc;
+	vm_reserv_t rv;
+	int index, free_count;
+
+	VM_OBJECT_ASSERT_WLOCKED(object);
+
 	/*
-	 * Found a matching reservation.
+	 * Could a reservation currently exist?
 	 */
-found:
-	index = VM_RESERV_INDEX(object, pindex);
-	/* Does the allocation fit within the reservation? */
-	if (index + npages > VM_LEVEL_0_NPAGES)
+	if (pindex < VM_RESERV_INDEX(object, pindex) ||
+	    pindex >= object->size || object->resident_page_count == 0)
 		return (NULL);
-	m = &rv->pages[index];
-	pa = VM_PAGE_TO_PHYS(m);
-	if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
-	    ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
+
+	/*
+	 * Look for an existing reservation.
+	 */
+	rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+	if (rv == NULL)
 		return (NULL);
-	/* Handle vm_page_rename(m, new_object, ...). */
-	for (i = 0; i < npages; i++)
-		if (popmap_is_set(rv->popmap, index + i))
-			return (NULL);
-	for (i = 0; i < npages; i++)
-		vm_reserv_populate(rv, index + i);
+
+	KASSERT(object != kernel_object || rv->domain == domain,
+	    ("vm_reserv_extend: Domain mismatch from reservation."));
+	domain = rv->domain;
+	vmd = VM_DOMAIN(domain);
+	index = VM_RESERV_INDEX(object, pindex);
+	m = &rv->pages[index];
+	vm_domain_free_lock(vmd);
+	if (vm_domain_available(vmd, req, 1) == 0 ||
+	    /* Handle reclaim race. */
+	    rv->object != object ||
+	    /* Handle vm_page_rename(m, new_object, ...). */
+	    popmap_is_set(rv->popmap, index))
+		m = NULL;
+	if (m != NULL)
+		vm_reserv_populate(rv, index);
+	free_count = vm_domain_freecnt_adj(vmd, -1);
+	vm_domain_free_unlock(vmd);
+
+	if (vm_paging_needed(vmd, free_count))
+		pagedaemon_wakeup(domain);
+
 	return (m);
 }
 
 /*
- * Allocates a page from an existing or newly created reservation.
+ * Allocates a page from an existing reservation.
  *
  * The page "mpred" must immediately precede the offset "pindex" within the
  * specified object.
@@ -632,9 +848,9 @@
 	vm_page_t m, msucc;
 	vm_pindex_t first, leftcap, rightcap;
 	vm_reserv_t rv;
-	int i, index;
+	int index;
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(VM_DOMAIN(domain));
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
@@ -645,48 +861,45 @@
 		return (NULL);
 
 	/*
-	 * Look for an existing reservation.
+	 * Callers should've extended an existing reservation prior to
+	 * calling this function.  If a reservation exists it is
+	 * incompatible with the allocation.
 	 */
-	if (mpred != NULL) {
-		KASSERT(mpred->object == object,
-		    ("vm_reserv_alloc_page: object doesn't contain mpred"));
-		KASSERT(mpred->pindex < pindex,
-		    ("vm_reserv_alloc_page: mpred doesn't precede pindex"));
-		rv = vm_reserv_from_page(mpred);
-		if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
-			goto found;
-		msucc = TAILQ_NEXT(mpred, listq);
-	} else
-		msucc = TAILQ_FIRST(&object->memq);
-	if (msucc != NULL) {
-		KASSERT(msucc->pindex > pindex,
-		    ("vm_reserv_alloc_page: msucc doesn't succeed pindex"));
-		rv = vm_reserv_from_page(msucc);
-		if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
-			goto found;
-	}
+	rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+	if (rv != NULL)
+		return (NULL);
 
 	/*
 	 * Could a reservation fit between the first index to the left that
 	 * can be used and the first index to the right that cannot be used?
+	 *
+	 * We must synchronize with the reserv object lock to protect the
+	 * pindex/object of the resulting reservations against rename while
+	 * we are inspecting.
 	 */
 	first = pindex - VM_RESERV_INDEX(object, pindex);
+	vm_reserv_object_lock(object);
 	if (mpred != NULL) {
 		if ((rv = vm_reserv_from_page(mpred))->object != object)
 			leftcap = mpred->pindex + 1;
 		else
 			leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
-		if (leftcap > first)
+		if (leftcap > first) {
+			vm_reserv_object_unlock(object);
 			return (NULL);
+		}
 	}
 	if (msucc != NULL) {
 		if ((rv = vm_reserv_from_page(msucc))->object != object)
 			rightcap = msucc->pindex;
 		else
 			rightcap = rv->pindex;
-		if (first + VM_LEVEL_0_NPAGES > rightcap)
+		if (first + VM_LEVEL_0_NPAGES > rightcap) {
+			vm_reserv_object_unlock(object);
 			return (NULL);
+		}
 	}
+	vm_reserv_object_unlock(object);
 
 	/*
 	 * Would a new reservation extend past the end of the object? 
@@ -712,37 +925,10 @@
 	rv = vm_reserv_from_page(m);
 	KASSERT(rv->pages == m,
 	    ("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv));
-	KASSERT(rv->object == NULL,
-	    ("vm_reserv_alloc_page: reserv %p isn't free", rv));
-	LIST_INSERT_HEAD(&object->rvq, rv, objq);
-	rv->object = object;
-	rv->pindex = first;
-	rv->domain = domain;
-	KASSERT(rv->popcnt == 0,
-	    ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv));
-	KASSERT(!rv->inpartpopq,
-	    ("vm_reserv_alloc_page: reserv %p's inpartpopq is TRUE", rv));
-	for (i = 0; i < NPOPMAP; i++)
-		KASSERT(rv->popmap[i] == 0,
-		    ("vm_reserv_alloc_page: reserv %p's popmap is corrupted",
-		    rv));
+	vm_reserv_insert(rv, object, first);
 	index = VM_RESERV_INDEX(object, pindex);
 	vm_reserv_populate(rv, index);
 	return (&rv->pages[index]);
-
-	/*
-	 * Found a matching reservation.
-	 */
-found:
-	index = VM_RESERV_INDEX(object, pindex);
-	m = &rv->pages[index];
-	KASSERT(object != kernel_object || vm_phys_domidx(m) == domain,
-	    ("vm_reserv_alloc_page: Domain mismatch from reservation."));
-	/* Handle vm_page_rename(m, new_object, ...). */
-	if (popmap_is_set(rv->popmap, index))
-		return (NULL);
-	vm_reserv_populate(rv, index);
-	return (m);
 }
 
 /*
@@ -759,14 +945,8 @@
 {
 	int begin_zeroes, hi, i, lo;
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
-	KASSERT(rv->object != NULL,
-	    ("vm_reserv_break: reserv %p is free", rv));
-	KASSERT(!rv->inpartpopq,
-	    ("vm_reserv_break: reserv %p's inpartpopq is TRUE", rv));
-	LIST_REMOVE(rv, objq);
-	rv->object = NULL;
-	rv->domain = -1;
+	vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+	vm_reserv_remove(rv);
 	if (m != NULL) {
 		/*
 		 * Since the reservation is being broken, there is no harm in
@@ -830,9 +1010,26 @@
 vm_reserv_break_all(vm_object_t object)
 {
 	vm_reserv_t rv;
+	struct vm_domain *vmd;
 
-	mtx_lock(&vm_page_queue_free_mtx);
+	/*
+	 * This access of object->rvq is unsynchronized so that the
+	 * object rvq lock can nest after the domain_free lock.  We
+	 * must check for races in the results.  However, the object
+	 * lock prevents new additions, so we are guaranteed that when
+	 * it returns NULL the object is properly empty.
+	 */
+	vmd = NULL;
 	while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
+		if (vmd != VM_DOMAIN(rv->domain)) {
+			if (vmd != NULL)
+				vm_domain_free_unlock(vmd);
+			vmd = VM_DOMAIN(rv->domain);
+			vm_domain_free_lock(vmd);
+		}
+		/* Reclaim race. */
+		if (rv->object != object)
+			continue;
 		KASSERT(rv->object == object,
 		    ("vm_reserv_break_all: reserv %p is corrupted", rv));
 		if (rv->inpartpopq) {
@@ -841,7 +1038,8 @@
 		}
 		vm_reserv_break(rv, NULL);
 	}
-	mtx_unlock(&vm_page_queue_free_mtx);
+	if (vmd != NULL)
+		vm_domain_free_unlock(vmd);
 }
 
 /*
@@ -855,8 +1053,8 @@
 {
 	vm_reserv_t rv;
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	rv = vm_reserv_from_page(m);
+	vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
 	if (rv->object == NULL)
 		return (FALSE);
 	vm_reserv_depopulate(rv, m - rv->pages);
@@ -886,6 +1084,8 @@
 		while (paddr + VM_LEVEL_0_SIZE <= seg->end) {
 			vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages =
 			    PHYS_TO_VM_PAGE(paddr);
+			vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].domain =
+			    seg->domain;
 			paddr += VM_LEVEL_0_SIZE;
 		}
 	}
@@ -902,8 +1102,8 @@
 {
 	vm_reserv_t rv;
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	rv = vm_reserv_from_page(m);
+	vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
 	if (rv->object == NULL)
 		return (false);
 	return (popmap_is_clear(rv->popmap, m - rv->pages));
@@ -945,7 +1145,7 @@
 vm_reserv_reclaim(vm_reserv_t rv)
 {
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
 	KASSERT(rv->inpartpopq,
 	    ("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
 	KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
@@ -969,7 +1169,7 @@
 {
 	vm_reserv_t rv;
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(VM_DOMAIN(domain));
 	if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
 		vm_reserv_reclaim(rv);
 		return (TRUE);
@@ -993,7 +1193,7 @@
 	vm_reserv_t rv;
 	int hi, i, lo, low_index, next_free;
 
-	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+	vm_domain_free_assert_locked(VM_DOMAIN(domain));
 	if (npages > VM_LEVEL_0_NPAGES - 1)
 		return (FALSE);
 	size = npages << PAGE_SHIFT;
@@ -1084,14 +1284,19 @@
 	VM_OBJECT_ASSERT_WLOCKED(new_object);
 	rv = vm_reserv_from_page(m);
 	if (rv->object == old_object) {
-		mtx_lock(&vm_page_queue_free_mtx);
+		vm_domain_free_lock(VM_DOMAIN(rv->domain));
 		if (rv->object == old_object) {
+			vm_reserv_object_lock(old_object);
+			rv->object = NULL;
 			LIST_REMOVE(rv, objq);
-			LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
+			vm_reserv_object_unlock(old_object);
+			vm_reserv_object_lock(new_object);
 			rv->object = new_object;
 			rv->pindex -= old_object_offset;
+			LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
+			vm_reserv_object_unlock(new_object);
 		}
-		mtx_unlock(&vm_page_queue_free_mtx);
+		vm_domain_free_unlock(VM_DOMAIN(rv->domain));
 	}
 }
 
@@ -1121,6 +1326,7 @@
 {
 	vm_paddr_t new_end;
 	size_t size;
+	int i;
 
 	/*
 	 * Calculate the size (in bytes) of the reservation array.  Round up
@@ -1139,6 +1345,10 @@
 	vm_reserv_array = (void *)(uintptr_t)pmap_map(vaddr, new_end, end,
 	    VM_PROT_READ | VM_PROT_WRITE);
 	bzero(vm_reserv_array, size);
+
+	for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++)
+		mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL,
+		    MTX_DEF);
 
 	/*
 	 * Return the next available physical address.
Index: sys/vm/vm_swapout.c
===================================================================
--- sys/vm/vm_swapout.c
+++ sys/vm/vm_swapout.c
@@ -650,7 +650,7 @@
 
 loop:
 	if (vm_page_count_min()) {
-		VM_WAIT;
+		vm_wait_min();
 		goto loop;
 	}
 
Index: sys/vm/vnode_pager.c
===================================================================
--- sys/vm/vnode_pager.c
+++ sys/vm/vnode_pager.c
@@ -1167,7 +1167,7 @@
 	 * daemon up.  This should be probably be addressed XXX.
 	 */
 
-	if (vm_cnt.v_free_count < vm_cnt.v_pageout_free_min)
+	if (vm_page_count_min())
 		flags |= VM_PAGER_PUT_SYNC;
 
 	/*