Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F152057811
D14000.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
104 KB
Referenced Files
None
Subscribers
None
D14000.id.diff
View Options
Index: sys/amd64/amd64/machdep.c
===================================================================
--- sys/amd64/amd64/machdep.c
+++ sys/amd64/amd64/machdep.c
@@ -279,7 +279,7 @@
memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
freeenv(sysenv);
}
- if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count))
+ if (memsize < ptoa((uintmax_t)vm_free_count()))
memsize = ptoa((uintmax_t)Maxmem);
printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20);
realmem = atop(memsize);
@@ -306,8 +306,8 @@
vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%ju MB)\n",
- ptoa((uintmax_t)vm_cnt.v_free_count),
- ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+ ptoa((uintmax_t)vm_free_count()),
+ ptoa((uintmax_t)vm_free_count()) / 1048576);
/*
* Set up buffers, so they can be used to read disk labels.
Index: sys/arm/arm/machdep.c
===================================================================
--- sys/arm/arm/machdep.c
+++ sys/arm/arm/machdep.c
@@ -228,8 +228,8 @@
(uintmax_t)arm32_ptob(realmem),
(uintmax_t)arm32_ptob(realmem) / mbyte);
printf("avail memory = %ju (%ju MB)\n",
- (uintmax_t)arm32_ptob(vm_cnt.v_free_count),
- (uintmax_t)arm32_ptob(vm_cnt.v_free_count) / mbyte);
+ (uintmax_t)arm32_ptob(vm_free_count()),
+ (uintmax_t)arm32_ptob(vm_free_count()) / mbyte);
if (bootverbose) {
arm_physmem_print_tables();
devmap_print_table();
Index: sys/arm/arm/pmap-v4.c
===================================================================
--- sys/arm/arm/pmap-v4.c
+++ sys/arm/arm/pmap-v4.c
@@ -3817,7 +3817,7 @@
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
- pagedaemon_wakeup();
+ pagedaemon_wakeup(0); /* XXX ARM NUMA */
ret_value = uma_zalloc(pvzone, M_NOWAIT);
return ret_value;
}
Index: sys/cddl/compat/opensolaris/sys/kmem.h
===================================================================
--- sys/cddl/compat/opensolaris/sys/kmem.h
+++ sys/cddl/compat/opensolaris/sys/kmem.h
@@ -78,7 +78,7 @@
int kmem_debugging(void);
void *calloc(size_t n, size_t s);
-#define freemem vm_cnt.v_free_count
+#define freemem vm_free_count()
#define minfree vm_cnt.v_free_min
#define heap_arena kernel_arena
#define zio_arena NULL
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -379,7 +379,7 @@
arc_free_target_init(void *unused __unused)
{
- zfs_arc_free_target = vm_pageout_wakeup_thresh;
+ zfs_arc_free_target = (vm_cnt.v_free_min / 10) * 11;
}
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
arc_free_target_init, NULL);
Index: sys/compat/linprocfs/linprocfs.c
===================================================================
--- sys/compat/linprocfs/linprocfs.c
+++ sys/compat/linprocfs/linprocfs.c
@@ -156,7 +156,7 @@
/*
* The correct thing here would be:
*
- memfree = vm_cnt.v_free_count * PAGE_SIZE;
+ memfree = vm_free_count() * PAGE_SIZE;
memused = memtotal - memfree;
*
* but it might mislead linux binaries into thinking there
@@ -178,7 +178,7 @@
* like unstaticizing it just for linprocfs's sake.
*/
buffers = 0;
- cached = vm_cnt.v_inactive_count * PAGE_SIZE;
+ cached = vm_inactive_count() * PAGE_SIZE;
sbuf_printf(sb,
"MemTotal: %9lu kB\n"
Index: sys/fs/tmpfs/tmpfs_subr.c
===================================================================
--- sys/fs/tmpfs/tmpfs_subr.c
+++ sys/fs/tmpfs/tmpfs_subr.c
@@ -106,7 +106,8 @@
{
vm_ooffset_t avail;
- avail = swap_pager_avail + vm_cnt.v_free_count - tmpfs_pages_reserved;
+ /* XXX */
+ avail = swap_pager_avail + vm_free_count() - tmpfs_pages_reserved;
if (__predict_false(avail < 0))
avail = 0;
return (avail);
Index: sys/i386/i386/machdep.c
===================================================================
--- sys/i386/i386/machdep.c
+++ sys/i386/i386/machdep.c
@@ -271,7 +271,7 @@
memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
freeenv(sysenv);
}
- if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count))
+ if (memsize < ptoa((uintmax_t)vm_free_count()))
memsize = ptoa((uintmax_t)Maxmem);
printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20);
realmem = atop(memsize);
@@ -298,8 +298,8 @@
vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%ju MB)\n",
- ptoa((uintmax_t)vm_cnt.v_free_count),
- ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+ ptoa((uintmax_t)vm_free_count()),
+ ptoa((uintmax_t)vm_free_count()) / 1048576);
/*
* Set up buffers, so they can be used to read disk labels.
Index: sys/kern/init_main.c
===================================================================
--- sys/kern/init_main.c
+++ sys/kern/init_main.c
@@ -87,6 +87,7 @@
#include <vm/vm.h>
#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <sys/copyright.h>
@@ -555,7 +556,7 @@
p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
/* Cast to avoid overflow on i386/PAE. */
- pageablemem = ptoa((vm_paddr_t)vm_cnt.v_free_count);
+ pageablemem = ptoa((vm_paddr_t)vm_free_count());
p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur =
p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem;
p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3;
Index: sys/kern/subr_vmem.c
===================================================================
--- sys/kern/subr_vmem.c
+++ sys/kern/subr_vmem.c
@@ -59,6 +59,7 @@
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <sys/vmem.h>
+#include <sys/vmmeter.h>
#include "opt_vm.h"
@@ -72,6 +73,8 @@
#include <vm/vm_param.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#define VMEM_OPTORDER 5
#define VMEM_OPTVALUE (1 << VMEM_OPTORDER)
@@ -641,7 +644,7 @@
* possible due to M_USE_RESERVE page allocation.
*/
if (wait & M_WAITOK)
- VM_WAIT;
+ vm_wait_domain(domain);
return (NULL);
}
mtx_unlock(&vmem_bt_lock);
Index: sys/kern/subr_witness.c
===================================================================
--- sys/kern/subr_witness.c
+++ sys/kern/subr_witness.c
@@ -139,7 +139,7 @@
#define WITNESS_COUNT 1536
#endif
#define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */
-#define WITNESS_PENDLIST (2048 + MAXCPU)
+#define WITNESS_PENDLIST (2048 + (MAXCPU * 4))
/* Allocate 256 KB of stack data space */
#define WITNESS_LO_DATA_COUNT 2048
Index: sys/mips/mips/machdep.c
===================================================================
--- sys/mips/mips/machdep.c
+++ sys/mips/mips/machdep.c
@@ -210,8 +210,8 @@
vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%juMB)\n",
- ptoa((uintmax_t)vm_cnt.v_free_count),
- ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+ ptoa((uintmax_t)vm_free_count()),
+ ptoa((uintmax_t)vm_free_count()) / 1048576);
cpu_init_interrupts();
/*
Index: sys/powerpc/booke/pmap.c
===================================================================
--- sys/powerpc/booke/pmap.c
+++ sys/powerpc/booke/pmap.c
@@ -1183,7 +1183,7 @@
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
- pagedaemon_wakeup();
+ pagedaemon_wakeup(0); /* XXX powerpc NUMA */
pv = uma_zalloc(pvzone, M_NOWAIT);
return (pv);
Index: sys/powerpc/powerpc/machdep.c
===================================================================
--- sys/powerpc/powerpc/machdep.c
+++ sys/powerpc/powerpc/machdep.c
@@ -213,8 +213,8 @@
vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%ju MB)\n",
- ptoa((uintmax_t)vm_cnt.v_free_count),
- ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+ ptoa((uintmax_t)vm_free_count()),
+ ptoa((uintmax_t)vm_free_count()) / 1048576);
/*
* Set up buffers, so they can be used to read disk labels.
Index: sys/sparc64/sparc64/machdep.c
===================================================================
--- sys/sparc64/sparc64/machdep.c
+++ sys/sparc64/sparc64/machdep.c
@@ -190,8 +190,8 @@
EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL,
SHUTDOWN_PRI_LAST);
- printf("avail memory = %lu (%lu MB)\n", vm_cnt.v_free_count * PAGE_SIZE,
- vm_cnt.v_free_count / ((1024 * 1024) / PAGE_SIZE));
+ printf("avail memory = %lu (%lu MB)\n", vm_free_count() * PAGE_SIZE,
+ vm_free_count() / ((1024 * 1024) / PAGE_SIZE));
if (bootverbose)
printf("machine: %s\n", sparc64_model);
Index: sys/sys/vmmeter.h
===================================================================
--- sys/sys/vmmeter.h
+++ sys/sys/vmmeter.h
@@ -141,23 +141,23 @@
u_int v_interrupt_free_min; /* (c) reserved pages for int code */
u_int v_free_severe; /* (c) severe page depletion point */
u_int v_wire_count VMMETER_ALIGNED; /* (a) pages wired down */
- u_int v_active_count VMMETER_ALIGNED; /* (a) pages active */
- u_int v_inactive_count VMMETER_ALIGNED; /* (a) pages inactive */
- u_int v_laundry_count VMMETER_ALIGNED; /* (a) pages eligible for
- laundering */
- u_int v_free_count VMMETER_ALIGNED; /* (f) pages free */
};
#endif /* _KERNEL || _WANT_VMMETER */
#ifdef _KERNEL
+#include <sys/domainset.h>
+
extern struct vmmeter vm_cnt;
-extern u_int vm_pageout_wakeup_thresh;
+extern domainset_t vm_min_domains;
+extern domainset_t vm_severe_domains;
#define VM_CNT_ADD(var, x) counter_u64_add(vm_cnt.var, x)
#define VM_CNT_INC(var) VM_CNT_ADD(var, 1)
#define VM_CNT_FETCH(var) counter_u64_fetch(vm_cnt.var)
+u_int vm_free_count(void);
+
/*
* Return TRUE if we are under our severe low-free-pages threshold
*
@@ -168,7 +168,7 @@
vm_page_count_severe(void)
{
- return (vm_cnt.v_free_severe > vm_cnt.v_free_count);
+ return (!DOMAINSET_EMPTY(&vm_severe_domains));
}
/*
@@ -184,50 +184,8 @@
vm_page_count_min(void)
{
- return (vm_cnt.v_free_min > vm_cnt.v_free_count);
+ return (!DOMAINSET_EMPTY(&vm_min_domains));
}
-/*
- * Return TRUE if we have not reached our free page target during
- * free page recovery operations.
- */
-static inline int
-vm_page_count_target(void)
-{
-
- return (vm_cnt.v_free_target > vm_cnt.v_free_count);
-}
-
-/*
- * Return the number of pages we need to free-up or cache
- * A positive number indicates that we do not have enough free pages.
- */
-static inline int
-vm_paging_target(void)
-{
-
- return (vm_cnt.v_free_target - vm_cnt.v_free_count);
-}
-
-/*
- * Returns TRUE if the pagedaemon needs to be woken up.
- */
-static inline int
-vm_paging_needed(u_int free_count)
-{
-
- return (free_count < vm_pageout_wakeup_thresh);
-}
-
-/*
- * Return the number of pages we need to launder.
- * A positive number indicates that we have a shortfall of clean pages.
- */
-static inline int
-vm_laundry_target(void)
-{
-
- return (vm_paging_target());
-}
#endif /* _KERNEL */
#endif /* _SYS_VMMETER_H_ */
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -2327,7 +2327,7 @@
* of data we will have to page back in, plus an epsilon so
* the system doesn't become critically low on swap space.
*/
- if (vm_cnt.v_free_count + swap_pager_avail < nblks + nswap_lowat)
+ if (vm_free_count() + swap_pager_avail < nblks + nswap_lowat)
return (ENOMEM);
/*
Index: sys/vm/uma.h
===================================================================
--- sys/vm/uma.h
+++ sys/vm/uma.h
@@ -47,6 +47,7 @@
/* Types and type defs */
struct uma_zone;
+struct vm_domain_iterator;
/* Opaque type used as a handle to the zone */
typedef struct uma_zone * uma_zone_t;
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c
+++ sys/vm/uma_core.c
@@ -3409,7 +3409,7 @@
slab->us_data = (void *)addr;
slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC;
slab->us_size = size;
- slab->us_domain = vm_phys_domidx(PHYS_TO_VM_PAGE(
+ slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE(
pmap_kextract(addr)));
uma_total_inc(size);
} else {
Index: sys/vm/vm_extern.h
===================================================================
--- sys/vm/vm_extern.h
+++ sys/vm/vm_extern.h
@@ -122,5 +122,9 @@
void vm_imgact_unmap_page(struct sf_buf *sf);
void vm_thread_dispose(struct thread *td);
int vm_thread_new(struct thread *td, int pages);
+u_int vm_active_count(void);
+u_int vm_inactive_count(void);
+u_int vm_laundry_count(void);
+u_int vm_wait_count(void);
#endif /* _KERNEL */
#endif /* !_VM_EXTERN_H_ */
Index: sys/vm/vm_glue.c
===================================================================
--- sys/vm/vm_glue.c
+++ sys/vm/vm_glue.c
@@ -552,7 +552,7 @@
}
while (vm_page_count_severe()) {
- VM_WAIT;
+ vm_wait_severe();
}
if ((flags & RFMEM) == 0) {
Index: sys/vm/vm_init.c
===================================================================
--- sys/vm/vm_init.c
+++ sys/vm/vm_init.c
@@ -89,6 +89,7 @@
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/vm_map.h>
#include <vm/vm_pager.h>
#include <vm/vm_extern.h>
Index: sys/vm/vm_kern.c
===================================================================
--- sys/vm/vm_kern.c
+++ sys/vm/vm_kern.c
@@ -92,6 +92,7 @@
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/vm_radix.h>
#include <vm/vm_extern.h>
#include <vm/uma.h>
@@ -196,7 +197,7 @@
if (!vm_page_reclaim_contig_domain(domain,
pflags, 1, low, high, PAGE_SIZE, 0) &&
(flags & M_WAITOK) != 0)
- VM_WAIT;
+ vm_wait_domain(domain);
VM_OBJECT_WLOCK(object);
tries++;
goto retry;
@@ -205,9 +206,9 @@
vmem_free(vmem, addr, size);
return (0);
}
- KASSERT(vm_phys_domidx(m) == domain,
+ KASSERT(vm_phys_domain(m) == domain,
("kmem_alloc_attr_domain: Domain mismatch %d != %d",
- vm_phys_domidx(m), domain));
+ vm_phys_domain(m), domain));
if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
m->valid = VM_PAGE_BITS_ALL;
@@ -280,7 +281,7 @@
if (!vm_page_reclaim_contig_domain(domain, pflags,
npages, low, high, alignment, boundary) &&
(flags & M_WAITOK) != 0)
- VM_WAIT;
+ vm_wait_domain(domain);
VM_OBJECT_WLOCK(object);
tries++;
goto retry;
@@ -288,9 +289,9 @@
vmem_free(vmem, addr, size);
return (0);
}
- KASSERT(vm_phys_domidx(m) == domain,
+ KASSERT(vm_phys_domain(m) == domain,
("kmem_alloc_contig_domain: Domain mismatch %d != %d",
- vm_phys_domidx(m), domain));
+ vm_phys_domain(m), domain));
end_m = m + npages;
tmp = addr;
for (; m < end_m; m++) {
@@ -452,9 +453,9 @@
kmem_unback(object, addr, i);
return (KERN_NO_SPACE);
}
- KASSERT(vm_phys_domidx(m) == domain,
+ KASSERT(vm_phys_domain(m) == domain,
("kmem_back_domain: Domain mismatch %d != %d",
- vm_phys_domidx(m), domain));
+ vm_phys_domain(m), domain));
if (flags & M_ZERO && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
KASSERT((m->oflags & VPO_UNMANAGED) != 0,
@@ -514,7 +515,7 @@
end = offset + size;
VM_OBJECT_WLOCK(object);
m = vm_page_lookup(object, atop(offset));
- domain = vm_phys_domidx(m);
+ domain = vm_phys_domain(m);
for (; offset < end; offset += PAGE_SIZE, m = next) {
next = vm_page_next(m);
vm_page_unwire(m, PQ_NONE);
Index: sys/vm/vm_map.c
===================================================================
--- sys/vm/vm_map.c
+++ sys/vm/vm_map.c
@@ -2016,7 +2016,7 @@
* free pages allocating pv entries.
*/
if (((flags & MAP_PREFAULT_MADVISE) != 0 &&
- vm_cnt.v_free_count < vm_cnt.v_free_reserved) ||
+ vm_page_count_severe()) ||
((flags & MAP_PREFAULT_PARTIAL) != 0 &&
tmpidx >= threshold)) {
psize = tmpidx;
Index: sys/vm/vm_meter.c
===================================================================
--- sys/vm/vm_meter.c
+++ sys/vm/vm_meter.c
@@ -53,6 +53,8 @@
#include <vm/vm_page.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
@@ -213,9 +215,6 @@
total.t_dw++;
else
total.t_sl++;
- if (td->td_wchan ==
- &vm_cnt.v_free_count)
- total.t_pw++;
}
break;
case TDS_CAN_RUN:
@@ -283,7 +282,8 @@
}
}
mtx_unlock(&vm_object_list_mtx);
- total.t_free = vm_cnt.v_free_count;
+ total.t_pw = vm_wait_count();
+ total.t_free = vm_free_count();
#if defined(COMPAT_FREEBSD11)
/* sysctl(8) allocates twice as much memory as reported by sysctl(3) */
if (curproc->p_osrel < P_OSREL_VMTOTAL64 && (req->oldlen ==
@@ -339,7 +339,7 @@
#define VM_STATS(parent, var, descr) \
SYSCTL_OID(parent, OID_AUTO, var, CTLTYPE_U64 | CTLFLAG_MPSAFE | \
- CTLFLAG_RD, &vm_cnt.var, 0, sysctl_handle_vmstat, "QU", descr);
+ CTLFLAG_RD, &vm_cnt.var, 0, sysctl_handle_vmstat, "QU", descr)
#define VM_STATS_VM(var, descr) VM_STATS(_vm_stats_vm, var, descr)
#define VM_STATS_SYS(var, descr) VM_STATS(_vm_stats_sys, var, descr)
@@ -379,19 +379,36 @@
VM_STATS_VM(v_rforkpages, "VM pages affected by rfork()");
VM_STATS_VM(v_kthreadpages, "VM pages affected by fork() by kernel");
+static int
+sysctl_handle_vmstat_proc(SYSCTL_HANDLER_ARGS)
+{
+ u_int (*fn)(void);
+ uint32_t val;
+
+ fn = arg1;
+ val = fn();
+ return (SYSCTL_OUT(req, &val, sizeof(val)));
+}
+
+#define VM_STATS_PROC(var, descr, fn) \
+ SYSCTL_OID(_vm_stats_vm, OID_AUTO, var, CTLTYPE_U32 | CTLFLAG_MPSAFE | \
+ CTLFLAG_RD, fn, 0, sysctl_handle_vmstat_proc, "IU", descr)
+
#define VM_STATS_UINT(var, descr) \
SYSCTL_UINT(_vm_stats_vm, OID_AUTO, var, CTLFLAG_RD, &vm_cnt.var, 0, descr)
+
VM_STATS_UINT(v_page_size, "Page size in bytes");
VM_STATS_UINT(v_page_count, "Total number of pages in system");
VM_STATS_UINT(v_free_reserved, "Pages reserved for deadlock");
VM_STATS_UINT(v_free_target, "Pages desired free");
VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold");
-VM_STATS_UINT(v_free_count, "Free pages");
+VM_STATS_PROC(v_free_count, "Free pages", vm_free_count);
VM_STATS_UINT(v_wire_count, "Wired pages");
-VM_STATS_UINT(v_active_count, "Active pages");
+VM_STATS_PROC(v_active_count, "Active pages", vm_active_count);
VM_STATS_UINT(v_inactive_target, "Desired inactive pages");
-VM_STATS_UINT(v_inactive_count, "Inactive pages");
-VM_STATS_UINT(v_laundry_count, "Pages eligible for laundering");
+VM_STATS_PROC(v_inactive_count, "Inactive pages", vm_inactive_count);
+VM_STATS_PROC(v_laundry_count, "Pages eligible for laundering",
+ vm_laundry_count);
VM_STATS_UINT(v_pageout_free_min, "Min pages reserved for kernel");
VM_STATS_UINT(v_interrupt_free_min, "Reserved pages for interrupt code");
VM_STATS_UINT(v_free_severe, "Severe page depletion point");
@@ -406,3 +423,52 @@
SYSCTL_UINT(_vm_stats_vm, OID_AUTO, v_tcached, CTLFLAG_RD,
SYSCTL_NULL_UINT_PTR, 0, "Dummy for compatibility");
#endif
+
+u_int
+vm_free_count(void)
+{
+ u_int v;
+ int i;
+
+ v = 0;
+ for (i = 0; i < vm_ndomains; i++)
+ v += vm_dom[i].vmd_free_count;
+
+ return (v);
+}
+
+static
+u_int
+vm_pagequeue_count(int pq)
+{
+ u_int v;
+ int i;
+
+ v = 0;
+ for (i = 0; i < vm_ndomains; i++)
+ v += vm_dom[i].vmd_pagequeues[pq].pq_cnt;
+
+ return (v);
+}
+
+u_int
+vm_active_count(void)
+{
+
+ return vm_pagequeue_count(PQ_ACTIVE);
+}
+
+u_int
+vm_inactive_count(void)
+{
+
+ return vm_pagequeue_count(PQ_INACTIVE);
+}
+
+u_int
+vm_laundry_count(void)
+{
+
+ return vm_pagequeue_count(PQ_LAUNDRY);
+}
+
Index: sys/vm/vm_object.h
===================================================================
--- sys/vm/vm_object.h
+++ sys/vm/vm_object.h
@@ -297,6 +297,17 @@
}
}
+static __inline bool
+vm_object_reserv(vm_object_t object)
+{
+
+ if (object != NULL &&
+ (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) == OBJ_COLORED) {
+ return (true);
+ }
+ return (false);
+}
+
void vm_object_clear_flag(vm_object_t object, u_short bits);
void vm_object_pip_add(vm_object_t object, short i);
void vm_object_pip_subtract(vm_object_t object, short i);
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -96,6 +96,8 @@
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/swap_pager.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -218,54 +218,10 @@
#endif
SLIST_HEAD(spglist, vm_page);
-struct vm_pagequeue {
- struct mtx pq_mutex;
- struct pglist pq_pl;
- int pq_cnt;
- u_int * const pq_vcnt;
- const char * const pq_name;
-} __aligned(CACHE_LINE_SIZE);
-
-
-struct vm_domain {
- struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
- struct vmem *vmd_kernel_arena;
- u_int vmd_page_count;
- u_int vmd_free_count;
- long vmd_segs; /* bitmask of the segments */
- boolean_t vmd_oom;
- int vmd_oom_seq;
- int vmd_last_active_scan;
- struct vm_page vmd_laundry_marker;
- struct vm_page vmd_marker; /* marker for pagedaemon private use */
- struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
-};
-
-extern struct vm_domain vm_dom[MAXMEMDOM];
-
-#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
-#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
-#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
-#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)
-
#ifdef _KERNEL
extern vm_page_t bogus_page;
-
-static __inline void
-vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
-{
-
-#ifdef notyet
- vm_pagequeue_assert_locked(pq);
-#endif
- pq->pq_cnt += addend;
- atomic_add_int(pq->pq_vcnt, addend);
-}
-#define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1)
-#define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1)
#endif /* _KERNEL */
-extern struct mtx_padalign vm_page_queue_free_mtx;
extern struct mtx_padalign pa_lock[];
#if defined(__arm__)
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -115,8 +115,9 @@
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
+#include <vm/vm_pager.h>
#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
#include <vm/vm_extern.h>
@@ -131,10 +132,16 @@
*/
struct vm_domain vm_dom[MAXMEMDOM];
-struct mtx_padalign __exclusive_cache_line vm_page_queue_free_mtx;
struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT];
+struct mtx_padalign __exclusive_cache_line vm_domainset_lock;
+domainset_t __exclusive_cache_line vm_min_domains;
+domainset_t __exclusive_cache_line vm_severe_domains;
+static int vm_min_waiters;
+static int vm_severe_waiters;
+static int vm_pageproc_waiters;
+
/*
* bogus page -- for I/O to/from partially complete buffers,
* or for paging into sparsely invalid regions.
@@ -159,24 +166,22 @@
SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD |
CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages");
-/* Is the page daemon waiting for free pages? */
-static int vm_pageout_pages_needed;
-
static uma_zone_t fakepg_zone;
static void vm_page_alloc_check(vm_page_t m);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
static void vm_page_enqueue(uint8_t queue, vm_page_t m);
static void vm_page_free_phys(vm_page_t m);
-static void vm_page_free_wakeup(void);
static void vm_page_init(void *dummy);
static int vm_page_insert_after(vm_page_t m, vm_object_t object,
vm_pindex_t pindex, vm_page_t mpred);
static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,
vm_page_t mpred);
-static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
- vm_paddr_t high);
-static int vm_page_alloc_fail(vm_object_t object, int req);
+static int vm_page_reclaim_run(int req_class, int domain, u_long npages,
+ vm_page_t m_run, vm_paddr_t high);
+static void vm_domain_free_wakeup(struct vm_domain *);
+static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object,
+ int req);
SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL);
@@ -313,6 +318,7 @@
static void
vm_page_blacklist_check(char *list, char *end)
{
+ struct vm_domain *vmd;
vm_paddr_t pa;
vm_page_t m;
char *next;
@@ -325,9 +331,10 @@
m = vm_phys_paddr_to_vm_page(pa);
if (m == NULL)
continue;
- mtx_lock(&vm_page_queue_free_mtx);
+ vmd = vm_pagequeue_domain(m);
+ vm_domain_free_lock(vmd);
ret = vm_phys_unfree_page(m);
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_unlock(vmd);
if (ret == TRUE) {
TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
if (bootverbose)
@@ -390,28 +397,23 @@
}
static void
-vm_page_domain_init(struct vm_domain *vmd)
+vm_page_domain_init(int domain)
{
+ struct vm_domain *vmd;
struct vm_pagequeue *pq;
int i;
+ vmd = VM_DOMAIN(domain);
+ bzero(vmd, sizeof(*vmd));
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) =
"vm inactive pagequeue";
- *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) =
- &vm_cnt.v_inactive_count;
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) =
"vm active pagequeue";
- *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) =
- &vm_cnt.v_active_count;
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) =
"vm laundry pagequeue";
- *__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) =
- &vm_cnt.v_laundry_count;
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) =
"vm unswappable pagequeue";
- /* Unswappable dirty pages are counted as being in the laundry. */
- *__DECONST(int **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_vcnt) =
- &vm_cnt.v_laundry_count;
+ vmd->vmd_domain = domain;
vmd->vmd_page_count = 0;
vmd->vmd_free_count = 0;
vmd->vmd_segs = 0;
@@ -422,6 +424,7 @@
mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue",
MTX_DEF | MTX_DUPOK);
}
+ mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF);
}
/*
@@ -458,7 +461,6 @@
vm_offset_t
vm_page_startup(vm_offset_t vaddr)
{
- struct vm_domain *vmd;
struct vm_phys_seg *seg;
vm_page_t m;
char *list, *listend;
@@ -489,11 +491,11 @@
/*
* Initialize the page and queue locks.
*/
- mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF);
+ mtx_init(&vm_domainset_lock, "vm domainset lock", NULL, MTX_DEF);
for (i = 0; i < PA_LOCK_COUNT; i++)
mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF);
for (i = 0; i < vm_ndomains; i++)
- vm_page_domain_init(&vm_dom[i]);
+ vm_page_domain_init(i);
/*
* Almost all of the pages needed for bootstrapping UMA are used
@@ -691,7 +693,6 @@
* physical memory allocator's free lists.
*/
vm_cnt.v_page_count = 0;
- vm_cnt.v_free_count = 0;
for (segind = 0; segind < vm_phys_nsegs; segind++) {
seg = &vm_phys_segs[segind];
for (m = seg->first_page, pa = seg->start; pa < seg->end;
@@ -706,6 +707,8 @@
* or doesn't overlap any of them.
*/
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
+ struct vm_domain *vmd;
+
if (seg->start < phys_avail[i] ||
seg->end > phys_avail[i + 1])
continue;
@@ -713,13 +716,14 @@
m = seg->first_page;
pagecount = (u_long)atop(seg->end - seg->start);
- mtx_lock(&vm_page_queue_free_mtx);
+ vmd = VM_DOMAIN(seg->domain);
+ vm_domain_free_lock(vmd);
vm_phys_free_contig(m, pagecount);
- vm_phys_freecnt_adj(m, (int)pagecount);
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_freecnt_adj(vmd, (int)pagecount);
+ vm_domain_free_unlock(vmd);
vm_cnt.v_page_count += (u_int)pagecount;
- vmd = &vm_dom[seg->domain];
+ vmd = VM_DOMAIN(seg->domain);;
vmd->vmd_page_count += (u_int)pagecount;
vmd->vmd_segs |= 1UL << m->segind;
break;
@@ -1644,12 +1648,40 @@
return (m);
}
+/*
+ * Returns true if the number of free pages exceeds the minimum
+ * for the request class and false otherwise.
+ */
+int
+vm_domain_available(struct vm_domain *vmd, int req, int npages)
+{
+
+ vm_domain_free_assert_locked(vmd);
+ req = req & VM_ALLOC_CLASS_MASK;
+
+ /*
+ * The page daemon is allowed to dig deeper into the free page list.
+ */
+ if (curproc == pageproc && req != VM_ALLOC_INTERRUPT)
+ req = VM_ALLOC_SYSTEM;
+
+ if (vmd->vmd_free_count >= npages + vmd->vmd_free_reserved ||
+ (req == VM_ALLOC_SYSTEM &&
+ vmd->vmd_free_count >= npages + vmd->vmd_interrupt_free_min) ||
+ (req == VM_ALLOC_INTERRUPT &&
+ vmd->vmd_free_count >= npages))
+ return (1);
+
+ return (0);
+}
+
vm_page_t
vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
int req, vm_page_t mpred)
{
+ struct vm_domain *vmd;
vm_page_t m;
- int flags, req_class;
+ int flags;
u_int free_count;
KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
@@ -1665,34 +1697,27 @@
if (object != NULL)
VM_OBJECT_ASSERT_WLOCKED(object);
- req_class = req & VM_ALLOC_CLASS_MASK;
-
- /*
- * The page daemon is allowed to dig deeper into the free page list.
- */
- if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
- req_class = VM_ALLOC_SYSTEM;
-
- /*
- * Allocate a page if the number of free pages exceeds the minimum
- * for the request class.
- */
again:
m = NULL;
- mtx_lock(&vm_page_queue_free_mtx);
- if (vm_cnt.v_free_count > vm_cnt.v_free_reserved ||
- (req_class == VM_ALLOC_SYSTEM &&
- vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) ||
- (req_class == VM_ALLOC_INTERRUPT &&
- vm_cnt.v_free_count > 0)) {
+#if VM_NRESERVLEVEL > 0
+ if (vm_object_reserv(object) &&
+ (m = vm_reserv_extend(req, object, pindex, domain, mpred))
+ != NULL) {
+ domain = vm_phys_domain(m);
+ vmd = VM_DOMAIN(domain);
+ goto found;
+ }
+#endif
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
+ if (vm_domain_available(vmd, req, 1)) {
/*
* Can we allocate the page from a reservation?
*/
#if VM_NRESERVLEVEL > 0
- if (object == NULL || (object->flags & (OBJ_COLORED |
- OBJ_FICTITIOUS)) != OBJ_COLORED || (m =
- vm_reserv_alloc_page(object, pindex, domain,
- mpred)) == NULL)
+ if (!vm_object_reserv(object) ||
+ (m = vm_reserv_alloc_page(object, pindex,
+ domain, mpred)) == NULL)
#endif
{
/*
@@ -1714,7 +1739,7 @@
/*
* Not allocatable, give up.
*/
- if (vm_page_alloc_fail(object, req))
+ if (vm_domain_alloc_fail(vmd, object, req))
goto again;
return (NULL);
}
@@ -1723,8 +1748,18 @@
* At this point we had better have found a good page.
*/
KASSERT(m != NULL, ("missing page"));
- free_count = vm_phys_freecnt_adj(m, -1);
- mtx_unlock(&vm_page_queue_free_mtx);
+ free_count = vm_domain_freecnt_adj(vmd, -1);
+ vm_domain_free_unlock(vmd);
+
+ /*
+ * Don't wakeup too often - wakeup the pageout daemon when
+ * we would be nearly out of memory.
+ */
+ if (vm_paging_needed(vmd, free_count))
+ pagedaemon_wakeup(vmd->vmd_domain);
+#if VM_NRESERVLEVEL > 0
+found:
+#endif
vm_page_alloc_check(m);
/*
@@ -1757,7 +1792,7 @@
if (object != NULL) {
if (vm_page_insert_after(m, object, pindex, mpred)) {
- pagedaemon_wakeup();
+ pagedaemon_wakeup(domain);
if (req & VM_ALLOC_WIRED) {
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
m->wire_count = 0;
@@ -1782,13 +1817,6 @@
} else
m->pindex = pindex;
- /*
- * Don't wakeup too often - wakeup the pageout daemon when
- * we would be nearly out of memory.
- */
- if (vm_paging_needed(free_count))
- pagedaemon_wakeup();
-
return (m);
}
@@ -1856,9 +1884,9 @@
int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr)
{
+ struct vm_domain *vmd;
vm_page_t m, m_ret, mpred;
u_int busy_lock, flags, oflags;
- int req_class;
mpred = NULL; /* XXX: pacify gcc */
KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
@@ -1876,14 +1904,7 @@
object));
}
KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
- req_class = req & VM_ALLOC_CLASS_MASK;
- /*
- * The page daemon is allowed to dig deeper into the free page list.
- */
- if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
- req_class = VM_ALLOC_SYSTEM;
-
if (object != NULL) {
mpred = vm_radix_lookup_le(&object->rtree, pindex);
KASSERT(mpred == NULL || mpred->pindex != pindex,
@@ -1895,19 +1916,25 @@
* below the lower bound for the allocation class?
*/
again:
+#if VM_NRESERVLEVEL > 0
+ if (vm_object_reserv(object) &&
+ (m_ret = vm_reserv_extend_contig(req, object, pindex, domain,
+ npages, low, high, alignment, boundary, mpred)) != NULL) {
+ domain = vm_phys_domain(m_ret);
+ vmd = VM_DOMAIN(domain);
+ goto found;
+ }
+#endif
m_ret = NULL;
- mtx_lock(&vm_page_queue_free_mtx);
- if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved ||
- (req_class == VM_ALLOC_SYSTEM &&
- vm_cnt.v_free_count >= npages + vm_cnt.v_interrupt_free_min) ||
- (req_class == VM_ALLOC_INTERRUPT &&
- vm_cnt.v_free_count >= npages)) {
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
+ if (vm_domain_available(vmd, req, npages)) {
/*
* Can we allocate the pages from a reservation?
*/
#if VM_NRESERVLEVEL > 0
retry:
- if (object == NULL || (object->flags & OBJ_COLORED) == 0 ||
+ if (!vm_object_reserv(object) ||
(m_ret = vm_reserv_alloc_contig(object, pindex, domain,
npages, low, high, alignment, boundary, mpred)) == NULL)
#endif
@@ -1923,12 +1950,15 @@
#endif
}
if (m_ret == NULL) {
- if (vm_page_alloc_fail(object, req))
+ if (vm_domain_alloc_fail(vmd, object, req))
goto again;
return (NULL);
}
- vm_phys_freecnt_adj(m_ret, -npages);
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_freecnt_adj(vmd, -npages);
+ vm_domain_free_unlock(vmd);
+#if VM_NRESERVLEVEL > 0
+found:
+#endif
for (m = m_ret; m < &m_ret[npages]; m++)
vm_page_alloc_check(m);
@@ -1964,7 +1994,7 @@
m->oflags = oflags;
if (object != NULL) {
if (vm_page_insert_after(m, object, pindex, mpred)) {
- pagedaemon_wakeup();
+ pagedaemon_wakeup(domain);
if ((req & VM_ALLOC_WIRED) != 0)
atomic_subtract_int(
&vm_cnt.v_wire_count, npages);
@@ -1994,8 +2024,9 @@
pmap_page_set_memattr(m, memattr);
pindex++;
}
- if (vm_paging_needed(vm_cnt.v_free_count))
- pagedaemon_wakeup();
+ vmd = VM_DOMAIN(domain);
+ if (vm_paging_needed(vmd, vmd->vmd_free_count))
+ pagedaemon_wakeup(domain);
return (m_ret);
}
@@ -2057,37 +2088,26 @@
vm_page_t
vm_page_alloc_freelist_domain(int domain, int freelist, int req)
{
+ struct vm_domain *vmd;
vm_page_t m;
u_int flags, free_count;
- int req_class;
- req_class = req & VM_ALLOC_CLASS_MASK;
-
/*
- * The page daemon is allowed to dig deeper into the free page list.
- */
- if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
- req_class = VM_ALLOC_SYSTEM;
-
- /*
* Do not allocate reserved pages unless the req has asked for it.
*/
+ vmd = VM_DOMAIN(domain);
again:
- mtx_lock(&vm_page_queue_free_mtx);
- if (vm_cnt.v_free_count > vm_cnt.v_free_reserved ||
- (req_class == VM_ALLOC_SYSTEM &&
- vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) ||
- (req_class == VM_ALLOC_INTERRUPT &&
- vm_cnt.v_free_count > 0))
+ vm_domain_free_lock(vmd);
+ if (vm_domain_available(vmd, req, 1))
m = vm_phys_alloc_freelist_pages(domain, freelist,
VM_FREEPOOL_DIRECT, 0);
if (m == NULL) {
- if (vm_page_alloc_fail(NULL, req))
+ if (vm_domain_alloc_fail(vmd, NULL, req))
goto again;
return (NULL);
}
- free_count = vm_phys_freecnt_adj(m, -1);
- mtx_unlock(&vm_page_queue_free_mtx);
+ free_count = vm_domain_freecnt_adj(vmd, -1);
+ vm_domain_free_unlock(vmd);
vm_page_alloc_check(m);
/*
@@ -2108,8 +2128,8 @@
}
/* Unmanaged pages don't use "act_count". */
m->oflags = VPO_UNMANAGED;
- if (vm_paging_needed(free_count))
- pagedaemon_wakeup();
+ if (vm_paging_needed(vmd, free_count))
+ pagedaemon_wakeup(domain);
return (m);
}
@@ -2331,9 +2351,10 @@
* "req_class" must be an allocation class.
*/
static int
-vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
+vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run,
vm_paddr_t high)
{
+ struct vm_domain *vmd;
struct mtx *m_mtx;
struct spglist free;
vm_object_t object;
@@ -2483,7 +2504,9 @@
unlock:
VM_OBJECT_WUNLOCK(object);
} else {
- mtx_lock(&vm_page_queue_free_mtx);
+ MPASS(vm_phys_domain(m) == domain);
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
order = m->order;
if (order < VM_NFREEORDER) {
/*
@@ -2500,7 +2523,7 @@
else if (vm_reserv_is_page_free(m))
order = 0;
#endif
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_unlock(vmd);
if (order == VM_NFREEORDER)
error = EINVAL;
}
@@ -2508,13 +2531,15 @@
if (m_mtx != NULL)
mtx_unlock(m_mtx);
if ((m = SLIST_FIRST(&free)) != NULL) {
- mtx_lock(&vm_page_queue_free_mtx);
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
do {
+ MPASS(vm_phys_domain(m) == domain);
SLIST_REMOVE_HEAD(&free, plinks.s.ss);
vm_page_free_phys(m);
} while ((m = SLIST_FIRST(&free)) != NULL);
- vm_page_free_wakeup();
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_wakeup(vmd);
+ vm_domain_free_unlock(vmd);
}
return (error);
}
@@ -2554,6 +2579,7 @@
vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
{
+ struct vm_domain *vmd;
vm_paddr_t curr_low;
vm_page_t m_run, m_runs[NRUNS];
u_long count, reclaimed;
@@ -2574,9 +2600,10 @@
* Return if the number of free pages cannot satisfy the requested
* allocation.
*/
- count = vm_cnt.v_free_count;
- if (count < npages + vm_cnt.v_free_reserved || (count < npages +
- vm_cnt.v_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
+ vmd = VM_DOMAIN(domain);
+ count = vmd->vmd_free_count;
+ if (count < npages + vmd->vmd_free_reserved || (count < npages +
+ vmd->vmd_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
(count < npages && req_class == VM_ALLOC_INTERRUPT))
return (false);
@@ -2612,8 +2639,8 @@
for (i = 0; count > 0 && i < NRUNS; i++) {
count--;
m_run = m_runs[RUN_INDEX(count)];
- error = vm_page_reclaim_run(req_class, npages, m_run,
- high);
+ error = vm_page_reclaim_run(req_class, domain, npages,
+ m_run, high);
if (error == 0) {
reclaimed += npages;
if (reclaimed >= MIN_RECLAIM)
@@ -2653,66 +2680,190 @@
return (ret);
}
+/*
+ * Set the domain in the appropriate page level domainset.
+ */
+void
+vm_domain_set(struct vm_domain *vmd)
+{
+ mtx_lock(&vm_domainset_lock);
+ if (!vmd->vmd_minset && vm_paging_min(vmd)) {
+ vmd->vmd_minset = 1;
+ DOMAINSET_SET(vmd->vmd_domain, &vm_min_domains);
+ }
+ if (!vmd->vmd_severeset && vm_paging_severe(vmd)) {
+ vmd->vmd_severeset = 1;
+ DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains);
+ }
+ mtx_unlock(&vm_domainset_lock);
+}
+
/*
- * vm_wait: (also see VM_WAIT macro)
+ * Clear the domain from the appropriate page level domainset.
+ */
+static void
+vm_domain_clear(struct vm_domain *vmd)
+{
+
+ mtx_lock(&vm_domainset_lock);
+ if (vmd->vmd_minset && !vm_paging_min(vmd)) {
+ vmd->vmd_minset = 0;
+ DOMAINSET_CLR(vmd->vmd_domain, &vm_min_domains);
+ if (vm_min_waiters != 0) {
+ vm_min_waiters = 0;
+ wakeup(&vm_min_domains);
+ }
+ }
+ if (vmd->vmd_severeset && !vm_paging_severe(vmd)) {
+ vmd->vmd_severeset = 0;
+ DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains);
+ if (vm_severe_waiters != 0) {
+ vm_severe_waiters = 0;
+ wakeup(&vm_severe_domains);
+ }
+ }
+ mtx_unlock(&vm_domainset_lock);
+}
+
+/*
+ * Wait for free pages to exceed the min threshold globally.
+ */
+void
+vm_wait_min(void)
+{
+
+ mtx_lock(&vm_domainset_lock);
+ while (vm_page_count_min()) {
+ vm_min_waiters++;
+ msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0);
+ }
+ mtx_unlock(&vm_domainset_lock);
+}
+
+/*
+ * Wait for free pages to exceed the severe threshold globally.
+ */
+void
+vm_wait_severe(void)
+{
+
+ mtx_lock(&vm_domainset_lock);
+ while (vm_page_count_severe()) {
+ vm_severe_waiters++;
+ msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0);
+ }
+ mtx_unlock(&vm_domainset_lock);
+}
+
+u_int
+vm_wait_count(void)
+{
+ u_int cnt;
+ int i;
+
+ cnt = 0;
+ for (i = 0; i < vm_ndomains; i++)
+ cnt += VM_DOMAIN(i)->vmd_waiters;
+ cnt += vm_severe_waiters + vm_min_waiters;
+
+ return (cnt);
+}
+
+/*
+ * vm_wait_domain:
*
* Sleep until free pages are available for allocation.
- * - Called in various places before memory allocations.
+ * - Called in various places after failed memory allocations.
*/
-static void
-_vm_wait(void)
+void
+vm_wait_domain(int domain)
{
+ struct vm_domain *vmd;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_assert_locked(vmd);
+
if (curproc == pageproc) {
- vm_pageout_pages_needed = 1;
- msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx,
- PDROP | PSWP, "VMWait", 0);
+ vmd->vmd_pageout_pages_needed = 1;
+ msleep(&vmd->vmd_pageout_pages_needed,
+ vm_domain_free_lockptr(vmd), PDROP | PSWP, "VMWait", 0);
} else {
if (pageproc == NULL)
panic("vm_wait in early boot");
- pagedaemon_wait(PVM, "vmwait");
+ pagedaemon_wait(domain, PVM, "vmwait");
}
}
+/*
+ * vm_wait: (also see VM_WAIT macro)
+ *
+ * Sleep until free pages are available for allocation.
+ * - Called in various places after failed memory allocations.
+ */
void
vm_wait(void)
{
- mtx_lock(&vm_page_queue_free_mtx);
- _vm_wait();
+ /*
+ * We use racey wakeup synchronization to avoid expensive global
+ * locking for the pageproc when sleeping with a non-specific vm_wait.
+ * To handle this, we only sleep for one tick in this instance. It
+ * is expected that most allocations for the pageproc will come from
+ * kmem or vm_page_grab* which will use the more specific and
+ * race-free vm_wait_domain().
+ */
+ if (curproc == pageproc) {
+ mtx_lock(&vm_domainset_lock);
+ vm_pageproc_waiters++;
+ msleep(&vm_pageproc_waiters, &vm_domainset_lock, PVM,
+ "pageprocwait", 1);
+ mtx_unlock(&vm_domainset_lock);
+ } else {
+ /*
+ * XXX Ideally we would wait only until the allocation could
+ * be satisfied. This condition can cause new allocators to
+ * consume all freed pages while old allocators wait.
+ */
+ mtx_lock(&vm_domainset_lock);
+ if (vm_page_count_min()) {
+ vm_min_waiters++;
+ msleep(&vm_min_domains, &vm_domainset_lock, PVM,
+ "vmwait", 0);
+ }
+ mtx_unlock(&vm_domainset_lock);
+ }
}
/*
- * vm_page_alloc_fail:
+ * vm_domain_alloc_fail:
*
* Called when a page allocation function fails. Informs the
* pagedaemon and performs the requested wait. Requires the
- * page_queue_free and object lock on entry. Returns with the
+ * domain_free and object lock on entry. Returns with the
* object lock held and free lock released. Returns an error when
* retry is necessary.
*
*/
static int
-vm_page_alloc_fail(vm_object_t object, int req)
+vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(vmd);
- atomic_add_int(&vm_pageout_deficit,
+ atomic_add_int(&vmd->vmd_pageout_deficit,
max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) {
if (object != NULL)
VM_OBJECT_WUNLOCK(object);
- _vm_wait();
+ vm_wait_domain(vmd->vmd_domain);
if (object != NULL)
VM_OBJECT_WLOCK(object);
if (req & VM_ALLOC_WAITOK)
return (EAGAIN);
} else {
- mtx_unlock(&vm_page_queue_free_mtx);
- pagedaemon_wakeup();
+ vm_domain_free_unlock(vmd);
+ pagedaemon_wakeup(vmd->vmd_domain);
}
return (0);
}
@@ -2731,18 +2882,19 @@
vm_waitpfault(void)
{
- mtx_lock(&vm_page_queue_free_mtx);
- pagedaemon_wait(PUSER, "pfault");
+ mtx_lock(&vm_domainset_lock);
+ if (vm_page_count_min()) {
+ vm_min_waiters++;
+ msleep(&vm_min_domains, &vm_domainset_lock, PUSER, "pfault", 0);
+ }
+ mtx_unlock(&vm_domainset_lock);
}
struct vm_pagequeue *
vm_page_pagequeue(vm_page_t m)
{
- if (vm_page_in_laundry(m))
- return (&vm_dom[0].vmd_pagequeues[m->queue]);
- else
- return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
+ return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]);
}
/*
@@ -2804,10 +2956,7 @@
KASSERT(queue < PQ_COUNT,
("vm_page_enqueue: invalid queue %u request for page %p",
queue, m));
- if (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE)
- pq = &vm_dom[0].vmd_pagequeues[queue];
- else
- pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
+ pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue];
vm_pagequeue_lock(pq);
m->queue = queue;
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
@@ -2889,7 +3038,7 @@
}
/*
- * vm_page_free_wakeup:
+ * vm_domain_free_wakeup:
*
* Helper routine for vm_page_free_toq(). This routine is called
* when a page is added to the free queues.
@@ -2897,28 +3046,39 @@
* The page queues must be locked.
*/
static void
-vm_page_free_wakeup(void)
+vm_domain_free_wakeup(struct vm_domain *vmd)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(vmd);
+
/*
* if pageout daemon needs pages, then tell it that there are
* some free.
*/
- if (vm_pageout_pages_needed &&
- vm_cnt.v_free_count >= vm_cnt.v_pageout_free_min) {
- wakeup(&vm_pageout_pages_needed);
- vm_pageout_pages_needed = 0;
+ if (vmd->vmd_pageout_pages_needed &&
+ vmd->vmd_free_count >= vmd->vmd_pageout_free_min) {
+ wakeup(&vmd->vmd_pageout_pages_needed);
+ vmd->vmd_pageout_pages_needed = 0;
}
/*
* wakeup processes that are waiting on memory if we hit a
* high water mark. And wakeup scheduler process if we have
* lots of memory. this process will swapin processes.
*/
- if (vm_pages_needed && !vm_page_count_min()) {
- vm_pages_needed = false;
- wakeup(&vm_cnt.v_free_count);
+ if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
+ vmd->vmd_pages_needed = false;
+ wakeup(&vmd->vmd_free_count);
}
+ if ((vmd->vmd_minset && !vm_paging_min(vmd)) ||
+ (vmd->vmd_severeset && !vm_paging_severe(vmd)))
+ vm_domain_clear(vmd);
+
+ /* See comments in vm_wait(); */
+ if (vm_pageproc_waiters) {
+ vm_pageproc_waiters = 0;
+ wakeup(&vm_pageproc_waiters);
+ }
+
}
/*
@@ -3008,9 +3168,9 @@
vm_page_free_phys(vm_page_t m)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(vm_pagequeue_domain(m));
- vm_phys_freecnt_adj(m, 1);
+ vm_domain_freecnt_adj(vm_pagequeue_domain(m), 1);
#if VM_NRESERVLEVEL > 0
if (!vm_reserv_free_page(m))
#endif
@@ -3020,15 +3180,27 @@
void
vm_page_free_phys_pglist(struct pglist *tq)
{
+ struct vm_domain *vmd;
vm_page_t m;
if (TAILQ_EMPTY(tq))
return;
- mtx_lock(&vm_page_queue_free_mtx);
- TAILQ_FOREACH(m, tq, listq)
+ vmd = NULL;
+ TAILQ_FOREACH(m, tq, listq) {
+ if (vmd != vm_pagequeue_domain(m)) {
+ if (vmd != NULL) {
+ vm_domain_free_wakeup(vmd);
+ vm_domain_free_unlock(vmd);
+ }
+ vmd = vm_pagequeue_domain(m);
+ vm_domain_free_lock(vmd);
+ }
vm_page_free_phys(m);
- vm_page_free_wakeup();
- mtx_unlock(&vm_page_queue_free_mtx);
+ }
+ if (vmd != NULL) {
+ vm_domain_free_wakeup(vmd);
+ vm_domain_free_unlock(vmd);
+ }
}
/*
@@ -3043,13 +3215,15 @@
void
vm_page_free_toq(vm_page_t m)
{
+ struct vm_domain *vmd;
if (!vm_page_free_prep(m, false))
return;
- mtx_lock(&vm_page_queue_free_mtx);
+ vmd = vm_pagequeue_domain(m);
+ vm_domain_free_lock(vmd);
vm_page_free_phys(m);
- vm_page_free_wakeup();
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_wakeup(vmd);
+ vm_domain_free_unlock(vmd);
}
/*
@@ -3160,7 +3334,7 @@
if ((queue = m->queue) == PQ_INACTIVE && !noreuse)
return;
if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
- pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE];
+ pq = &vm_pagequeue_domain(m)->vmd_pagequeues[PQ_INACTIVE];
/* Avoid multiple acquisitions of the inactive queue lock. */
if (queue == PQ_INACTIVE) {
vm_pagequeue_lock(pq);
@@ -3172,8 +3346,9 @@
}
m->queue = PQ_INACTIVE;
if (noreuse)
- TAILQ_INSERT_BEFORE(&vm_phys_domain(m)->vmd_inacthead,
- m, plinks.q);
+ TAILQ_INSERT_BEFORE(
+ &vm_pagequeue_domain(m)->vmd_inacthead, m,
+ plinks.q);
else
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
vm_pagequeue_cnt_inc(pq);
@@ -3950,10 +4125,10 @@
DB_SHOW_COMMAND(page, vm_page_print_page_info)
{
- db_printf("vm_cnt.v_free_count: %d\n", vm_cnt.v_free_count);
- db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count);
- db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count);
- db_printf("vm_cnt.v_laundry_count: %d\n", vm_cnt.v_laundry_count);
+ db_printf("vm_cnt.v_free_count: %d\n", vm_free_count());
+ db_printf("vm_cnt.v_inactive_count: %d\n", vm_inactive_count());
+ db_printf("vm_cnt.v_active_count: %d\n", vm_active_count());
+ db_printf("vm_cnt.v_laundry_count: %d\n", vm_laundry_count());
db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count);
db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
@@ -3965,7 +4140,7 @@
{
int dom;
- db_printf("pq_free %d\n", vm_cnt.v_free_count);
+ db_printf("pq_free %d\n", vm_free_count());
for (dom = 0; dom < vm_ndomains; dom++) {
db_printf(
"dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n",
Index: sys/vm/vm_pageout.h
===================================================================
--- sys/vm/vm_pageout.h
+++ sys/vm/vm_pageout.h
@@ -74,9 +74,7 @@
*/
extern int vm_page_max_wired;
-extern int vm_pageout_deficit;
extern int vm_pageout_page_count;
-extern bool vm_pages_needed;
#define VM_OOM_MEM 1
#define VM_OOM_SWAPZ 2
@@ -95,12 +93,15 @@
* Signal pageout-daemon and wait for it.
*/
-void pagedaemon_wait(int pri, const char *wmesg);
-void pagedaemon_wakeup(void);
+void pagedaemon_wait(int domain, int pri, const char *wmesg);
+void pagedaemon_wakeup(int domain);
#define VM_WAIT vm_wait()
#define VM_WAITPFAULT vm_waitpfault()
void vm_wait(void);
void vm_waitpfault(void);
+void vm_wait_domain(int domain);
+void vm_wait_min(void);
+void vm_wait_severe(void);
#ifdef _KERNEL
int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *);
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -110,6 +110,7 @@
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/swap_pager.h>
#include <vm/vm_extern.h>
#include <vm/uma.h>
@@ -147,20 +148,8 @@
#define VM_LAUNDER_RATE 10
#define VM_INACT_SCAN_RATE 2
-int vm_pageout_deficit; /* Estimated number of pages deficit */
-u_int vm_pageout_wakeup_thresh;
static int vm_pageout_oom_seq = 12;
-static bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */
-bool vm_pages_needed; /* Are threads waiting for free pages? */
-/* Pending request for dirty page laundering. */
-static enum {
- VM_LAUNDRY_IDLE,
- VM_LAUNDRY_BACKGROUND,
- VM_LAUNDRY_SHORTFALL
-} vm_laundry_request = VM_LAUNDRY_IDLE;
-static int vm_inactq_scans;
-
static int vm_pageout_update_period;
static int disable_swap_pageouts;
static int lowmem_period = 10;
@@ -173,10 +162,6 @@
CTLFLAG_RWTUN, &vm_panic_on_oom, 0,
"panic on out of memory instead of killing the largest process");
-SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
- CTLFLAG_RWTUN, &vm_pageout_wakeup_thresh, 0,
- "free page threshold for waking up the pageout daemon");
-
SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
CTLFLAG_RWTUN, &vm_pageout_update_period, 0,
"Maximum active LRU update period");
@@ -200,11 +185,6 @@
&act_scan_laundry_weight, 0,
"weight given to clean vs. dirty pages in active queue scans");
-static u_int vm_background_launder_target;
-SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RWTUN,
- &vm_background_launder_target, 0,
- "background laundering target, in pages");
-
static u_int vm_background_launder_rate = 4096;
SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN,
&vm_background_launder_rate, 0,
@@ -959,18 +939,18 @@
static void
vm_pageout_laundry_worker(void *arg)
{
- struct vm_domain *domain;
+ struct vm_domain *vmd;
struct vm_pagequeue *pq;
uint64_t nclean, ndirty;
u_int inactq_scans, last_launder;
- int domidx, last_target, launder, shortfall, shortfall_cycle, target;
+ int domain, last_target, launder, shortfall, shortfall_cycle, target;
bool in_shortfall;
- domidx = (uintptr_t)arg;
- domain = &vm_dom[domidx];
- pq = &domain->vmd_pagequeues[PQ_LAUNDRY];
- KASSERT(domain->vmd_segs != 0, ("domain without segments"));
- vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY);
+ domain = (uintptr_t)arg;
+ vmd = VM_DOMAIN(domain);
+ pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
+ KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
+ vm_pageout_init_marker(&vmd->vmd_laundry_marker, PQ_LAUNDRY);
shortfall = 0;
in_shortfall = false;
@@ -982,9 +962,9 @@
/*
* Calls to these handlers are serialized by the swap syscall lock.
*/
- (void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, domain,
+ (void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, vmd,
EVENTHANDLER_PRI_ANY);
- (void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, domain,
+ (void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, vmd,
EVENTHANDLER_PRI_ANY);
/*
@@ -1006,7 +986,7 @@
target = shortfall;
} else if (!in_shortfall)
goto trybackground;
- else if (shortfall_cycle == 0 || vm_laundry_target() <= 0) {
+ else if (shortfall_cycle == 0 || vm_laundry_target(vmd) <= 0) {
/*
* We recently entered shortfall and began laundering
* pages. If we have completed that laundering run
@@ -1040,11 +1020,12 @@
* memory pressure required to trigger laundering decreases.
*/
trybackground:
- nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count;
- ndirty = vm_cnt.v_laundry_count;
+ nclean = vmd->vmd_free_count +
+ vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt;
+ ndirty = vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt;
if (target == 0 && inactq_scans != last_launder &&
ndirty * isqrt(inactq_scans - last_launder) >= nclean) {
- target = vm_background_launder_target;
+ target = vmd->vmd_background_launder_target;
}
/*
@@ -1076,7 +1057,7 @@
* pages could exceed "target" by the maximum size of
* a cluster minus one.
*/
- target -= min(vm_pageout_launder(domain, launder,
+ target -= min(vm_pageout_launder(vmd, launder,
in_shortfall), target);
pause("laundp", hz / VM_LAUNDER_RATE);
}
@@ -1087,8 +1068,8 @@
* kicks us.
*/
vm_pagequeue_lock(pq);
- if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE)
- (void)mtx_sleep(&vm_laundry_request,
+ if (target == 0 && vmd->vmd_laundry_request == VM_LAUNDRY_IDLE)
+ (void)mtx_sleep(&vmd->vmd_laundry_request,
vm_pagequeue_lockptr(pq), PVM, "launds", 0);
/*
@@ -1096,16 +1077,17 @@
* a shortfall laundering unless we're already in the middle of
* one. This may preempt a background laundering.
*/
- if (vm_laundry_request == VM_LAUNDRY_SHORTFALL &&
+ if (vmd->vmd_laundry_request == VM_LAUNDRY_SHORTFALL &&
(!in_shortfall || shortfall_cycle == 0)) {
- shortfall = vm_laundry_target() + vm_pageout_deficit;
+ shortfall = vm_laundry_target(vmd) +
+ vmd->vmd_pageout_deficit;
target = 0;
} else
shortfall = 0;
if (target == 0)
- vm_laundry_request = VM_LAUNDRY_IDLE;
- inactq_scans = vm_inactq_scans;
+ vmd->vmd_laundry_request = VM_LAUNDRY_IDLE;
+ inactq_scans = vmd->vmd_inactq_scans;
vm_pagequeue_unlock(pq);
}
}
@@ -1134,7 +1116,7 @@
* If we need to reclaim memory ask kernel caches to return
* some. We rate limit to avoid thrashing.
*/
- if (vmd == &vm_dom[0] && pass > 0 &&
+ if (vmd == VM_DOMAIN(0) && pass > 0 &&
(time_uptime - lowmem_uptime) >= lowmem_period) {
/*
* Decrease registered cache sizes.
@@ -1163,8 +1145,8 @@
* the page daemon and this calculation.
*/
if (pass > 0) {
- deficit = atomic_readandclear_int(&vm_pageout_deficit);
- page_shortage = vm_paging_target() + deficit;
+ deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
+ page_shortage = vm_paging_target(vmd) + deficit;
} else
page_shortage = deficit = 0;
starting_page_shortage = page_shortage;
@@ -1357,18 +1339,20 @@
* keep count.
*/
if (starting_page_shortage > 0) {
- pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY];
+ pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
vm_pagequeue_lock(pq);
- if (vm_laundry_request == VM_LAUNDRY_IDLE &&
+ if (vmd->vmd_laundry_request == VM_LAUNDRY_IDLE &&
(pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) {
if (page_shortage > 0) {
- vm_laundry_request = VM_LAUNDRY_SHORTFALL;
+ vmd->vmd_laundry_request = VM_LAUNDRY_SHORTFALL;
VM_CNT_INC(v_pdshortfalls);
- } else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)
- vm_laundry_request = VM_LAUNDRY_BACKGROUND;
- wakeup(&vm_laundry_request);
+ } else if (vmd->vmd_laundry_request !=
+ VM_LAUNDRY_SHORTFALL)
+ vmd->vmd_laundry_request =
+ VM_LAUNDRY_BACKGROUND;
+ wakeup(&vmd->vmd_laundry_request);
}
- vm_inactq_scans++;
+ vmd->vmd_inactq_scans++;
vm_pagequeue_unlock(pq);
}
@@ -1397,9 +1381,9 @@
* more aggressively, improving the effectiveness of clustering and
* ensuring that they can eventually be reused.
*/
- inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count +
- vm_cnt.v_laundry_count / act_scan_laundry_weight) +
- vm_paging_target() + deficit + addl_page_shortage;
+ inactq_shortage = vmd->vmd_inactive_target - (pq->pq_cnt +
+ vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight) +
+ vm_paging_target(vmd) + deficit + addl_page_shortage;
inactq_shortage *= act_scan_laundry_weight;
pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
@@ -1742,6 +1726,8 @@
}
sx_sunlock(&allproc_lock);
if (bigproc != NULL) {
+ int i;
+
if (vm_panic_on_oom != 0)
panic("out of swap space");
PROC_LOCK(bigproc);
@@ -1749,19 +1735,20 @@
sched_nice(bigproc, PRIO_MIN);
_PRELE(bigproc);
PROC_UNLOCK(bigproc);
- wakeup(&vm_cnt.v_free_count);
+ for (i = 0; i < vm_ndomains; i++)
+ wakeup(&VM_DOMAIN(i)->vmd_free_count);
}
}
static void
vm_pageout_worker(void *arg)
{
- struct vm_domain *domain;
- int domidx, pass;
+ struct vm_domain *vmd;
+ int domain, pass;
bool target_met;
- domidx = (uintptr_t)arg;
- domain = &vm_dom[domidx];
+ domain = (uintptr_t)arg;
+ vmd = VM_DOMAIN(domain);
pass = 0;
target_met = true;
@@ -1771,18 +1758,18 @@
* is allocated.
*/
- KASSERT(domain->vmd_segs != 0, ("domain without segments"));
- domain->vmd_last_active_scan = ticks;
- vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE);
- vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE);
- TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl,
- &domain->vmd_inacthead, plinks.q);
+ KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
+ vmd->vmd_last_active_scan = ticks;
+ vm_pageout_init_marker(&vmd->vmd_marker, PQ_INACTIVE);
+ vm_pageout_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE);
+ TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pl,
+ &vmd->vmd_inacthead, plinks.q);
/*
* The pageout daemon worker is never done, so loop forever.
*/
while (TRUE) {
- mtx_lock(&vm_page_queue_free_mtx);
+ vm_domain_free_lock(vmd);
/*
* Generally, after a level >= 1 scan, if there are enough
@@ -1796,34 +1783,34 @@
* thread will, nonetheless, wait until another page is freed
* or this wakeup is performed.
*/
- if (vm_pages_needed && !vm_page_count_min()) {
- vm_pages_needed = false;
- wakeup(&vm_cnt.v_free_count);
+ if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
+ vmd->vmd_pages_needed = false;
+ wakeup(&vmd->vmd_free_count);
}
/*
- * Do not clear vm_pageout_wanted until we reach our free page
+ * Do not clear vmd_pageout_wanted until we reach our free page
* target. Otherwise, we may be awakened over and over again,
* wasting CPU time.
*/
- if (vm_pageout_wanted && target_met)
- vm_pageout_wanted = false;
+ if (vmd->vmd_pageout_wanted && target_met)
+ vmd->vmd_pageout_wanted = false;
/*
* Might the page daemon receive a wakeup call?
*/
- if (vm_pageout_wanted) {
+ if (vmd->vmd_pageout_wanted) {
/*
- * No. Either vm_pageout_wanted was set by another
+ * No. Either vmd_pageout_wanted was set by another
* thread during the previous scan, which must have
- * been a level 0 scan, or vm_pageout_wanted was
+ * been a level 0 scan, or vmd_pageout_wanted was
* already set and the scan failed to free enough
* pages. If we haven't yet performed a level >= 1
* (page reclamation) scan, then increase the level
* and scan again now. Otherwise, sleep a bit and
* try again later.
*/
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_unlock(vmd);
if (pass >= 1)
pause("pwait", hz / VM_INACT_SCAN_RATE);
pass++;
@@ -1834,20 +1821,20 @@
* sleep until the next wakeup or until pages need to
* have their reference stats updated.
*/
- if (vm_pages_needed) {
- mtx_unlock(&vm_page_queue_free_mtx);
+ if (vmd->vmd_pages_needed) {
+ vm_domain_free_unlock(vmd);
if (pass == 0)
pass++;
- } else if (mtx_sleep(&vm_pageout_wanted,
- &vm_page_queue_free_mtx, PDROP | PVM, "psleep",
- hz) == 0) {
+ } else if (mtx_sleep(&vmd->vmd_pageout_wanted,
+ vm_domain_free_lockptr(vmd), PDROP | PVM,
+ "psleep", hz) == 0) {
VM_CNT_INC(v_pdwakeups);
pass = 1;
} else
pass = 0;
}
- target_met = vm_pageout_scan(domain, pass);
+ target_met = vm_pageout_scan(vmd, pass);
}
}
@@ -1855,43 +1842,78 @@
* vm_pageout_init initialises basic pageout daemon settings.
*/
static void
-vm_pageout_init(void)
+vm_pageout_init_domain(int domain)
{
- /*
- * Initialize some paging parameters.
- */
- vm_cnt.v_interrupt_free_min = 2;
- if (vm_cnt.v_page_count < 2000)
- vm_pageout_page_count = 8;
+ struct vm_domain *vmd;
+ vmd = VM_DOMAIN(domain);
+ vmd->vmd_interrupt_free_min = 2;
+
/*
* v_free_reserved needs to include enough for the largest
* swap pager structures plus enough for any pv_entry structs
* when paging.
*/
- if (vm_cnt.v_page_count > 1024)
- vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200;
+ if (vmd->vmd_page_count > 1024)
+ vmd->vmd_free_min = 4 + (vmd->vmd_page_count - 1024) / 200;
else
- vm_cnt.v_free_min = 4;
- vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
- vm_cnt.v_interrupt_free_min;
- vm_cnt.v_free_reserved = vm_pageout_page_count +
- vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768);
- vm_cnt.v_free_severe = vm_cnt.v_free_min / 2;
- vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved;
- vm_cnt.v_free_min += vm_cnt.v_free_reserved;
- vm_cnt.v_free_severe += vm_cnt.v_free_reserved;
- vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2;
- if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3)
- vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3;
+ vmd->vmd_free_min = 4;
+ vmd->vmd_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
+ vmd->vmd_interrupt_free_min;
+ vmd->vmd_free_reserved = vm_pageout_page_count +
+ vmd->vmd_pageout_free_min + (vmd->vmd_page_count / 768);
+ vmd->vmd_free_severe = vmd->vmd_free_min / 2;
+ vmd->vmd_free_target = 4 * vmd->vmd_free_min + vmd->vmd_free_reserved;
+ vmd->vmd_free_min += vmd->vmd_free_reserved;
+ vmd->vmd_free_severe += vmd->vmd_free_reserved;
+ vmd->vmd_inactive_target = (3 * vmd->vmd_free_target) / 2;
+ if (vmd->vmd_inactive_target > vmd->vmd_free_count / 3)
+ vmd->vmd_inactive_target = vmd->vmd_free_count / 3;
/*
* Set the default wakeup threshold to be 10% above the minimum
* page limit. This keeps the steady state out of shortfall.
*/
- vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11;
+ vmd->vmd_pageout_wakeup_thresh = (vmd->vmd_free_min / 10) * 11;
/*
+ * Target amount of memory to move out of the laundry queue during a
+ * background laundering. This is proportional to the amount of system
+ * memory.
+ */
+ vmd->vmd_background_launder_target = (vmd->vmd_free_target -
+ vmd->vmd_free_min) / 10;
+}
+
+static void
+vm_pageout_init(void)
+{
+ u_int freecount;
+ int i;
+
+ /*
+ * Initialize some paging parameters.
+ */
+ if (vm_cnt.v_page_count < 2000)
+ vm_pageout_page_count = 8;
+
+ freecount = 0;
+ for (i = 0; i < vm_ndomains; i++) {
+ struct vm_domain *vmd;
+
+ vm_pageout_init_domain(i);
+ vmd = VM_DOMAIN(i);
+ vm_cnt.v_free_reserved += vmd->vmd_free_reserved;
+ vm_cnt.v_free_target += vmd->vmd_free_target;
+ vm_cnt.v_free_min += vmd->vmd_free_min;
+ vm_cnt.v_inactive_target += vmd->vmd_inactive_target;
+ vm_cnt.v_pageout_free_min += vmd->vmd_pageout_free_min;
+ vm_cnt.v_interrupt_free_min += vmd->vmd_interrupt_free_min;
+ vm_cnt.v_free_severe += vmd->vmd_free_severe;
+ freecount += vmd->vmd_free_count;
+ }
+
+ /*
* Set interval in seconds for active scan. We want to visit each
* page at least once every ten minutes. This is to prevent worst
* case paging behaviors with stale active LRU.
@@ -1899,17 +1921,8 @@
if (vm_pageout_update_period == 0)
vm_pageout_update_period = 600;
- /* XXX does not really belong here */
if (vm_page_max_wired == 0)
- vm_page_max_wired = vm_cnt.v_free_count / 3;
-
- /*
- * Target amount of memory to move out of the laundry queue during a
- * background laundering. This is proportional to the amount of system
- * memory.
- */
- vm_background_launder_target = (vm_cnt.v_free_target -
- vm_cnt.v_free_min) / 10;
+ vm_page_max_wired = freecount / 3;
}
/*
@@ -1933,6 +1946,12 @@
panic("starting pageout for domain %d, error %d\n",
i, error);
}
+ error = kthread_add(vm_pageout_laundry_worker,
+ (void *)(uintptr_t)i, curproc, NULL, 0, 0,
+ "laundry: dom%d", i);
+ if (error != 0)
+ panic("starting laundry for domain %d, error %d",
+ i, error);
}
error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
0, 0, "uma");
@@ -1945,14 +1964,16 @@
* Perform an advisory wakeup of the page daemon.
*/
void
-pagedaemon_wakeup(void)
+pagedaemon_wakeup(int domain)
{
+ struct vm_domain *vmd;
- mtx_assert(&vm_page_queue_free_mtx, MA_NOTOWNED);
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_assert_unlocked(vmd);
- if (!vm_pageout_wanted && curthread->td_proc != pageproc) {
- vm_pageout_wanted = true;
- wakeup(&vm_pageout_wanted);
+ if (!vmd->vmd_pageout_wanted && curthread->td_proc != pageproc) {
+ vmd->vmd_pageout_wanted = true;
+ wakeup(&vmd->vmd_pageout_wanted);
}
}
@@ -1962,22 +1983,26 @@
* This function returns with the free queues mutex unlocked.
*/
void
-pagedaemon_wait(int pri, const char *wmesg)
+pagedaemon_wait(int domain, int pri, const char *wmesg)
{
+ struct vm_domain *vmd;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_assert_locked(vmd);
/*
- * vm_pageout_wanted may have been set by an advisory wakeup, but if the
- * page daemon is running on a CPU, the wakeup will have been lost.
+ * vmd_pageout_wanted may have been set by an advisory wakeup, but if
+ * the page daemon is running on a CPU, the wakeup will have been lost.
* Thus, deliver a potentially spurious wakeup to ensure that the page
* daemon has been notified of the shortage.
*/
- if (!vm_pageout_wanted || !vm_pages_needed) {
- vm_pageout_wanted = true;
- wakeup(&vm_pageout_wanted);
+ if (!vmd->vmd_pageout_wanted || !vmd->vmd_pages_needed) {
+ vmd->vmd_pageout_wanted = true;
+ wakeup(&vmd->vmd_pageout_wanted);
}
- vm_pages_needed = true;
- msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | pri,
+ vmd->vmd_pages_needed = true;
+ vmd->vmd_waiters++;
+ msleep(&vmd->vmd_free_count, vm_domain_free_lockptr(vmd), PDROP | pri,
wmesg, 0);
+ vmd->vmd_waiters--;
}
Index: sys/vm/vm_pagequeue.h
===================================================================
--- sys/vm/vm_pagequeue.h
+++ sys/vm/vm_pagequeue.h
@@ -0,0 +1,235 @@
+/*-
+ * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
+ *
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)vm_page.h 8.2 (Berkeley) 12/13/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VM_PAGEQUEUE_
+#define _VM_PAGEQUEUE_
+
+#ifdef _KERNEL
+struct vm_pagequeue {
+ struct mtx pq_mutex;
+ struct pglist pq_pl;
+ int pq_cnt;
+ const char * const pq_name;
+} __aligned(CACHE_LINE_SIZE);
+
+
+struct vm_domain {
+ struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
+ struct mtx_padalign vmd_free_mtx;
+ struct vmem *vmd_kernel_arena;
+ u_int vmd_domain; /* Domain number. */
+ u_int vmd_page_count;
+ long vmd_segs; /* bitmask of the segments */
+
+ /* Paging control variables, locked by domain_free_mtx. */
+ u_int vmd_free_count;
+ boolean_t vmd_oom;
+ int vmd_oom_seq;
+ int vmd_last_active_scan;
+ struct vm_page vmd_laundry_marker;
+ struct vm_page vmd_marker; /* marker for pagedaemon private use */
+ struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
+
+ int vmd_pageout_pages_needed; /* page daemon waiting for pages? */
+ int vmd_pageout_deficit; /* Estimated number of pages deficit */
+ int vmd_waiters; /* Pageout waiters. */
+ bool vmd_pages_needed; /* Are threads waiting for free pages? */
+ bool vmd_pageout_wanted; /* pageout daemon wait channel */
+ bool vmd_minset; /* Are we in vm_min_domains? */
+ bool vmd_severeset; /* Are we in vm_severe_domains? */
+ int vmd_inactq_scans;
+ enum {
+ VM_LAUNDRY_IDLE = 0,
+ VM_LAUNDRY_BACKGROUND,
+ VM_LAUNDRY_SHORTFALL
+ } vmd_laundry_request;
+
+ /* Paging thresholds. */
+ u_int vmd_background_launder_target;
+ u_int vmd_free_reserved; /* (c) pages reserved for deadlock */
+ u_int vmd_free_target; /* (c) pages desired free */
+ u_int vmd_free_min; /* (c) pages desired free */
+ u_int vmd_inactive_target; /* (c) pages desired inactive */
+ u_int vmd_pageout_free_min; /* (c) min pages reserved for kernel */
+ u_int vmd_pageout_wakeup_thresh;/* (c) min pages to wake pagedaemon */
+ u_int vmd_interrupt_free_min; /* (c) reserved pages for int code */
+ u_int vmd_free_severe; /* (c) severe page depletion point */
+} __aligned(CACHE_LINE_SIZE);
+
+extern struct vm_domain vm_dom[MAXMEMDOM];
+
+#define VM_DOMAIN(n) (&vm_dom[(n)])
+
+#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
+#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
+#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
+#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)
+
+#define vm_domain_free_assert_locked(n) \
+ mtx_assert(vm_domain_free_lockptr((n)), MA_OWNED)
+#define vm_domain_free_assert_unlocked(n) \
+ mtx_assert(vm_domain_free_lockptr((n)), MA_NOTOWNED)
+#define vm_domain_free_lock(d) \
+ mtx_lock(vm_domain_free_lockptr((d)))
+#define vm_domain_free_lockptr(d) \
+ (&(d)->vmd_free_mtx)
+#define vm_domain_free_unlock(d) \
+ mtx_unlock(vm_domain_free_lockptr((d)))
+
+static __inline void
+vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
+{
+
+#ifdef notyet
+ vm_pagequeue_assert_locked(pq);
+#endif
+ pq->pq_cnt += addend;
+}
+#define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1)
+#define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1)
+
+void vm_domain_set(struct vm_domain *vmd);
+int vm_domain_available(struct vm_domain *vmd, int req, int npages);
+
+/*
+ * vm_pagequeue_domain:
+ *
+ * Return the memory domain the page belongs to.
+ */
+static inline struct vm_domain *
+vm_pagequeue_domain(vm_page_t m)
+{
+
+ return (VM_DOMAIN(vm_phys_domain(m)));
+}
+
+/*
+ * Return the number of pages we need to free-up or cache
+ * A positive number indicates that we do not have enough free pages.
+ */
+static inline int
+vm_paging_target(struct vm_domain *vmd)
+{
+
+ return (vmd->vmd_free_target - vmd->vmd_free_count);
+}
+
+/*
+ * Returns TRUE if the pagedaemon needs to be woken up.
+ */
+static inline int
+vm_paging_needed(struct vm_domain *vmd, u_int free_count)
+{
+
+ return (free_count < vmd->vmd_pageout_wakeup_thresh);
+}
+
+/*
+ * Returns TRUE if the domain is below the min paging target.
+ */
+static inline int
+vm_paging_min(struct vm_domain *vmd)
+{
+
+ return (vmd->vmd_free_min > vmd->vmd_free_count);
+}
+
+/*
+ * Returns TRUE if the domain is below the severe paging target.
+ */
+static inline int
+vm_paging_severe(struct vm_domain *vmd)
+{
+
+ return (vmd->vmd_free_severe > vmd->vmd_free_count);
+}
+
+/*
+ * Return the number of pages we need to launder.
+ * A positive number indicates that we have a shortfall of clean pages.
+ */
+static inline int
+vm_laundry_target(struct vm_domain *vmd)
+{
+
+ return (vm_paging_target(vmd));
+}
+
+static inline u_int
+vm_domain_freecnt_adj(struct vm_domain *vmd, int adj)
+{
+ u_int ret;
+
+ vm_domain_free_assert_locked(vmd);
+ ret = vmd->vmd_free_count += adj;
+ if ((!vmd->vmd_minset && vm_paging_min(vmd)) ||
+ (!vmd->vmd_severeset && vm_paging_severe(vmd)))
+ vm_domain_set(vmd);
+
+ return (ret);
+}
+
+
+#endif /* _KERNEL */
+#endif /* !_VM_PAGEQUEUE_ */
Index: sys/vm/vm_phys.h
===================================================================
--- sys/vm/vm_phys.h
+++ sys/vm/vm_phys.h
@@ -96,12 +96,12 @@
/*
*
- * vm_phys_domidx:
+ * vm_phys_domain:
*
* Return the index of the domain the page belongs to.
*/
static inline int
-vm_phys_domidx(vm_page_t m)
+vm_phys_domain(vm_page_t m)
{
#ifdef NUMA
int domn, segind;
@@ -115,27 +115,6 @@
#else
return (0);
#endif
-}
-
-/*
- * vm_phys_domain:
- *
- * Return the memory domain the page belongs to.
- */
-static inline struct vm_domain *
-vm_phys_domain(vm_page_t m)
-{
-
- return (&vm_dom[vm_phys_domidx(m)]);
-}
-
-static inline u_int
-vm_phys_freecnt_adj(vm_page_t m, int adj)
-{
-
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
- vm_phys_domain(m)->vmd_free_count += adj;
- return (vm_cnt.v_free_count += adj);
}
#endif /* _KERNEL */
Index: sys/vm/vm_phys.c
===================================================================
--- sys/vm/vm_phys.c
+++ sys/vm/vm_phys.c
@@ -67,6 +67,7 @@
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
_Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
"Too many physsegs.");
@@ -653,7 +654,7 @@
if (flind < 0)
return (NULL);
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
fl = &vm_phys_free_queues[domain][flind][pool][0];
for (oind = order; oind < VM_NFREEORDER; oind++) {
m = TAILQ_FIRST(&fl[oind].pl);
@@ -906,8 +907,8 @@
m, m->pool));
KASSERT(order < VM_NFREEORDER,
("vm_phys_free_pages: order %d is out of range", order));
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
seg = &vm_phys_segs[m->segind];
+ vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
if (order < VM_NFREEORDER - 1) {
pa = VM_PAGE_TO_PHYS(m);
do {
@@ -945,7 +946,7 @@
* Avoid unnecessary coalescing by freeing the pages in the largest
* possible power-of-two-sized subsets.
*/
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(vm_pagequeue_domain(m));
for (;; npages -= n) {
/*
* Unsigned "min" is used here so that "order" is assigned
@@ -1051,14 +1052,13 @@
vm_page_t m_set, m_tmp;
int order;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
-
/*
* First, find the contiguous, power of two-sized set of free
* physical pages containing the given physical page "m" and
* assign it to "m_set".
*/
seg = &vm_phys_segs[m->segind];
+ vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
order < VM_NFREEORDER - 1; ) {
order++;
@@ -1122,7 +1122,7 @@
KASSERT(npages > 0, ("npages is 0"));
KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
if (low >= high)
return (NULL);
m_run = NULL;
@@ -1167,7 +1167,7 @@
KASSERT(npages > 0, ("npages is 0"));
KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
/* Compute the queue that is the best fit for npages. */
for (order = 0; (1 << order) < npages; order++);
/* Search for a run satisfying the specified conditions. */
Index: sys/vm/vm_reserv.h
===================================================================
--- sys/vm/vm_reserv.h
+++ sys/vm/vm_reserv.h
@@ -50,8 +50,14 @@
vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex,
int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary, vm_page_t mpred);
+vm_page_t vm_reserv_extend_contig(int req, vm_object_t object,
+ vm_pindex_t pindex, int domain, u_long npages,
+ vm_paddr_t low, vm_paddr_t high, u_long alignment,
+ vm_paddr_t boundary, vm_page_t mpred);
vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex,
int domain, vm_page_t mpred);
+vm_page_t vm_reserv_extend(int req, vm_object_t object,
+ vm_pindex_t pindex, int domain, vm_page_t mpred);
void vm_reserv_break_all(vm_object_t object);
boolean_t vm_reserv_free_page(vm_page_t m);
void vm_reserv_init(void);
Index: sys/vm/vm_reserv.c
===================================================================
--- sys/vm/vm_reserv.c
+++ sys/vm/vm_reserv.c
@@ -59,7 +59,9 @@
#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
@@ -163,17 +165,21 @@
* object's list of reservations.
*
* A partially populated reservation can be broken and reclaimed at any time.
+ *
+ * f - vm_domain_free_lock
+ * o - vm_reserv_object_lock
+ * c - constant after boot
*/
struct vm_reserv {
- TAILQ_ENTRY(vm_reserv) partpopq;
- LIST_ENTRY(vm_reserv) objq;
- vm_object_t object; /* containing object */
- vm_pindex_t pindex; /* offset within object */
- vm_page_t pages; /* first page of a superpage */
- int domain; /* NUMA domain */
- int popcnt; /* # of pages in use */
- char inpartpopq;
- popmap_t popmap[NPOPMAP]; /* bit vector of used pages */
+ TAILQ_ENTRY(vm_reserv) partpopq; /* (f) per-domain queue. */
+ LIST_ENTRY(vm_reserv) objq; /* (o, f) object queue */
+ vm_object_t object; /* (o, f) containing object */
+ vm_pindex_t pindex; /* (o, f) offset in object */
+ vm_page_t pages; /* (c) first page */
+ int domain; /* (c) NUMA domain. */
+ int popcnt; /* (f) # of pages in use */
+ char inpartpopq; /* (f) */
+ popmap_t popmap[NPOPMAP]; /* (f) bit vector, used pages */
};
/*
@@ -234,6 +240,25 @@
SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
&vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
+/*
+ * The object lock pool is used to synchronize the rvq. We can not use a
+ * pool mutex because it is required before malloc works.
+ *
+ * The "hash" function could be made faster without divide and modulo.
+ */
+#define VM_RESERV_OBJ_LOCK_COUNT MAXCPU
+
+struct mtx_padalign vm_reserv_object_mtx[VM_RESERV_OBJ_LOCK_COUNT];
+
+#define vm_reserv_object_lock_idx(object) \
+ (((uintptr_t)object / sizeof(*object)) % VM_RESERV_OBJ_LOCK_COUNT)
+#define vm_reserv_object_lock_ptr(object) \
+ &vm_reserv_object_mtx[vm_reserv_object_lock_idx((object))]
+#define vm_reserv_object_lock(object) \
+ mtx_lock(vm_reserv_object_lock_ptr((object)))
+#define vm_reserv_object_unlock(object) \
+ mtx_unlock(vm_reserv_object_lock_ptr((object)))
+
static void vm_reserv_break(vm_reserv_t rv, vm_page_t m);
static void vm_reserv_depopulate(vm_reserv_t rv, int index);
static vm_reserv_t vm_reserv_from_page(vm_page_t m);
@@ -288,12 +313,12 @@
for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
counter = 0;
unused_pages = 0;
- mtx_lock(&vm_page_queue_free_mtx);
+ vm_domain_free_lock(VM_DOMAIN(domain));
TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
counter++;
unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
}
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_unlock(VM_DOMAIN(domain));
sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
domain, level,
unused_pages * ((int)PAGE_SIZE / 1024), counter);
@@ -305,6 +330,49 @@
}
/*
+ * Remove a reservation from the object's objq.
+ */
+static void
+vm_reserv_remove(vm_reserv_t rv)
+{
+ vm_object_t object;
+
+ KASSERT(rv->object != NULL,
+ ("vm_reserv_remove: reserv %p is free", rv));
+ KASSERT(!rv->inpartpopq,
+ ("vm_reserv_remove: reserv %p's inpartpopq is TRUE", rv));
+ object = rv->object;
+ vm_reserv_object_lock(object);
+ LIST_REMOVE(rv, objq);
+ rv->object = NULL;
+ vm_reserv_object_unlock(object);
+}
+
+/*
+ * Insert a new reservation into the object's objq.
+ */
+static void
+vm_reserv_insert(vm_reserv_t rv, vm_object_t object, vm_pindex_t pindex)
+{
+ int i;
+
+ KASSERT(rv->object == NULL,
+ ("vm_reserv_insert: reserv %p isn't free", rv));
+ KASSERT(rv->popcnt == 0,
+ ("vm_reserv_insert: reserv %p's popcnt is corrupted", rv));
+ KASSERT(!rv->inpartpopq,
+ ("vm_reserv_insert: reserv %p's inpartpopq is TRUE", rv));
+ for (i = 0; i < NPOPMAP; i++)
+ KASSERT(rv->popmap[i] == 0,
+ ("vm_reserv_insert: reserv %p's popmap is corrupted", rv));
+ vm_reserv_object_lock(object);
+ rv->pindex = pindex;
+ rv->object = object;
+ LIST_INSERT_HEAD(&object->rvq, rv, objq);
+ vm_reserv_object_unlock(object);
+}
+
+/*
* Reduces the given reservation's population count. If the population count
* becomes zero, the reservation is destroyed. Additionally, moves the
* reservation to the tail of the partially populated reservation queue if the
@@ -316,7 +384,7 @@
vm_reserv_depopulate(vm_reserv_t rv, int index)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
KASSERT(rv->object != NULL,
("vm_reserv_depopulate: reserv %p is free", rv));
KASSERT(popmap_is_set(rv->popmap, index),
@@ -339,9 +407,7 @@
popmap_clear(rv->popmap, index);
rv->popcnt--;
if (rv->popcnt == 0) {
- LIST_REMOVE(rv, objq);
- rv->object = NULL;
- rv->domain = -1;
+ vm_reserv_remove(rv);
vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
vm_reserv_freed++;
} else {
@@ -361,6 +427,43 @@
}
/*
+ * Returns an existing reservation or NULL and initialized successor pointer.
+ */
+static vm_reserv_t
+vm_reserv_from_object(vm_object_t object, vm_pindex_t pindex,
+ vm_page_t mpred, vm_page_t *msuccp)
+{
+ vm_reserv_t rv;
+ vm_page_t msucc;
+
+ msucc = NULL;
+ if (mpred != NULL) {
+ KASSERT(mpred->object == object,
+ ("vm_reserv_from_object: object doesn't contain mpred"));
+ KASSERT(mpred->pindex < pindex,
+ ("vm_reserv_from_object: mpred doesn't precede pindex"));
+ rv = vm_reserv_from_page(mpred);
+ if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
+ goto found;
+ msucc = TAILQ_NEXT(mpred, listq);
+ } else
+ msucc = TAILQ_FIRST(&object->memq);
+ if (msucc != NULL) {
+ KASSERT(msucc->pindex > pindex,
+ ("vm_reserv_from_object: msucc doesn't succeed pindex"));
+ rv = vm_reserv_from_page(msucc);
+ if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
+ goto found;
+ }
+ rv = NULL;
+
+found:
+ *msuccp = msucc;
+
+ return (rv);
+}
+
+/*
* Returns TRUE if the given reservation contains the given page index and
* FALSE otherwise.
*/
@@ -381,7 +484,7 @@
vm_reserv_populate(vm_reserv_t rv, int index)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
KASSERT(rv->object != NULL,
("vm_reserv_populate: reserv %p is free", rv));
KASSERT(popmap_is_clear(rv->popmap, index),
@@ -423,6 +526,100 @@
* The object and free page queue must be locked.
*/
vm_page_t
+vm_reserv_extend_contig(int req, vm_object_t object, vm_pindex_t pindex,
+ int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
+ u_long alignment, vm_paddr_t boundary, vm_page_t mpred)
+{
+ struct vm_domain *vmd;
+ vm_paddr_t pa, size;
+ vm_page_t m, msucc;
+ vm_reserv_t rv;
+ int i, index;
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+ KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
+
+ /*
+ * Is a reservation fundamentally impossible?
+ */
+ if (pindex < VM_RESERV_INDEX(object, pindex) ||
+ pindex + npages > object->size || object->resident_page_count == 0)
+ return (NULL);
+
+ /*
+ * All reservations of a particular size have the same alignment.
+ * Assuming that the first page is allocated from a reservation, the
+ * least significant bits of its physical address can be determined
+ * from its offset from the beginning of the reservation and the size
+ * of the reservation.
+ *
+ * Could the specified index within a reservation of the smallest
+ * possible size satisfy the alignment and boundary requirements?
+ */
+ pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT;
+ if ((pa & (alignment - 1)) != 0)
+ return (NULL);
+ size = npages << PAGE_SHIFT;
+ if (((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
+ return (NULL);
+
+ /*
+ * Look for an existing reservation.
+ */
+ rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+ if (rv == NULL)
+ return (NULL);
+ KASSERT(object != kernel_object || rv->domain == domain,
+ ("vm_reserv_extend_contig: Domain mismatch from reservation."));
+ index = VM_RESERV_INDEX(object, pindex);
+ /* Does the allocation fit within the reservation? */
+ if (index + npages > VM_LEVEL_0_NPAGES)
+ return (NULL);
+ domain = rv->domain;
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
+ if (rv->object != object || !vm_domain_available(vmd, req, npages)) {
+ m = NULL;
+ goto out;
+ }
+ m = &rv->pages[index];
+ pa = VM_PAGE_TO_PHYS(m);
+ if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
+ ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) {
+ m = NULL;
+ goto out;
+ }
+ /* Handle vm_page_rename(m, new_object, ...). */
+ for (i = 0; i < npages; i++) {
+ if (popmap_is_set(rv->popmap, index + i)) {
+ m = NULL;
+ goto out;
+ }
+ }
+ for (i = 0; i < npages; i++)
+ vm_reserv_populate(rv, index + i);
+ vm_domain_freecnt_adj(vmd, -npages);
+out:
+ vm_domain_free_unlock(vmd);
+ return (m);
+}
+
+/*
+ * Allocates a contiguous set of physical pages of the given size "npages"
+ * from existing or newly created reservations. All of the physical pages
+ * must be at or above the given physical address "low" and below the given
+ * physical address "high". The given value "alignment" determines the
+ * alignment of the first physical page in the set. If the given value
+ * "boundary" is non-zero, then the set of physical pages cannot cross any
+ * physical address boundary that is a multiple of that value. Both
+ * "alignment" and "boundary" must be a power of two.
+ *
+ * The page "mpred" must immediately precede the offset "pindex" within the
+ * specified object.
+ *
+ * The object and free page queue must be locked.
+ */
+vm_page_t
vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_page_t mpred)
@@ -434,7 +631,7 @@
u_long allocpages, maxpages, minpages;
int i, index, n;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
@@ -463,52 +660,48 @@
return (NULL);
/*
- * Look for an existing reservation.
+ * Callers should've extended an existing reservation prior to
+ * calling this function. If a reservation exists it is
+ * incompatible with the allocation.
*/
- if (mpred != NULL) {
- KASSERT(mpred->object == object,
- ("vm_reserv_alloc_contig: object doesn't contain mpred"));
- KASSERT(mpred->pindex < pindex,
- ("vm_reserv_alloc_contig: mpred doesn't precede pindex"));
- rv = vm_reserv_from_page(mpred);
- if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
- goto found;
- msucc = TAILQ_NEXT(mpred, listq);
- } else
- msucc = TAILQ_FIRST(&object->memq);
- if (msucc != NULL) {
- KASSERT(msucc->pindex > pindex,
- ("vm_reserv_alloc_contig: msucc doesn't succeed pindex"));
- rv = vm_reserv_from_page(msucc);
- if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
- goto found;
- }
+ rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+ if (rv != NULL)
+ return (NULL);
/*
* Could at least one reservation fit between the first index to the
* left that can be used ("leftcap") and the first index to the right
* that cannot be used ("rightcap")?
+ *
+ * We must synchronize with the reserv object lock to protect the
+ * pindex/object of the resulting reservations against rename while
+ * we are inspecting.
*/
first = pindex - VM_RESERV_INDEX(object, pindex);
+ minpages = VM_RESERV_INDEX(object, pindex) + npages;
+ maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
+ allocpages = maxpages;
+ vm_reserv_object_lock(object);
if (mpred != NULL) {
if ((rv = vm_reserv_from_page(mpred))->object != object)
leftcap = mpred->pindex + 1;
else
leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
- if (leftcap > first)
+ if (leftcap > first) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
}
- minpages = VM_RESERV_INDEX(object, pindex) + npages;
- maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
- allocpages = maxpages;
if (msucc != NULL) {
if ((rv = vm_reserv_from_page(msucc))->object != object)
rightcap = msucc->pindex;
else
rightcap = rv->pindex;
if (first + maxpages > rightcap) {
- if (maxpages == VM_LEVEL_0_NPAGES)
+ if (maxpages == VM_LEVEL_0_NPAGES) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
/*
* At least one reservation will fit between "leftcap"
@@ -519,6 +712,7 @@
allocpages = minpages;
}
}
+ vm_reserv_object_unlock(object);
/*
* Would the last new reservation extend past the end of the object?
@@ -549,7 +743,7 @@
VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
if (m == NULL)
return (NULL);
- KASSERT(vm_phys_domidx(m) == domain,
+ KASSERT(vm_phys_domain(m) == domain,
("vm_reserv_alloc_contig: Page domain does not match requested."));
/*
@@ -565,22 +759,7 @@
KASSERT(rv->pages == m,
("vm_reserv_alloc_contig: reserv %p's pages is corrupted",
rv));
- KASSERT(rv->object == NULL,
- ("vm_reserv_alloc_contig: reserv %p isn't free", rv));
- LIST_INSERT_HEAD(&object->rvq, rv, objq);
- rv->object = object;
- rv->pindex = first;
- rv->domain = domain;
- KASSERT(rv->popcnt == 0,
- ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted",
- rv));
- KASSERT(!rv->inpartpopq,
- ("vm_reserv_alloc_contig: reserv %p's inpartpopq is TRUE",
- rv));
- for (i = 0; i < NPOPMAP; i++)
- KASSERT(rv->popmap[i] == 0,
- ("vm_reserv_alloc_contig: reserv %p's popmap is corrupted",
- rv));
+ vm_reserv_insert(rv, object, first);
n = ulmin(VM_LEVEL_0_NPAGES - index, npages);
for (i = 0; i < n; i++)
vm_reserv_populate(rv, index + i);
@@ -594,31 +773,68 @@
allocpages -= VM_LEVEL_0_NPAGES;
} while (allocpages >= VM_LEVEL_0_NPAGES);
return (m_ret);
+}
+/*
+ * Attempts to extend an existing reservation and allocate the page to the
+ * object.
+ *
+ * The page "mpred" must immediately precede the offset "pindex" within the
+ * specified object.
+ *
+ * The object must be locked.
+ */
+vm_page_t
+vm_reserv_extend(int req, vm_object_t object, vm_pindex_t pindex, int domain,
+ vm_page_t mpred)
+{
+ struct vm_domain *vmd;
+ vm_page_t m, msucc;
+ vm_reserv_t rv;
+ int index, free_count;
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+
/*
- * Found a matching reservation.
+ * Could a reservation currently exist?
*/
-found:
- index = VM_RESERV_INDEX(object, pindex);
- /* Does the allocation fit within the reservation? */
- if (index + npages > VM_LEVEL_0_NPAGES)
+ if (pindex < VM_RESERV_INDEX(object, pindex) ||
+ pindex >= object->size || object->resident_page_count == 0)
return (NULL);
- m = &rv->pages[index];
- pa = VM_PAGE_TO_PHYS(m);
- if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
- ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
+
+ /*
+ * Look for an existing reservation.
+ */
+ rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+ if (rv == NULL)
return (NULL);
- /* Handle vm_page_rename(m, new_object, ...). */
- for (i = 0; i < npages; i++)
- if (popmap_is_set(rv->popmap, index + i))
- return (NULL);
- for (i = 0; i < npages; i++)
- vm_reserv_populate(rv, index + i);
+
+ KASSERT(object != kernel_object || rv->domain == domain,
+ ("vm_reserv_extend: Domain mismatch from reservation."));
+ domain = rv->domain;
+ vmd = VM_DOMAIN(domain);
+ index = VM_RESERV_INDEX(object, pindex);
+ m = &rv->pages[index];
+ vm_domain_free_lock(vmd);
+ if (vm_domain_available(vmd, req, 1) == 0 ||
+ /* Handle reclaim race. */
+ rv->object != object ||
+ /* Handle vm_page_rename(m, new_object, ...). */
+ popmap_is_set(rv->popmap, index))
+ m = NULL;
+ if (m != NULL)
+ vm_reserv_populate(rv, index);
+ free_count = vm_domain_freecnt_adj(vmd, -1);
+ vm_domain_free_unlock(vmd);
+
+ if (vm_paging_needed(vmd, free_count))
+ pagedaemon_wakeup(domain);
+
return (m);
}
/*
- * Allocates a page from an existing or newly created reservation.
+ * Allocates a page from an existing reservation.
*
* The page "mpred" must immediately precede the offset "pindex" within the
* specified object.
@@ -632,9 +848,9 @@
vm_page_t m, msucc;
vm_pindex_t first, leftcap, rightcap;
vm_reserv_t rv;
- int i, index;
+ int index;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
VM_OBJECT_ASSERT_WLOCKED(object);
/*
@@ -645,48 +861,45 @@
return (NULL);
/*
- * Look for an existing reservation.
+ * Callers should've extended an existing reservation prior to
+ * calling this function. If a reservation exists it is
+ * incompatible with the allocation.
*/
- if (mpred != NULL) {
- KASSERT(mpred->object == object,
- ("vm_reserv_alloc_page: object doesn't contain mpred"));
- KASSERT(mpred->pindex < pindex,
- ("vm_reserv_alloc_page: mpred doesn't precede pindex"));
- rv = vm_reserv_from_page(mpred);
- if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
- goto found;
- msucc = TAILQ_NEXT(mpred, listq);
- } else
- msucc = TAILQ_FIRST(&object->memq);
- if (msucc != NULL) {
- KASSERT(msucc->pindex > pindex,
- ("vm_reserv_alloc_page: msucc doesn't succeed pindex"));
- rv = vm_reserv_from_page(msucc);
- if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
- goto found;
- }
+ rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+ if (rv != NULL)
+ return (NULL);
/*
* Could a reservation fit between the first index to the left that
* can be used and the first index to the right that cannot be used?
+ *
+ * We must synchronize with the reserv object lock to protect the
+ * pindex/object of the resulting reservations against rename while
+ * we are inspecting.
*/
first = pindex - VM_RESERV_INDEX(object, pindex);
+ vm_reserv_object_lock(object);
if (mpred != NULL) {
if ((rv = vm_reserv_from_page(mpred))->object != object)
leftcap = mpred->pindex + 1;
else
leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
- if (leftcap > first)
+ if (leftcap > first) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
}
if (msucc != NULL) {
if ((rv = vm_reserv_from_page(msucc))->object != object)
rightcap = msucc->pindex;
else
rightcap = rv->pindex;
- if (first + VM_LEVEL_0_NPAGES > rightcap)
+ if (first + VM_LEVEL_0_NPAGES > rightcap) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
}
+ vm_reserv_object_unlock(object);
/*
* Would a new reservation extend past the end of the object?
@@ -712,37 +925,10 @@
rv = vm_reserv_from_page(m);
KASSERT(rv->pages == m,
("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv));
- KASSERT(rv->object == NULL,
- ("vm_reserv_alloc_page: reserv %p isn't free", rv));
- LIST_INSERT_HEAD(&object->rvq, rv, objq);
- rv->object = object;
- rv->pindex = first;
- rv->domain = domain;
- KASSERT(rv->popcnt == 0,
- ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv));
- KASSERT(!rv->inpartpopq,
- ("vm_reserv_alloc_page: reserv %p's inpartpopq is TRUE", rv));
- for (i = 0; i < NPOPMAP; i++)
- KASSERT(rv->popmap[i] == 0,
- ("vm_reserv_alloc_page: reserv %p's popmap is corrupted",
- rv));
+ vm_reserv_insert(rv, object, first);
index = VM_RESERV_INDEX(object, pindex);
vm_reserv_populate(rv, index);
return (&rv->pages[index]);
-
- /*
- * Found a matching reservation.
- */
-found:
- index = VM_RESERV_INDEX(object, pindex);
- m = &rv->pages[index];
- KASSERT(object != kernel_object || vm_phys_domidx(m) == domain,
- ("vm_reserv_alloc_page: Domain mismatch from reservation."));
- /* Handle vm_page_rename(m, new_object, ...). */
- if (popmap_is_set(rv->popmap, index))
- return (NULL);
- vm_reserv_populate(rv, index);
- return (m);
}
/*
@@ -759,14 +945,8 @@
{
int begin_zeroes, hi, i, lo;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
- KASSERT(rv->object != NULL,
- ("vm_reserv_break: reserv %p is free", rv));
- KASSERT(!rv->inpartpopq,
- ("vm_reserv_break: reserv %p's inpartpopq is TRUE", rv));
- LIST_REMOVE(rv, objq);
- rv->object = NULL;
- rv->domain = -1;
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+ vm_reserv_remove(rv);
if (m != NULL) {
/*
* Since the reservation is being broken, there is no harm in
@@ -830,9 +1010,26 @@
vm_reserv_break_all(vm_object_t object)
{
vm_reserv_t rv;
+ struct vm_domain *vmd;
- mtx_lock(&vm_page_queue_free_mtx);
+ /*
+ * This access of object->rvq is unsynchronized so that the
+ * object rvq lock can nest after the domain_free lock. We
+ * must check for races in the results. However, the object
+ * lock prevents new additions, so we are guaranteed that when
+ * it returns NULL the object is properly empty.
+ */
+ vmd = NULL;
while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
+ if (vmd != VM_DOMAIN(rv->domain)) {
+ if (vmd != NULL)
+ vm_domain_free_unlock(vmd);
+ vmd = VM_DOMAIN(rv->domain);
+ vm_domain_free_lock(vmd);
+ }
+ /* Reclaim race. */
+ if (rv->object != object)
+ continue;
KASSERT(rv->object == object,
("vm_reserv_break_all: reserv %p is corrupted", rv));
if (rv->inpartpopq) {
@@ -841,7 +1038,8 @@
}
vm_reserv_break(rv, NULL);
}
- mtx_unlock(&vm_page_queue_free_mtx);
+ if (vmd != NULL)
+ vm_domain_free_unlock(vmd);
}
/*
@@ -855,8 +1053,8 @@
{
vm_reserv_t rv;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
rv = vm_reserv_from_page(m);
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
if (rv->object == NULL)
return (FALSE);
vm_reserv_depopulate(rv, m - rv->pages);
@@ -886,6 +1084,8 @@
while (paddr + VM_LEVEL_0_SIZE <= seg->end) {
vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages =
PHYS_TO_VM_PAGE(paddr);
+ vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].domain =
+ seg->domain;
paddr += VM_LEVEL_0_SIZE;
}
}
@@ -902,8 +1102,8 @@
{
vm_reserv_t rv;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
rv = vm_reserv_from_page(m);
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
if (rv->object == NULL)
return (false);
return (popmap_is_clear(rv->popmap, m - rv->pages));
@@ -945,7 +1145,7 @@
vm_reserv_reclaim(vm_reserv_t rv)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
KASSERT(rv->inpartpopq,
("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
@@ -969,7 +1169,7 @@
{
vm_reserv_t rv;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
vm_reserv_reclaim(rv);
return (TRUE);
@@ -993,7 +1193,7 @@
vm_reserv_t rv;
int hi, i, lo, low_index, next_free;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
if (npages > VM_LEVEL_0_NPAGES - 1)
return (FALSE);
size = npages << PAGE_SHIFT;
@@ -1084,14 +1284,19 @@
VM_OBJECT_ASSERT_WLOCKED(new_object);
rv = vm_reserv_from_page(m);
if (rv->object == old_object) {
- mtx_lock(&vm_page_queue_free_mtx);
+ vm_domain_free_lock(VM_DOMAIN(rv->domain));
if (rv->object == old_object) {
+ vm_reserv_object_lock(old_object);
+ rv->object = NULL;
LIST_REMOVE(rv, objq);
- LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
+ vm_reserv_object_unlock(old_object);
+ vm_reserv_object_lock(new_object);
rv->object = new_object;
rv->pindex -= old_object_offset;
+ LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
+ vm_reserv_object_unlock(new_object);
}
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_unlock(VM_DOMAIN(rv->domain));
}
}
@@ -1121,6 +1326,7 @@
{
vm_paddr_t new_end;
size_t size;
+ int i;
/*
* Calculate the size (in bytes) of the reservation array. Round up
@@ -1139,6 +1345,10 @@
vm_reserv_array = (void *)(uintptr_t)pmap_map(vaddr, new_end, end,
VM_PROT_READ | VM_PROT_WRITE);
bzero(vm_reserv_array, size);
+
+ for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++)
+ mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL,
+ MTX_DEF);
/*
* Return the next available physical address.
Index: sys/vm/vm_swapout.c
===================================================================
--- sys/vm/vm_swapout.c
+++ sys/vm/vm_swapout.c
@@ -650,7 +650,7 @@
loop:
if (vm_page_count_min()) {
- VM_WAIT;
+ vm_wait_min();
goto loop;
}
Index: sys/vm/vnode_pager.c
===================================================================
--- sys/vm/vnode_pager.c
+++ sys/vm/vnode_pager.c
@@ -1167,7 +1167,7 @@
* daemon up. This should be probably be addressed XXX.
*/
- if (vm_cnt.v_free_count < vm_cnt.v_pageout_free_min)
+ if (vm_page_count_min())
flags |= VM_PAGER_PUT_SYNC;
/*
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Apr 13, 10:26 AM (11 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31358974
Default Alt Text
D14000.id.diff (104 KB)
Attached To
Mode
D14000: per-domain page queue free locking
Attached
Detach File
Event Timeline
Log In to Comment