Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -2675,7 +2675,7 @@ RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); PMAP_ASSERT_NOT_IN_DI(); - VM_WAIT; + vm_wait(NULL); PMAP_LOCK(pmap); } Index: sys/arm/arm/pmap-v4.c =================================================================== --- sys/arm/arm/pmap-v4.c +++ sys/arm/arm/pmap-v4.c @@ -3248,7 +3248,7 @@ if ((flags & PMAP_ENTER_NOSLEEP) == 0) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); - VM_WAIT; + vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); goto do_l2b_alloc; Index: sys/arm/arm/pmap-v6.c =================================================================== --- sys/arm/arm/pmap-v6.c +++ sys/arm/arm/pmap-v6.c @@ -2478,7 +2478,7 @@ if ((flags & PMAP_ENTER_NOSLEEP) == 0) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); - VM_WAIT; + vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } Index: sys/arm/nvidia/drm2/tegra_bo.c =================================================================== --- sys/arm/nvidia/drm2/tegra_bo.c +++ sys/arm/nvidia/drm2/tegra_bo.c @@ -114,7 +114,7 @@ if (tries < 3) { if (!vm_page_reclaim_contig(pflags, npages, low, high, alignment, boundary)) - VM_WAIT; + vm_wait(NULL); tries++; goto retry; } Index: sys/arm64/arm64/pmap.c =================================================================== --- sys/arm64/arm64/pmap.c +++ sys/arm64/arm64/pmap.c @@ -1409,7 +1409,7 @@ */ while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) - VM_WAIT; + vm_wait(NULL); l0phys = VM_PAGE_TO_PHYS(l0pt); pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); @@ -1449,7 +1449,7 @@ if (lockp != NULL) { RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); - VM_WAIT; + vm_wait(NULL); PMAP_LOCK(pmap); } Index: sys/compat/linuxkpi/common/src/linux_page.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_page.c +++ sys/compat/linuxkpi/common/src/linux_page.c @@ -101,7 +101,7 @@ if (flags & M_WAITOK) { if (!vm_page_reclaim_contig(req, npages, 0, pmax, PAGE_SIZE, 0)) { - VM_WAIT; + vm_wait(NULL); } flags &= ~M_WAITOK; goto retry; Index: sys/dev/drm2/i915/i915_gem.c =================================================================== --- sys/dev/drm2/i915/i915_gem.c +++ sys/dev/drm2/i915/i915_gem.c @@ -1561,7 +1561,7 @@ i915_gem_object_unpin(obj); DRM_UNLOCK(dev); VM_OBJECT_WUNLOCK(vm_obj); - VM_WAIT; + vm_wait(vm_obj); goto retry; } page->valid = VM_PAGE_BITS_ALL; Index: sys/dev/drm2/i915/i915_gem_gtt.c =================================================================== --- sys/dev/drm2/i915/i915_gem_gtt.c +++ sys/dev/drm2/i915/i915_gem_gtt.c @@ -589,7 +589,7 @@ if (tries < 1) { if (!vm_page_reclaim_contig(req, 1, 0, 0xffffffff, PAGE_SIZE, 0)) - VM_WAIT; + vm_wait(NULL); tries++; goto retry; } Index: sys/dev/drm2/ttm/ttm_bo_vm.c =================================================================== --- sys/dev/drm2/ttm/ttm_bo_vm.c +++ sys/dev/drm2/ttm/ttm_bo_vm.c @@ -246,7 +246,7 @@ if (m1 == NULL) { if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) { VM_OBJECT_WUNLOCK(vm_obj); - VM_WAIT; + vm_wait(vm_obj); VM_OBJECT_WLOCK(vm_obj); ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); Index: sys/dev/drm2/ttm/ttm_page_alloc.c =================================================================== --- sys/dev/drm2/ttm/ttm_page_alloc.c +++ sys/dev/drm2/ttm/ttm_page_alloc.c @@ -168,7 +168,7 @@ return (p); if (!vm_page_reclaim_contig(req, 1, 0, 0xffffffff, PAGE_SIZE, 0)) - VM_WAIT; + vm_wait(NULL); } } @@ -181,7 +181,7 @@ p = vm_page_alloc(NULL, 0, req); if (p != NULL) break; - VM_WAIT; + vm_wait(NULL); } pmap_page_set_memattr(p, memattr); return (p); Index: sys/i386/i386/pmap.c =================================================================== --- sys/i386/i386/pmap.c +++ sys/i386/i386/pmap.c @@ -1893,10 +1893,9 @@ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) - VM_WAIT; - else { + vm_wait(NULL); + else ptdpg[i++] = m; - } } pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); @@ -1945,7 +1944,7 @@ if ((flags & PMAP_ENTER_NOSLEEP) == 0) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); - VM_WAIT; + vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } Index: sys/mips/mips/pmap.c =================================================================== --- sys/mips/mips/pmap.c +++ sys/mips/mips/pmap.c @@ -1050,11 +1050,11 @@ { #ifdef __mips_n64 - VM_WAIT; + vm_wait(NULL); #else if (!vm_page_reclaim_contig(req, 1, 0, MIPS_KSEG0_LARGEST_PHYS, PAGE_SIZE, 0)) - VM_WAIT; + vm_wait(NULL); #endif } Index: sys/mips/mips/uma_machdep.c =================================================================== --- sys/mips/mips/uma_machdep.c +++ sys/mips/mips/uma_machdep.c @@ -67,13 +67,11 @@ 0, MIPS_KSEG0_LARGEST_PHYS, PAGE_SIZE, 0)) continue; #endif - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - else - VM_WAIT; - } else + if (m != NULL) break; + if ((wait & M_NOWAIT) != 0) + return (NULL); + vm_wait(NULL); } pa = VM_PAGE_TO_PHYS(m); Index: sys/powerpc/aim/mmu_oea.c =================================================================== --- sys/powerpc/aim/mmu_oea.c +++ sys/powerpc/aim/mmu_oea.c @@ -1124,7 +1124,7 @@ if ((flags & PMAP_ENTER_NOSLEEP) != 0) return (KERN_RESOURCE_SHORTAGE); VM_OBJECT_ASSERT_UNLOCKED(m->object); - VM_WAIT; + vm_wait(NULL); } } Index: sys/powerpc/aim/mmu_oea64.c =================================================================== --- sys/powerpc/aim/mmu_oea64.c +++ sys/powerpc/aim/mmu_oea64.c @@ -1383,7 +1383,7 @@ if ((flags & PMAP_ENTER_NOSLEEP) != 0) return (KERN_RESOURCE_SHORTAGE); VM_OBJECT_ASSERT_UNLOCKED(m->object); - VM_WAIT; + vm_wait(NULL); } /* Index: sys/powerpc/booke/pmap.c =================================================================== --- sys/powerpc/booke/pmap.c +++ sys/powerpc/booke/pmap.c @@ -789,7 +789,7 @@ vm_wire_sub(i); return (NULL); } - VM_WAIT; + vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } @@ -1033,7 +1033,7 @@ vm_wire_sub(i); return (NULL); } - VM_WAIT; + vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } @@ -1346,7 +1346,7 @@ req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; while ((m = vm_page_alloc(NULL, pidx, req)) == NULL) { PMAP_UNLOCK(pmap); - VM_WAIT; + vm_wait(NULL); PMAP_LOCK(pmap); } mtbl[i] = m; Index: sys/riscv/riscv/pmap.c =================================================================== --- sys/riscv/riscv/pmap.c +++ sys/riscv/riscv/pmap.c @@ -1203,7 +1203,7 @@ */ while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) - VM_WAIT; + vm_wait(NULL); l1phys = VM_PAGE_TO_PHYS(l1pt); pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); @@ -1252,7 +1252,7 @@ RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); rw_runlock(&pvh_global_lock); - VM_WAIT; + vm_wait(NULL); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); } Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -787,7 +787,7 @@ } if (fs.m == NULL) { unlock_and_deallocate(&fs); - VM_WAITPFAULT; + vm_waitpfault(); goto RetryFault; } } @@ -1685,7 +1685,7 @@ if (dst_m == NULL) { VM_OBJECT_WUNLOCK(dst_object); VM_OBJECT_RUNLOCK(object); - VM_WAIT; + vm_wait(dst_object); VM_OBJECT_WLOCK(dst_object); goto again; } Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -2567,7 +2567,7 @@ * Returns true if reclamation is successful and false otherwise. Since * relocation requires the allocation of physical pages, reclamation may * fail due to a shortage of free pages. When reclamation fails, callers - * are expected to perform VM_WAIT before retrying a failed allocation + * are expected to perform vm_wait() before retrying a failed allocation * operation, e.g., vm_page_alloc_contig(). * * The caller must always specify an allocation class through "req". @@ -2767,15 +2767,42 @@ u_int vm_wait_count(void) { - u_int cnt; - int i; - cnt = 0; - for (i = 0; i < vm_ndomains; i++) - cnt += VM_DOMAIN(i)->vmd_waiters; - cnt += vm_severe_waiters + vm_min_waiters; + return (vm_severe_waiters + vm_min_waiters); +} + +static void +vm_wait_doms(const domainset_t *wdoms) +{ - return (cnt); + /* + * We use racey wakeup synchronization to avoid expensive global + * locking for the pageproc when sleeping with a non-specific vm_wait. + * To handle this, we only sleep for one tick in this instance. It + * is expected that most allocations for the pageproc will come from + * kmem or vm_page_grab* which will use the more specific and + * race-free vm_wait_domain(). + */ + if (curproc == pageproc) { + mtx_lock(&vm_domainset_lock); + vm_pageproc_waiters++; + msleep(&vm_pageproc_waiters, &vm_domainset_lock, PVM, + "pageprocwait", 1); + mtx_unlock(&vm_domainset_lock); + } else { + /* + * XXX Ideally we would wait only until the allocation could + * be satisfied. This condition can cause new allocators to + * consume all freed pages while old allocators wait. + */ + mtx_lock(&vm_domainset_lock); + if (DOMAINSET_SUBSET(&vm_min_domains, wdoms)) { + vm_min_waiters++; + msleep(&vm_min_domains, &vm_domainset_lock, PVM, + "vmwait", 0); + } + mtx_unlock(&vm_domainset_lock); + } } /* @@ -2788,6 +2815,7 @@ vm_wait_domain(int domain) { struct vm_domain *vmd; + domainset_t wdom; vmd = VM_DOMAIN(domain); vm_domain_free_assert_locked(vmd); @@ -2799,48 +2827,37 @@ } else { if (pageproc == NULL) panic("vm_wait in early boot"); - pagedaemon_wait(domain, PVM, "vmwait"); + DOMAINSET_ZERO(&wdom); + DOMAINSET_SET(vmd->vmd_domain, &wdom); + vm_wait_doms(&wdom); } } /* - * vm_wait: (also see VM_WAIT macro) + * vm_wait: * - * Sleep until free pages are available for allocation. - * - Called in various places after failed memory allocations. + * Sleep until free pages are available for allocation in the + * affinity domains of the obj. If obj is NULL, the domain set + * for the calling thread is used. + * Called in various places after failed memory allocations. */ void -vm_wait(void) +vm_wait(vm_object_t obj) { + struct domainset *d; + + d = NULL; /* - * We use racey wakeup synchronization to avoid expensive global - * locking for the pageproc when sleeping with a non-specific vm_wait. - * To handle this, we only sleep for one tick in this instance. It - * is expected that most allocations for the pageproc will come from - * kmem or vm_page_grab* which will use the more specific and - * race-free vm_wait_domain(). + * Carefully fetch pointers only once: the struct domainset + * itself is ummutable but the pointer might change. */ - if (curproc == pageproc) { - mtx_lock(&vm_domainset_lock); - vm_pageproc_waiters++; - msleep(&vm_pageproc_waiters, &vm_domainset_lock, PVM, - "pageprocwait", 1); - mtx_unlock(&vm_domainset_lock); - } else { - /* - * XXX Ideally we would wait only until the allocation could - * be satisfied. This condition can cause new allocators to - * consume all freed pages while old allocators wait. - */ - mtx_lock(&vm_domainset_lock); - if (vm_page_count_min()) { - vm_min_waiters++; - msleep(&vm_min_domains, &vm_domainset_lock, PVM, - "vmwait", 0); - } - mtx_unlock(&vm_domainset_lock); - } + if (obj != NULL) + d = obj->domain.dr_policy; + if (d == NULL) + d = curthread->td_domain.dr_policy; + + vm_wait_doms(&d->ds_mask); } /* @@ -2877,7 +2894,7 @@ } /* - * vm_waitpfault: (also see VM_WAITPFAULT macro) + * vm_waitpfault: * * Sleep until free pages are available for allocation. * - Called only in vm_fault so that processes page faulting @@ -3071,10 +3088,6 @@ * high water mark. And wakeup scheduler process if we have * lots of memory. this process will swapin processes. */ - if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) { - vmd->vmd_pages_needed = false; - wakeup(&vmd->vmd_free_count); - } if ((vmd->vmd_minset && !vm_paging_min(vmd)) || (vmd->vmd_severeset && !vm_paging_severe(vmd))) vm_domain_clear(vmd); Index: sys/vm/vm_pageout.h =================================================================== --- sys/vm/vm_pageout.h +++ sys/vm/vm_pageout.h @@ -93,11 +93,8 @@ * Signal pageout-daemon and wait for it. */ -void pagedaemon_wait(int domain, int pri, const char *wmesg); void pagedaemon_wakeup(int domain); -#define VM_WAIT vm_wait() -#define VM_WAITPFAULT vm_waitpfault() -void vm_wait(void); +void vm_wait(vm_object_t obj); void vm_waitpfault(void); void vm_wait_domain(int domain); void vm_wait_min(void); Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -1750,8 +1750,6 @@ } sx_sunlock(&allproc_lock); if (bigproc != NULL) { - int i; - if (vm_panic_on_oom != 0) panic("out of swap space"); PROC_LOCK(bigproc); @@ -1759,8 +1757,6 @@ sched_nice(bigproc, PRIO_MIN); _PRELE(bigproc); PROC_UNLOCK(bigproc); - for (i = 0; i < vm_ndomains; i++) - wakeup(&VM_DOMAIN(i)->vmd_free_count); } } @@ -1795,23 +1791,6 @@ while (TRUE) { vm_domain_free_lock(vmd); - /* - * Generally, after a level >= 1 scan, if there are enough - * free pages to wakeup the waiters, then they are already - * awake. A call to vm_page_free() during the scan awakened - * them. However, in the following case, this wakeup serves - * to bound the amount of time that a thread might wait. - * Suppose a thread's call to vm_page_alloc() fails, but - * before that thread calls VM_WAIT, enough pages are freed by - * other threads to alleviate the free page shortage. The - * thread will, nonetheless, wait until another page is freed - * or this wakeup is performed. - */ - if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) { - vmd->vmd_pages_needed = false; - wakeup(&vmd->vmd_free_count); - } - /* * Do not clear vmd_pageout_wanted until we reach our free page * target. Otherwise, we may be awakened over and over again, @@ -1840,16 +1819,12 @@ pass++; } else { /* - * Yes. If threads are still sleeping in VM_WAIT + * Yes. If threads are still sleeping in vm_wait() * then we immediately start a new scan. Otherwise, * sleep until the next wakeup or until pages need to * have their reference stats updated. */ - if (vmd->vmd_pages_needed) { - vm_domain_free_unlock(vmd); - if (pass == 0) - pass++; - } else if (mtx_sleep(&vmd->vmd_pageout_wanted, + if (mtx_sleep(&vmd->vmd_pageout_wanted, vm_domain_free_lockptr(vmd), PDROP | PVM, "psleep", hz) == 0) { VM_CNT_INC(v_pdwakeups); @@ -2000,33 +1975,3 @@ wakeup(&vmd->vmd_pageout_wanted); } } - -/* - * Wake up the page daemon and wait for it to reclaim free pages. - * - * This function returns with the free queues mutex unlocked. - */ -void -pagedaemon_wait(int domain, int pri, const char *wmesg) -{ - struct vm_domain *vmd; - - vmd = VM_DOMAIN(domain); - vm_domain_free_assert_locked(vmd); - - /* - * vmd_pageout_wanted may have been set by an advisory wakeup, but if - * the page daemon is running on a CPU, the wakeup will have been lost. - * Thus, deliver a potentially spurious wakeup to ensure that the page - * daemon has been notified of the shortage. - */ - if (!vmd->vmd_pageout_wanted || !vmd->vmd_pages_needed) { - vmd->vmd_pageout_wanted = true; - wakeup(&vmd->vmd_pageout_wanted); - } - vmd->vmd_pages_needed = true; - vmd->vmd_waiters++; - msleep(&vmd->vmd_free_count, vm_domain_free_lockptr(vmd), PDROP | pri, - wmesg, 0); - vmd->vmd_waiters--; -} Index: sys/vm/vm_pagequeue.h =================================================================== --- sys/vm/vm_pagequeue.h +++ sys/vm/vm_pagequeue.h @@ -93,8 +93,6 @@ int vmd_pageout_pages_needed; /* page daemon waiting for pages? */ int vmd_pageout_deficit; /* Estimated number of pages deficit */ - int vmd_waiters; /* Pageout waiters. */ - bool vmd_pages_needed; /* Are threads waiting for free pages? */ bool vmd_pageout_wanted; /* pageout daemon wait channel */ bool vmd_minset; /* Are we in vm_min_domains? */ bool vmd_severeset; /* Are we in vm_severe_domains? */