Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -2794,7 +2794,7 @@ RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); PMAP_ASSERT_NOT_IN_DI(); - vm_wait(NULL); + vm_wait(NULL, 0); PMAP_LOCK(pmap); } Index: sys/compat/linuxkpi/common/src/linux_page.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_page.c +++ sys/compat/linuxkpi/common/src/linux_page.c @@ -101,7 +101,7 @@ if (flags & M_WAITOK) { if (!vm_page_reclaim_contig(req, npages, 0, pmax, PAGE_SIZE, 0)) { - vm_wait(NULL); + vm_wait(NULL, 0); } flags &= ~M_WAITOK; goto retry; Index: sys/dev/drm2/i915/i915_gem.c =================================================================== --- sys/dev/drm2/i915/i915_gem.c +++ sys/dev/drm2/i915/i915_gem.c @@ -1561,7 +1561,7 @@ i915_gem_object_unpin(obj); DRM_UNLOCK(dev); VM_OBJECT_WUNLOCK(vm_obj); - vm_wait(vm_obj); + vm_wait(vm_obj, 0); goto retry; } page->valid = VM_PAGE_BITS_ALL; Index: sys/dev/drm2/i915/i915_gem_gtt.c =================================================================== --- sys/dev/drm2/i915/i915_gem_gtt.c +++ sys/dev/drm2/i915/i915_gem_gtt.c @@ -589,7 +589,7 @@ if (tries < 1) { if (!vm_page_reclaim_contig(req, 1, 0, 0xffffffff, PAGE_SIZE, 0)) - vm_wait(NULL); + vm_wait(NULL, 0); tries++; goto retry; } Index: sys/dev/drm2/ttm/ttm_bo_vm.c =================================================================== --- sys/dev/drm2/ttm/ttm_bo_vm.c +++ sys/dev/drm2/ttm/ttm_bo_vm.c @@ -246,7 +246,7 @@ if (m1 == NULL) { if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) { VM_OBJECT_WUNLOCK(vm_obj); - vm_wait(vm_obj); + vm_wait(vm_obj, 0); VM_OBJECT_WLOCK(vm_obj); ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); Index: sys/dev/drm2/ttm/ttm_page_alloc.c =================================================================== --- sys/dev/drm2/ttm/ttm_page_alloc.c +++ sys/dev/drm2/ttm/ttm_page_alloc.c @@ -168,7 +168,7 @@ return (p); if (!vm_page_reclaim_contig(req, 1, 0, 0xffffffff, PAGE_SIZE, 0)) - vm_wait(NULL); + vm_wait(NULL, 0); } } @@ -181,7 +181,7 @@ p = vm_page_alloc(NULL, 0, req); if (p != NULL) break; - vm_wait(NULL); + vm_wait(NULL, 0); } pmap_page_set_memattr(p, memattr); return (p); Index: sys/i386/i386/pmap.c =================================================================== --- sys/i386/i386/pmap.c +++ sys/i386/i386/pmap.c @@ -2044,7 +2044,7 @@ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) { - vm_wait(NULL); + vm_wait(NULL, 0); } else { pmap->pm_ptdpg[i] = m; #if defined(PAE) || defined(PAE_TABLES) @@ -2088,7 +2088,7 @@ if ((flags & PMAP_ENTER_NOSLEEP) == 0) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); - vm_wait(NULL); + vm_wait(NULL, 0); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -134,6 +134,16 @@ static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, int backward, int forward, bool obj_locked); +static int vm_pfault_oom_attempts = 3; +SYSCTL_INT(_vm, OID_AUTO, pfault_oom_attempts, CTLFLAG_RWTUN, + &vm_pfault_oom_attempts, 0, + ""); + +static int vm_pfault_oom_wait = 10; +SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN, + &vm_pfault_oom_wait, 0, + ""); + static inline void release_page(struct faultstate *fs) { @@ -552,7 +562,7 @@ vm_pindex_t retry_pindex; vm_prot_t prot, retry_prot; int ahead, alloc_req, behind, cluster_offset, error, era, faultcount; - int locked, nera, result, rv; + int locked, nera, oom, result, rv; u_char behavior; boolean_t wired; /* Passed by reference. */ bool dead, hardfault, is_first_object_locked; @@ -563,7 +573,9 @@ nera = -1; hardfault = false; -RetryFault:; +RetryFault: + oom = 0; +RetryFault_oom: /* * Find the backing store object and offset into it to begin the @@ -805,7 +817,17 @@ } if (fs.m == NULL) { unlock_and_deallocate(&fs); - vm_waitpfault(); + if (vm_pfault_oom_attempts < 0 || + oom < vm_pfault_oom_attempts) { + oom++; + vm_waitpfault(vm_pfault_oom_wait * hz); + goto RetryFault_oom; + } + if (bootverbose) + printf( + "proc %d (%s) failed to alloc page on fault, starting OOM\n", + curproc->p_pid, curproc->p_comm); + vm_pageout_oom(VM_OOM_MEM_PF); goto RetryFault; } } @@ -1719,7 +1741,7 @@ if (dst_m == NULL) { VM_OBJECT_WUNLOCK(dst_object); VM_OBJECT_RUNLOCK(object); - vm_wait(dst_object); + vm_wait(dst_object, 0); VM_OBJECT_WLOCK(dst_object); goto again; } Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -2899,13 +2899,13 @@ * Wait for free pages to exceed the min threshold globally. */ void -vm_wait_min(void) +vm_wait_min(int timo) { mtx_lock(&vm_domainset_lock); while (vm_page_count_min()) { vm_min_waiters++; - msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0); + msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", timo); } mtx_unlock(&vm_domainset_lock); } @@ -2934,7 +2934,7 @@ } static void -vm_wait_doms(const domainset_t *wdoms) +vm_wait_doms(const domainset_t *wdoms, int timo) { /* @@ -2960,7 +2960,7 @@ if (DOMAINSET_SUBSET(&vm_min_domains, wdoms)) { vm_min_waiters++; msleep(&vm_min_domains, &vm_domainset_lock, PVM, - "vmwait", 0); + "vmwait", timo); } mtx_unlock(&vm_domainset_lock); } @@ -2994,7 +2994,7 @@ panic("vm_wait in early boot"); DOMAINSET_ZERO(&wdom); DOMAINSET_SET(vmd->vmd_domain, &wdom); - vm_wait_doms(&wdom); + vm_wait_doms(&wdom, 0); } } @@ -3007,7 +3007,7 @@ * Called in various places after failed memory allocations. */ void -vm_wait(vm_object_t obj) +vm_wait(vm_object_t obj, int timo) { struct domainset *d; @@ -3022,7 +3022,7 @@ if (d == NULL) d = curthread->td_domain.dr_policy; - vm_wait_doms(&d->ds_mask); + vm_wait_doms(&d->ds_mask, timo); } /* @@ -3067,13 +3067,14 @@ * this balance without careful testing first. */ void -vm_waitpfault(void) +vm_waitpfault(int timo) { mtx_lock(&vm_domainset_lock); if (vm_page_count_min()) { vm_min_waiters++; - msleep(&vm_min_domains, &vm_domainset_lock, PUSER, "pfault", 0); + msleep(&vm_min_domains, &vm_domainset_lock, PUSER, "pfault", + timo); } mtx_unlock(&vm_domainset_lock); } Index: sys/vm/vm_pageout.h =================================================================== --- sys/vm/vm_pageout.h +++ sys/vm/vm_pageout.h @@ -79,7 +79,8 @@ extern int vm_pageout_page_count; #define VM_OOM_MEM 1 -#define VM_OOM_SWAPZ 2 +#define VM_OOM_MEM_PF 2 +#define VM_OOM_SWAPZ 3 /* * vm_lowmem flags. @@ -95,10 +96,10 @@ * Signal pageout-daemon and wait for it. */ -void vm_wait(vm_object_t obj); -void vm_waitpfault(void); +void vm_wait(vm_object_t obj, int timo); +void vm_waitpfault(int timo); void vm_wait_domain(int domain); -void vm_wait_min(void); +void vm_wait_min(int timo); void vm_wait_severe(void); int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *); Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -1752,6 +1752,13 @@ return (res); } +static int vm_oom_ratelim_count; +static int vm_oom_ratelim_last; +static int vm_oom_pf_secs = 10; +SYSCTL_INT(_vm, OID_AUTO, oom_pf_secs, CTLFLAG_RWTUN, &vm_oom_pf_secs, 0, + ""); +static struct mtx vm_oom_ratelim_mtx; + void vm_pageout_oom(int shortage) { @@ -1759,8 +1766,31 @@ vm_offset_t size, bigsize; struct thread *td; struct vmspace *vm; + int now; bool breakout; + /* + * For OOM requests originating from vm_fault(), there is a high + * chance that a single large process faults simultaneously in + * several threads. Also, on an active system running many + * processes of middle-size, like buildworld, all of them + * could fault almost simultaneously as well. + * + * To avoid killing too many processes, rate-limit OOMs + * initiated by vm_fault() time-outs on the waits for free + * pages. + */ + mtx_lock(&vm_oom_ratelim_mtx); + now = ticks; + if ((u_int)(now - vm_oom_ratelim_last) >= hz * vm_oom_pf_secs) { + vm_oom_ratelim_last = now; + vm_oom_ratelim_count = 0; + } else if (vm_oom_ratelim_count++ > 0 && shortage == VM_OOM_MEM_PF) { + mtx_unlock(&vm_oom_ratelim_mtx); + return; + } + mtx_unlock(&vm_oom_ratelim_mtx); + /* * We keep the process bigproc locked once we find it to keep anyone * from messing with it; however, there is a possibility of @@ -1825,7 +1855,7 @@ continue; } size = vmspace_swap_count(vm); - if (shortage == VM_OOM_MEM) + if (shortage == VM_OOM_MEM || shortage == VM_OOM_MEM_PF) size += vm_pageout_oom_pagecount(vm); vm_map_unlock_read(&vm->vm_map); vmspace_free(vm); @@ -2064,6 +2094,7 @@ int error; int i; + mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF); swap_pager_swap_init(); snprintf(curthread->td_name, sizeof(curthread->td_name), "dom0"); error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,