Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -134,6 +134,16 @@ static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, int backward, int forward, bool obj_locked); +static int vm_pfault_oom_attempts = 3; +SYSCTL_INT(_vm, OID_AUTO, pfault_oom_attempts, CTLFLAG_RWTUN, + &vm_pfault_oom_attempts, 0, + ""); + +static int vm_pfault_oom_wait = 10; +SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN, + &vm_pfault_oom_wait, 0, + ""); + static inline void release_page(struct faultstate *fs) { @@ -554,7 +564,7 @@ vm_pindex_t retry_pindex; vm_prot_t prot, retry_prot; int ahead, alloc_req, behind, cluster_offset, error, era, faultcount; - int locked, nera, result, rv; + int locked, nera, oom, result, rv; u_char behavior; boolean_t wired; /* Passed by reference. */ bool dead, hardfault, is_first_object_locked; @@ -565,7 +575,9 @@ nera = -1; hardfault = false; -RetryFault:; +RetryFault: + oom = 0; +RetryFault_oom: /* * Find the backing store object and offset into it to begin the @@ -811,7 +823,18 @@ } if (fs.m == NULL) { unlock_and_deallocate(&fs); - vm_waitpfault(dset); + if (vm_pfault_oom_attempts < 0 || + oom < vm_pfault_oom_attempts) { + oom++; + vm_waitpfault(dset, + vm_pfault_oom_wait * hz); + goto RetryFault_oom; + } + if (bootverbose) + printf( + "proc %d (%s) failed to alloc page on fault, starting OOM\n", + curproc->p_pid, curproc->p_comm); + vm_pageout_oom(VM_OOM_MEM_PF); goto RetryFault; } } Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -3069,7 +3069,7 @@ * this balance without careful testing first. */ void -vm_waitpfault(struct domainset *dset) +vm_waitpfault(struct domainset *dset, int timo) { /* @@ -3081,7 +3081,7 @@ if (vm_page_count_min_set(&dset->ds_mask)) { vm_min_waiters++; msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP, - "pfault", 0); + "pfault", timo); } else mtx_unlock(&vm_domainset_lock); } Index: sys/vm/vm_pageout.h =================================================================== --- sys/vm/vm_pageout.h +++ sys/vm/vm_pageout.h @@ -79,7 +79,8 @@ extern int vm_pageout_page_count; #define VM_OOM_MEM 1 -#define VM_OOM_SWAPZ 2 +#define VM_OOM_MEM_PF 2 +#define VM_OOM_SWAPZ 3 /* * vm_lowmem flags. @@ -96,7 +97,7 @@ */ void vm_wait(vm_object_t obj); -void vm_waitpfault(struct domainset *); +void vm_waitpfault(struct domainset *, int timo); void vm_wait_domain(int domain); void vm_wait_min(void); void vm_wait_severe(void); Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -1751,6 +1751,13 @@ return (res); } +static int vm_oom_ratelim_count; +static int vm_oom_ratelim_last; +static int vm_oom_pf_secs = 10; +SYSCTL_INT(_vm, OID_AUTO, oom_pf_secs, CTLFLAG_RWTUN, &vm_oom_pf_secs, 0, + ""); +static struct mtx vm_oom_ratelim_mtx; + void vm_pageout_oom(int shortage) { @@ -1758,8 +1765,31 @@ vm_offset_t size, bigsize; struct thread *td; struct vmspace *vm; + int now; bool breakout; + /* + * For OOM requests originating from vm_fault(), there is a high + * chance that a single large process faults simultaneously in + * several threads. Also, on an active system running many + * processes of middle-size, like buildworld, all of them + * could fault almost simultaneously as well. + * + * To avoid killing too many processes, rate-limit OOMs + * initiated by vm_fault() time-outs on the waits for free + * pages. + */ + mtx_lock(&vm_oom_ratelim_mtx); + now = ticks; + if ((u_int)(now - vm_oom_ratelim_last) >= hz * vm_oom_pf_secs) { + vm_oom_ratelim_last = now; + vm_oom_ratelim_count = 0; + } else if (vm_oom_ratelim_count++ > 0 && shortage == VM_OOM_MEM_PF) { + mtx_unlock(&vm_oom_ratelim_mtx); + return; + } + mtx_unlock(&vm_oom_ratelim_mtx); + /* * We keep the process bigproc locked once we find it to keep anyone * from messing with it; however, there is a possibility of @@ -1824,7 +1854,7 @@ continue; } size = vmspace_swap_count(vm); - if (shortage == VM_OOM_MEM) + if (shortage == VM_OOM_MEM || shortage == VM_OOM_MEM_PF) size += vm_pageout_oom_pagecount(vm); vm_map_unlock_read(&vm->vm_map); vmspace_free(vm); @@ -2075,6 +2105,7 @@ int error; int i; + mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF); swap_pager_swap_init(); snprintf(curthread->td_name, sizeof(curthread->td_name), "dom0"); error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,