Page MenuHomeFreeBSD

D13671.id45920.diff
No OneTemporary

D13671.id45920.diff

Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -2765,7 +2765,7 @@
RELEASE_PV_LIST_LOCK(lockp);
PMAP_UNLOCK(pmap);
PMAP_ASSERT_NOT_IN_DI();
- vm_wait(NULL);
+ vm_wait(NULL, 0);
PMAP_LOCK(pmap);
}
Index: sys/compat/linuxkpi/common/src/linux_page.c
===================================================================
--- sys/compat/linuxkpi/common/src/linux_page.c
+++ sys/compat/linuxkpi/common/src/linux_page.c
@@ -101,7 +101,7 @@
if (flags & M_WAITOK) {
if (!vm_page_reclaim_contig(req,
npages, 0, pmax, PAGE_SIZE, 0)) {
- vm_wait(NULL);
+ vm_wait(NULL, 0);
}
flags &= ~M_WAITOK;
goto retry;
Index: sys/dev/drm2/i915/i915_gem.c
===================================================================
--- sys/dev/drm2/i915/i915_gem.c
+++ sys/dev/drm2/i915/i915_gem.c
@@ -1561,7 +1561,7 @@
i915_gem_object_unpin(obj);
DRM_UNLOCK(dev);
VM_OBJECT_WUNLOCK(vm_obj);
- vm_wait(vm_obj);
+ vm_wait(vm_obj, 0);
goto retry;
}
page->valid = VM_PAGE_BITS_ALL;
Index: sys/dev/drm2/i915/i915_gem_gtt.c
===================================================================
--- sys/dev/drm2/i915/i915_gem_gtt.c
+++ sys/dev/drm2/i915/i915_gem_gtt.c
@@ -589,7 +589,7 @@
if (tries < 1) {
if (!vm_page_reclaim_contig(req, 1, 0, 0xffffffff,
PAGE_SIZE, 0))
- vm_wait(NULL);
+ vm_wait(NULL, 0);
tries++;
goto retry;
}
Index: sys/dev/drm2/ttm/ttm_bo_vm.c
===================================================================
--- sys/dev/drm2/ttm/ttm_bo_vm.c
+++ sys/dev/drm2/ttm/ttm_bo_vm.c
@@ -246,7 +246,7 @@
if (m1 == NULL) {
if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) {
VM_OBJECT_WUNLOCK(vm_obj);
- vm_wait(vm_obj);
+ vm_wait(vm_obj, 0);
VM_OBJECT_WLOCK(vm_obj);
ttm_mem_io_unlock(man);
ttm_bo_unreserve(bo);
Index: sys/dev/drm2/ttm/ttm_page_alloc.c
===================================================================
--- sys/dev/drm2/ttm/ttm_page_alloc.c
+++ sys/dev/drm2/ttm/ttm_page_alloc.c
@@ -168,7 +168,7 @@
return (p);
if (!vm_page_reclaim_contig(req, 1, 0, 0xffffffff,
PAGE_SIZE, 0))
- vm_wait(NULL);
+ vm_wait(NULL, 0);
}
}
@@ -181,7 +181,7 @@
p = vm_page_alloc(NULL, 0, req);
if (p != NULL)
break;
- vm_wait(NULL);
+ vm_wait(NULL, 0);
}
pmap_page_set_memattr(p, memattr);
return (p);
Index: sys/i386/i386/pmap.c
===================================================================
--- sys/i386/i386/pmap.c
+++ sys/i386/i386/pmap.c
@@ -2026,7 +2026,7 @@
m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED | VM_ALLOC_ZERO);
if (m == NULL) {
- vm_wait(NULL);
+ vm_wait(NULL, 0);
} else {
pmap->pm_ptdpg[i] = m;
#if defined(PAE) || defined(PAE_TABLES)
@@ -2070,7 +2070,7 @@
if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
PMAP_UNLOCK(pmap);
rw_wunlock(&pvh_global_lock);
- vm_wait(NULL);
+ vm_wait(NULL, 0);
rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
}
Index: sys/kern/kern_sig.c
===================================================================
--- sys/kern/kern_sig.c
+++ sys/kern/kern_sig.c
@@ -3066,6 +3066,17 @@
return (1);
}
+void
+proc_wkilled(struct proc *p)
+{
+
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ if ((p->p_flag & P_WKILLED) == 0) {
+ p->p_flag |= P_WKILLED;
+ atomic_store_rel_int(&wkilled, 1);
+ }
+}
+
/*
* Kill the current process for stated reason.
*/
@@ -3078,7 +3089,7 @@
p->p_comm);
log(LOG_ERR, "pid %d (%s), uid %d, was killed: %s\n", p->p_pid,
p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, why);
- p->p_flag |= P_WKILLED;
+ proc_wkilled(p);
kern_psignal(p, SIGKILL);
}
Index: sys/kern/sys_process.c
===================================================================
--- sys/kern/sys_process.c
+++ sys/kern/sys_process.c
@@ -1170,7 +1170,7 @@
* queue cannot accommodate any new signals.
*/
if (data == SIGKILL)
- p->p_flag |= P_WKILLED;
+ proc_wkilled(p);
/*
* Unsuspend all threads. To leave a thread
Index: sys/sys/proc.h
===================================================================
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -964,6 +964,7 @@
extern int nprocs, maxproc; /* Current and max number of procs. */
extern int maxprocperuid; /* Max procs per uid. */
extern u_long ps_arg_cache_limit;
+extern int wkilled;
LIST_HEAD(proclist, proc);
TAILQ_HEAD(procqueue, proc);
@@ -1050,6 +1051,7 @@
void proc_reap(struct thread *td, struct proc *p, int *status, int options);
void proc_reparent(struct proc *child, struct proc *newparent);
void proc_set_traced(struct proc *p, bool stop);
+void proc_wkilled(struct proc *p);
struct pstats *pstats_alloc(void);
void pstats_fork(struct pstats *src, struct pstats *dst);
void pstats_free(struct pstats *ps);
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c
+++ sys/vm/vm_fault.c
@@ -134,6 +134,16 @@
static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra,
int backward, int forward, bool obj_locked);
+static int vm_pfault_oom_attempts = 3;
+SYSCTL_INT(_vm, OID_AUTO, pfault_oom_attempts, CTLFLAG_RWTUN,
+ &vm_pfault_oom_attempts, 0,
+ "");
+
+static int vm_pfault_oom_wait = 10;
+SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN,
+ &vm_pfault_oom_wait, 0,
+ "");
+
static inline void
release_page(struct faultstate *fs)
{
@@ -552,7 +562,7 @@
vm_pindex_t retry_pindex;
vm_prot_t prot, retry_prot;
int ahead, alloc_req, behind, cluster_offset, error, era, faultcount;
- int locked, nera, result, rv;
+ int locked, nera, oom, result, rv;
u_char behavior;
boolean_t wired; /* Passed by reference. */
bool dead, hardfault, is_first_object_locked;
@@ -563,7 +573,9 @@
nera = -1;
hardfault = false;
-RetryFault:;
+RetryFault:
+ oom = 0;
+RetryFault_oom:
/*
* Find the backing store object and offset into it to begin the
@@ -805,7 +817,17 @@
}
if (fs.m == NULL) {
unlock_and_deallocate(&fs);
- vm_waitpfault();
+ if (vm_pfault_oom_attempts < 0 ||
+ oom < vm_pfault_oom_attempts) {
+ oom++;
+ vm_waitpfault(vm_pfault_oom_wait * hz);
+ goto RetryFault_oom;
+ }
+ if (bootverbose)
+ printf(
+ "proc %d (%s) failed to alloc page on fault, starting OOM\n",
+ curproc->p_pid, curproc->p_comm);
+ vm_pageout_oom(VM_OOM_MEM_PF);
goto RetryFault;
}
}
@@ -1719,7 +1741,7 @@
if (dst_m == NULL) {
VM_OBJECT_WUNLOCK(dst_object);
VM_OBJECT_RUNLOCK(object);
- vm_wait(dst_object);
+ vm_wait(dst_object, 0);
VM_OBJECT_WLOCK(dst_object);
goto again;
}
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -2899,13 +2899,13 @@
* Wait for free pages to exceed the min threshold globally.
*/
void
-vm_wait_min(void)
+vm_wait_min(int timo)
{
mtx_lock(&vm_domainset_lock);
while (vm_page_count_min()) {
vm_min_waiters++;
- msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0);
+ msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", timo);
}
mtx_unlock(&vm_domainset_lock);
}
@@ -2934,7 +2934,7 @@
}
static void
-vm_wait_doms(const domainset_t *wdoms)
+vm_wait_doms(const domainset_t *wdoms, int timo)
{
/*
@@ -2960,7 +2960,7 @@
if (DOMAINSET_SUBSET(&vm_min_domains, wdoms)) {
vm_min_waiters++;
msleep(&vm_min_domains, &vm_domainset_lock, PVM,
- "vmwait", 0);
+ "vmwait", timo);
}
mtx_unlock(&vm_domainset_lock);
}
@@ -2994,7 +2994,7 @@
panic("vm_wait in early boot");
DOMAINSET_ZERO(&wdom);
DOMAINSET_SET(vmd->vmd_domain, &wdom);
- vm_wait_doms(&wdom);
+ vm_wait_doms(&wdom, 0);
}
}
@@ -3007,7 +3007,7 @@
* Called in various places after failed memory allocations.
*/
void
-vm_wait(vm_object_t obj)
+vm_wait(vm_object_t obj, int timo)
{
struct domainset *d;
@@ -3022,7 +3022,7 @@
if (d == NULL)
d = curthread->td_domain.dr_policy;
- vm_wait_doms(&d->ds_mask);
+ vm_wait_doms(&d->ds_mask, timo);
}
/*
@@ -3067,13 +3067,14 @@
* this balance without careful testing first.
*/
void
-vm_waitpfault(void)
+vm_waitpfault(int timo)
{
mtx_lock(&vm_domainset_lock);
if (vm_page_count_min()) {
vm_min_waiters++;
- msleep(&vm_min_domains, &vm_domainset_lock, PUSER, "pfault", 0);
+ msleep(&vm_min_domains, &vm_domainset_lock, PUSER, "pfault",
+ timo);
}
mtx_unlock(&vm_domainset_lock);
}
Index: sys/vm/vm_pageout.h
===================================================================
--- sys/vm/vm_pageout.h
+++ sys/vm/vm_pageout.h
@@ -79,7 +79,8 @@
extern int vm_pageout_page_count;
#define VM_OOM_MEM 1
-#define VM_OOM_SWAPZ 2
+#define VM_OOM_MEM_PF 2
+#define VM_OOM_SWAPZ 3
/*
* vm_lowmem flags.
@@ -95,10 +96,10 @@
* Signal pageout-daemon and wait for it.
*/
-void vm_wait(vm_object_t obj);
-void vm_waitpfault(void);
+void vm_wait(vm_object_t obj, int timo);
+void vm_waitpfault(int timo);
void vm_wait_domain(int domain);
-void vm_wait_min(void);
+void vm_wait_min(int timo);
void vm_wait_severe(void);
int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *);
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -1752,6 +1752,13 @@
return (res);
}
+static int vm_oom_ratelim_count;
+static int vm_oom_ratelim_last;
+static int vm_oom_pf_secs = 10;
+SYSCTL_INT(_vm, OID_AUTO, oom_pf_secs, CTLFLAG_RWTUN, &vm_oom_pf_secs, 0,
+ "");
+static struct mtx vm_oom_ratelim_mtx;
+
void
vm_pageout_oom(int shortage)
{
@@ -1759,8 +1766,31 @@
vm_offset_t size, bigsize;
struct thread *td;
struct vmspace *vm;
+ int now;
bool breakout;
+ /*
+ * For OOM requests originating from vm_fault(), there is a high
+ * chance that a single large process faults simultaneously in
+ * several threads. Also, on an active system running many
+ * processes of middle-size, like buildworld, all of them
+ * could fault almost simultaneously as well.
+ *
+ * To avoid killing too many processes, rate-limit OOMs
+ * initiated by vm_fault() time-outs on the waits for free
+ * pages.
+ */
+ mtx_lock(&vm_oom_ratelim_mtx);
+ now = ticks;
+ if ((u_int)(now - vm_oom_ratelim_last) >= hz * vm_oom_pf_secs) {
+ vm_oom_ratelim_last = now;
+ vm_oom_ratelim_count = 0;
+ } else if (vm_oom_ratelim_count++ > 0 && shortage == VM_OOM_MEM_PF) {
+ mtx_unlock(&vm_oom_ratelim_mtx);
+ return;
+ }
+ mtx_unlock(&vm_oom_ratelim_mtx);
+
/*
* We keep the process bigproc locked once we find it to keep anyone
* from messing with it; however, there is a possibility of
@@ -1825,7 +1855,7 @@
continue;
}
size = vmspace_swap_count(vm);
- if (shortage == VM_OOM_MEM)
+ if (shortage == VM_OOM_MEM || shortage == VM_OOM_MEM_PF)
size += vm_pageout_oom_pagecount(vm);
vm_map_unlock_read(&vm->vm_map);
vmspace_free(vm);
@@ -2064,6 +2094,7 @@
int error;
int i;
+ mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF);
swap_pager_swap_init();
snprintf(curthread->td_name, sizeof(curthread->td_name), "dom0");
error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
Index: sys/vm/vm_swapout.c
===================================================================
--- sys/vm/vm_swapout.c
+++ sys/vm/vm_swapout.c
@@ -152,6 +152,11 @@
&swap_idle_threshold2, 0,
"Time before a process will be swapped out");
+static int swapper_swapin_oom_timeout = 1;
+SYSCTL_INT(_vm, OID_AUTO, swapper_swapin_oom_timeout, CTLFLAG_RW,
+ &swapper_swapin_oom_timeout, 0,
+ "Interval for swapper to try faultin killed processes on OOM");
+
static int vm_pageout_req_swapout; /* XXX */
static int vm_daemon_needed;
static struct mtx vm_daemon_mtx;
@@ -164,7 +169,7 @@
static void vm_swapout_object_deactivate_pages(pmap_t, vm_object_t, long);
static void swapout_procs(int action);
static void vm_req_vmdaemon(int req);
-static void vm_thread_swapin(struct thread *td);
+static void vm_thread_swapin(struct thread *td, bool oom_swapin);
static void vm_thread_swapout(struct thread *td);
/*
@@ -563,7 +568,7 @@
* Bring the kernel stack for a specified thread back in.
*/
static void
-vm_thread_swapin(struct thread *td)
+vm_thread_swapin(struct thread *td, bool oom_swapin)
{
vm_object_t ksobj;
vm_page_t ma[KSTACK_MAX_PAGES];
@@ -572,8 +577,8 @@
pages = td->td_kstack_pages;
ksobj = td->td_kstack_obj;
VM_OBJECT_WLOCK(ksobj);
- (void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_WIRED, ma,
- pages);
+ (void)vm_page_grab_pages(ksobj, 0, (oom_swapin ? VM_ALLOC_SYSTEM :
+ VM_ALLOC_NORMAL) | VM_ALLOC_WIRED, ma, pages);
for (i = 0; i < pages;) {
vm_page_assert_xbusied(ma[i]);
if (ma[i]->valid == VM_PAGE_BITS_ALL) {
@@ -601,8 +606,8 @@
cpu_thread_swapin(td);
}
-void
-faultin(struct proc *p)
+static void
+faultin1(struct proc *p, bool oom_swapin)
{
struct thread *td;
@@ -631,7 +636,7 @@
* swapped out.
*/
FOREACH_THREAD_IN_PROC(p, td)
- vm_thread_swapin(td);
+ vm_thread_swapin(td, oom_swapin);
PROC_LOCK(p);
swapclear(p);
p->p_swtick = ticks;
@@ -643,6 +648,15 @@
}
}
+void
+faultin(struct proc *p)
+{
+
+ faultin1(p, false);
+}
+
+int wkilled;
+
/*
* This swapin algorithm attempts to swap-in processes only if there
* is enough space for them. Of course, if a process waits for a long
@@ -656,8 +670,36 @@
int ppri, pri, slptime, swtime;
loop:
- if (vm_page_count_min()) {
- vm_wait_min();
+ if (atomic_load_int(&wkilled) != 0) {
+ atomic_thread_fence_acq();
+ atomic_store_int(&wkilled, 0);
+
+ /*
+ * A swapped-out process might have mapped a large
+ * portion of the system's pages as anonymous memory.
+ * There is no other way to release the memory other
+ * than to kill the process, for which we need to swap
+ * it in.
+ */
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ /*
+ * Ensure that the pages for kernel stacks are
+ * allocated with higher priority.
+ */
+ if (p->p_state == PRS_NORMAL && (p->p_flag &
+ (P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM |
+ P_WKILLED)) == P_WKILLED) {
+ sx_sunlock(&allproc_lock);
+ faultin1(p, true);
+ PROC_UNLOCK(p);
+ goto loop;
+ }
+
+ PROC_UNLOCK(p);
+ }
+ sx_sunlock(&allproc_lock);
goto loop;
}
Index: sys/vm/vm_swapout_dummy.c
===================================================================
--- sys/vm/vm_swapout_dummy.c
+++ sys/vm/vm_swapout_dummy.c
@@ -115,10 +115,14 @@
panic("faultin: proc %p swapped out with NO_SWAPPING", p);
}
+int wkilled;
+
void
swapper(void)
{
- for (;;)
+ for (;;) {
+ wkilled = 0;
tsleep(&proc0, PVM, "swapin", MAXSLP * hz);
+ }
}

File Metadata

Mime Type
text/plain
Expires
Sat, Nov 15, 6:37 PM (11 h, 7 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25335865
Default Alt Text
D13671.id45920.diff (14 KB)

Event Timeline