Page MenuHomeFreeBSD

D13671.id37194.diff
No OneTemporary

D13671.id37194.diff

Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c
+++ sys/vm/vm_fault.c
@@ -134,6 +134,16 @@
static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra,
int backward, int forward);
+static int vm_pfault_oom_attempts = 3;
+SYSCTL_INT(_vm, OID_AUTO, pfault_oom_attempts, CTLFLAG_RWTUN,
+ &vm_pfault_oom_attempts, 0,
+ "");
+
+static int vm_pfault_oom_wait = 10;
+SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN,
+ &vm_pfault_oom_wait, 0,
+ "");
+
static inline void
release_page(struct faultstate *fs)
{
@@ -531,7 +541,7 @@
vm_pindex_t retry_pindex;
vm_prot_t prot, retry_prot;
int ahead, alloc_req, behind, cluster_offset, error, era, faultcount;
- int locked, nera, result, rv;
+ int locked, nera, oom, result, rv;
u_char behavior;
boolean_t wired; /* Passed by reference. */
bool dead, hardfault, is_first_object_locked;
@@ -542,7 +552,9 @@
nera = -1;
hardfault = false;
-RetryFault:;
+RetryFault:
+ oom = 0;
+RetryFault_oom:
/*
* Find the backing store object and offset into it to begin the
@@ -787,7 +799,17 @@
}
if (fs.m == NULL) {
unlock_and_deallocate(&fs);
- VM_WAITPFAULT;
+ if (vm_pfault_oom_attempts < 0 ||
+ oom < vm_pfault_oom_attempts) {
+ oom++;
+ vm_waitpfault(vm_pfault_oom_wait);
+ goto RetryFault_oom;
+ }
+ if (bootverbose)
+ printf(
+ "proc %d (%s) failed to alloc page on fault, starting OOM\n",
+ curproc->p_pid, curproc->p_comm);
+ vm_pageout_oom(VM_OOM_MEM_PF);
goto RetryFault;
}
}
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -2652,7 +2652,7 @@
* - Called in various places before memory allocations.
*/
static void
-_vm_wait(void)
+_vm_wait(int timo)
{
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
@@ -2663,16 +2663,16 @@
} else {
if (pageproc == NULL)
panic("vm_wait in early boot");
- pagedaemon_wait(PVM, "vmwait");
+ pagedaemon_wait(PVM, "vmwait", timo);
}
}
void
-vm_wait(void)
+vm_wait(int timo)
{
mtx_lock(&vm_page_queue_free_mtx);
- _vm_wait();
+ _vm_wait(timo);
}
/*
@@ -2696,7 +2696,7 @@
if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) {
if (object != NULL)
VM_OBJECT_WUNLOCK(object);
- _vm_wait();
+ _vm_wait(0);
if (object != NULL)
VM_OBJECT_WLOCK(object);
if (req & VM_ALLOC_WAITOK)
@@ -2719,11 +2719,11 @@
* this balance without careful testing first.
*/
void
-vm_waitpfault(void)
+vm_waitpfault(int timo)
{
mtx_lock(&vm_page_queue_free_mtx);
- pagedaemon_wait(PUSER, "pfault");
+ pagedaemon_wait(PUSER, "pfault", timo);
}
struct vm_pagequeue *
Index: sys/vm/vm_pageout.h
===================================================================
--- sys/vm/vm_pageout.h
+++ sys/vm/vm_pageout.h
@@ -79,7 +79,8 @@
extern bool vm_pages_needed;
#define VM_OOM_MEM 1
-#define VM_OOM_SWAPZ 2
+#define VM_OOM_MEM_PF 2
+#define VM_OOM_SWAPZ 3
/*
* vm_lowmem flags.
@@ -95,12 +96,11 @@
* Signal pageout-daemon and wait for it.
*/
-void pagedaemon_wait(int pri, const char *wmesg);
+void pagedaemon_wait(int pri, const char *wmesg, int timo);
void pagedaemon_wakeup(void);
-#define VM_WAIT vm_wait()
-#define VM_WAITPFAULT vm_waitpfault()
-void vm_wait(void);
-void vm_waitpfault(void);
+#define VM_WAIT vm_wait(0)
+void vm_wait(int timo);
+void vm_waitpfault(int timo);
#ifdef _KERNEL
int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *);
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -1648,6 +1648,13 @@
return (res);
}
+static int vm_oom_ratelim_count;
+static int vm_oom_ratelim_last;
+static int vm_oom_pf_secs = 10;
+SYSCTL_INT(_vm, OID_AUTO, oom_pf_secs, CTLFLAG_RWTUN, &vm_oom_pf_secs, 0,
+ "");
+static struct mtx vm_oom_ratelim_mtx;
+
void
vm_pageout_oom(int shortage)
{
@@ -1655,9 +1662,32 @@
vm_offset_t size, bigsize;
struct thread *td;
struct vmspace *vm;
+ int now;
bool breakout;
/*
+ * For OOM requests originating from vm_fault(), there is a high
+ * chance that a single large process faults simultaneously in
+ * several threads. Also, on an active system running many
+ * processes of middle-size, like buildworld, all of them
+ * could fault almost simultaneously as well.
+ *
+ * To avoid killing too many processes, rate-limit OOMs
+ * initiated by vm_fault() time-outs on the waits for free
+ * pages.
+ */
+ mtx_lock(&vm_oom_ratelim_mtx);
+ now = ticks;
+ if ((u_int)(now - vm_oom_ratelim_last) >= hz * vm_oom_pf_secs) {
+ vm_oom_ratelim_last = now;
+ vm_oom_ratelim_count = 0;
+ } else if (vm_oom_ratelim_count++ > 0 && shortage == VM_OOM_MEM_PF) {
+ mtx_unlock(&vm_oom_ratelim_mtx);
+ return;
+ }
+ mtx_unlock(&vm_oom_ratelim_mtx);
+
+ /*
* We keep the process bigproc locked once we find it to keep anyone
* from messing with it; however, there is a possibility of
* deadlock if process B is bigproc and one of its child processes
@@ -1721,7 +1751,7 @@
continue;
}
size = vmspace_swap_count(vm);
- if (shortage == VM_OOM_MEM)
+ if (shortage == VM_OOM_MEM || shortage == VM_OOM_MEM_PF)
size += vm_pageout_oom_pagecount(vm);
vm_map_unlock_read(&vm->vm_map);
vmspace_free(vm);
@@ -1923,6 +1953,7 @@
int i;
#endif
+ mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF);
swap_pager_swap_init();
error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
0, 0, "laundry: dom0");
@@ -1966,7 +1997,7 @@
* This function returns with the free queues mutex unlocked.
*/
void
-pagedaemon_wait(int pri, const char *wmesg)
+pagedaemon_wait(int pri, const char *wmesg, int timo)
{
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
@@ -1983,5 +2014,5 @@
}
vm_pages_needed = true;
msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | pri,
- wmesg, 0);
+ wmesg, timo * hz);
}
Index: sys/vm/vm_swapout.c
===================================================================
--- sys/vm/vm_swapout.c
+++ sys/vm/vm_swapout.c
@@ -152,6 +152,11 @@
&swap_idle_threshold2, 0,
"Time before a process will be swapped out");
+static int swapper_swapin_oom_timeout = 1;
+SYSCTL_INT(_vm, OID_AUTO, swapper_swapin_oom_timeout, CTLFLAG_RW,
+ &swapper_swapin_oom_timeout, 0,
+ "Interval for swapper to try faultin killed processes on OOM");
+
static int vm_pageout_req_swapout; /* XXX */
static int vm_daemon_needed;
static struct mtx vm_daemon_mtx;
@@ -164,7 +169,7 @@
static void vm_swapout_object_deactivate_pages(pmap_t, vm_object_t, long);
static void swapout_procs(int action);
static void vm_req_vmdaemon(int req);
-static void vm_thread_swapin(struct thread *td);
+static void vm_thread_swapin(struct thread *td, bool oom_swapin);
static void vm_thread_swapout(struct thread *td);
/*
@@ -203,6 +208,8 @@
TAILQ_FOREACH(p, &object->memq, listq) {
if (pmap_resident_count(pmap) <= desired)
goto unlock_return;
+ if (should_yield())
+ goto unlock_return;
if (vm_page_busied(p))
continue;
VM_CNT_INC(v_pdpages);
@@ -516,8 +523,10 @@
PRELE(p);
}
sx_sunlock(&allproc_lock);
- if (tryagain != 0 && attempts <= 10)
+ if (tryagain != 0 && attempts <= 10) {
+ maybe_yield();
goto again;
+ }
}
}
@@ -552,7 +561,7 @@
* Bring the kernel stack for a specified thread back in.
*/
static void
-vm_thread_swapin(struct thread *td)
+vm_thread_swapin(struct thread *td, bool oom_swapin)
{
vm_object_t ksobj;
vm_page_t ma[KSTACK_MAX_PAGES];
@@ -561,8 +570,8 @@
pages = td->td_kstack_pages;
ksobj = td->td_kstack_obj;
VM_OBJECT_WLOCK(ksobj);
- (void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_WIRED, ma,
- pages);
+ (void)vm_page_grab_pages(ksobj, 0, (oom_swapin ? VM_ALLOC_SYSTEM :
+ VM_ALLOC_NORMAL) | VM_ALLOC_WIRED, ma, pages);
for (i = 0; i < pages;) {
vm_page_assert_xbusied(ma[i]);
if (ma[i]->valid == VM_PAGE_BITS_ALL) {
@@ -590,8 +599,8 @@
cpu_thread_swapin(td);
}
-void
-faultin(struct proc *p)
+static void
+faultin1(struct proc *p, bool oom_swapin)
{
struct thread *td;
@@ -620,7 +629,7 @@
* swapped out.
*/
FOREACH_THREAD_IN_PROC(p, td)
- vm_thread_swapin(td);
+ vm_thread_swapin(td, oom_swapin);
PROC_LOCK(p);
swapclear(p);
p->p_swtick = ticks;
@@ -632,6 +641,13 @@
}
}
+void
+faultin(struct proc *p)
+{
+
+ faultin1(p, false);
+}
+
/*
* This swapin algorithm attempts to swap-in processes only if there
* is enough space for them. Of course, if a process waits for a long
@@ -646,7 +662,34 @@
loop:
if (vm_page_count_min()) {
- VM_WAIT;
+ /*
+ * We are low on memory. A swapped-out process might
+ * have mapped a large portion of the system's pages
+ * as anonymous memory. There is no other way to
+ * release the memory other than to kill the process,
+ * for which we need to swap it in.
+ */
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ /*
+ * Ensure that the pages for kernel stacks are
+ * allocated with higher priority.
+ */
+ if (p->p_state == PRS_NORMAL && (p->p_flag &
+ (P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM |
+ P_WKILLED)) == P_WKILLED) {
+ sx_sunlock(&allproc_lock);
+ faultin1(p, true);
+ PROC_UNLOCK(p);
+ goto loop;
+ }
+
+ PROC_UNLOCK(p);
+ }
+ sx_sunlock(&allproc_lock);
+
+ vm_wait(swapper_swapin_oom_timeout * hz);
goto loop;
}

File Metadata

Mime Type
text/plain
Expires
Sat, Dec 27, 12:55 PM (18 h, 13 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27305406
Default Alt Text
D13671.id37194.diff (9 KB)

Event Timeline