D13671.id37194.diff
No OneTemporary
Actions

Size

9 KB

Referenced Files

None

Subscribers

None

D13671.id37194.diff
View Options

	Index: sys/vm/vm_fault.c
	===================================================================
	--- sys/vm/vm_fault.c
	+++ sys/vm/vm_fault.c
	@@ -134,6 +134,16 @@
	static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra,
	int backward, int forward);

	+static int vm_pfault_oom_attempts = 3;
	+SYSCTL_INT(_vm, OID_AUTO, pfault_oom_attempts, CTLFLAG_RWTUN,
	+ &vm_pfault_oom_attempts, 0,
	+ "");
	+
	+static int vm_pfault_oom_wait = 10;
	+SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN,
	+ &vm_pfault_oom_wait, 0,
	+ "");
	+
	static inline void
	release_page(struct faultstate *fs)
	{
	@@ -531,7 +541,7 @@
	vm_pindex_t retry_pindex;
	vm_prot_t prot, retry_prot;
	int ahead, alloc_req, behind, cluster_offset, error, era, faultcount;
	- int locked, nera, result, rv;
	+ int locked, nera, oom, result, rv;
	u_char behavior;
	boolean_t wired; /* Passed by reference. */
	bool dead, hardfault, is_first_object_locked;
	@@ -542,7 +552,9 @@
	nera = -1;
	hardfault = false;

	-RetryFault:;
	+RetryFault:
	+ oom = 0;
	+RetryFault_oom:

	/*
	* Find the backing store object and offset into it to begin the
	@@ -787,7 +799,17 @@
	}
	if (fs.m == NULL) {
	unlock_and_deallocate(&fs);
	- VM_WAITPFAULT;
	+ if (vm_pfault_oom_attempts < 0 \|\|
	+ oom < vm_pfault_oom_attempts) {
	+ oom++;
	+ vm_waitpfault(vm_pfault_oom_wait);
	+ goto RetryFault_oom;
	+ }
	+ if (bootverbose)
	+ printf(
	+ "proc %d (%s) failed to alloc page on fault, starting OOM\n",
	+ curproc->p_pid, curproc->p_comm);
	+ vm_pageout_oom(VM_OOM_MEM_PF);
	goto RetryFault;
	}
	}
	Index: sys/vm/vm_page.c
	===================================================================
	--- sys/vm/vm_page.c
	+++ sys/vm/vm_page.c
	@@ -2652,7 +2652,7 @@
	* - Called in various places before memory allocations.
	*/
	static void
	-_vm_wait(void)
	+_vm_wait(int timo)
	{

	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
	@@ -2663,16 +2663,16 @@
	} else {
	if (pageproc == NULL)
	panic("vm_wait in early boot");
	- pagedaemon_wait(PVM, "vmwait");
	+ pagedaemon_wait(PVM, "vmwait", timo);
	}
	}

	void
	-vm_wait(void)
	+vm_wait(int timo)
	{

	mtx_lock(&vm_page_queue_free_mtx);
	- _vm_wait();
	+ _vm_wait(timo);
	}

	/*
	@@ -2696,7 +2696,7 @@
	if (req & (VM_ALLOC_WAITOK \| VM_ALLOC_WAITFAIL)) {
	if (object != NULL)
	VM_OBJECT_WUNLOCK(object);
	- _vm_wait();
	+ _vm_wait(0);
	if (object != NULL)
	VM_OBJECT_WLOCK(object);
	if (req & VM_ALLOC_WAITOK)
	@@ -2719,11 +2719,11 @@
	* this balance without careful testing first.
	*/
	void
	-vm_waitpfault(void)
	+vm_waitpfault(int timo)
	{

	mtx_lock(&vm_page_queue_free_mtx);
	- pagedaemon_wait(PUSER, "pfault");
	+ pagedaemon_wait(PUSER, "pfault", timo);
	}

	struct vm_pagequeue *
	Index: sys/vm/vm_pageout.h
	===================================================================
	--- sys/vm/vm_pageout.h
	+++ sys/vm/vm_pageout.h
	@@ -79,7 +79,8 @@
	extern bool vm_pages_needed;

	#define VM_OOM_MEM 1
	-#define VM_OOM_SWAPZ 2
	+#define VM_OOM_MEM_PF 2
	+#define VM_OOM_SWAPZ 3

	/*
	* vm_lowmem flags.
	@@ -95,12 +96,11 @@
	* Signal pageout-daemon and wait for it.
	*/

	-void pagedaemon_wait(int pri, const char *wmesg);
	+void pagedaemon_wait(int pri, const char *wmesg, int timo);
	void pagedaemon_wakeup(void);
	-#define VM_WAIT vm_wait()
	-#define VM_WAITPFAULT vm_waitpfault()
	-void vm_wait(void);
	-void vm_waitpfault(void);
	+#define VM_WAIT vm_wait(0)
	+void vm_wait(int timo);
	+void vm_waitpfault(int timo);

	#ifdef _KERNEL
	int vm_pageout_flush(vm_page_t , int, int, int, int , boolean_t *);
	Index: sys/vm/vm_pageout.c
	===================================================================
	--- sys/vm/vm_pageout.c
	+++ sys/vm/vm_pageout.c
	@@ -1648,6 +1648,13 @@
	return (res);
	}

	+static int vm_oom_ratelim_count;
	+static int vm_oom_ratelim_last;
	+static int vm_oom_pf_secs = 10;
	+SYSCTL_INT(_vm, OID_AUTO, oom_pf_secs, CTLFLAG_RWTUN, &vm_oom_pf_secs, 0,
	+ "");
	+static struct mtx vm_oom_ratelim_mtx;
	+
	void
	vm_pageout_oom(int shortage)
	{
	@@ -1655,9 +1662,32 @@
	vm_offset_t size, bigsize;
	struct thread *td;
	struct vmspace *vm;
	+ int now;
	bool breakout;

	/*
	+ * For OOM requests originating from vm_fault(), there is a high
	+ * chance that a single large process faults simultaneously in
	+ * several threads. Also, on an active system running many
	+ * processes of middle-size, like buildworld, all of them
	+ * could fault almost simultaneously as well.
	+ *
	+ * To avoid killing too many processes, rate-limit OOMs
	+ * initiated by vm_fault() time-outs on the waits for free
	+ * pages.
	+ */
	+ mtx_lock(&vm_oom_ratelim_mtx);
	+ now = ticks;
	+ if ((u_int)(now - vm_oom_ratelim_last) >= hz * vm_oom_pf_secs) {
	+ vm_oom_ratelim_last = now;
	+ vm_oom_ratelim_count = 0;
	+ } else if (vm_oom_ratelim_count++ > 0 && shortage == VM_OOM_MEM_PF) {
	+ mtx_unlock(&vm_oom_ratelim_mtx);
	+ return;
	+ }
	+ mtx_unlock(&vm_oom_ratelim_mtx);
	+
	+ /*
	* We keep the process bigproc locked once we find it to keep anyone
	* from messing with it; however, there is a possibility of
	* deadlock if process B is bigproc and one of its child processes
	@@ -1721,7 +1751,7 @@
	continue;
	}
	size = vmspace_swap_count(vm);
	- if (shortage == VM_OOM_MEM)
	+ if (shortage == VM_OOM_MEM \|\| shortage == VM_OOM_MEM_PF)
	size += vm_pageout_oom_pagecount(vm);
	vm_map_unlock_read(&vm->vm_map);
	vmspace_free(vm);
	@@ -1923,6 +1953,7 @@
	int i;
	#endif

	+ mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF);
	swap_pager_swap_init();
	error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
	0, 0, "laundry: dom0");
	@@ -1966,7 +1997,7 @@
	* This function returns with the free queues mutex unlocked.
	*/
	void
	-pagedaemon_wait(int pri, const char *wmesg)
	+pagedaemon_wait(int pri, const char *wmesg, int timo)
	{

	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
	@@ -1983,5 +2014,5 @@
	}
	vm_pages_needed = true;
	msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP \| pri,
	- wmesg, 0);
	+ wmesg, timo * hz);
	}
	Index: sys/vm/vm_swapout.c
	===================================================================
	--- sys/vm/vm_swapout.c
	+++ sys/vm/vm_swapout.c
	@@ -152,6 +152,11 @@
	&swap_idle_threshold2, 0,
	"Time before a process will be swapped out");

	+static int swapper_swapin_oom_timeout = 1;
	+SYSCTL_INT(_vm, OID_AUTO, swapper_swapin_oom_timeout, CTLFLAG_RW,
	+ &swapper_swapin_oom_timeout, 0,
	+ "Interval for swapper to try faultin killed processes on OOM");
	+
	static int vm_pageout_req_swapout; /* XXX */
	static int vm_daemon_needed;
	static struct mtx vm_daemon_mtx;
	@@ -164,7 +169,7 @@
	static void vm_swapout_object_deactivate_pages(pmap_t, vm_object_t, long);
	static void swapout_procs(int action);
	static void vm_req_vmdaemon(int req);
	-static void vm_thread_swapin(struct thread *td);
	+static void vm_thread_swapin(struct thread *td, bool oom_swapin);
	static void vm_thread_swapout(struct thread *td);

	/*
	@@ -203,6 +208,8 @@
	TAILQ_FOREACH(p, &object->memq, listq) {
	if (pmap_resident_count(pmap) <= desired)
	goto unlock_return;
	+ if (should_yield())
	+ goto unlock_return;
	if (vm_page_busied(p))
	continue;
	VM_CNT_INC(v_pdpages);
	@@ -516,8 +523,10 @@
	PRELE(p);
	}
	sx_sunlock(&allproc_lock);
	- if (tryagain != 0 && attempts <= 10)
	+ if (tryagain != 0 && attempts <= 10) {
	+ maybe_yield();
	goto again;
	+ }
	}
	}

	@@ -552,7 +561,7 @@
	* Bring the kernel stack for a specified thread back in.
	*/
	static void
	-vm_thread_swapin(struct thread *td)
	+vm_thread_swapin(struct thread *td, bool oom_swapin)
	{
	vm_object_t ksobj;
	vm_page_t ma[KSTACK_MAX_PAGES];
	@@ -561,8 +570,8 @@
	pages = td->td_kstack_pages;
	ksobj = td->td_kstack_obj;
	VM_OBJECT_WLOCK(ksobj);
	- (void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL \| VM_ALLOC_WIRED, ma,
	- pages);
	+ (void)vm_page_grab_pages(ksobj, 0, (oom_swapin ? VM_ALLOC_SYSTEM :
	+ VM_ALLOC_NORMAL) \| VM_ALLOC_WIRED, ma, pages);
	for (i = 0; i < pages;) {
	vm_page_assert_xbusied(ma[i]);
	if (ma[i]->valid == VM_PAGE_BITS_ALL) {
	@@ -590,8 +599,8 @@
	cpu_thread_swapin(td);
	}

	-void
	-faultin(struct proc *p)
	+static void
	+faultin1(struct proc *p, bool oom_swapin)
	{
	struct thread *td;

	@@ -620,7 +629,7 @@
	* swapped out.
	*/
	FOREACH_THREAD_IN_PROC(p, td)
	- vm_thread_swapin(td);
	+ vm_thread_swapin(td, oom_swapin);
	PROC_LOCK(p);
	swapclear(p);
	p->p_swtick = ticks;
	@@ -632,6 +641,13 @@
	}
	}

	+void
	+faultin(struct proc *p)
	+{
	+
	+ faultin1(p, false);
	+}
	+
	/*
	* This swapin algorithm attempts to swap-in processes only if there
	* is enough space for them. Of course, if a process waits for a long
	@@ -646,7 +662,34 @@

	loop:
	if (vm_page_count_min()) {
	- VM_WAIT;
	+ /*
	+ * We are low on memory. A swapped-out process might
	+ * have mapped a large portion of the system's pages
	+ * as anonymous memory. There is no other way to
	+ * release the memory other than to kill the process,
	+ * for which we need to swap it in.
	+ */
	+ sx_slock(&allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ PROC_LOCK(p);
	+ /*
	+ * Ensure that the pages for kernel stacks are
	+ * allocated with higher priority.
	+ */
	+ if (p->p_state == PRS_NORMAL && (p->p_flag &
	+ (P_SWAPPINGOUT \| P_SWAPPINGIN \| P_INMEM \|
	+ P_WKILLED)) == P_WKILLED) {
	+ sx_sunlock(&allproc_lock);
	+ faultin1(p, true);
	+ PROC_UNLOCK(p);
	+ goto loop;
	+ }
	+
	+ PROC_UNLOCK(p);
	+ }
	+ sx_sunlock(&allproc_lock);
	+
	+ vm_wait(swapper_swapin_oom_timeout * hz);
	goto loop;
	}

File Metadata

Mime Type: text/plain
Expires: Sat, Dec 27, 12:55 PM (18 h, 13 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 27305406
Default Alt Text: D13671.id37194.diff (9 KB)

D13671.id37194.diffNo OneTemporaryActions

D13671.id37194.diffView Options

File Metadata

Event Timeline

D13671.id37194.diff
No OneTemporary
Actions

D13671.id37194.diff
View Options