D21629.diff
No OneTemporary
Actions

Size

12 KB

Referenced Files

None

Subscribers

None

D21629.diff
View Options

	Index: head/sys/vm/vm_meter.c
	===================================================================
	--- head/sys/vm/vm_meter.c
	+++ head/sys/vm/vm_meter.c
	@@ -552,6 +552,9 @@
	SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
	"free_severe", CTLFLAG_RD, &vmd->vmd_free_severe, 0,
	"Severe free pages");
	+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
	+ "inactive_pps", CTLFLAG_RD, &vmd->vmd_inactive_pps, 0,
	+ "inactive pages freed/second");

	}

	Index: head/sys/vm/vm_page.h
	===================================================================
	--- head/sys/vm/vm_page.h
	+++ head/sys/vm/vm_page.h
	@@ -630,6 +630,7 @@
	void vm_page_free_invalid(vm_page_t);
	vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
	void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
	+void vm_page_init_marker(vm_page_t marker, int queue, uint16_t aflags);
	int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
	void vm_page_invalid(vm_page_t m);
	void vm_page_launder(vm_page_t m);
	Index: head/sys/vm/vm_page.c
	===================================================================
	--- head/sys/vm/vm_page.c
	+++ head/sys/vm/vm_page.c
	@@ -421,7 +421,7 @@
	* In principle, this function only needs to set the flag PG_MARKER.
	* Nonetheless, it write busies the page as a safety precaution.
	*/
	-static void
	+void
	vm_page_init_marker(vm_page_t marker, int queue, uint16_t aflags)
	{

	@@ -2488,7 +2488,7 @@
	* main purpose is to replenish the store of free pages.
	*/
	if (vmd->vmd_severeset \|\| curproc == pageproc \|\|
	- !_vm_domain_allocate(vmd, VM_ALLOC_NORMAL, cnt))
	+ !_vm_domain_allocate(vmd, VM_ALLOC_SYSTEM, cnt))
	return (0);
	domain = vmd->vmd_domain;
	vm_domain_free_lock(vmd);
	Index: head/sys/vm/vm_pageout.c
	===================================================================
	--- head/sys/vm/vm_pageout.c
	+++ head/sys/vm/vm_pageout.c
	@@ -82,6 +82,7 @@
	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/kernel.h>
	+#include <sys/blockcount.h>
	#include <sys/eventhandler.h>
	#include <sys/lock.h>
	#include <sys/mutex.h>
	@@ -163,6 +164,12 @@
	SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
	CTLFLAG_RWTUN, &vm_pageout_update_period, 0,
	"Maximum active LRU update period");
	+
	+/* Access with get_pageout_threads_per_domain(). */
	+static int pageout_threads_per_domain = 1;
	+SYSCTL_INT(_vm, OID_AUTO, pageout_threads_per_domain, CTLFLAG_RDTUN,
	+ &pageout_threads_per_domain, 0,
	+ "Number of worker threads comprising each per-domain pagedaemon");

	SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RWTUN, &lowmem_period, 0,
	"Low memory callback period");
	@@ -1414,23 +1421,23 @@
	vm_batchqueue_init(bq);
	}

	-/*
	- * Attempt to reclaim the requested number of pages from the inactive queue.
	- * Returns true if the shortage was addressed.
	- */
	-static int
	-vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage,
	- int *addl_shortage)
	+static void
	+vm_pageout_scan_inactive(struct vm_domain *vmd, int page_shortage)
	{
	+ struct timeval start, end;
	struct scan_state ss;
	struct vm_batchqueue rq;
	+ struct vm_page marker_page;
	vm_page_t m, marker;
	struct vm_pagequeue *pq;
	vm_object_t object;
	vm_page_astate_t old, new;
	- int act_delta, addl_page_shortage, deficit, page_shortage, refs;
	- int starting_page_shortage;
	+ int act_delta, addl_page_shortage, starting_page_shortage, refs;

	+ object = NULL;
	+ vm_batchqueue_init(&rq);
	+ getmicrouptime(&start);
	+
	/*
	* The addl_page_shortage is an estimate of the number of temporarily
	* stuck pages in the inactive queue. In other words, the
	@@ -1440,24 +1447,14 @@
	addl_page_shortage = 0;

	/*
	- * vmd_pageout_deficit counts the number of pages requested in
	- * allocations that failed because of a free page shortage. We assume
	- * that the allocations will be reattempted and thus include the deficit
	- * in our scan target.
	- */
	- deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
	- starting_page_shortage = page_shortage = shortage + deficit;
	-
	- object = NULL;
	- vm_batchqueue_init(&rq);
	-
	- /*
	* Start scanning the inactive queue for pages that we can free. The
	* scan will stop when we reach the target or we have scanned the
	* entire queue. (Note that m->a.act_count is not used to make
	* decisions for the inactive queue, only for the active queue.)
	*/
	- marker = &vmd->vmd_markers[PQ_INACTIVE];
	+ starting_page_shortage = page_shortage;
	+ marker = &marker_page;
	+ vm_page_init_marker(marker, PQ_INACTIVE, 0);
	pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
	vm_pagequeue_lock(pq);
	vm_pageout_init_scan(&ss, pq, marker, NULL, pq->pq_cnt);
	@@ -1637,9 +1634,99 @@
	vm_pageout_end_scan(&ss);
	vm_pagequeue_unlock(pq);

	- VM_CNT_ADD(v_dfree, starting_page_shortage - page_shortage);
	+ /*
	+ * Record the remaining shortage and the progress and rate it was made.
	+ */
	+ atomic_add_int(&vmd->vmd_addl_shortage, addl_page_shortage);
	+ getmicrouptime(&end);
	+ timevalsub(&end, &start);
	+ atomic_add_int(&vmd->vmd_inactive_us,
	+ end.tv_sec * 1000000 + end.tv_usec);
	+ atomic_add_int(&vmd->vmd_inactive_freed,
	+ starting_page_shortage - page_shortage);
	+}

	+/*
	+ * Dispatch a number of inactive threads according to load and collect the
	+ * results to prevent a coherent (CEM: incoherent?) view of paging activity on
	+ * this domain.
	+ */
	+static int
	+vm_pageout_inactive_dispatch(struct vm_domain *vmd, int shortage)
	+{
	+ u_int freed, pps, threads, us;
	+
	+ vmd->vmd_inactive_shortage = shortage;
	+
	/*
	+ * If we have more work than we can do in a quarter of our interval, we
	+ * fire off multiple threads to process it.
	+ */
	+ if (vmd->vmd_inactive_threads > 1 && vmd->vmd_inactive_pps != 0 &&
	+ shortage > vmd->vmd_inactive_pps / VM_INACT_SCAN_RATE / 4) {
	+ threads = vmd->vmd_inactive_threads;
	+ vm_domain_pageout_lock(vmd);
	+ vmd->vmd_inactive_shortage /= threads;
	+ blockcount_acquire(&vmd->vmd_inactive_starting, threads - 1);
	+ blockcount_acquire(&vmd->vmd_inactive_running, threads - 1);
	+ wakeup(&vmd->vmd_inactive_shortage);
	+ vm_domain_pageout_unlock(vmd);
	+ }
	+
	+ /* Run the local thread scan. */
	+ vm_pageout_scan_inactive(vmd, vmd->vmd_inactive_shortage);
	+
	+ /*
	+ * Block until helper threads report results and then accumulate
	+ * totals.
	+ */
	+ blockcount_wait(&vmd->vmd_inactive_running, NULL, "vmpoid", PVM);
	+ freed = atomic_readandclear_int(&vmd->vmd_inactive_freed);
	+ VM_CNT_ADD(v_dfree, freed);
	+
	+ /*
	+ * Calculate the per-thread paging rate with an exponential decay of
	+ * prior results. Careful to avoid integer rounding errors with large
	+ * us values.
	+ */
	+ us = max(atomic_readandclear_int(&vmd->vmd_inactive_us), 1);
	+ if (us > 1000000)
	+ /* Keep rounding to tenths */
	+ pps = (freed * 10) / ((us * 10) / 1000000);
	+ else
	+ pps = (1000000 / us) * freed;
	+ vmd->vmd_inactive_pps = (vmd->vmd_inactive_pps / 2) + (pps / 2);
	+
	+ return (shortage - freed);
	+}
	+
	+/*
	+ * Attempt to reclaim the requested number of pages from the inactive queue.
	+ * Returns true if the shortage was addressed.
	+ */
	+static int
	+vm_pageout_inactive(struct vm_domain vmd, int shortage, int addl_shortage)
	+{
	+ struct vm_pagequeue *pq;
	+ u_int addl_page_shortage, deficit, page_shortage;
	+ u_int starting_page_shortage;
	+
	+ /*
	+ * vmd_pageout_deficit counts the number of pages requested in
	+ * allocations that failed because of a free page shortage. We assume
	+ * that the allocations will be reattempted and thus include the deficit
	+ * in our scan target.
	+ */
	+ deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
	+ starting_page_shortage = shortage + deficit;
	+
	+ /*
	+ * Run the inactive scan on as many threads as is necessary.
	+ */
	+ page_shortage = vm_pageout_inactive_dispatch(vmd, starting_page_shortage);
	+ addl_page_shortage = atomic_readandclear_int(&vmd->vmd_addl_shortage);
	+
	+ /*
	* Wake up the laundry thread so that it can perform any needed
	* laundering. If we didn't meet our target, we're in shortfall and
	* need to launder more aggressively. If PQ_LAUNDRY is empty and no
	@@ -2066,7 +2153,7 @@
	if (vm_pageout_lowmem() && vmd->vmd_free_count > ofree)
	shortage -= min(vmd->vmd_free_count - ofree,
	(u_int)shortage);
	- target_met = vm_pageout_scan_inactive(vmd, shortage,
	+ target_met = vm_pageout_inactive(vmd, shortage,
	&addl_shortage);
	} else
	addl_shortage = 0;
	@@ -2082,6 +2169,72 @@
	}

	/*
	+ * vm_pageout_helper runs additional pageout daemons in times of high paging
	+ * activity.
	+ */
	+static void
	+vm_pageout_helper(void *arg)
	+{
	+ struct vm_domain *vmd;
	+ int domain;
	+
	+ domain = (uintptr_t)arg;
	+ vmd = VM_DOMAIN(domain);
	+
	+ vm_domain_pageout_lock(vmd);
	+ for (;;) {
	+ msleep(&vmd->vmd_inactive_shortage,
	+ vm_domain_pageout_lockptr(vmd), PVM, "psleep", 0);
	+ blockcount_release(&vmd->vmd_inactive_starting, 1);
	+
	+ vm_domain_pageout_unlock(vmd);
	+ vm_pageout_scan_inactive(vmd, vmd->vmd_inactive_shortage);
	+ vm_domain_pageout_lock(vmd);
	+
	+ /*
	+ * Release the running count while the pageout lock is held to
	+ * prevent wakeup races.
	+ */
	+ blockcount_release(&vmd->vmd_inactive_running, 1);
	+ }
	+}
	+
	+static int
	+get_pageout_threads_per_domain(void)
	+{
	+ static bool resolved = false;
	+ int half_cpus_per_dom;
	+
	+ /*
	+ * This is serialized externally by the sorted autoconfig portion of
	+ * boot.
	+ */
	+ if (__predict_true(resolved))
	+ return (pageout_threads_per_domain);
	+
	+ /*
	+ * Semi-arbitrarily constrain pagedaemon threads to less than half the
	+ * total number of threads in the system as an insane upper limit.
	+ */
	+ half_cpus_per_dom = (mp_ncpus / vm_ndomains) / 2;
	+
	+ if (pageout_threads_per_domain < 1) {
	+ printf("Invalid tuneable vm.pageout_threads_per_domain value: "
	+ "%d out of valid range: [1-%d]; clamping to 1\n",
	+ pageout_threads_per_domain, half_cpus_per_dom);
	+ pageout_threads_per_domain = 1;
	+ } else if (pageout_threads_per_domain > half_cpus_per_dom) {
	+ printf("Invalid tuneable vm.pageout_threads_per_domain value: "
	+ "%d out of valid range: [1-%d]; clamping to %d\n",
	+ pageout_threads_per_domain, half_cpus_per_dom,
	+ half_cpus_per_dom);
	+ pageout_threads_per_domain = half_cpus_per_dom;
	+ }
	+ resolved = true;
	+ return (pageout_threads_per_domain);
	+}
	+
	+/*
	* Initialize basic pageout daemon settings. See the comment above the
	* definition of vm_domain for some explanation of how these thresholds are
	* used.
	@@ -2134,6 +2287,8 @@
	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO,
	"pidctrl", CTLFLAG_RD \| CTLFLAG_MPSAFE, NULL, "");
	pidctrl_init_sysctl(&vmd->vmd_pid, SYSCTL_CHILDREN(oid));
	+
	+ vmd->vmd_inactive_threads = get_pageout_threads_per_domain();
	}

	static void
	@@ -2184,10 +2339,11 @@
	{
	struct proc *p;
	struct thread *td;
	- int error, first, i;
	+ int error, first, i, j, pageout_threads;

	p = curproc;
	td = curthread;
	+ pageout_threads = get_pageout_threads_per_domain();

	mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF);
	swap_pager_swap_init();
	@@ -2206,6 +2362,14 @@
	if (error != 0)
	panic("starting pageout for domain %d: %d\n",
	i, error);
	+ }
	+ for (j = 0; j < pageout_threads - 1; j++) {
	+ error = kthread_add(vm_pageout_helper,
	+ (void *)(uintptr_t)i, p, NULL, 0, 0,
	+ "dom%d helper%d", i, j);
	+ if (error != 0)
	+ panic("starting pageout helper %d for domain "
	+ "%d: %d\n", j, i, error);
	}
	error = kthread_add(vm_pageout_laundry_worker,
	(void *)(uintptr_t)i, p, NULL, 0, 0, "laundry: dom%d", i);
	Index: head/sys/vm/vm_pagequeue.h
	===================================================================
	--- head/sys/vm/vm_pagequeue.h
	+++ head/sys/vm/vm_pagequeue.h
	@@ -84,6 +84,7 @@
	} __aligned(CACHE_LINE_SIZE);

	#include <vm/uma.h>
	+#include <sys/_blockcount.h>
	#include <sys/pidctrl.h>
	struct sysctl_oid;

	@@ -254,6 +255,14 @@
	/* Paging control variables, used within single threaded page daemon. */
	struct pidctrl vmd_pid; /* Pageout controller. */
	boolean_t vmd_oom;
	+ u_int vmd_inactive_threads;
	+ u_int vmd_inactive_shortage; /* Per-thread shortage. */
	+ blockcount_t vmd_inactive_running; /* Number of inactive threads. */
	+ blockcount_t vmd_inactive_starting; /* Number of threads started. */
	+ volatile u_int vmd_addl_shortage; /* Shortage accumulator. */
	+ volatile u_int vmd_inactive_freed; /* Successful inactive frees. */
	+ volatile u_int vmd_inactive_us; /* Microseconds for above. */
	+ u_int vmd_inactive_pps; /* Exponential decay frees/second. */
	int vmd_oom_seq;
	int vmd_last_active_scan;
	struct vm_page vmd_markers[PQ_COUNT]; /* (q) markers for queue scans */

File Metadata

Mime Type: text/plain
Expires: Sat, Apr 25, 10:25 AM (12 h, 11 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 32089547
Default Alt Text: D21629.diff (12 KB)

D21629.diffNo OneTemporaryActions

D21629.diffView Options

File Metadata

Event Timeline

D21629.diff
No OneTemporary
Actions

D21629.diff
View Options