Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F153871328
D21629.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
12 KB
Referenced Files
None
Subscribers
None
D21629.diff
View Options
Index: head/sys/vm/vm_meter.c
===================================================================
--- head/sys/vm/vm_meter.c
+++ head/sys/vm/vm_meter.c
@@ -552,6 +552,9 @@
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"free_severe", CTLFLAG_RD, &vmd->vmd_free_severe, 0,
"Severe free pages");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "inactive_pps", CTLFLAG_RD, &vmd->vmd_inactive_pps, 0,
+ "inactive pages freed/second");
}
Index: head/sys/vm/vm_page.h
===================================================================
--- head/sys/vm/vm_page.h
+++ head/sys/vm/vm_page.h
@@ -630,6 +630,7 @@
void vm_page_free_invalid(vm_page_t);
vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
+void vm_page_init_marker(vm_page_t marker, int queue, uint16_t aflags);
int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
void vm_page_invalid(vm_page_t m);
void vm_page_launder(vm_page_t m);
Index: head/sys/vm/vm_page.c
===================================================================
--- head/sys/vm/vm_page.c
+++ head/sys/vm/vm_page.c
@@ -421,7 +421,7 @@
* In principle, this function only needs to set the flag PG_MARKER.
* Nonetheless, it write busies the page as a safety precaution.
*/
-static void
+void
vm_page_init_marker(vm_page_t marker, int queue, uint16_t aflags)
{
@@ -2488,7 +2488,7 @@
* main purpose is to replenish the store of free pages.
*/
if (vmd->vmd_severeset || curproc == pageproc ||
- !_vm_domain_allocate(vmd, VM_ALLOC_NORMAL, cnt))
+ !_vm_domain_allocate(vmd, VM_ALLOC_SYSTEM, cnt))
return (0);
domain = vmd->vmd_domain;
vm_domain_free_lock(vmd);
Index: head/sys/vm/vm_pageout.c
===================================================================
--- head/sys/vm/vm_pageout.c
+++ head/sys/vm/vm_pageout.c
@@ -82,6 +82,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/blockcount.h>
#include <sys/eventhandler.h>
#include <sys/lock.h>
#include <sys/mutex.h>
@@ -163,6 +164,12 @@
SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
CTLFLAG_RWTUN, &vm_pageout_update_period, 0,
"Maximum active LRU update period");
+
+/* Access with get_pageout_threads_per_domain(). */
+static int pageout_threads_per_domain = 1;
+SYSCTL_INT(_vm, OID_AUTO, pageout_threads_per_domain, CTLFLAG_RDTUN,
+ &pageout_threads_per_domain, 0,
+ "Number of worker threads comprising each per-domain pagedaemon");
SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RWTUN, &lowmem_period, 0,
"Low memory callback period");
@@ -1414,23 +1421,23 @@
vm_batchqueue_init(bq);
}
-/*
- * Attempt to reclaim the requested number of pages from the inactive queue.
- * Returns true if the shortage was addressed.
- */
-static int
-vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage,
- int *addl_shortage)
+static void
+vm_pageout_scan_inactive(struct vm_domain *vmd, int page_shortage)
{
+ struct timeval start, end;
struct scan_state ss;
struct vm_batchqueue rq;
+ struct vm_page marker_page;
vm_page_t m, marker;
struct vm_pagequeue *pq;
vm_object_t object;
vm_page_astate_t old, new;
- int act_delta, addl_page_shortage, deficit, page_shortage, refs;
- int starting_page_shortage;
+ int act_delta, addl_page_shortage, starting_page_shortage, refs;
+ object = NULL;
+ vm_batchqueue_init(&rq);
+ getmicrouptime(&start);
+
/*
* The addl_page_shortage is an estimate of the number of temporarily
* stuck pages in the inactive queue. In other words, the
@@ -1440,24 +1447,14 @@
addl_page_shortage = 0;
/*
- * vmd_pageout_deficit counts the number of pages requested in
- * allocations that failed because of a free page shortage. We assume
- * that the allocations will be reattempted and thus include the deficit
- * in our scan target.
- */
- deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
- starting_page_shortage = page_shortage = shortage + deficit;
-
- object = NULL;
- vm_batchqueue_init(&rq);
-
- /*
* Start scanning the inactive queue for pages that we can free. The
* scan will stop when we reach the target or we have scanned the
* entire queue. (Note that m->a.act_count is not used to make
* decisions for the inactive queue, only for the active queue.)
*/
- marker = &vmd->vmd_markers[PQ_INACTIVE];
+ starting_page_shortage = page_shortage;
+ marker = &marker_page;
+ vm_page_init_marker(marker, PQ_INACTIVE, 0);
pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
vm_pagequeue_lock(pq);
vm_pageout_init_scan(&ss, pq, marker, NULL, pq->pq_cnt);
@@ -1637,9 +1634,99 @@
vm_pageout_end_scan(&ss);
vm_pagequeue_unlock(pq);
- VM_CNT_ADD(v_dfree, starting_page_shortage - page_shortage);
+ /*
+ * Record the remaining shortage and the progress and rate it was made.
+ */
+ atomic_add_int(&vmd->vmd_addl_shortage, addl_page_shortage);
+ getmicrouptime(&end);
+ timevalsub(&end, &start);
+ atomic_add_int(&vmd->vmd_inactive_us,
+ end.tv_sec * 1000000 + end.tv_usec);
+ atomic_add_int(&vmd->vmd_inactive_freed,
+ starting_page_shortage - page_shortage);
+}
+/*
+ * Dispatch a number of inactive threads according to load and collect the
+ * results to prevent a coherent (CEM: incoherent?) view of paging activity on
+ * this domain.
+ */
+static int
+vm_pageout_inactive_dispatch(struct vm_domain *vmd, int shortage)
+{
+ u_int freed, pps, threads, us;
+
+ vmd->vmd_inactive_shortage = shortage;
+
/*
+ * If we have more work than we can do in a quarter of our interval, we
+ * fire off multiple threads to process it.
+ */
+ if (vmd->vmd_inactive_threads > 1 && vmd->vmd_inactive_pps != 0 &&
+ shortage > vmd->vmd_inactive_pps / VM_INACT_SCAN_RATE / 4) {
+ threads = vmd->vmd_inactive_threads;
+ vm_domain_pageout_lock(vmd);
+ vmd->vmd_inactive_shortage /= threads;
+ blockcount_acquire(&vmd->vmd_inactive_starting, threads - 1);
+ blockcount_acquire(&vmd->vmd_inactive_running, threads - 1);
+ wakeup(&vmd->vmd_inactive_shortage);
+ vm_domain_pageout_unlock(vmd);
+ }
+
+ /* Run the local thread scan. */
+ vm_pageout_scan_inactive(vmd, vmd->vmd_inactive_shortage);
+
+ /*
+ * Block until helper threads report results and then accumulate
+ * totals.
+ */
+ blockcount_wait(&vmd->vmd_inactive_running, NULL, "vmpoid", PVM);
+ freed = atomic_readandclear_int(&vmd->vmd_inactive_freed);
+ VM_CNT_ADD(v_dfree, freed);
+
+ /*
+ * Calculate the per-thread paging rate with an exponential decay of
+ * prior results. Careful to avoid integer rounding errors with large
+ * us values.
+ */
+ us = max(atomic_readandclear_int(&vmd->vmd_inactive_us), 1);
+ if (us > 1000000)
+ /* Keep rounding to tenths */
+ pps = (freed * 10) / ((us * 10) / 1000000);
+ else
+ pps = (1000000 / us) * freed;
+ vmd->vmd_inactive_pps = (vmd->vmd_inactive_pps / 2) + (pps / 2);
+
+ return (shortage - freed);
+}
+
+/*
+ * Attempt to reclaim the requested number of pages from the inactive queue.
+ * Returns true if the shortage was addressed.
+ */
+static int
+vm_pageout_inactive(struct vm_domain *vmd, int shortage, int *addl_shortage)
+{
+ struct vm_pagequeue *pq;
+ u_int addl_page_shortage, deficit, page_shortage;
+ u_int starting_page_shortage;
+
+ /*
+ * vmd_pageout_deficit counts the number of pages requested in
+ * allocations that failed because of a free page shortage. We assume
+ * that the allocations will be reattempted and thus include the deficit
+ * in our scan target.
+ */
+ deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
+ starting_page_shortage = shortage + deficit;
+
+ /*
+ * Run the inactive scan on as many threads as is necessary.
+ */
+ page_shortage = vm_pageout_inactive_dispatch(vmd, starting_page_shortage);
+ addl_page_shortage = atomic_readandclear_int(&vmd->vmd_addl_shortage);
+
+ /*
* Wake up the laundry thread so that it can perform any needed
* laundering. If we didn't meet our target, we're in shortfall and
* need to launder more aggressively. If PQ_LAUNDRY is empty and no
@@ -2066,7 +2153,7 @@
if (vm_pageout_lowmem() && vmd->vmd_free_count > ofree)
shortage -= min(vmd->vmd_free_count - ofree,
(u_int)shortage);
- target_met = vm_pageout_scan_inactive(vmd, shortage,
+ target_met = vm_pageout_inactive(vmd, shortage,
&addl_shortage);
} else
addl_shortage = 0;
@@ -2082,6 +2169,72 @@
}
/*
+ * vm_pageout_helper runs additional pageout daemons in times of high paging
+ * activity.
+ */
+static void
+vm_pageout_helper(void *arg)
+{
+ struct vm_domain *vmd;
+ int domain;
+
+ domain = (uintptr_t)arg;
+ vmd = VM_DOMAIN(domain);
+
+ vm_domain_pageout_lock(vmd);
+ for (;;) {
+ msleep(&vmd->vmd_inactive_shortage,
+ vm_domain_pageout_lockptr(vmd), PVM, "psleep", 0);
+ blockcount_release(&vmd->vmd_inactive_starting, 1);
+
+ vm_domain_pageout_unlock(vmd);
+ vm_pageout_scan_inactive(vmd, vmd->vmd_inactive_shortage);
+ vm_domain_pageout_lock(vmd);
+
+ /*
+ * Release the running count while the pageout lock is held to
+ * prevent wakeup races.
+ */
+ blockcount_release(&vmd->vmd_inactive_running, 1);
+ }
+}
+
+static int
+get_pageout_threads_per_domain(void)
+{
+ static bool resolved = false;
+ int half_cpus_per_dom;
+
+ /*
+ * This is serialized externally by the sorted autoconfig portion of
+ * boot.
+ */
+ if (__predict_true(resolved))
+ return (pageout_threads_per_domain);
+
+ /*
+ * Semi-arbitrarily constrain pagedaemon threads to less than half the
+ * total number of threads in the system as an insane upper limit.
+ */
+ half_cpus_per_dom = (mp_ncpus / vm_ndomains) / 2;
+
+ if (pageout_threads_per_domain < 1) {
+ printf("Invalid tuneable vm.pageout_threads_per_domain value: "
+ "%d out of valid range: [1-%d]; clamping to 1\n",
+ pageout_threads_per_domain, half_cpus_per_dom);
+ pageout_threads_per_domain = 1;
+ } else if (pageout_threads_per_domain > half_cpus_per_dom) {
+ printf("Invalid tuneable vm.pageout_threads_per_domain value: "
+ "%d out of valid range: [1-%d]; clamping to %d\n",
+ pageout_threads_per_domain, half_cpus_per_dom,
+ half_cpus_per_dom);
+ pageout_threads_per_domain = half_cpus_per_dom;
+ }
+ resolved = true;
+ return (pageout_threads_per_domain);
+}
+
+/*
* Initialize basic pageout daemon settings. See the comment above the
* definition of vm_domain for some explanation of how these thresholds are
* used.
@@ -2134,6 +2287,8 @@
oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO,
"pidctrl", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
pidctrl_init_sysctl(&vmd->vmd_pid, SYSCTL_CHILDREN(oid));
+
+ vmd->vmd_inactive_threads = get_pageout_threads_per_domain();
}
static void
@@ -2184,10 +2339,11 @@
{
struct proc *p;
struct thread *td;
- int error, first, i;
+ int error, first, i, j, pageout_threads;
p = curproc;
td = curthread;
+ pageout_threads = get_pageout_threads_per_domain();
mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF);
swap_pager_swap_init();
@@ -2206,6 +2362,14 @@
if (error != 0)
panic("starting pageout for domain %d: %d\n",
i, error);
+ }
+ for (j = 0; j < pageout_threads - 1; j++) {
+ error = kthread_add(vm_pageout_helper,
+ (void *)(uintptr_t)i, p, NULL, 0, 0,
+ "dom%d helper%d", i, j);
+ if (error != 0)
+ panic("starting pageout helper %d for domain "
+ "%d: %d\n", j, i, error);
}
error = kthread_add(vm_pageout_laundry_worker,
(void *)(uintptr_t)i, p, NULL, 0, 0, "laundry: dom%d", i);
Index: head/sys/vm/vm_pagequeue.h
===================================================================
--- head/sys/vm/vm_pagequeue.h
+++ head/sys/vm/vm_pagequeue.h
@@ -84,6 +84,7 @@
} __aligned(CACHE_LINE_SIZE);
#include <vm/uma.h>
+#include <sys/_blockcount.h>
#include <sys/pidctrl.h>
struct sysctl_oid;
@@ -254,6 +255,14 @@
/* Paging control variables, used within single threaded page daemon. */
struct pidctrl vmd_pid; /* Pageout controller. */
boolean_t vmd_oom;
+ u_int vmd_inactive_threads;
+ u_int vmd_inactive_shortage; /* Per-thread shortage. */
+ blockcount_t vmd_inactive_running; /* Number of inactive threads. */
+ blockcount_t vmd_inactive_starting; /* Number of threads started. */
+ volatile u_int vmd_addl_shortage; /* Shortage accumulator. */
+ volatile u_int vmd_inactive_freed; /* Successful inactive frees. */
+ volatile u_int vmd_inactive_us; /* Microseconds for above. */
+ u_int vmd_inactive_pps; /* Exponential decay frees/second. */
int vmd_oom_seq;
int vmd_last_active_scan;
struct vm_page vmd_markers[PQ_COUNT]; /* (q) markers for queue scans */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Apr 25, 10:25 AM (12 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32089547
Default Alt Text
D21629.diff (12 KB)
Attached To
Mode
D21629: Add support for multithreading the inactive queue pageout within a domain.
Attached
Detach File
Event Timeline
Log In to Comment