Page MenuHomeFreeBSD

D21629.id62014.diff
No OneTemporary

D21629.id62014.diff

Index: sys/conf/options
===================================================================
--- sys/conf/options
+++ sys/conf/options
@@ -614,6 +614,7 @@
MALLOC_MAKE_FAILURES opt_vm.h
MALLOC_PROFILE opt_vm.h
MALLOC_DEBUG_MAXZONES opt_vm.h
+PAGEOUT_THREADS opt_vm.h
UMA_XDOMAIN opt_vm.h
UMA_FIRSTTOUCH opt_vm.h
Index: sys/sys/refcount.h
===================================================================
--- sys/sys/refcount.h
+++ sys/sys/refcount.h
@@ -180,16 +180,19 @@
{
u_int old;
- KASSERT(n > 0,
- ("refcount_release_if_gt: Use refcount_release for final ref"));
+ if (n == 0)
+ atomic_thread_fence_rel();
old = *count;
for (;;) {
if (REFCOUNT_COUNT(old) <= n)
return (false);
if (__predict_false(REFCOUNT_SATURATED(old)))
return (true);
- if (atomic_fcmpset_int(count, &old, old - 1))
+ if (atomic_fcmpset_int(count, &old, old - 1)) {
+ if (__predict_false(REFCOUNT_COUNT(old) == 1))
+ return (refcount_release_last(count, 1, old));
return (true);
+ }
}
}
Index: sys/vm/vm_meter.c
===================================================================
--- sys/vm/vm_meter.c
+++ sys/vm/vm_meter.c
@@ -550,6 +550,9 @@
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"free_severe", CTLFLAG_RD, &vmd->vmd_free_severe, 0,
"Severe free pages");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "inactive_pps", CTLFLAG_RD, &vmd->vmd_inactive_pps, 0,
+ "inactive pages freed/second");
}
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -595,6 +595,7 @@
bool vm_page_free_prep(vm_page_t m);
vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
+void vm_page_init_marker(vm_page_t m, int queue, uint8_t aflags);
int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
void vm_page_invalid(vm_page_t m);
void vm_page_launder(vm_page_t m);
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -437,7 +437,7 @@
* In principle, this function only needs to set the flag PG_MARKER.
* Nonetheless, it write busies the page as a safety precaution.
*/
-static void
+void
vm_page_init_marker(vm_page_t marker, int queue, uint8_t aflags)
{
@@ -2318,7 +2318,7 @@
pgcache = arg;
vmd = VM_DOMAIN(pgcache->domain);
/* Only import if we can bring in a full bucket. */
- if (cnt == 1 || !vm_domain_allocate(vmd, VM_ALLOC_NORMAL, cnt))
+ if (cnt == 1 || !vm_domain_allocate(vmd, VM_ALLOC_SYSTEM, cnt))
return (0);
domain = vmd->vmd_domain;
vm_domain_free_lock(vmd);
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -90,6 +90,7 @@
#include <sys/ktr.h>
#include <sys/mount.h>
#include <sys/racct.h>
+#include <sys/refcount.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/sdt.h>
@@ -1408,22 +1409,23 @@
vm_batchqueue_init(bq);
}
-/*
- * Attempt to reclaim the requested number of pages from the inactive queue.
- * Returns true if the shortage was addressed.
- */
-static int
-vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage,
- int *addl_shortage)
+static void
+vm_pageout_scan_inactive(struct vm_domain *vmd, int page_shortage)
{
+ struct timeval start, end;
struct scan_state ss;
struct vm_batchqueue rq;
+ struct vm_page marker_page;
struct mtx *mtx;
vm_page_t m, marker;
struct vm_pagequeue *pq;
vm_object_t object;
- int act_delta, addl_page_shortage, deficit, page_shortage;
- int starting_page_shortage;
+ int act_delta, addl_page_shortage, starting_page_shortage;
+
+ mtx = NULL;
+ object = NULL;
+ vm_batchqueue_init(&rq);
+ getmicrouptime(&start);
/*
* The addl_page_shortage is an estimate of the number of temporarily
@@ -1433,18 +1435,6 @@
*/
addl_page_shortage = 0;
- /*
- * vmd_pageout_deficit counts the number of pages requested in
- * allocations that failed because of a free page shortage. We assume
- * that the allocations will be reattempted and thus include the deficit
- * in our scan target.
- */
- deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
- starting_page_shortage = page_shortage = shortage + deficit;
-
- mtx = NULL;
- object = NULL;
- vm_batchqueue_init(&rq);
/*
* Start scanning the inactive queue for pages that we can free. The
@@ -1452,7 +1442,9 @@
* entire queue. (Note that m->act_count is not used to make
* decisions for the inactive queue, only for the active queue.)
*/
- marker = &vmd->vmd_markers[PQ_INACTIVE];
+ starting_page_shortage = page_shortage;
+ marker = &marker_page;
+ vm_page_init_marker(marker, PQ_INACTIVE, 0);
pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
vm_pagequeue_lock(pq);
vm_pageout_init_scan(&ss, pq, marker, NULL, pq->pq_cnt);
@@ -1651,7 +1643,99 @@
vm_pageout_end_scan(&ss);
vm_pagequeue_unlock(pq);
- VM_CNT_ADD(v_dfree, starting_page_shortage - page_shortage);
+ /*
+ * Record the remaining shortage and the progress and rate it was
+ * made.
+ */
+ atomic_add_int(&vmd->vmd_addl_shortage, addl_page_shortage);
+ getmicrouptime(&end);
+ timevalsub(&end, &start);
+ atomic_add_int(&vmd->vmd_inactive_us,
+ end.tv_sec * 1000000 + end.tv_usec);
+ atomic_add_int(&vmd->vmd_inactive_freed,
+ starting_page_shortage - page_shortage);
+}
+
+/*
+ * Dispatch a number of inactive threads according to load and collect the
+ * results to prevent a coherent view of paging activity on this domain.
+ */
+static int
+vm_pageout_inactive_dispatch(struct vm_domain *vmd, int shortage)
+{
+ u_int freed, pps, threads, us;
+
+ vmd->vmd_inactive_shortage = shortage;
+
+ /*
+ * If we have more work than we can do in a quarter of our interval
+ * we fire off multiple threads to process it.
+ */
+ if (vmd->vmd_inactive_threads > 1 && vmd->vmd_inactive_pps != 0 &&
+ shortage > vmd->vmd_inactive_pps / VM_INACT_SCAN_RATE / 4) {
+ threads = vmd->vmd_inactive_threads;
+ vm_domain_pageout_lock(vmd);
+ vmd->vmd_inactive_shortage /= threads;
+ refcount_acquiren(&vmd->vmd_inactive_starting, threads - 1);
+ refcount_acquiren(&vmd->vmd_inactive_running, threads - 1);
+ wakeup(&vmd->vmd_inactive_shortage);
+ vm_domain_pageout_unlock(vmd);
+ }
+
+ /* Run the local scan. */
+ vm_pageout_scan_inactive(vmd, vmd->vmd_inactive_shortage);
+
+ /*
+ * Block until helper threads report results and then accumulate
+ * totals.
+ */
+ refcount_wait(&vmd->vmd_inactive_running, "vmpoid", PVM);
+ freed = atomic_readandclear_int(&vmd->vmd_inactive_freed);
+ VM_CNT_ADD(v_dfree, freed);
+
+ /*
+ * Calculate the per-thread paging rate with an exponential decay
+ * of prior results. Careful to avoid integer rounding errors with
+ * large us values.
+ */
+ us = max(atomic_readandclear_int(&vmd->vmd_inactive_us), 1);
+ if (us > 1000000)
+ /* Keep rounding to tenths */
+ pps = (freed * 10) / ((us * 10) / 1000000);
+ else
+ pps = (1000000 / us) * freed;
+ vmd->vmd_inactive_pps = (vmd->vmd_inactive_pps / 2) + (pps / 2);
+
+ return (shortage - freed);
+}
+
+/*
+ * Attempt to reclaim the requested number of pages from the inactive queue.
+ * Returns true if the shortage was addressed.
+ */
+static int
+vm_pageout_inactive(struct vm_domain *vmd, int shortage,
+ int *addl_shortage)
+{
+ struct vm_pagequeue *pq;
+ u_int addl_page_shortage, deficit, page_shortage;
+ u_int starting_page_shortage;
+
+ /*
+ * vmd_pageout_deficit counts the number of pages requested in
+ * allocations that failed because of a free page shortage. We assume
+ * that the allocations will be reattempted and thus include the deficit
+ * in our scan target.
+ */
+ deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
+ starting_page_shortage = shortage + deficit;
+
+ /*
+ * Run the inactive scan on as many threads as is necessary.
+ */
+ page_shortage = vm_pageout_inactive_dispatch(vmd,
+ starting_page_shortage);
+ addl_page_shortage = atomic_readandclear_int(&vmd->vmd_addl_shortage);
/*
* Wake up the laundry thread so that it can perform any needed
@@ -2060,7 +2144,7 @@
if (vm_pageout_lowmem() && vmd->vmd_free_count > ofree)
shortage -= min(vmd->vmd_free_count - ofree,
(u_int)shortage);
- target_met = vm_pageout_scan_inactive(vmd, shortage,
+ target_met = vm_pageout_inactive(vmd, shortage,
&addl_shortage);
} else
addl_shortage = 0;
@@ -2075,6 +2159,41 @@
}
}
+/*
+ * vm_pageout_helper runs additional pageout daemons in times of high
+ * paging activity.
+ */
+static void
+vm_pageout_helper(void *arg)
+{
+ struct vm_domain *vmd;
+ int domain;
+
+ domain = (uintptr_t)arg;
+ vmd = VM_DOMAIN(domain);
+
+ vm_domain_pageout_lock(vmd);
+ while (TRUE) {
+ while (TRUE) {
+ msleep(&vmd->vmd_inactive_shortage,
+ vm_domain_pageout_lockptr(vmd), PVM, "psleep", 0);
+ /* Check for spurious wakeups. */
+ if (refcount_release_if_gt(
+ &vmd->vmd_inactive_starting, 0))
+ break;
+ }
+ vm_domain_pageout_unlock(vmd);
+ vm_pageout_scan_inactive(vmd, vmd->vmd_inactive_shortage);
+ vm_domain_pageout_lock(vmd);
+
+ /*
+ * Release the running count while the pageout lock is
+ * held to prevent wakeup races.
+ */
+ refcount_release(&vmd->vmd_inactive_running);
+ }
+}
+
/*
* vm_pageout_init initialises basic pageout daemon settings.
*/
@@ -2129,6 +2248,8 @@
oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO,
"pidctrl", CTLFLAG_RD, NULL, "");
pidctrl_init_sysctl(&vmd->vmd_pid, SYSCTL_CHILDREN(oid));
+
+ vmd->vmd_inactive_threads = PAGEOUT_THREADS;
}
static void
@@ -2179,7 +2300,7 @@
{
struct proc *p;
struct thread *td;
- int error, first, i;
+ int error, first, i, j;
p = curproc;
td = curthread;
@@ -2202,6 +2323,13 @@
panic("starting pageout for domain %d: %d\n",
i, error);
}
+ for (j = 0; j < PAGEOUT_THREADS - 1; j++) {
+ error = kthread_add(vm_pageout_helper,
+ (void *)(uintptr_t)i, p, NULL, 0, 0, "dom%d", i);
+ if (error != 0)
+ panic("starting pageout helper for domain %d: %d\n",
+ i, error);
+ }
error = kthread_add(vm_pageout_laundry_worker,
(void *)(uintptr_t)i, p, NULL, 0, 0, "laundry: dom%d", i);
if (error != 0)
Index: sys/vm/vm_pagequeue.h
===================================================================
--- sys/vm/vm_pagequeue.h
+++ sys/vm/vm_pagequeue.h
@@ -120,6 +120,14 @@
/* Paging control variables, used within single threaded page daemon. */
struct pidctrl vmd_pid; /* Pageout controller. */
boolean_t vmd_oom;
+ u_int vmd_inactive_threads;
+ u_int vmd_inactive_shortage; /* per-thread shortage. */
+ volatile u_int vmd_inactive_running; /* Number of inactive threads.*/
+ volatile u_int vmd_inactive_starting; /* Number threads started. */
+ volatile u_int vmd_addl_shortage; /* Shortage accumulator. */
+ volatile u_int vmd_inactive_freed; /* Successful inactive frees. */
+ volatile u_int vmd_inactive_us; /* Microseconds for above. */
+ u_int vmd_inactive_pps; /* Exponential decay frees/second. */
int vmd_oom_seq;
int vmd_last_active_scan;
struct vm_page vmd_markers[PQ_COUNT]; /* (q) markers for queue scans */
Index: sys/vm/vm_param.h
===================================================================
--- sys/vm/vm_param.h
+++ sys/vm/vm_param.h
@@ -131,6 +131,10 @@
#endif
#define PHYS_AVAIL_COUNT (PHYS_AVAIL_ENTRIES + 2)
+#ifndef PAGEOUT_THREADS
+#define PAGEOUT_THREADS 1
+#endif
+
#ifndef ASSEMBLER
#ifdef _KERNEL
#define num_pages(x) \

File Metadata

Mime Type
text/plain
Expires
Sat, Apr 25, 10:09 PM (17 h, 44 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32100592
Default Alt Text
D21629.id62014.diff (11 KB)

Event Timeline