Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F142174059
D8302.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
49 KB
Referenced Files
None
Subscribers
None
D8302.diff
View Options
Index: head/sys/sys/vmmeter.h
===================================================================
--- head/sys/sys/vmmeter.h
+++ head/sys/sys/vmmeter.h
@@ -75,9 +75,10 @@
u_int v_vnodepgsin; /* (p) vnode_pager pages paged in */
u_int v_vnodepgsout; /* (p) vnode pager pages paged out */
u_int v_intrans; /* (p) intransit blocking page faults */
- u_int v_reactivated; /* (f) pages reactivated from free list */
+ u_int v_reactivated; /* (p) pages reactivated by the pagedaemon */
u_int v_pdwakeups; /* (p) times daemon has awaken from sleep */
u_int v_pdpages; /* (p) pages analyzed by daemon */
+ u_int v_pdshortfalls; /* (p) page reclamation shortfalls */
u_int v_tcached; /* (p) total pages cached */
u_int v_dfree; /* (p) pages freed by daemon */
@@ -96,6 +97,7 @@
u_int v_active_count; /* (q) pages active */
u_int v_inactive_target; /* (c) pages desired inactive */
u_int v_inactive_count; /* (q) pages inactive */
+ u_int v_laundry_count; /* (q) pages eligible for laundering */
u_int v_cache_count; /* (f) pages on cache queue */
u_int v_pageout_free_min; /* (c) min pages reserved for kernel */
u_int v_interrupt_free_min; /* (c) reserved pages for int code */
@@ -111,7 +113,6 @@
u_int v_vforkpages; /* (p) VM pages affected by vfork() */
u_int v_rforkpages; /* (p) VM pages affected by rfork() */
u_int v_kthreadpages; /* (p) VM pages affected by fork() by kernel */
- u_int v_spare[2];
};
#ifdef _KERNEL
@@ -184,6 +185,25 @@
vm_pageout_wakeup_thresh);
}
+/*
+ * Return the number of pages we need to launder.
+ * A positive number indicates that we have a shortfall of clean pages.
+ */
+static inline int
+vm_laundry_target(void)
+{
+
+ return (vm_paging_target());
+}
+
+/*
+ * Obtain the value of a per-CPU counter.
+ */
+#define VM_METER_PCPU_CNT(member) \
+ vm_meter_cnt(__offsetof(struct vmmeter, member))
+
+u_int vm_meter_cnt(size_t);
+
#endif
/* systemwide totals computed every five seconds */
Index: head/sys/vm/swap_pager.c
===================================================================
--- head/sys/vm/swap_pager.c
+++ head/sys/vm/swap_pager.c
@@ -1549,17 +1549,18 @@
* For write success, clear the dirty
* status, then finish the I/O ( which decrements the
* busy count and possibly wakes waiter's up ).
+ * A page is only written to swap after a period of
+ * inactivity. Therefore, we do not expect it to be
+ * reused.
*/
KASSERT(!pmap_page_is_write_mapped(m),
("swp_pager_async_iodone: page %p is not write"
" protected", m));
vm_page_undirty(m);
+ vm_page_lock(m);
+ vm_page_deactivate_noreuse(m);
+ vm_page_unlock(m);
vm_page_sunbusy(m);
- if (vm_page_count_severe()) {
- vm_page_lock(m);
- vm_page_try_to_cache(m);
- vm_page_unlock(m);
- }
}
}
@@ -1635,12 +1636,15 @@
/*
* SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in
*
- * This routine dissociates the page at the given index within a
- * swap block from its backing store, paging it in if necessary.
- * If the page is paged in, it is placed in the inactive queue,
- * since it had its backing store ripped out from under it.
- * We also attempt to swap in all other pages in the swap block,
- * we only guarantee that the one at the specified index is
+ * This routine dissociates the page at the given index within an object
+ * from its backing store, paging it in if it does not reside in memory.
+ * If the page is paged in, it is marked dirty and placed in the laundry
+ * queue. The page is marked dirty because it no longer has backing
+ * store. It is placed in the laundry queue because it has not been
+ * accessed recently. Otherwise, it would already reside in memory.
+ *
+ * We also attempt to swap in all other pages in the swap block.
+ * However, we only guarantee that the one at the specified index is
* paged in.
*
* XXX - The code to page the whole block in doesn't work, so we
@@ -1669,7 +1673,7 @@
vm_object_pip_wakeup(object);
vm_page_dirty(m);
vm_page_lock(m);
- vm_page_deactivate(m);
+ vm_page_launder(m);
vm_page_unlock(m);
vm_page_xunbusy(m);
vm_pager_page_unswapped(m);
Index: head/sys/vm/vm_fault.c
===================================================================
--- head/sys/vm/vm_fault.c
+++ head/sys/vm/vm_fault.c
@@ -290,12 +290,13 @@
vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags, vm_page_t *m_hold)
{
- vm_prot_t prot;
- vm_object_t next_object;
struct faultstate fs;
struct vnode *vp;
+ vm_object_t next_object, retry_object;
vm_offset_t e_end, e_start;
vm_page_t m;
+ vm_pindex_t retry_pindex;
+ vm_prot_t prot, retry_prot;
int ahead, alloc_req, behind, cluster_offset, error, era, faultcount;
int locked, map_generation, nera, result, rv;
u_char behavior;
@@ -946,10 +947,6 @@
* lookup.
*/
if (!fs.lookup_still_valid) {
- vm_object_t retry_object;
- vm_pindex_t retry_pindex;
- vm_prot_t retry_prot;
-
if (!vm_map_trylock_read(fs.map)) {
release_page(&fs);
unlock_and_deallocate(&fs);
Index: head/sys/vm/vm_meter.c
===================================================================
--- head/sys/vm/vm_meter.c
+++ head/sys/vm/vm_meter.c
@@ -216,29 +216,37 @@
}
/*
- * vcnt() - accumulate statistics from all cpus and the global cnt
- * structure.
+ * vm_meter_cnt() - accumulate statistics from all cpus and the global cnt
+ * structure.
*
* The vmmeter structure is now per-cpu as well as global. Those
* statistics which can be kept on a per-cpu basis (to avoid cache
* stalls between cpus) can be moved to the per-cpu vmmeter. Remaining
* statistics, such as v_free_reserved, are left in the global
* structure.
- *
- * (sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
*/
-static int
-vcnt(SYSCTL_HANDLER_ARGS)
+u_int
+vm_meter_cnt(size_t offset)
{
- int count = *(int *)arg1;
- int offset = (char *)arg1 - (char *)&vm_cnt;
+ struct pcpu *pcpu;
+ u_int count;
int i;
+ count = *(u_int *)((char *)&vm_cnt + offset);
CPU_FOREACH(i) {
- struct pcpu *pcpu = pcpu_find(i);
- count += *(int *)((char *)&pcpu->pc_cnt + offset);
+ pcpu = pcpu_find(i);
+ count += *(u_int *)((char *)&pcpu->pc_cnt + offset);
}
- return (SYSCTL_OUT(req, &count, sizeof(int)));
+ return (count);
+}
+
+static int
+cnt_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ u_int count;
+
+ count = vm_meter_cnt((char *)arg1 - (char *)&vm_cnt);
+ return (SYSCTL_OUT(req, &count, sizeof(count)));
}
SYSCTL_PROC(_vm, VM_TOTAL, vmtotal, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE,
@@ -253,8 +261,8 @@
#define VM_STATS(parent, var, descr) \
SYSCTL_PROC(parent, OID_AUTO, var, \
- CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, &vm_cnt.var, 0, vcnt, \
- "IU", descr)
+ CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, &vm_cnt.var, 0, \
+ cnt_sysctl, "IU", descr)
#define VM_STATS_VM(var, descr) VM_STATS(_vm_stats_vm, var, descr)
#define VM_STATS_SYS(var, descr) VM_STATS(_vm_stats_sys, var, descr)
@@ -278,9 +286,10 @@
VM_STATS_VM(v_vnodepgsin, "Vnode pages paged in");
VM_STATS_VM(v_vnodepgsout, "Vnode pages paged out");
VM_STATS_VM(v_intrans, "In transit page faults");
-VM_STATS_VM(v_reactivated, "Pages reactivated from free list");
+VM_STATS_VM(v_reactivated, "Pages reactivated by pagedaemon");
VM_STATS_VM(v_pdwakeups, "Pagedaemon wakeups");
VM_STATS_VM(v_pdpages, "Pages analyzed by pagedaemon");
+VM_STATS_VM(v_pdshortfalls, "Page reclamation shortfalls");
VM_STATS_VM(v_tcached, "Total pages cached");
VM_STATS_VM(v_dfree, "Pages freed by pagedaemon");
VM_STATS_VM(v_pfree, "Pages freed by exiting processes");
@@ -295,6 +304,7 @@
VM_STATS_VM(v_active_count, "Active pages");
VM_STATS_VM(v_inactive_target, "Desired inactive pages");
VM_STATS_VM(v_inactive_count, "Inactive pages");
+VM_STATS_VM(v_laundry_count, "Pages eligible for laundering");
VM_STATS_VM(v_cache_count, "Pages on cache queue");
VM_STATS_VM(v_pageout_free_min, "Min pages reserved for kernel");
VM_STATS_VM(v_interrupt_free_min, "Reserved pages for interrupt code");
Index: head/sys/vm/vm_object.c
===================================================================
--- head/sys/vm/vm_object.c
+++ head/sys/vm/vm_object.c
@@ -2329,9 +2329,9 @@
* sysctl is only meant to give an
* approximation of the system anyway.
*/
- if (m->queue == PQ_ACTIVE)
+ if (vm_page_active(m))
kvo.kvo_active++;
- else if (m->queue == PQ_INACTIVE)
+ else if (vm_page_inactive(m))
kvo.kvo_inactive++;
}
Index: head/sys/vm/vm_page.h
===================================================================
--- head/sys/vm/vm_page.h
+++ head/sys/vm/vm_page.h
@@ -206,7 +206,8 @@
#define PQ_NONE 255
#define PQ_INACTIVE 0
#define PQ_ACTIVE 1
-#define PQ_COUNT 2
+#define PQ_LAUNDRY 2
+#define PQ_COUNT 3
TAILQ_HEAD(pglist, vm_page);
SLIST_HEAD(spglist, vm_page);
@@ -228,6 +229,7 @@
boolean_t vmd_oom;
int vmd_oom_seq;
int vmd_last_active_scan;
+ struct vm_page vmd_laundry_marker;
struct vm_page vmd_marker; /* marker for pagedaemon private use */
struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
};
@@ -236,6 +238,7 @@
#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
+#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)
#ifdef _KERNEL
@@ -327,7 +330,6 @@
#define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */
#define PG_ZERO 0x0008 /* page is zeroed */
#define PG_MARKER 0x0010 /* special queue marker page */
-#define PG_WINATCFLS 0x0040 /* flush dirty page on inactive q */
#define PG_NODUMP 0x0080 /* don't include this page in a dump */
#define PG_UNHOLDFREE 0x0100 /* delayed free of a held page */
@@ -451,10 +453,8 @@
vm_paddr_t boundary, vm_memattr_t memattr);
vm_page_t vm_page_alloc_freelist(int, int);
vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
-void vm_page_cache(vm_page_t);
void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t);
void vm_page_cache_transfer(vm_object_t, vm_pindex_t, vm_object_t);
-int vm_page_try_to_cache (vm_page_t);
int vm_page_try_to_free (vm_page_t);
void vm_page_deactivate (vm_page_t);
void vm_page_deactivate_noreuse(vm_page_t);
@@ -465,6 +465,7 @@
void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
boolean_t vm_page_is_cached(vm_object_t object, vm_pindex_t pindex);
+void vm_page_launder(vm_page_t m);
vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
vm_page_t vm_page_next(vm_page_t m);
int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);
@@ -697,5 +698,26 @@
(void)mret;
}
+static inline bool
+vm_page_active(vm_page_t m)
+{
+
+ return (m->queue == PQ_ACTIVE);
+}
+
+static inline bool
+vm_page_inactive(vm_page_t m)
+{
+
+ return (m->queue == PQ_INACTIVE);
+}
+
+static inline bool
+vm_page_in_laundry(vm_page_t m)
+{
+
+ return (m->queue == PQ_LAUNDRY);
+}
+
#endif /* _KERNEL */
#endif /* !_VM_PAGE_ */
Index: head/sys/vm/vm_page.c
===================================================================
--- head/sys/vm/vm_page.c
+++ head/sys/vm/vm_page.c
@@ -390,6 +390,10 @@
"vm active pagequeue";
*__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) =
&vm_cnt.v_active_count;
+ *__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) =
+ "vm laundry pagequeue";
+ *__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) =
+ &vm_cnt.v_laundry_count;
vmd->vmd_page_count = 0;
vmd->vmd_free_count = 0;
vmd->vmd_segs = 0;
@@ -1730,9 +1734,7 @@
("vm_page_alloc: cached page %p is PG_ZERO", m));
KASSERT(m->valid != 0,
("vm_page_alloc: cached page %p is invalid", m));
- if (m->object == object && m->pindex == pindex)
- vm_cnt.v_reactivated++;
- else
+ if (m->object != object || m->pindex != pindex)
m->valid = 0;
m_object = m->object;
vm_page_cache_remove(m);
@@ -2254,7 +2256,7 @@
}
KASSERT((m->flags & PG_UNHOLDFREE) == 0,
("page %p is PG_UNHOLDFREE", m));
- /* Don't care: PG_NODUMP, PG_WINATCFLS, PG_ZERO. */
+ /* Don't care: PG_NODUMP, PG_ZERO. */
if (object->type != OBJT_DEFAULT &&
object->type != OBJT_SWAP &&
object->type != OBJT_VNODE)
@@ -2450,7 +2452,7 @@
}
KASSERT((m->flags & PG_UNHOLDFREE) == 0,
("page %p is PG_UNHOLDFREE", m));
- /* Don't care: PG_NODUMP, PG_WINATCFLS, PG_ZERO. */
+ /* Don't care: PG_NODUMP, PG_ZERO. */
if (object->type != OBJT_DEFAULT &&
object->type != OBJT_SWAP &&
object->type != OBJT_VNODE)
@@ -2778,7 +2780,10 @@
vm_page_pagequeue(vm_page_t m)
{
- return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
+ if (vm_page_in_laundry(m))
+ return (&vm_dom[0].vmd_pagequeues[m->queue]);
+ else
+ return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
}
/*
@@ -2840,7 +2845,10 @@
KASSERT(queue < PQ_COUNT,
("vm_page_enqueue: invalid queue %u request for page %p",
queue, m));
- pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
+ if (queue == PQ_LAUNDRY)
+ pq = &vm_dom[0].vmd_pagequeues[queue];
+ else
+ pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
vm_pagequeue_lock(pq);
m->queue = queue;
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
@@ -3124,11 +3132,8 @@
if (m->wire_count == 0) {
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
if ((m->oflags & VPO_UNMANAGED) == 0 &&
- m->object != NULL && queue != PQ_NONE) {
- if (queue == PQ_INACTIVE)
- m->flags &= ~PG_WINATCFLS;
+ m->object != NULL && queue != PQ_NONE)
vm_page_enqueue(queue, m);
- }
return (TRUE);
} else
return (FALSE);
@@ -3181,7 +3186,6 @@
} else {
if (queue != PQ_NONE)
vm_page_dequeue(m);
- m->flags &= ~PG_WINATCFLS;
vm_pagequeue_lock(pq);
}
m->queue = PQ_INACTIVE;
@@ -3221,24 +3225,25 @@
}
/*
- * vm_page_try_to_cache:
+ * vm_page_launder
*
- * Returns 0 on failure, 1 on success
+ * Put a page in the laundry.
*/
-int
-vm_page_try_to_cache(vm_page_t m)
+void
+vm_page_launder(vm_page_t m)
{
+ int queue;
- vm_page_lock_assert(m, MA_OWNED);
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (m->dirty || m->hold_count || m->wire_count ||
- (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m))
- return (0);
- pmap_remove_all(m);
- if (m->dirty)
- return (0);
- vm_page_cache(m);
- return (1);
+ vm_page_assert_locked(m);
+ if ((queue = m->queue) != PQ_LAUNDRY) {
+ if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
+ if (queue != PQ_NONE)
+ vm_page_dequeue(m);
+ vm_page_enqueue(PQ_LAUNDRY, m);
+ } else
+ KASSERT(queue == PQ_NONE,
+ ("wired page %p is queued", m));
+ }
}
/*
@@ -3265,112 +3270,6 @@
}
/*
- * vm_page_cache
- *
- * Put the specified page onto the page cache queue (if appropriate).
- *
- * The object and page must be locked.
- */
-void
-vm_page_cache(vm_page_t m)
-{
- vm_object_t object;
- boolean_t cache_was_empty;
-
- vm_page_lock_assert(m, MA_OWNED);
- object = m->object;
- VM_OBJECT_ASSERT_WLOCKED(object);
- if (vm_page_busied(m) || (m->oflags & VPO_UNMANAGED) ||
- m->hold_count || m->wire_count)
- panic("vm_page_cache: attempting to cache busy page");
- KASSERT(!pmap_page_is_mapped(m),
- ("vm_page_cache: page %p is mapped", m));
- KASSERT(m->dirty == 0, ("vm_page_cache: page %p is dirty", m));
- if (m->valid == 0 || object->type == OBJT_DEFAULT ||
- (object->type == OBJT_SWAP &&
- !vm_pager_has_page(object, m->pindex, NULL, NULL))) {
- /*
- * Hypothesis: A cache-eligible page belonging to a
- * default object or swap object but without a backing
- * store must be zero filled.
- */
- vm_page_free(m);
- return;
- }
- KASSERT((m->flags & PG_CACHED) == 0,
- ("vm_page_cache: page %p is already cached", m));
-
- /*
- * Remove the page from the paging queues.
- */
- vm_page_remque(m);
-
- /*
- * Remove the page from the object's collection of resident
- * pages.
- */
- vm_radix_remove(&object->rtree, m->pindex);
- TAILQ_REMOVE(&object->memq, m, listq);
- object->resident_page_count--;
-
- /*
- * Restore the default memory attribute to the page.
- */
- if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
- pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
-
- /*
- * Insert the page into the object's collection of cached pages
- * and the physical memory allocator's cache/free page queues.
- */
- m->flags &= ~PG_ZERO;
- mtx_lock(&vm_page_queue_free_mtx);
- cache_was_empty = vm_radix_is_empty(&object->cache);
- if (vm_radix_insert(&object->cache, m)) {
- mtx_unlock(&vm_page_queue_free_mtx);
- if (object->type == OBJT_VNODE &&
- object->resident_page_count == 0)
- vdrop(object->handle);
- m->object = NULL;
- vm_page_free(m);
- return;
- }
-
- /*
- * The above call to vm_radix_insert() could reclaim the one pre-
- * existing cached page from this object, resulting in a call to
- * vdrop().
- */
- if (!cache_was_empty)
- cache_was_empty = vm_radix_is_singleton(&object->cache);
-
- m->flags |= PG_CACHED;
- vm_cnt.v_cache_count++;
- PCPU_INC(cnt.v_tcached);
-#if VM_NRESERVLEVEL > 0
- if (!vm_reserv_free_page(m)) {
-#else
- if (TRUE) {
-#endif
- vm_phys_free_pages(m, 0);
- }
- vm_page_free_wakeup();
- mtx_unlock(&vm_page_queue_free_mtx);
-
- /*
- * Increment the vnode's hold count if this is the object's only
- * cached page. Decrement the vnode's hold count if this was
- * the object's only resident page.
- */
- if (object->type == OBJT_VNODE) {
- if (cache_was_empty && object->resident_page_count != 0)
- vhold(object->handle);
- else if (!cache_was_empty && object->resident_page_count == 0)
- vdrop(object->handle);
- }
-}
-
-/*
* vm_page_advise
*
* Deactivate or do nothing, as appropriate.
@@ -3413,11 +3312,13 @@
/*
* Place clean pages near the head of the inactive queue rather than
* the tail, thus defeating the queue's LRU operation and ensuring that
- * the page will be reused quickly. Dirty pages are given a chance to
- * cycle once through the inactive queue before becoming eligible for
- * laundering.
+ * the page will be reused quickly. Dirty pages not already in the
+ * laundry are moved there.
*/
- _vm_page_deactivate(m, m->dirty == 0);
+ if (m->dirty == 0)
+ vm_page_deactivate_noreuse(m);
+ else
+ vm_page_launder(m);
}
/*
@@ -3926,6 +3827,7 @@
db_printf("vm_cnt.v_cache_count: %d\n", vm_cnt.v_cache_count);
db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count);
db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count);
+ db_printf("vm_cnt.v_laundry_count: %d\n", vm_cnt.v_laundry_count);
db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count);
db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
@@ -3940,12 +3842,14 @@
db_printf("pq_free %d pq_cache %d\n",
vm_cnt.v_free_count, vm_cnt.v_cache_count);
for (dom = 0; dom < vm_ndomains; dom++) {
- db_printf("dom %d page_cnt %d free %d pq_act %d pq_inact %d\n",
+ db_printf(
+ "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d\n",
dom,
vm_dom[dom].vmd_page_count,
vm_dom[dom].vmd_free_count,
vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt,
- vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt);
+ vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt,
+ vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt);
}
}
Index: head/sys/vm/vm_pageout.c
===================================================================
--- head/sys/vm/vm_pageout.c
+++ head/sys/vm/vm_pageout.c
@@ -119,7 +119,7 @@
/* the kernel process "vm_pageout"*/
static void vm_pageout(void);
static void vm_pageout_init(void);
-static int vm_pageout_clean(vm_page_t m);
+static int vm_pageout_clean(vm_page_t m, int *numpagedout);
static int vm_pageout_cluster(vm_page_t m);
static bool vm_pageout_scan(struct vm_domain *vmd, int pass);
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
@@ -154,6 +154,9 @@
SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
#endif
+/* Pagedaemon activity rates, in subdivisions of one second. */
+#define VM_LAUNDER_RATE 10
+#define VM_INACT_SCAN_RATE 2
int vm_pageout_deficit; /* Estimated number of pages deficit */
u_int vm_pageout_wakeup_thresh;
@@ -161,6 +164,13 @@
bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */
bool vm_pages_needed; /* Are threads waiting for free pages? */
+/* Pending request for dirty page laundering. */
+static enum {
+ VM_LAUNDRY_IDLE,
+ VM_LAUNDRY_BACKGROUND,
+ VM_LAUNDRY_SHORTFALL
+} vm_laundry_request = VM_LAUNDRY_IDLE;
+
#if !defined(NO_SWAPPING)
static int vm_pageout_req_swapout; /* XXX */
static int vm_daemon_needed;
@@ -168,9 +178,7 @@
/* Allow for use by vm_pageout before vm_daemon is initialized. */
MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
#endif
-static int vm_max_launder = 32;
static int vm_pageout_update_period;
-static int defer_swap_pageouts;
static int disable_swap_pageouts;
static int lowmem_period = 10;
static time_t lowmem_uptime;
@@ -193,9 +201,6 @@
CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0,
"free page threshold for waking up the pageout daemon");
-SYSCTL_INT(_vm, OID_AUTO, max_launder,
- CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout");
-
SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
CTLFLAG_RW, &vm_pageout_update_period, 0,
"Maximum active LRU update period");
@@ -215,9 +220,6 @@
CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
#endif
-SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
- CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");
-
SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
@@ -229,6 +231,25 @@
CTLFLAG_RW, &vm_pageout_oom_seq, 0,
"back-to-back calls to oom detector to start OOM");
+static int act_scan_laundry_weight = 3;
+SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RW,
+ &act_scan_laundry_weight, 0,
+ "weight given to clean vs. dirty pages in active queue scans");
+
+static u_int vm_background_launder_target;
+SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RW,
+ &vm_background_launder_target, 0,
+ "background laundering target, in pages");
+
+static u_int vm_background_launder_rate = 4096;
+SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RW,
+ &vm_background_launder_rate, 0,
+ "background laundering rate, in kilobytes per second");
+
+static u_int vm_background_launder_max = 20 * 1024;
+SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RW,
+ &vm_background_launder_max, 0, "background laundering cap, in kilobytes");
+
#define VM_PAGEOUT_PAGE_COUNT 16
int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
@@ -236,7 +257,11 @@
SYSCTL_INT(_vm, OID_AUTO, max_wired,
CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
+static u_int isqrt(u_int num);
static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
+static int vm_pageout_launder(struct vm_domain *vmd, int launder,
+ bool in_shortfall);
+static void vm_pageout_laundry_worker(void *arg);
#if !defined(NO_SWAPPING)
static void vm_pageout_map_deactivate_pages(vm_map_t, long);
static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
@@ -387,7 +412,7 @@
/*
* We can cluster only if the page is not clean, busy, or held, and
- * the page is inactive.
+ * the page is in the laundry queue.
*
* During heavy mmap/modification loads the pageout
* daemon can really fragment the underlying file
@@ -413,7 +438,7 @@
break;
}
vm_page_lock(p);
- if (p->queue != PQ_INACTIVE ||
+ if (!vm_page_in_laundry(p) ||
p->hold_count != 0) { /* may be undergoing I/O */
vm_page_unlock(p);
ib = 0;
@@ -439,7 +464,7 @@
if (p->dirty == 0)
break;
vm_page_lock(p);
- if (p->queue != PQ_INACTIVE ||
+ if (!vm_page_in_laundry(p) ||
p->hold_count != 0) { /* may be undergoing I/O */
vm_page_unlock(p);
break;
@@ -519,23 +544,33 @@
("vm_pageout_flush: page %p is not write protected", mt));
switch (pageout_status[i]) {
case VM_PAGER_OK:
+ vm_page_lock(mt);
+ if (vm_page_in_laundry(mt))
+ vm_page_deactivate_noreuse(mt);
+ vm_page_unlock(mt);
+ /* FALLTHROUGH */
case VM_PAGER_PEND:
numpagedout++;
break;
case VM_PAGER_BAD:
/*
- * Page outside of range of object. Right now we
- * essentially lose the changes by pretending it
- * worked.
+ * The page is outside the object's range. We pretend
+ * that the page out worked and clean the page, so the
+ * changes will be lost if the page is reclaimed by
+ * the page daemon.
*/
vm_page_undirty(mt);
+ vm_page_lock(mt);
+ if (vm_page_in_laundry(mt))
+ vm_page_deactivate_noreuse(mt);
+ vm_page_unlock(mt);
break;
case VM_PAGER_ERROR:
case VM_PAGER_FAIL:
/*
- * If page couldn't be paged out, then reactivate the
- * page so it doesn't clog the inactive list. (We
- * will try paging out it again later).
+ * If the page couldn't be paged out, then reactivate
+ * it so that it doesn't clog the laundry and inactive
+ * queues. (We will try paging it out again later).
*/
vm_page_lock(mt);
vm_page_activate(mt);
@@ -617,10 +652,10 @@
act_delta = 1;
vm_page_aflag_clear(p, PGA_REFERENCED);
}
- if (p->queue != PQ_ACTIVE && act_delta != 0) {
+ if (!vm_page_active(p) && act_delta != 0) {
vm_page_activate(p);
p->act_count += act_delta;
- } else if (p->queue == PQ_ACTIVE) {
+ } else if (vm_page_active(p)) {
if (act_delta == 0) {
p->act_count -= min(p->act_count,
ACT_DECLINE);
@@ -636,7 +671,7 @@
p->act_count += ACT_ADVANCE;
vm_page_requeue(p);
}
- } else if (p->queue == PQ_INACTIVE)
+ } else if (vm_page_inactive(p))
pmap_remove_all(p);
vm_page_unlock(p);
}
@@ -739,7 +774,7 @@
* Returns 0 on success and an errno otherwise.
*/
static int
-vm_pageout_clean(vm_page_t m)
+vm_pageout_clean(vm_page_t m, int *numpagedout)
{
struct vnode *vp;
struct mount *mp;
@@ -797,7 +832,7 @@
* (3) reallocated to a different offset, or
* (4) cleaned.
*/
- if (m->queue != PQ_INACTIVE || m->object != object ||
+ if (!vm_page_in_laundry(m) || m->object != object ||
m->pindex != pindex || m->dirty == 0) {
vm_page_unlock(m);
error = ENXIO;
@@ -821,7 +856,7 @@
* laundry. If it is still in the laundry, then we
* start the cleaning operation.
*/
- if (vm_pageout_cluster(m) == 0)
+ if ((*numpagedout = vm_pageout_cluster(m)) == 0)
error = EIO;
unlock_all:
@@ -840,11 +875,390 @@
}
/*
+ * Attempt to launder the specified number of pages.
+ *
+ * Returns the number of pages successfully laundered.
+ */
+static int
+vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
+{
+ struct vm_pagequeue *pq;
+ vm_object_t object;
+ vm_page_t m, next;
+ int act_delta, error, maxscan, numpagedout, starting_target;
+ int vnodes_skipped;
+ bool pageout_ok, queue_locked;
+
+ starting_target = launder;
+ vnodes_skipped = 0;
+
+ /*
+ * Scan the laundry queue for pages eligible to be laundered. We stop
+ * once the target number of dirty pages have been laundered, or once
+ * we've reached the end of the queue. A single iteration of this loop
+ * may cause more than one page to be laundered because of clustering.
+ *
+ * maxscan ensures that we don't re-examine requeued pages. Any
+ * additional pages written as part of a cluster are subtracted from
+ * maxscan since they must be taken from the laundry queue.
+ */
+ pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
+ maxscan = pq->pq_cnt;
+
+ vm_pagequeue_lock(pq);
+ queue_locked = true;
+ for (m = TAILQ_FIRST(&pq->pq_pl);
+ m != NULL && maxscan-- > 0 && launder > 0;
+ m = next) {
+ vm_pagequeue_assert_locked(pq);
+ KASSERT(queue_locked, ("unlocked laundry queue"));
+ KASSERT(vm_page_in_laundry(m),
+ ("page %p has an inconsistent queue", m));
+ next = TAILQ_NEXT(m, plinks.q);
+ if ((m->flags & PG_MARKER) != 0)
+ continue;
+ KASSERT((m->flags & PG_FICTITIOUS) == 0,
+ ("PG_FICTITIOUS page %p cannot be in laundry queue", m));
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("VPO_UNMANAGED page %p cannot be in laundry queue", m));
+ if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
+ vm_page_unlock(m);
+ continue;
+ }
+ object = m->object;
+ if ((!VM_OBJECT_TRYWLOCK(object) &&
+ (!vm_pageout_fallback_object_lock(m, &next) ||
+ m->hold_count != 0)) || vm_page_busied(m)) {
+ VM_OBJECT_WUNLOCK(object);
+ vm_page_unlock(m);
+ continue;
+ }
+
+ /*
+ * Unlock the laundry queue, invalidating the 'next' pointer.
+ * Use a marker to remember our place in the laundry queue.
+ */
+ TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_laundry_marker,
+ plinks.q);
+ vm_pagequeue_unlock(pq);
+ queue_locked = false;
+
+ /*
+ * Invalid pages can be easily freed. They cannot be
+ * mapped; vm_page_free() asserts this.
+ */
+ if (m->valid == 0)
+ goto free_page;
+
+ /*
+ * If the page has been referenced and the object is not dead,
+ * reactivate or requeue the page depending on whether the
+ * object is mapped.
+ */
+ if ((m->aflags & PGA_REFERENCED) != 0) {
+ vm_page_aflag_clear(m, PGA_REFERENCED);
+ act_delta = 1;
+ } else
+ act_delta = 0;
+ if (object->ref_count != 0)
+ act_delta += pmap_ts_referenced(m);
+ else {
+ KASSERT(!pmap_page_is_mapped(m),
+ ("page %p is mapped", m));
+ }
+ if (act_delta != 0) {
+ if (object->ref_count != 0) {
+ PCPU_INC(cnt.v_reactivated);
+ vm_page_activate(m);
+
+ /*
+ * Increase the activation count if the page
+ * was referenced while in the laundry queue.
+ * This makes it less likely that the page will
+ * be returned prematurely to the inactive
+ * queue.
+ */
+ m->act_count += act_delta + ACT_ADVANCE;
+
+ /*
+ * If this was a background laundering, count
+ * activated pages towards our target. The
+ * purpose of background laundering is to ensure
+ * that pages are eventually cycled through the
+ * laundry queue, and an activation is a valid
+ * way out.
+ */
+ if (!in_shortfall)
+ launder--;
+ goto drop_page;
+ } else if ((object->flags & OBJ_DEAD) == 0)
+ goto requeue_page;
+ }
+
+ /*
+ * If the page appears to be clean at the machine-independent
+ * layer, then remove all of its mappings from the pmap in
+ * anticipation of freeing it. If, however, any of the page's
+ * mappings allow write access, then the page may still be
+ * modified until the last of those mappings are removed.
+ */
+ if (object->ref_count != 0) {
+ vm_page_test_dirty(m);
+ if (m->dirty == 0)
+ pmap_remove_all(m);
+ }
+
+ /*
+ * Clean pages are freed, and dirty pages are paged out unless
+ * they belong to a dead object. Requeueing dirty pages from
+ * dead objects is pointless, as they are being paged out and
+ * freed by the thread that destroyed the object.
+ */
+ if (m->dirty == 0) {
+free_page:
+ vm_page_free(m);
+ PCPU_INC(cnt.v_dfree);
+ } else if ((object->flags & OBJ_DEAD) == 0) {
+ if (object->type != OBJT_SWAP &&
+ object->type != OBJT_DEFAULT)
+ pageout_ok = true;
+ else if (disable_swap_pageouts)
+ pageout_ok = false;
+ else
+ pageout_ok = true;
+ if (!pageout_ok) {
+requeue_page:
+ vm_pagequeue_lock(pq);
+ queue_locked = true;
+ vm_page_requeue_locked(m);
+ goto drop_page;
+ }
+
+ /*
+ * Form a cluster with adjacent, dirty pages from the
+ * same object, and page out that entire cluster.
+ *
+ * The adjacent, dirty pages must also be in the
+ * laundry. However, their mappings are not checked
+ * for new references. Consequently, a recently
+ * referenced page may be paged out. However, that
+ * page will not be prematurely reclaimed. After page
+ * out, the page will be placed in the inactive queue,
+ * where any new references will be detected and the
+ * page reactivated.
+ */
+ error = vm_pageout_clean(m, &numpagedout);
+ if (error == 0) {
+ launder -= numpagedout;
+ maxscan -= numpagedout - 1;
+ } else if (error == EDEADLK) {
+ pageout_lock_miss++;
+ vnodes_skipped++;
+ }
+ goto relock_queue;
+ }
+drop_page:
+ vm_page_unlock(m);
+ VM_OBJECT_WUNLOCK(object);
+relock_queue:
+ if (!queue_locked) {
+ vm_pagequeue_lock(pq);
+ queue_locked = true;
+ }
+ next = TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q);
+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
+ }
+ vm_pagequeue_unlock(pq);
+
+ /*
+ * Wakeup the sync daemon if we skipped a vnode in a writeable object
+ * and we didn't launder enough pages.
+ */
+ if (vnodes_skipped > 0 && launder > 0)
+ (void)speedup_syncer();
+
+ return (starting_target - launder);
+}
+
+/*
+ * Compute the integer square root.
+ */
+static u_int
+isqrt(u_int num)
+{
+ u_int bit, root, tmp;
+
+ bit = 1u << ((NBBY * sizeof(u_int)) - 2);
+ while (bit > num)
+ bit >>= 2;
+ root = 0;
+ while (bit != 0) {
+ tmp = root + bit;
+ root >>= 1;
+ if (num >= tmp) {
+ num -= tmp;
+ root += bit;
+ }
+ bit >>= 2;
+ }
+ return (root);
+}
+
+/*
+ * Perform the work of the laundry thread: periodically wake up and determine
+ * whether any pages need to be laundered. If so, determine the number of pages
+ * that need to be laundered, and launder them.
+ */
+static void
+vm_pageout_laundry_worker(void *arg)
+{
+ struct vm_domain *domain;
+ struct vm_pagequeue *pq;
+ uint64_t nclean, ndirty;
+ u_int last_launder, wakeups;
+ int domidx, last_target, launder, shortfall, shortfall_cycle, target;
+ bool in_shortfall;
+
+ domidx = (uintptr_t)arg;
+ domain = &vm_dom[domidx];
+ pq = &domain->vmd_pagequeues[PQ_LAUNDRY];
+ KASSERT(domain->vmd_segs != 0, ("domain without segments"));
+ vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY);
+
+ shortfall = 0;
+ in_shortfall = false;
+ shortfall_cycle = 0;
+ target = 0;
+ last_launder = 0;
+
+ /*
+ * The pageout laundry worker is never done, so loop forever.
+ */
+ for (;;) {
+ KASSERT(target >= 0, ("negative target %d", target));
+ KASSERT(shortfall_cycle >= 0,
+ ("negative cycle %d", shortfall_cycle));
+ launder = 0;
+ wakeups = VM_METER_PCPU_CNT(v_pdwakeups);
+
+ /*
+ * First determine whether we need to launder pages to meet a
+ * shortage of free pages.
+ */
+ if (shortfall > 0) {
+ in_shortfall = true;
+ shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE;
+ target = shortfall;
+ } else if (!in_shortfall)
+ goto trybackground;
+ else if (shortfall_cycle == 0 || vm_laundry_target() <= 0) {
+ /*
+ * We recently entered shortfall and began laundering
+ * pages. If we have completed that laundering run
+ * (and we are no longer in shortfall) or we have met
+ * our laundry target through other activity, then we
+ * can stop laundering pages.
+ */
+ in_shortfall = false;
+ target = 0;
+ goto trybackground;
+ }
+ last_launder = wakeups;
+ launder = target / shortfall_cycle--;
+ goto dolaundry;
+
+ /*
+ * There's no immediate need to launder any pages; see if we
+ * meet the conditions to perform background laundering:
+ *
+ * 1. The ratio of dirty to clean inactive pages exceeds the
+ * background laundering threshold and the pagedaemon has
+ * been woken up to reclaim pages since our last
+ * laundering, or
+ * 2. we haven't yet reached the target of the current
+ * background laundering run.
+ *
+ * The background laundering threshold is not a constant.
+ * Instead, it is a slowly growing function of the number of
+ * page daemon wakeups since the last laundering. Thus, as the
+ * ratio of dirty to clean inactive pages grows, the amount of
+ * memory pressure required to trigger laundering decreases.
+ */
+trybackground:
+ nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count;
+ ndirty = vm_cnt.v_laundry_count;
+ if (target == 0 && wakeups != last_launder &&
+ ndirty * isqrt(wakeups - last_launder) >= nclean) {
+ target = vm_background_launder_target;
+ }
+
+ /*
+ * We have a non-zero background laundering target. If we've
+ * laundered up to our maximum without observing a page daemon
+ * wakeup, just stop. This is a safety belt that ensures we
+ * don't launder an excessive amount if memory pressure is low
+ * and the ratio of dirty to clean pages is large. Otherwise,
+ * proceed at the background laundering rate.
+ */
+ if (target > 0) {
+ if (wakeups != last_launder) {
+ last_launder = wakeups;
+ last_target = target;
+ } else if (last_target - target >=
+ vm_background_launder_max * PAGE_SIZE / 1024) {
+ target = 0;
+ }
+ launder = vm_background_launder_rate * PAGE_SIZE / 1024;
+ launder /= VM_LAUNDER_RATE;
+ if (launder > target)
+ launder = target;
+ }
+
+dolaundry:
+ if (launder > 0) {
+ /*
+ * Because of I/O clustering, the number of laundered
+ * pages could exceed "target" by the maximum size of
+ * a cluster minus one.
+ */
+ target -= min(vm_pageout_launder(domain, launder,
+ in_shortfall), target);
+ pause("laundp", hz / VM_LAUNDER_RATE);
+ }
+
+ /*
+ * If we're not currently laundering pages and the page daemon
+ * hasn't posted a new request, sleep until the page daemon
+ * kicks us.
+ */
+ vm_pagequeue_lock(pq);
+ if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE)
+ (void)mtx_sleep(&vm_laundry_request,
+ vm_pagequeue_lockptr(pq), PVM, "launds", 0);
+
+ /*
+ * If the pagedaemon has indicated that it's in shortfall, start
+ * a shortfall laundering unless we're already in the middle of
+ * one. This may preempt a background laundering.
+ */
+ if (vm_laundry_request == VM_LAUNDRY_SHORTFALL &&
+ (!in_shortfall || shortfall_cycle == 0)) {
+ shortfall = vm_laundry_target() + vm_pageout_deficit;
+ target = 0;
+ } else
+ shortfall = 0;
+
+ if (target == 0)
+ vm_laundry_request = VM_LAUNDRY_IDLE;
+ vm_pagequeue_unlock(pq);
+ }
+}
+
+/*
* vm_pageout_scan does the dirty work for the pageout daemon.
*
- * pass 0 - Update active LRU/deactivate pages
- * pass 1 - Free inactive pages
- * pass 2 - Launder dirty pages
+ * pass == 0: Update active LRU/deactivate pages
+ * pass >= 1: Free inactive pages
*
* Returns true if pass was zero or enough pages were freed by the inactive
* queue scan to meet the target.
@@ -856,10 +1270,9 @@
struct vm_pagequeue *pq;
vm_object_t object;
long min_scan;
- int act_delta, addl_page_shortage, deficit, error, inactq_shortage;
- int maxlaunder, maxscan, page_shortage, scan_tick, scanned;
- int starting_page_shortage, vnodes_skipped;
- boolean_t pageout_ok, queue_locked;
+ int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan;
+ int page_shortage, scan_tick, scanned, starting_page_shortage;
+ boolean_t queue_locked;
/*
* If we need to reclaim memory ask kernel caches to return
@@ -901,23 +1314,6 @@
starting_page_shortage = page_shortage;
/*
- * maxlaunder limits the number of dirty pages we flush per scan.
- * For most systems a smaller value (16 or 32) is more robust under
- * extreme memory and disk pressure because any unnecessary writes
- * to disk can result in extreme performance degredation. However,
- * systems with excessive dirty pages (especially when MAP_NOSYNC is
- * used) will die horribly with limited laundering. If the pageout
- * daemon cannot clean enough pages in the first pass, we let it go
- * all out in succeeding passes.
- */
- if ((maxlaunder = vm_max_launder) <= 1)
- maxlaunder = 1;
- if (pass > 1)
- maxlaunder = 10000;
-
- vnodes_skipped = 0;
-
- /*
* Start scanning the inactive queue for pages that we can free. The
* scan will stop when we reach the target or we have scanned the
* entire queue. (Note that m->act_count is not used to make
@@ -932,7 +1328,7 @@
m = next) {
vm_pagequeue_assert_locked(pq);
KASSERT(queue_locked, ("unlocked inactive queue"));
- KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m));
+ KASSERT(vm_page_inactive(m), ("Inactive queue %p", m));
PCPU_INC(cnt.v_pdpages);
next = TAILQ_NEXT(m, plinks.q);
@@ -995,11 +1391,15 @@
KASSERT(m->hold_count == 0, ("Held page %p", m));
/*
- * We unlock the inactive page queue, invalidating the
- * 'next' pointer. Use our marker to remember our
- * place.
+ * Dequeue the inactive page and unlock the inactive page
+ * queue, invalidating the 'next' pointer. Dequeueing the
+ * page here avoids a later reacquisition (and release) of
+ * the inactive page queue lock when vm_page_activate(),
+ * vm_page_free(), or vm_page_launder() is called. Use a
+ * marker to remember our place in the inactive queue.
*/
TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);
+ vm_page_dequeue_locked(m);
vm_pagequeue_unlock(pq);
queue_locked = FALSE;
@@ -1028,6 +1428,7 @@
}
if (act_delta != 0) {
if (object->ref_count != 0) {
+ PCPU_INC(cnt.v_reactivated);
vm_page_activate(m);
/*
@@ -1039,8 +1440,14 @@
*/
m->act_count += act_delta + ACT_ADVANCE;
goto drop_page;
- } else if ((object->flags & OBJ_DEAD) == 0)
- goto requeue_page;
+ } else if ((object->flags & OBJ_DEAD) == 0) {
+ vm_pagequeue_lock(pq);
+ queue_locked = TRUE;
+ m->queue = PQ_INACTIVE;
+ TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+ vm_pagequeue_cnt_inc(pq);
+ goto drop_page;
+ }
}
/*
@@ -1056,83 +1463,23 @@
pmap_remove_all(m);
}
+ /*
+ * Clean pages can be freed, but dirty pages must be sent back
+ * to the laundry, unless they belong to a dead object.
+ * Requeueing dirty pages from dead objects is pointless, as
+ * they are being paged out and freed by the thread that
+ * destroyed the object.
+ */
if (m->dirty == 0) {
- /*
- * Clean pages can be freed.
- */
free_page:
vm_page_free(m);
PCPU_INC(cnt.v_dfree);
--page_shortage;
- } else if ((object->flags & OBJ_DEAD) != 0) {
- /*
- * Leave dirty pages from dead objects at the front of
- * the queue. They are being paged out and freed by
- * the thread that destroyed the object. They will
- * leave the queue shortly after the scan finishes, so
- * they should be discounted from the inactive count.
- */
- addl_page_shortage++;
- } else if ((m->flags & PG_WINATCFLS) == 0 && pass < 2) {
- /*
- * Dirty pages need to be paged out, but flushing
- * a page is extremely expensive versus freeing
- * a clean page. Rather then artificially limiting
- * the number of pages we can flush, we instead give
- * dirty pages extra priority on the inactive queue
- * by forcing them to be cycled through the queue
- * twice before being flushed, after which the
- * (now clean) page will cycle through once more
- * before being freed. This significantly extends
- * the thrash point for a heavily loaded machine.
- */
- m->flags |= PG_WINATCFLS;
-requeue_page:
- vm_pagequeue_lock(pq);
- queue_locked = TRUE;
- vm_page_requeue_locked(m);
- } else if (maxlaunder > 0) {
- /*
- * We always want to try to flush some dirty pages if
- * we encounter them, to keep the system stable.
- * Normally this number is small, but under extreme
- * pressure where there are insufficient clean pages
- * on the inactive queue, we may have to go all out.
- */
-
- if (object->type != OBJT_SWAP &&
- object->type != OBJT_DEFAULT)
- pageout_ok = TRUE;
- else if (disable_swap_pageouts)
- pageout_ok = FALSE;
- else if (defer_swap_pageouts)
- pageout_ok = vm_page_count_min();
- else
- pageout_ok = TRUE;
- if (!pageout_ok)
- goto requeue_page;
- error = vm_pageout_clean(m);
- /*
- * Decrement page_shortage on success to account for
- * the (future) cleaned page. Otherwise we could wind
- * up laundering or cleaning too many pages.
- */
- if (error == 0) {
- page_shortage--;
- maxlaunder--;
- } else if (error == EDEADLK) {
- pageout_lock_miss++;
- vnodes_skipped++;
- } else if (error == EBUSY) {
- addl_page_shortage++;
- }
- vm_page_lock_assert(m, MA_NOTOWNED);
- goto relock_queue;
- }
+ } else if ((object->flags & OBJ_DEAD) == 0)
+ vm_page_launder(m);
drop_page:
vm_page_unlock(m);
VM_OBJECT_WUNLOCK(object);
-relock_queue:
if (!queue_locked) {
vm_pagequeue_lock(pq);
queue_locked = TRUE;
@@ -1142,6 +1489,24 @@
}
vm_pagequeue_unlock(pq);
+ /*
+ * Wake up the laundry thread so that it can perform any needed
+ * laundering. If we didn't meet our target, we're in shortfall and
+ * need to launder more aggressively.
+ */
+ if (vm_laundry_request == VM_LAUNDRY_IDLE &&
+ starting_page_shortage > 0) {
+ pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY];
+ vm_pagequeue_lock(pq);
+ if (page_shortage > 0) {
+ vm_laundry_request = VM_LAUNDRY_SHORTFALL;
+ PCPU_INC(cnt.v_pdshortfalls);
+ } else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)
+ vm_laundry_request = VM_LAUNDRY_BACKGROUND;
+ wakeup(&vm_laundry_request);
+ vm_pagequeue_unlock(pq);
+ }
+
#if !defined(NO_SWAPPING)
/*
* Wakeup the swapout daemon if we didn't free the targeted number of
@@ -1152,14 +1517,6 @@
#endif
/*
- * Wakeup the sync daemon if we skipped a vnode in a writeable object
- * and we didn't free enough pages.
- */
- if (vnodes_skipped > 0 && page_shortage > vm_cnt.v_free_target -
- vm_cnt.v_free_min)
- (void)speedup_syncer();
-
- /*
* If the inactive queue scan fails repeatedly to meet its
* target, kill the largest process.
*/
@@ -1167,10 +1524,20 @@
/*
* Compute the number of pages we want to try to move from the
- * active queue to the inactive queue.
+ * active queue to either the inactive or laundry queue.
+ *
+ * When scanning active pages, we make clean pages count more heavily
+ * towards the page shortage than dirty pages. This is because dirty
+ * pages must be laundered before they can be reused and thus have less
+ * utility when attempting to quickly alleviate a shortage. However,
+ * this weighting also causes the scan to deactivate dirty pages more
+ * more aggressively, improving the effectiveness of clustering and
+ * ensuring that they can eventually be reused.
*/
- inactq_shortage = vm_cnt.v_inactive_target - vm_cnt.v_inactive_count +
+ inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count +
+ vm_cnt.v_laundry_count / act_scan_laundry_weight) +
vm_paging_target() + deficit + addl_page_shortage;
+ page_shortage *= act_scan_laundry_weight;
pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
vm_pagequeue_lock(pq);
@@ -1254,14 +1621,44 @@
m->act_count -= min(m->act_count, ACT_DECLINE);
/*
- * Move this page to the tail of the active or inactive
+ * Move this page to the tail of the active, inactive or laundry
* queue depending on usage.
*/
if (m->act_count == 0) {
/* Dequeue to avoid later lock recursion. */
vm_page_dequeue_locked(m);
- vm_page_deactivate(m);
- inactq_shortage--;
+
+ /*
+ * When not short for inactive pages, let dirty pages go
+ * through the inactive queue before moving to the
+ * laundry queues. This gives them some extra time to
+ * be reactivated, potentially avoiding an expensive
+ * pageout. During a page shortage, the inactive queue
+ * is necessarily small, so we may move dirty pages
+ * directly to the laundry queue.
+ */
+ if (inactq_shortage <= 0)
+ vm_page_deactivate(m);
+ else {
+ /*
+ * Calling vm_page_test_dirty() here would
+ * require acquisition of the object's write
+ * lock. However, during a page shortage,
+ * directing dirty pages into the laundry
+ * queue is only an optimization and not a
+ * requirement. Therefore, we simply rely on
+ * the opportunistic updates to the page's
+ * dirty field by the pmap.
+ */
+ if (m->dirty == 0) {
+ vm_page_deactivate(m);
+ inactq_shortage -=
+ act_scan_laundry_weight;
+ } else {
+ vm_page_launder(m);
+ inactq_shortage--;
+ }
+ }
} else
vm_page_requeue_locked(m);
vm_page_unlock(m);
@@ -1570,14 +1967,14 @@
* thread during the previous scan, which must have
* been a level 0 scan, or vm_pageout_wanted was
* already set and the scan failed to free enough
- * pages. If we haven't yet performed a level >= 2
- * scan (unlimited dirty cleaning), then upgrade the
- * level and scan again now. Otherwise, sleep a bit
- * and try again later.
+ * pages. If we haven't yet performed a level >= 1
+ * (page reclamation) scan, then increase the level
+ * and scan again now. Otherwise, sleep a bit and
+ * try again later.
*/
mtx_unlock(&vm_page_queue_free_mtx);
- if (pass > 1)
- pause("psleep", hz / 2);
+ if (pass >= 1)
+ pause("psleep", hz / VM_INACT_SCAN_RATE);
pass++;
} else {
/*
@@ -1648,6 +2045,14 @@
/* XXX does not really belong here */
if (vm_page_max_wired == 0)
vm_page_max_wired = vm_cnt.v_free_count / 3;
+
+ /*
+ * Target amount of memory to move out of the laundry queue during a
+ * background laundering. This is proportional to the amount of system
+ * memory.
+ */
+ vm_background_launder_target = (vm_cnt.v_free_target -
+ vm_cnt.v_free_min) / 10;
}
/*
@@ -1662,6 +2067,10 @@
#endif
swap_pager_swap_init();
+ error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
+ 0, 0, "laundry: dom0");
+ if (error != 0)
+ panic("starting laundry for domain 0, error %d", error);
#ifdef VM_NUMA_ALLOC
for (i = 1; i < vm_ndomains; i++) {
error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Jan 17, 7:37 PM (6 h, 14 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27698111
Default Alt Text
D8302.diff (49 KB)
Attached To
Mode
D8302: PQ_LAUNDRY
Attached
Detach File
Event Timeline
Log In to Comment