D8302.diff
No OneTemporary
Actions

Size

49 KB

Referenced Files

None

Subscribers

None

D8302.diff
View Options

	Index: head/sys/sys/vmmeter.h
	===================================================================
	--- head/sys/sys/vmmeter.h
	+++ head/sys/sys/vmmeter.h
	@@ -75,9 +75,10 @@
	u_int v_vnodepgsin; /* (p) vnode_pager pages paged in */
	u_int v_vnodepgsout; /* (p) vnode pager pages paged out */
	u_int v_intrans; /* (p) intransit blocking page faults */
	- u_int v_reactivated; /* (f) pages reactivated from free list */
	+ u_int v_reactivated; /* (p) pages reactivated by the pagedaemon */
	u_int v_pdwakeups; /* (p) times daemon has awaken from sleep */
	u_int v_pdpages; /* (p) pages analyzed by daemon */
	+ u_int v_pdshortfalls; /* (p) page reclamation shortfalls */

	u_int v_tcached; /* (p) total pages cached */
	u_int v_dfree; /* (p) pages freed by daemon */
	@@ -96,6 +97,7 @@
	u_int v_active_count; /* (q) pages active */
	u_int v_inactive_target; /* (c) pages desired inactive */
	u_int v_inactive_count; /* (q) pages inactive */
	+ u_int v_laundry_count; /* (q) pages eligible for laundering */
	u_int v_cache_count; /* (f) pages on cache queue */
	u_int v_pageout_free_min; /* (c) min pages reserved for kernel */
	u_int v_interrupt_free_min; /* (c) reserved pages for int code */
	@@ -111,7 +113,6 @@
	u_int v_vforkpages; /* (p) VM pages affected by vfork() */
	u_int v_rforkpages; /* (p) VM pages affected by rfork() */
	u_int v_kthreadpages; /* (p) VM pages affected by fork() by kernel */
	- u_int v_spare[2];
	};
	#ifdef _KERNEL

	@@ -184,6 +185,25 @@
	vm_pageout_wakeup_thresh);
	}

	+/*
	+ * Return the number of pages we need to launder.
	+ * A positive number indicates that we have a shortfall of clean pages.
	+ */
	+static inline int
	+vm_laundry_target(void)
	+{
	+
	+ return (vm_paging_target());
	+}
	+
	+/*
	+ * Obtain the value of a per-CPU counter.
	+ */
	+#define VM_METER_PCPU_CNT(member) \
	+ vm_meter_cnt(__offsetof(struct vmmeter, member))
	+
	+u_int vm_meter_cnt(size_t);
	+
	#endif

	/* systemwide totals computed every five seconds */
	Index: head/sys/vm/swap_pager.c
	===================================================================
	--- head/sys/vm/swap_pager.c
	+++ head/sys/vm/swap_pager.c
	@@ -1549,17 +1549,18 @@
	* For write success, clear the dirty
	* status, then finish the I/O ( which decrements the
	* busy count and possibly wakes waiter's up ).
	+ * A page is only written to swap after a period of
	+ * inactivity. Therefore, we do not expect it to be
	+ * reused.
	*/
	KASSERT(!pmap_page_is_write_mapped(m),
	("swp_pager_async_iodone: page %p is not write"
	" protected", m));
	vm_page_undirty(m);
	+ vm_page_lock(m);
	+ vm_page_deactivate_noreuse(m);
	+ vm_page_unlock(m);
	vm_page_sunbusy(m);
	- if (vm_page_count_severe()) {
	- vm_page_lock(m);
	- vm_page_try_to_cache(m);
	- vm_page_unlock(m);
	- }
	}
	}

	@@ -1635,12 +1636,15 @@
	/*
	* SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in
	*
	- * This routine dissociates the page at the given index within a
	- * swap block from its backing store, paging it in if necessary.
	- * If the page is paged in, it is placed in the inactive queue,
	- * since it had its backing store ripped out from under it.
	- * We also attempt to swap in all other pages in the swap block,
	- * we only guarantee that the one at the specified index is
	+ * This routine dissociates the page at the given index within an object
	+ * from its backing store, paging it in if it does not reside in memory.
	+ * If the page is paged in, it is marked dirty and placed in the laundry
	+ * queue. The page is marked dirty because it no longer has backing
	+ * store. It is placed in the laundry queue because it has not been
	+ * accessed recently. Otherwise, it would already reside in memory.
	+ *
	+ * We also attempt to swap in all other pages in the swap block.
	+ * However, we only guarantee that the one at the specified index is
	* paged in.
	*
	* XXX - The code to page the whole block in doesn't work, so we
	@@ -1669,7 +1673,7 @@
	vm_object_pip_wakeup(object);
	vm_page_dirty(m);
	vm_page_lock(m);
	- vm_page_deactivate(m);
	+ vm_page_launder(m);
	vm_page_unlock(m);
	vm_page_xunbusy(m);
	vm_pager_page_unswapped(m);
	Index: head/sys/vm/vm_fault.c
	===================================================================
	--- head/sys/vm/vm_fault.c
	+++ head/sys/vm/vm_fault.c
	@@ -290,12 +290,13 @@
	vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
	int fault_flags, vm_page_t *m_hold)
	{
	- vm_prot_t prot;
	- vm_object_t next_object;
	struct faultstate fs;
	struct vnode *vp;
	+ vm_object_t next_object, retry_object;
	vm_offset_t e_end, e_start;
	vm_page_t m;
	+ vm_pindex_t retry_pindex;
	+ vm_prot_t prot, retry_prot;
	int ahead, alloc_req, behind, cluster_offset, error, era, faultcount;
	int locked, map_generation, nera, result, rv;
	u_char behavior;
	@@ -946,10 +947,6 @@
	* lookup.
	*/
	if (!fs.lookup_still_valid) {
	- vm_object_t retry_object;
	- vm_pindex_t retry_pindex;
	- vm_prot_t retry_prot;
	-
	if (!vm_map_trylock_read(fs.map)) {
	release_page(&fs);
	unlock_and_deallocate(&fs);
	Index: head/sys/vm/vm_meter.c
	===================================================================
	--- head/sys/vm/vm_meter.c
	+++ head/sys/vm/vm_meter.c
	@@ -216,29 +216,37 @@
	}

	/*
	- * vcnt() - accumulate statistics from all cpus and the global cnt
	- * structure.
	+ * vm_meter_cnt() - accumulate statistics from all cpus and the global cnt
	+ * structure.
	*
	* The vmmeter structure is now per-cpu as well as global. Those
	* statistics which can be kept on a per-cpu basis (to avoid cache
	* stalls between cpus) can be moved to the per-cpu vmmeter. Remaining
	* statistics, such as v_free_reserved, are left in the global
	* structure.
	- *
	- * (sysctl_oid oidp, void arg1, int arg2, struct sysctl_req *req)
	*/
	-static int
	-vcnt(SYSCTL_HANDLER_ARGS)
	+u_int
	+vm_meter_cnt(size_t offset)
	{
	- int count = (int )arg1;
	- int offset = (char )arg1 - (char )&vm_cnt;
	+ struct pcpu *pcpu;
	+ u_int count;
	int i;

	+ count = (u_int )((char *)&vm_cnt + offset);
	CPU_FOREACH(i) {
	- struct pcpu *pcpu = pcpu_find(i);
	- count += (int )((char *)&pcpu->pc_cnt + offset);
	+ pcpu = pcpu_find(i);
	+ count += (u_int )((char *)&pcpu->pc_cnt + offset);
	}
	- return (SYSCTL_OUT(req, &count, sizeof(int)));
	+ return (count);
	+}
	+
	+static int
	+cnt_sysctl(SYSCTL_HANDLER_ARGS)
	+{
	+ u_int count;
	+
	+ count = vm_meter_cnt((char )arg1 - (char )&vm_cnt);
	+ return (SYSCTL_OUT(req, &count, sizeof(count)));
	}

	SYSCTL_PROC(_vm, VM_TOTAL, vmtotal, CTLTYPE_OPAQUE\|CTLFLAG_RD\|CTLFLAG_MPSAFE,
	@@ -253,8 +261,8 @@

	#define VM_STATS(parent, var, descr) \
	SYSCTL_PROC(parent, OID_AUTO, var, \
	- CTLTYPE_UINT \| CTLFLAG_RD \| CTLFLAG_MPSAFE, &vm_cnt.var, 0, vcnt, \
	- "IU", descr)
	+ CTLTYPE_UINT \| CTLFLAG_RD \| CTLFLAG_MPSAFE, &vm_cnt.var, 0, \
	+ cnt_sysctl, "IU", descr)
	#define VM_STATS_VM(var, descr) VM_STATS(_vm_stats_vm, var, descr)
	#define VM_STATS_SYS(var, descr) VM_STATS(_vm_stats_sys, var, descr)

	@@ -278,9 +286,10 @@
	VM_STATS_VM(v_vnodepgsin, "Vnode pages paged in");
	VM_STATS_VM(v_vnodepgsout, "Vnode pages paged out");
	VM_STATS_VM(v_intrans, "In transit page faults");
	-VM_STATS_VM(v_reactivated, "Pages reactivated from free list");
	+VM_STATS_VM(v_reactivated, "Pages reactivated by pagedaemon");
	VM_STATS_VM(v_pdwakeups, "Pagedaemon wakeups");
	VM_STATS_VM(v_pdpages, "Pages analyzed by pagedaemon");
	+VM_STATS_VM(v_pdshortfalls, "Page reclamation shortfalls");
	VM_STATS_VM(v_tcached, "Total pages cached");
	VM_STATS_VM(v_dfree, "Pages freed by pagedaemon");
	VM_STATS_VM(v_pfree, "Pages freed by exiting processes");
	@@ -295,6 +304,7 @@
	VM_STATS_VM(v_active_count, "Active pages");
	VM_STATS_VM(v_inactive_target, "Desired inactive pages");
	VM_STATS_VM(v_inactive_count, "Inactive pages");
	+VM_STATS_VM(v_laundry_count, "Pages eligible for laundering");
	VM_STATS_VM(v_cache_count, "Pages on cache queue");
	VM_STATS_VM(v_pageout_free_min, "Min pages reserved for kernel");
	VM_STATS_VM(v_interrupt_free_min, "Reserved pages for interrupt code");
	Index: head/sys/vm/vm_object.c
	===================================================================
	--- head/sys/vm/vm_object.c
	+++ head/sys/vm/vm_object.c
	@@ -2329,9 +2329,9 @@
	* sysctl is only meant to give an
	* approximation of the system anyway.
	*/
	- if (m->queue == PQ_ACTIVE)
	+ if (vm_page_active(m))
	kvo.kvo_active++;
	- else if (m->queue == PQ_INACTIVE)
	+ else if (vm_page_inactive(m))
	kvo.kvo_inactive++;
	}

	Index: head/sys/vm/vm_page.h
	===================================================================
	--- head/sys/vm/vm_page.h
	+++ head/sys/vm/vm_page.h
	@@ -206,7 +206,8 @@
	#define PQ_NONE 255
	#define PQ_INACTIVE 0
	#define PQ_ACTIVE 1
	-#define PQ_COUNT 2
	+#define PQ_LAUNDRY 2
	+#define PQ_COUNT 3

	TAILQ_HEAD(pglist, vm_page);
	SLIST_HEAD(spglist, vm_page);
	@@ -228,6 +229,7 @@
	boolean_t vmd_oom;
	int vmd_oom_seq;
	int vmd_last_active_scan;
	+ struct vm_page vmd_laundry_marker;
	struct vm_page vmd_marker; /* marker for pagedaemon private use */
	struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
	};
	@@ -236,6 +238,7 @@

	#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
	#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
	+#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
	#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)

	#ifdef _KERNEL
	@@ -327,7 +330,6 @@
	#define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */
	#define PG_ZERO 0x0008 /* page is zeroed */
	#define PG_MARKER 0x0010 /* special queue marker page */
	-#define PG_WINATCFLS 0x0040 /* flush dirty page on inactive q */
	#define PG_NODUMP 0x0080 /* don't include this page in a dump */
	#define PG_UNHOLDFREE 0x0100 /* delayed free of a held page */

	@@ -451,10 +453,8 @@
	vm_paddr_t boundary, vm_memattr_t memattr);
	vm_page_t vm_page_alloc_freelist(int, int);
	vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
	-void vm_page_cache(vm_page_t);
	void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t);
	void vm_page_cache_transfer(vm_object_t, vm_pindex_t, vm_object_t);
	-int vm_page_try_to_cache (vm_page_t);
	int vm_page_try_to_free (vm_page_t);
	void vm_page_deactivate (vm_page_t);
	void vm_page_deactivate_noreuse(vm_page_t);
	@@ -465,6 +465,7 @@
	void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
	int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
	boolean_t vm_page_is_cached(vm_object_t object, vm_pindex_t pindex);
	+void vm_page_launder(vm_page_t m);
	vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
	vm_page_t vm_page_next(vm_page_t m);
	int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);
	@@ -697,5 +698,26 @@
	(void)mret;
	}

	+static inline bool
	+vm_page_active(vm_page_t m)
	+{
	+
	+ return (m->queue == PQ_ACTIVE);
	+}
	+
	+static inline bool
	+vm_page_inactive(vm_page_t m)
	+{
	+
	+ return (m->queue == PQ_INACTIVE);
	+}
	+
	+static inline bool
	+vm_page_in_laundry(vm_page_t m)
	+{
	+
	+ return (m->queue == PQ_LAUNDRY);
	+}
	+
	#endif /* _KERNEL */
	#endif /* !_VM_PAGE_ */
	Index: head/sys/vm/vm_page.c
	===================================================================
	--- head/sys/vm/vm_page.c
	+++ head/sys/vm/vm_page.c
	@@ -390,6 +390,10 @@
	"vm active pagequeue";
	__DECONST(u_int *, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) =
	&vm_cnt.v_active_count;
	+ __DECONST(char *, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) =
	+ "vm laundry pagequeue";
	+ __DECONST(int *, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) =
	+ &vm_cnt.v_laundry_count;
	vmd->vmd_page_count = 0;
	vmd->vmd_free_count = 0;
	vmd->vmd_segs = 0;
	@@ -1730,9 +1734,7 @@
	("vm_page_alloc: cached page %p is PG_ZERO", m));
	KASSERT(m->valid != 0,
	("vm_page_alloc: cached page %p is invalid", m));
	- if (m->object == object && m->pindex == pindex)
	- vm_cnt.v_reactivated++;
	- else
	+ if (m->object != object \|\| m->pindex != pindex)
	m->valid = 0;
	m_object = m->object;
	vm_page_cache_remove(m);
	@@ -2254,7 +2256,7 @@
	}
	KASSERT((m->flags & PG_UNHOLDFREE) == 0,
	("page %p is PG_UNHOLDFREE", m));
	- /* Don't care: PG_NODUMP, PG_WINATCFLS, PG_ZERO. */
	+ /* Don't care: PG_NODUMP, PG_ZERO. */
	if (object->type != OBJT_DEFAULT &&
	object->type != OBJT_SWAP &&
	object->type != OBJT_VNODE)
	@@ -2450,7 +2452,7 @@
	}
	KASSERT((m->flags & PG_UNHOLDFREE) == 0,
	("page %p is PG_UNHOLDFREE", m));
	- /* Don't care: PG_NODUMP, PG_WINATCFLS, PG_ZERO. */
	+ /* Don't care: PG_NODUMP, PG_ZERO. */
	if (object->type != OBJT_DEFAULT &&
	object->type != OBJT_SWAP &&
	object->type != OBJT_VNODE)
	@@ -2778,7 +2780,10 @@
	vm_page_pagequeue(vm_page_t m)
	{

	- return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
	+ if (vm_page_in_laundry(m))
	+ return (&vm_dom[0].vmd_pagequeues[m->queue]);
	+ else
	+ return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
	}

	/*
	@@ -2840,7 +2845,10 @@
	KASSERT(queue < PQ_COUNT,
	("vm_page_enqueue: invalid queue %u request for page %p",
	queue, m));
	- pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
	+ if (queue == PQ_LAUNDRY)
	+ pq = &vm_dom[0].vmd_pagequeues[queue];
	+ else
	+ pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
	vm_pagequeue_lock(pq);
	m->queue = queue;
	TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
	@@ -3124,11 +3132,8 @@
	if (m->wire_count == 0) {
	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
	if ((m->oflags & VPO_UNMANAGED) == 0 &&
	- m->object != NULL && queue != PQ_NONE) {
	- if (queue == PQ_INACTIVE)
	- m->flags &= ~PG_WINATCFLS;
	+ m->object != NULL && queue != PQ_NONE)
	vm_page_enqueue(queue, m);
	- }
	return (TRUE);
	} else
	return (FALSE);
	@@ -3181,7 +3186,6 @@
	} else {
	if (queue != PQ_NONE)
	vm_page_dequeue(m);
	- m->flags &= ~PG_WINATCFLS;
	vm_pagequeue_lock(pq);
	}
	m->queue = PQ_INACTIVE;
	@@ -3221,24 +3225,25 @@
	}

	/*
	- * vm_page_try_to_cache:
	+ * vm_page_launder
	*
	- * Returns 0 on failure, 1 on success
	+ * Put a page in the laundry.
	*/
	-int
	-vm_page_try_to_cache(vm_page_t m)
	+void
	+vm_page_launder(vm_page_t m)
	{
	+ int queue;

	- vm_page_lock_assert(m, MA_OWNED);
	- VM_OBJECT_ASSERT_WLOCKED(m->object);
	- if (m->dirty \|\| m->hold_count \|\| m->wire_count \|\|
	- (m->oflags & VPO_UNMANAGED) != 0 \|\| vm_page_busied(m))
	- return (0);
	- pmap_remove_all(m);
	- if (m->dirty)
	- return (0);
	- vm_page_cache(m);
	- return (1);
	+ vm_page_assert_locked(m);
	+ if ((queue = m->queue) != PQ_LAUNDRY) {
	+ if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
	+ if (queue != PQ_NONE)
	+ vm_page_dequeue(m);
	+ vm_page_enqueue(PQ_LAUNDRY, m);
	+ } else
	+ KASSERT(queue == PQ_NONE,
	+ ("wired page %p is queued", m));
	+ }
	}

	/*
	@@ -3265,112 +3270,6 @@
	}

	/*
	- * vm_page_cache
	- *
	- * Put the specified page onto the page cache queue (if appropriate).
	- *
	- * The object and page must be locked.
	- */
	-void
	-vm_page_cache(vm_page_t m)
	-{
	- vm_object_t object;
	- boolean_t cache_was_empty;
	-
	- vm_page_lock_assert(m, MA_OWNED);
	- object = m->object;
	- VM_OBJECT_ASSERT_WLOCKED(object);
	- if (vm_page_busied(m) \|\| (m->oflags & VPO_UNMANAGED) \|\|
	- m->hold_count \|\| m->wire_count)
	- panic("vm_page_cache: attempting to cache busy page");
	- KASSERT(!pmap_page_is_mapped(m),
	- ("vm_page_cache: page %p is mapped", m));
	- KASSERT(m->dirty == 0, ("vm_page_cache: page %p is dirty", m));
	- if (m->valid == 0 \|\| object->type == OBJT_DEFAULT \|\|
	- (object->type == OBJT_SWAP &&
	- !vm_pager_has_page(object, m->pindex, NULL, NULL))) {
	- /*
	- * Hypothesis: A cache-eligible page belonging to a
	- * default object or swap object but without a backing
	- * store must be zero filled.
	- */
	- vm_page_free(m);
	- return;
	- }
	- KASSERT((m->flags & PG_CACHED) == 0,
	- ("vm_page_cache: page %p is already cached", m));
	-
	- /*
	- * Remove the page from the paging queues.
	- */
	- vm_page_remque(m);
	-
	- /*
	- * Remove the page from the object's collection of resident
	- * pages.
	- */
	- vm_radix_remove(&object->rtree, m->pindex);
	- TAILQ_REMOVE(&object->memq, m, listq);
	- object->resident_page_count--;
	-
	- /*
	- * Restore the default memory attribute to the page.
	- */
	- if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
	- pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
	-
	- /*
	- * Insert the page into the object's collection of cached pages
	- * and the physical memory allocator's cache/free page queues.
	- */
	- m->flags &= ~PG_ZERO;
	- mtx_lock(&vm_page_queue_free_mtx);
	- cache_was_empty = vm_radix_is_empty(&object->cache);
	- if (vm_radix_insert(&object->cache, m)) {
	- mtx_unlock(&vm_page_queue_free_mtx);
	- if (object->type == OBJT_VNODE &&
	- object->resident_page_count == 0)
	- vdrop(object->handle);
	- m->object = NULL;
	- vm_page_free(m);
	- return;
	- }
	-
	- /*
	- * The above call to vm_radix_insert() could reclaim the one pre-
	- * existing cached page from this object, resulting in a call to
	- * vdrop().
	- */
	- if (!cache_was_empty)
	- cache_was_empty = vm_radix_is_singleton(&object->cache);
	-
	- m->flags \|= PG_CACHED;
	- vm_cnt.v_cache_count++;
	- PCPU_INC(cnt.v_tcached);
	-#if VM_NRESERVLEVEL > 0
	- if (!vm_reserv_free_page(m)) {
	-#else
	- if (TRUE) {
	-#endif
	- vm_phys_free_pages(m, 0);
	- }
	- vm_page_free_wakeup();
	- mtx_unlock(&vm_page_queue_free_mtx);
	-
	- /*
	- * Increment the vnode's hold count if this is the object's only
	- * cached page. Decrement the vnode's hold count if this was
	- * the object's only resident page.
	- */
	- if (object->type == OBJT_VNODE) {
	- if (cache_was_empty && object->resident_page_count != 0)
	- vhold(object->handle);
	- else if (!cache_was_empty && object->resident_page_count == 0)
	- vdrop(object->handle);
	- }
	-}
	-
	-/*
	* vm_page_advise
	*
	* Deactivate or do nothing, as appropriate.
	@@ -3413,11 +3312,13 @@
	/*
	* Place clean pages near the head of the inactive queue rather than
	* the tail, thus defeating the queue's LRU operation and ensuring that
	- * the page will be reused quickly. Dirty pages are given a chance to
	- * cycle once through the inactive queue before becoming eligible for
	- * laundering.
	+ * the page will be reused quickly. Dirty pages not already in the
	+ * laundry are moved there.
	*/
	- _vm_page_deactivate(m, m->dirty == 0);
	+ if (m->dirty == 0)
	+ vm_page_deactivate_noreuse(m);
	+ else
	+ vm_page_launder(m);
	}

	/*
	@@ -3926,6 +3827,7 @@
	db_printf("vm_cnt.v_cache_count: %d\n", vm_cnt.v_cache_count);
	db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count);
	db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count);
	+ db_printf("vm_cnt.v_laundry_count: %d\n", vm_cnt.v_laundry_count);
	db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count);
	db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
	db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
	@@ -3940,12 +3842,14 @@
	db_printf("pq_free %d pq_cache %d\n",
	vm_cnt.v_free_count, vm_cnt.v_cache_count);
	for (dom = 0; dom < vm_ndomains; dom++) {
	- db_printf("dom %d page_cnt %d free %d pq_act %d pq_inact %d\n",
	+ db_printf(
	+ "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d\n",
	dom,
	vm_dom[dom].vmd_page_count,
	vm_dom[dom].vmd_free_count,
	vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt,
	- vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt);
	+ vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt,
	+ vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt);
	}
	}

	Index: head/sys/vm/vm_pageout.c
	===================================================================
	--- head/sys/vm/vm_pageout.c
	+++ head/sys/vm/vm_pageout.c
	@@ -119,7 +119,7 @@
	/* the kernel process "vm_pageout"*/
	static void vm_pageout(void);
	static void vm_pageout_init(void);
	-static int vm_pageout_clean(vm_page_t m);
	+static int vm_pageout_clean(vm_page_t m, int *numpagedout);
	static int vm_pageout_cluster(vm_page_t m);
	static bool vm_pageout_scan(struct vm_domain *vmd, int pass);
	static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
	@@ -154,6 +154,9 @@
	SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
	#endif

	+/* Pagedaemon activity rates, in subdivisions of one second. */
	+#define VM_LAUNDER_RATE 10
	+#define VM_INACT_SCAN_RATE 2

	int vm_pageout_deficit; /* Estimated number of pages deficit */
	u_int vm_pageout_wakeup_thresh;
	@@ -161,6 +164,13 @@
	bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */
	bool vm_pages_needed; /* Are threads waiting for free pages? */

	+/* Pending request for dirty page laundering. */
	+static enum {
	+ VM_LAUNDRY_IDLE,
	+ VM_LAUNDRY_BACKGROUND,
	+ VM_LAUNDRY_SHORTFALL
	+} vm_laundry_request = VM_LAUNDRY_IDLE;
	+
	#if !defined(NO_SWAPPING)
	static int vm_pageout_req_swapout; /* XXX */
	static int vm_daemon_needed;
	@@ -168,9 +178,7 @@
	/* Allow for use by vm_pageout before vm_daemon is initialized. */
	MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
	#endif
	-static int vm_max_launder = 32;
	static int vm_pageout_update_period;
	-static int defer_swap_pageouts;
	static int disable_swap_pageouts;
	static int lowmem_period = 10;
	static time_t lowmem_uptime;
	@@ -193,9 +201,6 @@
	CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0,
	"free page threshold for waking up the pageout daemon");

	-SYSCTL_INT(_vm, OID_AUTO, max_launder,
	- CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout");
	-
	SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
	CTLFLAG_RW, &vm_pageout_update_period, 0,
	"Maximum active LRU update period");
	@@ -215,9 +220,6 @@
	CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
	#endif

	-SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
	- CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");
	-
	SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
	CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");

	@@ -229,6 +231,25 @@
	CTLFLAG_RW, &vm_pageout_oom_seq, 0,
	"back-to-back calls to oom detector to start OOM");

	+static int act_scan_laundry_weight = 3;
	+SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RW,
	+ &act_scan_laundry_weight, 0,
	+ "weight given to clean vs. dirty pages in active queue scans");
	+
	+static u_int vm_background_launder_target;
	+SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RW,
	+ &vm_background_launder_target, 0,
	+ "background laundering target, in pages");
	+
	+static u_int vm_background_launder_rate = 4096;
	+SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RW,
	+ &vm_background_launder_rate, 0,
	+ "background laundering rate, in kilobytes per second");
	+
	+static u_int vm_background_launder_max = 20 * 1024;
	+SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RW,
	+ &vm_background_launder_max, 0, "background laundering cap, in kilobytes");
	+
	#define VM_PAGEOUT_PAGE_COUNT 16
	int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;

	@@ -236,7 +257,11 @@
	SYSCTL_INT(_vm, OID_AUTO, max_wired,
	CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");

	+static u_int isqrt(u_int num);
	static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
	+static int vm_pageout_launder(struct vm_domain *vmd, int launder,
	+ bool in_shortfall);
	+static void vm_pageout_laundry_worker(void *arg);
	#if !defined(NO_SWAPPING)
	static void vm_pageout_map_deactivate_pages(vm_map_t, long);
	static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
	@@ -387,7 +412,7 @@

	/*
	* We can cluster only if the page is not clean, busy, or held, and
	- * the page is inactive.
	+ * the page is in the laundry queue.
	*
	* During heavy mmap/modification loads the pageout
	* daemon can really fragment the underlying file
	@@ -413,7 +438,7 @@
	break;
	}
	vm_page_lock(p);
	- if (p->queue != PQ_INACTIVE \|\|
	+ if (!vm_page_in_laundry(p) \|\|
	p->hold_count != 0) { /* may be undergoing I/O */
	vm_page_unlock(p);
	ib = 0;
	@@ -439,7 +464,7 @@
	if (p->dirty == 0)
	break;
	vm_page_lock(p);
	- if (p->queue != PQ_INACTIVE \|\|
	+ if (!vm_page_in_laundry(p) \|\|
	p->hold_count != 0) { /* may be undergoing I/O */
	vm_page_unlock(p);
	break;
	@@ -519,23 +544,33 @@
	("vm_pageout_flush: page %p is not write protected", mt));
	switch (pageout_status[i]) {
	case VM_PAGER_OK:
	+ vm_page_lock(mt);
	+ if (vm_page_in_laundry(mt))
	+ vm_page_deactivate_noreuse(mt);
	+ vm_page_unlock(mt);
	+ /* FALLTHROUGH */
	case VM_PAGER_PEND:
	numpagedout++;
	break;
	case VM_PAGER_BAD:
	/*
	- * Page outside of range of object. Right now we
	- * essentially lose the changes by pretending it
	- * worked.
	+ * The page is outside the object's range. We pretend
	+ * that the page out worked and clean the page, so the
	+ * changes will be lost if the page is reclaimed by
	+ * the page daemon.
	*/
	vm_page_undirty(mt);
	+ vm_page_lock(mt);
	+ if (vm_page_in_laundry(mt))
	+ vm_page_deactivate_noreuse(mt);
	+ vm_page_unlock(mt);
	break;
	case VM_PAGER_ERROR:
	case VM_PAGER_FAIL:
	/*
	- * If page couldn't be paged out, then reactivate the
	- * page so it doesn't clog the inactive list. (We
	- * will try paging out it again later).
	+ * If the page couldn't be paged out, then reactivate
	+ * it so that it doesn't clog the laundry and inactive
	+ * queues. (We will try paging it out again later).
	*/
	vm_page_lock(mt);
	vm_page_activate(mt);
	@@ -617,10 +652,10 @@
	act_delta = 1;
	vm_page_aflag_clear(p, PGA_REFERENCED);
	}
	- if (p->queue != PQ_ACTIVE && act_delta != 0) {
	+ if (!vm_page_active(p) && act_delta != 0) {
	vm_page_activate(p);
	p->act_count += act_delta;
	- } else if (p->queue == PQ_ACTIVE) {
	+ } else if (vm_page_active(p)) {
	if (act_delta == 0) {
	p->act_count -= min(p->act_count,
	ACT_DECLINE);
	@@ -636,7 +671,7 @@
	p->act_count += ACT_ADVANCE;
	vm_page_requeue(p);
	}
	- } else if (p->queue == PQ_INACTIVE)
	+ } else if (vm_page_inactive(p))
	pmap_remove_all(p);
	vm_page_unlock(p);
	}
	@@ -739,7 +774,7 @@
	* Returns 0 on success and an errno otherwise.
	*/
	static int
	-vm_pageout_clean(vm_page_t m)
	+vm_pageout_clean(vm_page_t m, int *numpagedout)
	{
	struct vnode *vp;
	struct mount *mp;
	@@ -797,7 +832,7 @@
	* (3) reallocated to a different offset, or
	* (4) cleaned.
	*/
	- if (m->queue != PQ_INACTIVE \|\| m->object != object \|\|
	+ if (!vm_page_in_laundry(m) \|\| m->object != object \|\|
	m->pindex != pindex \|\| m->dirty == 0) {
	vm_page_unlock(m);
	error = ENXIO;
	@@ -821,7 +856,7 @@
	* laundry. If it is still in the laundry, then we
	* start the cleaning operation.
	*/
	- if (vm_pageout_cluster(m) == 0)
	+ if ((*numpagedout = vm_pageout_cluster(m)) == 0)
	error = EIO;

	unlock_all:
	@@ -840,11 +875,390 @@
	}

	/*
	+ * Attempt to launder the specified number of pages.
	+ *
	+ * Returns the number of pages successfully laundered.
	+ */
	+static int
	+vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
	+{
	+ struct vm_pagequeue *pq;
	+ vm_object_t object;
	+ vm_page_t m, next;
	+ int act_delta, error, maxscan, numpagedout, starting_target;
	+ int vnodes_skipped;
	+ bool pageout_ok, queue_locked;
	+
	+ starting_target = launder;
	+ vnodes_skipped = 0;
	+
	+ /*
	+ * Scan the laundry queue for pages eligible to be laundered. We stop
	+ * once the target number of dirty pages have been laundered, or once
	+ * we've reached the end of the queue. A single iteration of this loop
	+ * may cause more than one page to be laundered because of clustering.
	+ *
	+ * maxscan ensures that we don't re-examine requeued pages. Any
	+ * additional pages written as part of a cluster are subtracted from
	+ * maxscan since they must be taken from the laundry queue.
	+ */
	+ pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
	+ maxscan = pq->pq_cnt;
	+
	+ vm_pagequeue_lock(pq);
	+ queue_locked = true;
	+ for (m = TAILQ_FIRST(&pq->pq_pl);
	+ m != NULL && maxscan-- > 0 && launder > 0;
	+ m = next) {
	+ vm_pagequeue_assert_locked(pq);
	+ KASSERT(queue_locked, ("unlocked laundry queue"));
	+ KASSERT(vm_page_in_laundry(m),
	+ ("page %p has an inconsistent queue", m));
	+ next = TAILQ_NEXT(m, plinks.q);
	+ if ((m->flags & PG_MARKER) != 0)
	+ continue;
	+ KASSERT((m->flags & PG_FICTITIOUS) == 0,
	+ ("PG_FICTITIOUS page %p cannot be in laundry queue", m));
	+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
	+ ("VPO_UNMANAGED page %p cannot be in laundry queue", m));
	+ if (!vm_pageout_page_lock(m, &next) \|\| m->hold_count != 0) {
	+ vm_page_unlock(m);
	+ continue;
	+ }
	+ object = m->object;
	+ if ((!VM_OBJECT_TRYWLOCK(object) &&
	+ (!vm_pageout_fallback_object_lock(m, &next) \|\|
	+ m->hold_count != 0)) \|\| vm_page_busied(m)) {
	+ VM_OBJECT_WUNLOCK(object);
	+ vm_page_unlock(m);
	+ continue;
	+ }
	+
	+ /*
	+ * Unlock the laundry queue, invalidating the 'next' pointer.
	+ * Use a marker to remember our place in the laundry queue.
	+ */
	+ TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_laundry_marker,
	+ plinks.q);
	+ vm_pagequeue_unlock(pq);
	+ queue_locked = false;
	+
	+ /*
	+ * Invalid pages can be easily freed. They cannot be
	+ * mapped; vm_page_free() asserts this.
	+ */
	+ if (m->valid == 0)
	+ goto free_page;
	+
	+ /*
	+ * If the page has been referenced and the object is not dead,
	+ * reactivate or requeue the page depending on whether the
	+ * object is mapped.
	+ */
	+ if ((m->aflags & PGA_REFERENCED) != 0) {
	+ vm_page_aflag_clear(m, PGA_REFERENCED);
	+ act_delta = 1;
	+ } else
	+ act_delta = 0;
	+ if (object->ref_count != 0)
	+ act_delta += pmap_ts_referenced(m);
	+ else {
	+ KASSERT(!pmap_page_is_mapped(m),
	+ ("page %p is mapped", m));
	+ }
	+ if (act_delta != 0) {
	+ if (object->ref_count != 0) {
	+ PCPU_INC(cnt.v_reactivated);
	+ vm_page_activate(m);
	+
	+ /*
	+ * Increase the activation count if the page
	+ * was referenced while in the laundry queue.
	+ * This makes it less likely that the page will
	+ * be returned prematurely to the inactive
	+ * queue.
	+ */
	+ m->act_count += act_delta + ACT_ADVANCE;
	+
	+ /*
	+ * If this was a background laundering, count
	+ * activated pages towards our target. The
	+ * purpose of background laundering is to ensure
	+ * that pages are eventually cycled through the
	+ * laundry queue, and an activation is a valid
	+ * way out.
	+ */
	+ if (!in_shortfall)
	+ launder--;
	+ goto drop_page;
	+ } else if ((object->flags & OBJ_DEAD) == 0)
	+ goto requeue_page;
	+ }
	+
	+ /*
	+ * If the page appears to be clean at the machine-independent
	+ * layer, then remove all of its mappings from the pmap in
	+ * anticipation of freeing it. If, however, any of the page's
	+ * mappings allow write access, then the page may still be
	+ * modified until the last of those mappings are removed.
	+ */
	+ if (object->ref_count != 0) {
	+ vm_page_test_dirty(m);
	+ if (m->dirty == 0)
	+ pmap_remove_all(m);
	+ }
	+
	+ /*
	+ * Clean pages are freed, and dirty pages are paged out unless
	+ * they belong to a dead object. Requeueing dirty pages from
	+ * dead objects is pointless, as they are being paged out and
	+ * freed by the thread that destroyed the object.
	+ */
	+ if (m->dirty == 0) {
	+free_page:
	+ vm_page_free(m);
	+ PCPU_INC(cnt.v_dfree);
	+ } else if ((object->flags & OBJ_DEAD) == 0) {
	+ if (object->type != OBJT_SWAP &&
	+ object->type != OBJT_DEFAULT)
	+ pageout_ok = true;
	+ else if (disable_swap_pageouts)
	+ pageout_ok = false;
	+ else
	+ pageout_ok = true;
	+ if (!pageout_ok) {
	+requeue_page:
	+ vm_pagequeue_lock(pq);
	+ queue_locked = true;
	+ vm_page_requeue_locked(m);
	+ goto drop_page;
	+ }
	+
	+ /*
	+ * Form a cluster with adjacent, dirty pages from the
	+ * same object, and page out that entire cluster.
	+ *
	+ * The adjacent, dirty pages must also be in the
	+ * laundry. However, their mappings are not checked
	+ * for new references. Consequently, a recently
	+ * referenced page may be paged out. However, that
	+ * page will not be prematurely reclaimed. After page
	+ * out, the page will be placed in the inactive queue,
	+ * where any new references will be detected and the
	+ * page reactivated.
	+ */
	+ error = vm_pageout_clean(m, &numpagedout);
	+ if (error == 0) {
	+ launder -= numpagedout;
	+ maxscan -= numpagedout - 1;
	+ } else if (error == EDEADLK) {
	+ pageout_lock_miss++;
	+ vnodes_skipped++;
	+ }
	+ goto relock_queue;
	+ }
	+drop_page:
	+ vm_page_unlock(m);
	+ VM_OBJECT_WUNLOCK(object);
	+relock_queue:
	+ if (!queue_locked) {
	+ vm_pagequeue_lock(pq);
	+ queue_locked = true;
	+ }
	+ next = TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q);
	+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
	+ }
	+ vm_pagequeue_unlock(pq);
	+
	+ /*
	+ * Wakeup the sync daemon if we skipped a vnode in a writeable object
	+ * and we didn't launder enough pages.
	+ */
	+ if (vnodes_skipped > 0 && launder > 0)
	+ (void)speedup_syncer();
	+
	+ return (starting_target - launder);
	+}
	+
	+/*
	+ * Compute the integer square root.
	+ */
	+static u_int
	+isqrt(u_int num)
	+{
	+ u_int bit, root, tmp;
	+
	+ bit = 1u << ((NBBY * sizeof(u_int)) - 2);
	+ while (bit > num)
	+ bit >>= 2;
	+ root = 0;
	+ while (bit != 0) {
	+ tmp = root + bit;
	+ root >>= 1;
	+ if (num >= tmp) {
	+ num -= tmp;
	+ root += bit;
	+ }
	+ bit >>= 2;
	+ }
	+ return (root);
	+}
	+
	+/*
	+ * Perform the work of the laundry thread: periodically wake up and determine
	+ * whether any pages need to be laundered. If so, determine the number of pages
	+ * that need to be laundered, and launder them.
	+ */
	+static void
	+vm_pageout_laundry_worker(void *arg)
	+{
	+ struct vm_domain *domain;
	+ struct vm_pagequeue *pq;
	+ uint64_t nclean, ndirty;
	+ u_int last_launder, wakeups;
	+ int domidx, last_target, launder, shortfall, shortfall_cycle, target;
	+ bool in_shortfall;
	+
	+ domidx = (uintptr_t)arg;
	+ domain = &vm_dom[domidx];
	+ pq = &domain->vmd_pagequeues[PQ_LAUNDRY];
	+ KASSERT(domain->vmd_segs != 0, ("domain without segments"));
	+ vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY);
	+
	+ shortfall = 0;
	+ in_shortfall = false;
	+ shortfall_cycle = 0;
	+ target = 0;
	+ last_launder = 0;
	+
	+ /*
	+ * The pageout laundry worker is never done, so loop forever.
	+ */
	+ for (;;) {
	+ KASSERT(target >= 0, ("negative target %d", target));
	+ KASSERT(shortfall_cycle >= 0,
	+ ("negative cycle %d", shortfall_cycle));
	+ launder = 0;
	+ wakeups = VM_METER_PCPU_CNT(v_pdwakeups);
	+
	+ /*
	+ * First determine whether we need to launder pages to meet a
	+ * shortage of free pages.
	+ */
	+ if (shortfall > 0) {
	+ in_shortfall = true;
	+ shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE;
	+ target = shortfall;
	+ } else if (!in_shortfall)
	+ goto trybackground;
	+ else if (shortfall_cycle == 0 \|\| vm_laundry_target() <= 0) {
	+ /*
	+ * We recently entered shortfall and began laundering
	+ * pages. If we have completed that laundering run
	+ * (and we are no longer in shortfall) or we have met
	+ * our laundry target through other activity, then we
	+ * can stop laundering pages.
	+ */
	+ in_shortfall = false;
	+ target = 0;
	+ goto trybackground;
	+ }
	+ last_launder = wakeups;
	+ launder = target / shortfall_cycle--;
	+ goto dolaundry;
	+
	+ /*
	+ * There's no immediate need to launder any pages; see if we
	+ * meet the conditions to perform background laundering:
	+ *
	+ * 1. The ratio of dirty to clean inactive pages exceeds the
	+ * background laundering threshold and the pagedaemon has
	+ * been woken up to reclaim pages since our last
	+ * laundering, or
	+ * 2. we haven't yet reached the target of the current
	+ * background laundering run.
	+ *
	+ * The background laundering threshold is not a constant.
	+ * Instead, it is a slowly growing function of the number of
	+ * page daemon wakeups since the last laundering. Thus, as the
	+ * ratio of dirty to clean inactive pages grows, the amount of
	+ * memory pressure required to trigger laundering decreases.
	+ */
	+trybackground:
	+ nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count;
	+ ndirty = vm_cnt.v_laundry_count;
	+ if (target == 0 && wakeups != last_launder &&
	+ ndirty * isqrt(wakeups - last_launder) >= nclean) {
	+ target = vm_background_launder_target;
	+ }
	+
	+ /*
	+ * We have a non-zero background laundering target. If we've
	+ * laundered up to our maximum without observing a page daemon
	+ * wakeup, just stop. This is a safety belt that ensures we
	+ * don't launder an excessive amount if memory pressure is low
	+ * and the ratio of dirty to clean pages is large. Otherwise,
	+ * proceed at the background laundering rate.
	+ */
	+ if (target > 0) {
	+ if (wakeups != last_launder) {
	+ last_launder = wakeups;
	+ last_target = target;
	+ } else if (last_target - target >=
	+ vm_background_launder_max * PAGE_SIZE / 1024) {
	+ target = 0;
	+ }
	+ launder = vm_background_launder_rate * PAGE_SIZE / 1024;
	+ launder /= VM_LAUNDER_RATE;
	+ if (launder > target)
	+ launder = target;
	+ }
	+
	+dolaundry:
	+ if (launder > 0) {
	+ /*
	+ * Because of I/O clustering, the number of laundered
	+ * pages could exceed "target" by the maximum size of
	+ * a cluster minus one.
	+ */
	+ target -= min(vm_pageout_launder(domain, launder,
	+ in_shortfall), target);
	+ pause("laundp", hz / VM_LAUNDER_RATE);
	+ }
	+
	+ /*
	+ * If we're not currently laundering pages and the page daemon
	+ * hasn't posted a new request, sleep until the page daemon
	+ * kicks us.
	+ */
	+ vm_pagequeue_lock(pq);
	+ if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE)
	+ (void)mtx_sleep(&vm_laundry_request,
	+ vm_pagequeue_lockptr(pq), PVM, "launds", 0);
	+
	+ /*
	+ * If the pagedaemon has indicated that it's in shortfall, start
	+ * a shortfall laundering unless we're already in the middle of
	+ * one. This may preempt a background laundering.
	+ */
	+ if (vm_laundry_request == VM_LAUNDRY_SHORTFALL &&
	+ (!in_shortfall \|\| shortfall_cycle == 0)) {
	+ shortfall = vm_laundry_target() + vm_pageout_deficit;
	+ target = 0;
	+ } else
	+ shortfall = 0;
	+
	+ if (target == 0)
	+ vm_laundry_request = VM_LAUNDRY_IDLE;
	+ vm_pagequeue_unlock(pq);
	+ }
	+}
	+
	+/*
	* vm_pageout_scan does the dirty work for the pageout daemon.
	*
	- * pass 0 - Update active LRU/deactivate pages
	- * pass 1 - Free inactive pages
	- * pass 2 - Launder dirty pages
	+ * pass == 0: Update active LRU/deactivate pages
	+ * pass >= 1: Free inactive pages
	*
	* Returns true if pass was zero or enough pages were freed by the inactive
	* queue scan to meet the target.
	@@ -856,10 +1270,9 @@
	struct vm_pagequeue *pq;
	vm_object_t object;
	long min_scan;
	- int act_delta, addl_page_shortage, deficit, error, inactq_shortage;
	- int maxlaunder, maxscan, page_shortage, scan_tick, scanned;
	- int starting_page_shortage, vnodes_skipped;
	- boolean_t pageout_ok, queue_locked;
	+ int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan;
	+ int page_shortage, scan_tick, scanned, starting_page_shortage;
	+ boolean_t queue_locked;

	/*
	* If we need to reclaim memory ask kernel caches to return
	@@ -901,23 +1314,6 @@
	starting_page_shortage = page_shortage;

	/*
	- * maxlaunder limits the number of dirty pages we flush per scan.
	- * For most systems a smaller value (16 or 32) is more robust under
	- * extreme memory and disk pressure because any unnecessary writes
	- * to disk can result in extreme performance degredation. However,
	- * systems with excessive dirty pages (especially when MAP_NOSYNC is
	- * used) will die horribly with limited laundering. If the pageout
	- * daemon cannot clean enough pages in the first pass, we let it go
	- * all out in succeeding passes.
	- */
	- if ((maxlaunder = vm_max_launder) <= 1)
	- maxlaunder = 1;
	- if (pass > 1)
	- maxlaunder = 10000;
	-
	- vnodes_skipped = 0;
	-
	- /*
	* Start scanning the inactive queue for pages that we can free. The
	* scan will stop when we reach the target or we have scanned the
	* entire queue. (Note that m->act_count is not used to make
	@@ -932,7 +1328,7 @@
	m = next) {
	vm_pagequeue_assert_locked(pq);
	KASSERT(queue_locked, ("unlocked inactive queue"));
	- KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m));
	+ KASSERT(vm_page_inactive(m), ("Inactive queue %p", m));

	PCPU_INC(cnt.v_pdpages);
	next = TAILQ_NEXT(m, plinks.q);
	@@ -995,11 +1391,15 @@
	KASSERT(m->hold_count == 0, ("Held page %p", m));

	/*
	- * We unlock the inactive page queue, invalidating the
	- * 'next' pointer. Use our marker to remember our
	- * place.
	+ * Dequeue the inactive page and unlock the inactive page
	+ * queue, invalidating the 'next' pointer. Dequeueing the
	+ * page here avoids a later reacquisition (and release) of
	+ * the inactive page queue lock when vm_page_activate(),
	+ * vm_page_free(), or vm_page_launder() is called. Use a
	+ * marker to remember our place in the inactive queue.
	*/
	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);
	+ vm_page_dequeue_locked(m);
	vm_pagequeue_unlock(pq);
	queue_locked = FALSE;

	@@ -1028,6 +1428,7 @@
	}
	if (act_delta != 0) {
	if (object->ref_count != 0) {
	+ PCPU_INC(cnt.v_reactivated);
	vm_page_activate(m);

	/*
	@@ -1039,8 +1440,14 @@
	*/
	m->act_count += act_delta + ACT_ADVANCE;
	goto drop_page;
	- } else if ((object->flags & OBJ_DEAD) == 0)
	- goto requeue_page;
	+ } else if ((object->flags & OBJ_DEAD) == 0) {
	+ vm_pagequeue_lock(pq);
	+ queue_locked = TRUE;
	+ m->queue = PQ_INACTIVE;
	+ TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
	+ vm_pagequeue_cnt_inc(pq);
	+ goto drop_page;
	+ }
	}

	/*
	@@ -1056,83 +1463,23 @@
	pmap_remove_all(m);
	}

	+ /*
	+ * Clean pages can be freed, but dirty pages must be sent back
	+ * to the laundry, unless they belong to a dead object.
	+ * Requeueing dirty pages from dead objects is pointless, as
	+ * they are being paged out and freed by the thread that
	+ * destroyed the object.
	+ */
	if (m->dirty == 0) {
	- /*
	- * Clean pages can be freed.
	- */
	free_page:
	vm_page_free(m);
	PCPU_INC(cnt.v_dfree);
	--page_shortage;
	- } else if ((object->flags & OBJ_DEAD) != 0) {
	- /*
	- * Leave dirty pages from dead objects at the front of
	- * the queue. They are being paged out and freed by
	- * the thread that destroyed the object. They will
	- * leave the queue shortly after the scan finishes, so
	- * they should be discounted from the inactive count.
	- */
	- addl_page_shortage++;
	- } else if ((m->flags & PG_WINATCFLS) == 0 && pass < 2) {
	- /*
	- * Dirty pages need to be paged out, but flushing
	- * a page is extremely expensive versus freeing
	- * a clean page. Rather then artificially limiting
	- * the number of pages we can flush, we instead give
	- * dirty pages extra priority on the inactive queue
	- * by forcing them to be cycled through the queue
	- * twice before being flushed, after which the
	- * (now clean) page will cycle through once more
	- * before being freed. This significantly extends
	- * the thrash point for a heavily loaded machine.
	- */
	- m->flags \|= PG_WINATCFLS;
	-requeue_page:
	- vm_pagequeue_lock(pq);
	- queue_locked = TRUE;
	- vm_page_requeue_locked(m);
	- } else if (maxlaunder > 0) {
	- /*
	- * We always want to try to flush some dirty pages if
	- * we encounter them, to keep the system stable.
	- * Normally this number is small, but under extreme
	- * pressure where there are insufficient clean pages
	- * on the inactive queue, we may have to go all out.
	- */
	-
	- if (object->type != OBJT_SWAP &&
	- object->type != OBJT_DEFAULT)
	- pageout_ok = TRUE;
	- else if (disable_swap_pageouts)
	- pageout_ok = FALSE;
	- else if (defer_swap_pageouts)
	- pageout_ok = vm_page_count_min();
	- else
	- pageout_ok = TRUE;
	- if (!pageout_ok)
	- goto requeue_page;
	- error = vm_pageout_clean(m);
	- /*
	- * Decrement page_shortage on success to account for
	- * the (future) cleaned page. Otherwise we could wind
	- * up laundering or cleaning too many pages.
	- */
	- if (error == 0) {
	- page_shortage--;
	- maxlaunder--;
	- } else if (error == EDEADLK) {
	- pageout_lock_miss++;
	- vnodes_skipped++;
	- } else if (error == EBUSY) {
	- addl_page_shortage++;
	- }
	- vm_page_lock_assert(m, MA_NOTOWNED);
	- goto relock_queue;
	- }
	+ } else if ((object->flags & OBJ_DEAD) == 0)
	+ vm_page_launder(m);
	drop_page:
	vm_page_unlock(m);
	VM_OBJECT_WUNLOCK(object);
	-relock_queue:
	if (!queue_locked) {
	vm_pagequeue_lock(pq);
	queue_locked = TRUE;
	@@ -1142,6 +1489,24 @@
	}
	vm_pagequeue_unlock(pq);

	+ /*
	+ * Wake up the laundry thread so that it can perform any needed
	+ * laundering. If we didn't meet our target, we're in shortfall and
	+ * need to launder more aggressively.
	+ */
	+ if (vm_laundry_request == VM_LAUNDRY_IDLE &&
	+ starting_page_shortage > 0) {
	+ pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY];
	+ vm_pagequeue_lock(pq);
	+ if (page_shortage > 0) {
	+ vm_laundry_request = VM_LAUNDRY_SHORTFALL;
	+ PCPU_INC(cnt.v_pdshortfalls);
	+ } else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)
	+ vm_laundry_request = VM_LAUNDRY_BACKGROUND;
	+ wakeup(&vm_laundry_request);
	+ vm_pagequeue_unlock(pq);
	+ }
	+
	#if !defined(NO_SWAPPING)
	/*
	* Wakeup the swapout daemon if we didn't free the targeted number of
	@@ -1152,14 +1517,6 @@
	#endif

	/*
	- * Wakeup the sync daemon if we skipped a vnode in a writeable object
	- * and we didn't free enough pages.
	- */
	- if (vnodes_skipped > 0 && page_shortage > vm_cnt.v_free_target -
	- vm_cnt.v_free_min)
	- (void)speedup_syncer();
	-
	- /*
	* If the inactive queue scan fails repeatedly to meet its
	* target, kill the largest process.
	*/
	@@ -1167,10 +1524,20 @@

	/*
	* Compute the number of pages we want to try to move from the
	- * active queue to the inactive queue.
	+ * active queue to either the inactive or laundry queue.
	+ *
	+ * When scanning active pages, we make clean pages count more heavily
	+ * towards the page shortage than dirty pages. This is because dirty
	+ * pages must be laundered before they can be reused and thus have less
	+ * utility when attempting to quickly alleviate a shortage. However,
	+ * this weighting also causes the scan to deactivate dirty pages more
	+ * more aggressively, improving the effectiveness of clustering and
	+ * ensuring that they can eventually be reused.
	*/
	- inactq_shortage = vm_cnt.v_inactive_target - vm_cnt.v_inactive_count +
	+ inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count +
	+ vm_cnt.v_laundry_count / act_scan_laundry_weight) +
	vm_paging_target() + deficit + addl_page_shortage;
	+ page_shortage *= act_scan_laundry_weight;

	pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
	vm_pagequeue_lock(pq);
	@@ -1254,14 +1621,44 @@
	m->act_count -= min(m->act_count, ACT_DECLINE);

	/*
	- * Move this page to the tail of the active or inactive
	+ * Move this page to the tail of the active, inactive or laundry
	* queue depending on usage.
	*/
	if (m->act_count == 0) {
	/* Dequeue to avoid later lock recursion. */
	vm_page_dequeue_locked(m);
	- vm_page_deactivate(m);
	- inactq_shortage--;
	+
	+ /*
	+ * When not short for inactive pages, let dirty pages go
	+ * through the inactive queue before moving to the
	+ * laundry queues. This gives them some extra time to
	+ * be reactivated, potentially avoiding an expensive
	+ * pageout. During a page shortage, the inactive queue
	+ * is necessarily small, so we may move dirty pages
	+ * directly to the laundry queue.
	+ */
	+ if (inactq_shortage <= 0)
	+ vm_page_deactivate(m);
	+ else {
	+ /*
	+ * Calling vm_page_test_dirty() here would
	+ * require acquisition of the object's write
	+ * lock. However, during a page shortage,
	+ * directing dirty pages into the laundry
	+ * queue is only an optimization and not a
	+ * requirement. Therefore, we simply rely on
	+ * the opportunistic updates to the page's
	+ * dirty field by the pmap.
	+ */
	+ if (m->dirty == 0) {
	+ vm_page_deactivate(m);
	+ inactq_shortage -=
	+ act_scan_laundry_weight;
	+ } else {
	+ vm_page_launder(m);
	+ inactq_shortage--;
	+ }
	+ }
	} else
	vm_page_requeue_locked(m);
	vm_page_unlock(m);
	@@ -1570,14 +1967,14 @@
	* thread during the previous scan, which must have
	* been a level 0 scan, or vm_pageout_wanted was
	* already set and the scan failed to free enough
	- * pages. If we haven't yet performed a level >= 2
	- * scan (unlimited dirty cleaning), then upgrade the
	- * level and scan again now. Otherwise, sleep a bit
	- * and try again later.
	+ * pages. If we haven't yet performed a level >= 1
	+ * (page reclamation) scan, then increase the level
	+ * and scan again now. Otherwise, sleep a bit and
	+ * try again later.
	*/
	mtx_unlock(&vm_page_queue_free_mtx);
	- if (pass > 1)
	- pause("psleep", hz / 2);
	+ if (pass >= 1)
	+ pause("psleep", hz / VM_INACT_SCAN_RATE);
	pass++;
	} else {
	/*
	@@ -1648,6 +2045,14 @@
	/* XXX does not really belong here */
	if (vm_page_max_wired == 0)
	vm_page_max_wired = vm_cnt.v_free_count / 3;
	+
	+ /*
	+ * Target amount of memory to move out of the laundry queue during a
	+ * background laundering. This is proportional to the amount of system
	+ * memory.
	+ */
	+ vm_background_launder_target = (vm_cnt.v_free_target -
	+ vm_cnt.v_free_min) / 10;
	}

	/*
	@@ -1662,6 +2067,10 @@
	#endif

	swap_pager_swap_init();
	+ error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
	+ 0, 0, "laundry: dom0");
	+ if (error != 0)
	+ panic("starting laundry for domain 0, error %d", error);
	#ifdef VM_NUMA_ALLOC
	for (i = 1; i < vm_ndomains; i++) {
	error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,

File Metadata

Mime Type: text/plain
Expires: Sat, Jan 17, 7:37 PM (6 h, 14 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 27698111
Default Alt Text: D8302.diff (49 KB)

D8302.diffNo OneTemporaryActions

D8302.diffView Options

File Metadata

Event Timeline

D8302.diff
No OneTemporary
Actions

D8302.diff
View Options