Differential D8302 Diff 22110 head/sys/vm/vm_pageout.c

Changeset View

Standalone View

head/sys/vm/vm_pageout.c

Show First 20 Lines • Show All 113 Lines • ▼ Show 20 Lines

/*		/*
* System initialization		* System initialization
*/		*/

/* the kernel process "vm_pageout"*/		/* the kernel process "vm_pageout"*/
static void vm_pageout(void);		static void vm_pageout(void);
static void vm_pageout_init(void);		static void vm_pageout_init(void);
static int vm_pageout_clean(vm_page_t m);		static int vm_pageout_clean(vm_page_t m, int *numpagedout);
static int vm_pageout_cluster(vm_page_t m);		static int vm_pageout_cluster(vm_page_t m);
static bool vm_pageout_scan(struct vm_domain *vmd, int pass);		static bool vm_pageout_scan(struct vm_domain *vmd, int pass);
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,		static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
int starting_page_shortage);		int starting_page_shortage);

SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init,		SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init,
NULL);		NULL);

Show All 18 Lines
static struct kproc_desc vm_kp = {		static struct kproc_desc vm_kp = {
"vmdaemon",		"vmdaemon",
vm_daemon,		vm_daemon,
&vmproc		&vmproc
};		};
SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);		SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
#endif		#endif

		/* Pagedaemon activity rates, in subdivisions of one second. */
		#define VM_LAUNDER_RATE 10
		#define VM_INACT_SCAN_RATE 2

int vm_pageout_deficit; /* Estimated number of pages deficit */		int vm_pageout_deficit; /* Estimated number of pages deficit */
u_int vm_pageout_wakeup_thresh;		u_int vm_pageout_wakeup_thresh;
static int vm_pageout_oom_seq = 12;		static int vm_pageout_oom_seq = 12;
bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */		bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */
bool vm_pages_needed; /* Are threads waiting for free pages? */		bool vm_pages_needed; /* Are threads waiting for free pages? */

		/* Pending request for dirty page laundering. */
		static enum {
		VM_LAUNDRY_IDLE,
		VM_LAUNDRY_BACKGROUND,
		VM_LAUNDRY_SHORTFALL
		} vm_laundry_request = VM_LAUNDRY_IDLE;

#if !defined(NO_SWAPPING)		#if !defined(NO_SWAPPING)
static int vm_pageout_req_swapout; /* XXX */		static int vm_pageout_req_swapout; /* XXX */
static int vm_daemon_needed;		static int vm_daemon_needed;
static struct mtx vm_daemon_mtx;		static struct mtx vm_daemon_mtx;
/* Allow for use by vm_pageout before vm_daemon is initialized. */		/* Allow for use by vm_pageout before vm_daemon is initialized. */
MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);		MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
#endif		#endif
static int vm_max_launder = 32;
static int vm_pageout_update_period;		static int vm_pageout_update_period;
static int defer_swap_pageouts;
static int disable_swap_pageouts;		static int disable_swap_pageouts;
static int lowmem_period = 10;		static int lowmem_period = 10;
static time_t lowmem_uptime;		static time_t lowmem_uptime;

#if defined(NO_SWAPPING)		#if defined(NO_SWAPPING)
static int vm_swap_enabled = 0;		static int vm_swap_enabled = 0;
static int vm_swap_idle_enabled = 0;		static int vm_swap_idle_enabled = 0;
#else		#else
static int vm_swap_enabled = 1;		static int vm_swap_enabled = 1;
static int vm_swap_idle_enabled = 0;		static int vm_swap_idle_enabled = 0;
#endif		#endif

static int vm_panic_on_oom = 0;		static int vm_panic_on_oom = 0;

SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,		SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,
CTLFLAG_RWTUN, &vm_panic_on_oom, 0,		CTLFLAG_RWTUN, &vm_panic_on_oom, 0,
"panic on out of memory instead of killing the largest process");		"panic on out of memory instead of killing the largest process");

SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,		SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0,		CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0,
"free page threshold for waking up the pageout daemon");		"free page threshold for waking up the pageout daemon");

SYSCTL_INT(_vm, OID_AUTO, max_launder,
CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout");

SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,		SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
CTLFLAG_RW, &vm_pageout_update_period, 0,		CTLFLAG_RW, &vm_pageout_update_period, 0,
"Maximum active LRU update period");		"Maximum active LRU update period");

SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0,		SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0,
"Low memory callback period");		"Low memory callback period");

#if defined(NO_SWAPPING)		#if defined(NO_SWAPPING)
SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,		SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout");		CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout");
SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,		SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");		CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
#else		#else
SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,		SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");		CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");
SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,		SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");		CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
#endif		#endif

SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");

SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,		SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");		CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");

static int pageout_lock_miss;		static int pageout_lock_miss;
SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,		SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");		CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");

SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,		SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
CTLFLAG_RW, &vm_pageout_oom_seq, 0,		CTLFLAG_RW, &vm_pageout_oom_seq, 0,
"back-to-back calls to oom detector to start OOM");		"back-to-back calls to oom detector to start OOM");

		static int act_scan_laundry_weight = 3;
		SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RW,
		&act_scan_laundry_weight, 0,
		"weight given to clean vs. dirty pages in active queue scans");

		static u_int vm_background_launder_target;
		SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RW,
		&vm_background_launder_target, 0,
		"background laundering target, in pages");

		static u_int vm_background_launder_rate = 4096;
		SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RW,
		&vm_background_launder_rate, 0,
		"background laundering rate, in kilobytes per second");

		static u_int vm_background_launder_max = 20 * 1024;
		SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RW,
		&vm_background_launder_max, 0, "background laundering cap, in kilobytes");

#define VM_PAGEOUT_PAGE_COUNT 16		#define VM_PAGEOUT_PAGE_COUNT 16
int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;		int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;

int vm_page_max_wired; /* XXX max # of wired pages system-wide */		int vm_page_max_wired; /* XXX max # of wired pages system-wide */
SYSCTL_INT(_vm, OID_AUTO, max_wired,		SYSCTL_INT(_vm, OID_AUTO, max_wired,
CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");		CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");

		static u_int isqrt(u_int num);
static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);		static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
		static int vm_pageout_launder(struct vm_domain *vmd, int launder,
		bool in_shortfall);
		static void vm_pageout_laundry_worker(void *arg);
#if !defined(NO_SWAPPING)		#if !defined(NO_SWAPPING)
static void vm_pageout_map_deactivate_pages(vm_map_t, long);		static void vm_pageout_map_deactivate_pages(vm_map_t, long);
static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);		static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
static void vm_req_vmdaemon(int req);		static void vm_req_vmdaemon(int req);
#endif		#endif
static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);		static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);

/*		/*
▲ Show 20 Lines • Show All 134 Lines • ▼ Show 20 Lines	vm_pageout_cluster(vm_page_t m)
mc[vm_pageout_page_count] = pb = ps = m;		mc[vm_pageout_page_count] = pb = ps = m;
pageout_count = 1;		pageout_count = 1;
page_base = vm_pageout_page_count;		page_base = vm_pageout_page_count;
ib = 1;		ib = 1;
is = 1;		is = 1;

/*		/*
* We can cluster only if the page is not clean, busy, or held, and		* We can cluster only if the page is not clean, busy, or held, and
* the page is inactive.		* the page is in the laundry queue.
*		*
* During heavy mmap/modification loads the pageout		* During heavy mmap/modification loads the pageout
* daemon can really fragment the underlying file		* daemon can really fragment the underlying file
* due to flushing pages out of order and not trying to		* due to flushing pages out of order and not trying to
* align the clusters (which leaves sporadic out-of-order		* align the clusters (which leaves sporadic out-of-order
* holes). To solve this problem we do the reverse scan		* holes). To solve this problem we do the reverse scan
* first and attempt to align our cluster, then do a		* first and attempt to align our cluster, then do a
* forward scan if room remains.		* forward scan if room remains.
Show All 9 Lines	if ((p = vm_page_prev(pb)) == NULL \|\| vm_page_busied(p)) {
break;		break;
}		}
vm_page_test_dirty(p);		vm_page_test_dirty(p);
if (p->dirty == 0) {		if (p->dirty == 0) {
ib = 0;		ib = 0;
break;		break;
}		}
vm_page_lock(p);		vm_page_lock(p);
if (p->queue != PQ_INACTIVE \|\|		if (!vm_page_in_laundry(p) \|\|
p->hold_count != 0) { /* may be undergoing I/O */		p->hold_count != 0) { /* may be undergoing I/O */
vm_page_unlock(p);		vm_page_unlock(p);
ib = 0;		ib = 0;
break;		break;
}		}
vm_page_unlock(p);		vm_page_unlock(p);
mc[--page_base] = pb = p;		mc[--page_base] = pb = p;
++pageout_count;		++pageout_count;
Show All 9 Lines	more:
while (pageout_count < vm_pageout_page_count &&		while (pageout_count < vm_pageout_page_count &&
pindex + is < object->size) {		pindex + is < object->size) {
if ((p = vm_page_next(ps)) == NULL \|\| vm_page_busied(p))		if ((p = vm_page_next(ps)) == NULL \|\| vm_page_busied(p))
break;		break;
vm_page_test_dirty(p);		vm_page_test_dirty(p);
if (p->dirty == 0)		if (p->dirty == 0)
break;		break;
vm_page_lock(p);		vm_page_lock(p);
if (p->queue != PQ_INACTIVE \|\|		if (!vm_page_in_laundry(p) \|\|
p->hold_count != 0) { /* may be undergoing I/O */		p->hold_count != 0) { /* may be undergoing I/O */
vm_page_unlock(p);		vm_page_unlock(p);
break;		break;
}		}
vm_page_unlock(p);		vm_page_unlock(p);
mc[page_base + pageout_count] = ps = p;		mc[page_base + pageout_count] = ps = p;
++pageout_count;		++pageout_count;
++is;		++is;
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines	vm_pageout_flush(vm_page_t mc, int count, int flags, int mreq, int prunlen,
for (i = 0; i < count; i++) {		for (i = 0; i < count; i++) {
vm_page_t mt = mc[i];		vm_page_t mt = mc[i];

KASSERT(pageout_status[i] == VM_PAGER_PEND \|\|		KASSERT(pageout_status[i] == VM_PAGER_PEND \|\|
!pmap_page_is_write_mapped(mt),		!pmap_page_is_write_mapped(mt),
("vm_pageout_flush: page %p is not write protected", mt));		("vm_pageout_flush: page %p is not write protected", mt));
switch (pageout_status[i]) {		switch (pageout_status[i]) {
case VM_PAGER_OK:		case VM_PAGER_OK:
		vm_page_lock(mt);
		if (vm_page_in_laundry(mt))
		vm_page_deactivate_noreuse(mt);
		vm_page_unlock(mt);
		/* FALLTHROUGH */
case VM_PAGER_PEND:		case VM_PAGER_PEND:
numpagedout++;		numpagedout++;
break;		break;
case VM_PAGER_BAD:		case VM_PAGER_BAD:
/*		/*
* Page outside of range of object. Right now we		* The page is outside the object's range. We pretend
* essentially lose the changes by pretending it		* that the page out worked and clean the page, so the
* worked.		* changes will be lost if the page is reclaimed by
		* the page daemon.
*/		*/
vm_page_undirty(mt);		vm_page_undirty(mt);
		vm_page_lock(mt);
		if (vm_page_in_laundry(mt))
		vm_page_deactivate_noreuse(mt);
		vm_page_unlock(mt);
break;		break;
case VM_PAGER_ERROR:		case VM_PAGER_ERROR:
case VM_PAGER_FAIL:		case VM_PAGER_FAIL:
/*		/*
* If page couldn't be paged out, then reactivate the		* If the page couldn't be paged out, then reactivate
* page so it doesn't clog the inactive list. (We		* it so that it doesn't clog the laundry and inactive
* will try paging out it again later).		* queues. (We will try paging it out again later).
*/		*/
vm_page_lock(mt);		vm_page_lock(mt);
vm_page_activate(mt);		vm_page_activate(mt);
vm_page_unlock(mt);		vm_page_unlock(mt);
if (eio != NULL && i >= mreq && i - mreq < runlen)		if (eio != NULL && i >= mreq && i - mreq < runlen)
*eio = TRUE;		*eio = TRUE;
break;		break;
case VM_PAGER_AGAIN:		case VM_PAGER_AGAIN:
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines	TAILQ_FOREACH(p, &object->memq, listq) {
continue;		continue;
}		}
act_delta = pmap_ts_referenced(p);		act_delta = pmap_ts_referenced(p);
if ((p->aflags & PGA_REFERENCED) != 0) {		if ((p->aflags & PGA_REFERENCED) != 0) {
if (act_delta == 0)		if (act_delta == 0)
act_delta = 1;		act_delta = 1;
vm_page_aflag_clear(p, PGA_REFERENCED);		vm_page_aflag_clear(p, PGA_REFERENCED);
}		}
if (p->queue != PQ_ACTIVE && act_delta != 0) {		if (!vm_page_active(p) && act_delta != 0) {
vm_page_activate(p);		vm_page_activate(p);
p->act_count += act_delta;		p->act_count += act_delta;
} else if (p->queue == PQ_ACTIVE) {		} else if (vm_page_active(p)) {
if (act_delta == 0) {		if (act_delta == 0) {
p->act_count -= min(p->act_count,		p->act_count -= min(p->act_count,
ACT_DECLINE);		ACT_DECLINE);
if (!remove_mode && p->act_count == 0) {		if (!remove_mode && p->act_count == 0) {
pmap_remove_all(p);		pmap_remove_all(p);
vm_page_deactivate(p);		vm_page_deactivate(p);
} else		} else
vm_page_requeue(p);		vm_page_requeue(p);
} else {		} else {
vm_page_activate(p);		vm_page_activate(p);
if (p->act_count < ACT_MAX -		if (p->act_count < ACT_MAX -
ACT_ADVANCE)		ACT_ADVANCE)
p->act_count += ACT_ADVANCE;		p->act_count += ACT_ADVANCE;
vm_page_requeue(p);		vm_page_requeue(p);
}		}
} else if (p->queue == PQ_INACTIVE)		} else if (vm_page_inactive(p))
pmap_remove_all(p);		pmap_remove_all(p);
vm_page_unlock(p);		vm_page_unlock(p);
}		}
if ((backing_object = object->backing_object) == NULL)		if ((backing_object = object->backing_object) == NULL)
goto unlock_return;		goto unlock_return;
VM_OBJECT_RLOCK(backing_object);		VM_OBJECT_RLOCK(backing_object);
if (object != first_object)		if (object != first_object)
VM_OBJECT_RUNLOCK(object);		VM_OBJECT_RUNLOCK(object);
▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines
* Attempt to acquire all of the necessary locks to launder a page and		* Attempt to acquire all of the necessary locks to launder a page and
* then call through the clustering layer to PUTPAGES. Wait a short		* then call through the clustering layer to PUTPAGES. Wait a short
* time for a vnode lock.		* time for a vnode lock.
*		*
* Requires the page and object lock on entry, releases both before return.		* Requires the page and object lock on entry, releases both before return.
* Returns 0 on success and an errno otherwise.		* Returns 0 on success and an errno otherwise.
*/		*/
static int		static int
vm_pageout_clean(vm_page_t m)		vm_pageout_clean(vm_page_t m, int *numpagedout)
{		{
struct vnode *vp;		struct vnode *vp;
struct mount *mp;		struct mount *mp;
vm_object_t object;		vm_object_t object;
vm_pindex_t pindex;		vm_pindex_t pindex;
int error, lockmode;		int error, lockmode;

vm_page_assert_locked(m);		vm_page_assert_locked(m);
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	if (object->type == OBJT_VNODE) {
/*		/*
* While the object and page were unlocked, the page		* While the object and page were unlocked, the page
* may have been:		* may have been:
* (1) moved to a different queue,		* (1) moved to a different queue,
* (2) reallocated to a different object,		* (2) reallocated to a different object,
* (3) reallocated to a different offset, or		* (3) reallocated to a different offset, or
* (4) cleaned.		* (4) cleaned.
*/		*/
if (m->queue != PQ_INACTIVE \|\| m->object != object \|\|		if (!vm_page_in_laundry(m) \|\| m->object != object \|\|
m->pindex != pindex \|\| m->dirty == 0) {		m->pindex != pindex \|\| m->dirty == 0) {
vm_page_unlock(m);		vm_page_unlock(m);
error = ENXIO;		error = ENXIO;
goto unlock_all;		goto unlock_all;
}		}

/*		/*
* The page may have been busied or held while the object		* The page may have been busied or held while the object
* and page locks were released.		* and page locks were released.
*/		*/
if (vm_page_busied(m) \|\| m->hold_count != 0) {		if (vm_page_busied(m) \|\| m->hold_count != 0) {
vm_page_unlock(m);		vm_page_unlock(m);
error = EBUSY;		error = EBUSY;
goto unlock_all;		goto unlock_all;
}		}
}		}

/*		/*
* If a page is dirty, then it is either being washed		* If a page is dirty, then it is either being washed
* (but not yet cleaned) or it is still in the		* (but not yet cleaned) or it is still in the
* laundry. If it is still in the laundry, then we		* laundry. If it is still in the laundry, then we
* start the cleaning operation.		* start the cleaning operation.
*/		*/
if (vm_pageout_cluster(m) == 0)		if ((*numpagedout = vm_pageout_cluster(m)) == 0)
error = EIO;		error = EIO;

unlock_all:		unlock_all:
VM_OBJECT_WUNLOCK(object);		VM_OBJECT_WUNLOCK(object);

unlock_mp:		unlock_mp:
vm_page_lock_assert(m, MA_NOTOWNED);		vm_page_lock_assert(m, MA_NOTOWNED);
if (mp != NULL) {		if (mp != NULL) {
if (vp != NULL)		if (vp != NULL)
vput(vp);		vput(vp);
vm_object_deallocate(object);		vm_object_deallocate(object);
vn_finished_write(mp);		vn_finished_write(mp);
}		}

return (error);		return (error);
}		}

/*		/*
		* Attempt to launder the specified number of pages.
		*
		* Returns the number of pages successfully laundered.
		*/
		static int
		vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
		{
		struct vm_pagequeue *pq;
		vm_object_t object;
		vm_page_t m, next;
		int act_delta, error, maxscan, numpagedout, starting_target;
		int vnodes_skipped;
		bool pageout_ok, queue_locked;

		starting_target = launder;
		vnodes_skipped = 0;

		/*
		* Scan the laundry queue for pages eligible to be laundered. We stop
		* once the target number of dirty pages have been laundered, or once
		* we've reached the end of the queue. A single iteration of this loop
		* may cause more than one page to be laundered because of clustering.
		*
		* maxscan ensures that we don't re-examine requeued pages. Any
		* additional pages written as part of a cluster are subtracted from
		* maxscan since they must be taken from the laundry queue.
		*/
		pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
		maxscan = pq->pq_cnt;

		vm_pagequeue_lock(pq);
		queue_locked = true;
		for (m = TAILQ_FIRST(&pq->pq_pl);
		m != NULL && maxscan-- > 0 && launder > 0;
		m = next) {
		vm_pagequeue_assert_locked(pq);
		KASSERT(queue_locked, ("unlocked laundry queue"));
		KASSERT(vm_page_in_laundry(m),
		("page %p has an inconsistent queue", m));
		next = TAILQ_NEXT(m, plinks.q);
		if ((m->flags & PG_MARKER) != 0)
		continue;
		KASSERT((m->flags & PG_FICTITIOUS) == 0,
		("PG_FICTITIOUS page %p cannot be in laundry queue", m));
		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
		("VPO_UNMANAGED page %p cannot be in laundry queue", m));
		if (!vm_pageout_page_lock(m, &next) \|\| m->hold_count != 0) {
		vm_page_unlock(m);
		continue;
		}
		object = m->object;
		if ((!VM_OBJECT_TRYWLOCK(object) &&
		(!vm_pageout_fallback_object_lock(m, &next) \|\|
		m->hold_count != 0)) \|\| vm_page_busied(m)) {
		VM_OBJECT_WUNLOCK(object);
		vm_page_unlock(m);
		continue;
		}

		/*
		* Unlock the laundry queue, invalidating the 'next' pointer.
		* Use a marker to remember our place in the laundry queue.
		*/
		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_laundry_marker,
		plinks.q);
		vm_pagequeue_unlock(pq);
		queue_locked = false;

		/*
		* Invalid pages can be easily freed. They cannot be
		* mapped; vm_page_free() asserts this.
		*/
		if (m->valid == 0)
		goto free_page;

		/*
		* If the page has been referenced and the object is not dead,
		* reactivate or requeue the page depending on whether the
		* object is mapped.
		*/
		if ((m->aflags & PGA_REFERENCED) != 0) {
		vm_page_aflag_clear(m, PGA_REFERENCED);
		act_delta = 1;
		} else
		act_delta = 0;
		if (object->ref_count != 0)
		act_delta += pmap_ts_referenced(m);
		else {
		KASSERT(!pmap_page_is_mapped(m),
		("page %p is mapped", m));
		}
		if (act_delta != 0) {
		if (object->ref_count != 0) {
		PCPU_INC(cnt.v_reactivated);
		vm_page_activate(m);

		/*
		* Increase the activation count if the page
		* was referenced while in the laundry queue.
		* This makes it less likely that the page will
		* be returned prematurely to the inactive
		* queue.
		*/
		m->act_count += act_delta + ACT_ADVANCE;

		/*
		* If this was a background laundering, count
		* activated pages towards our target. The
		* purpose of background laundering is to ensure
		* that pages are eventually cycled through the
		* laundry queue, and an activation is a valid
		* way out.
		*/
		if (!in_shortfall)
		launder--;
		goto drop_page;
		} else if ((object->flags & OBJ_DEAD) == 0)
		goto requeue_page;
		}

		/*
		* If the page appears to be clean at the machine-independent
		* layer, then remove all of its mappings from the pmap in
		* anticipation of freeing it. If, however, any of the page's
		* mappings allow write access, then the page may still be
		* modified until the last of those mappings are removed.
		*/
		if (object->ref_count != 0) {
		vm_page_test_dirty(m);
		if (m->dirty == 0)
		pmap_remove_all(m);
		}

		/*
		* Clean pages are freed, and dirty pages are paged out unless
		* they belong to a dead object. Requeueing dirty pages from
		* dead objects is pointless, as they are being paged out and
		* freed by the thread that destroyed the object.
		*/
		if (m->dirty == 0) {
		free_page:
		vm_page_free(m);
		PCPU_INC(cnt.v_dfree);
		} else if ((object->flags & OBJ_DEAD) == 0) {
		if (object->type != OBJT_SWAP &&
		object->type != OBJT_DEFAULT)
		pageout_ok = true;
		else if (disable_swap_pageouts)
		pageout_ok = false;
		else
		pageout_ok = true;
		if (!pageout_ok) {
		requeue_page:
		vm_pagequeue_lock(pq);
		queue_locked = true;
		vm_page_requeue_locked(m);
		goto drop_page;
		}

		/*
		* Form a cluster with adjacent, dirty pages from the
		* same object, and page out that entire cluster.
		*
		* The adjacent, dirty pages must also be in the
		* laundry. However, their mappings are not checked
		* for new references. Consequently, a recently
		* referenced page may be paged out. However, that
		* page will not be prematurely reclaimed. After page
		* out, the page will be placed in the inactive queue,
		* where any new references will be detected and the
		* page reactivated.
		*/
		error = vm_pageout_clean(m, &numpagedout);
		if (error == 0) {
		launder -= numpagedout;
		maxscan -= numpagedout - 1;
		} else if (error == EDEADLK) {
		pageout_lock_miss++;
		vnodes_skipped++;
		}
		goto relock_queue;
		}
		drop_page:
		vm_page_unlock(m);
		VM_OBJECT_WUNLOCK(object);
		relock_queue:
		if (!queue_locked) {
		vm_pagequeue_lock(pq);
		queue_locked = true;
		}
		next = TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q);
		TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
		}
		vm_pagequeue_unlock(pq);

		/*
		* Wakeup the sync daemon if we skipped a vnode in a writeable object
		* and we didn't launder enough pages.
		*/
		if (vnodes_skipped > 0 && launder > 0)
		(void)speedup_syncer();

		return (starting_target - launder);
		}

		/*
		* Compute the integer square root.
		*/
		static u_int
		isqrt(u_int num)
		{
		u_int bit, root, tmp;

		bit = 1u << ((NBBY * sizeof(u_int)) - 2);
		while (bit > num)
		bit >>= 2;
		root = 0;
		while (bit != 0) {
		tmp = root + bit;
		root >>= 1;
		if (num >= tmp) {
		num -= tmp;
		root += bit;
		}
		bit >>= 2;
		}
		return (root);
		}

		/*
		* Perform the work of the laundry thread: periodically wake up and determine
		* whether any pages need to be laundered. If so, determine the number of pages
		* that need to be laundered, and launder them.
		*/
		static void
		vm_pageout_laundry_worker(void *arg)
		{
		struct vm_domain *domain;
		struct vm_pagequeue *pq;
		uint64_t nclean, ndirty;
		u_int last_launder, wakeups;
		int domidx, last_target, launder, shortfall, shortfall_cycle, target;
		bool in_shortfall;

		domidx = (uintptr_t)arg;
		domain = &vm_dom[domidx];
		pq = &domain->vmd_pagequeues[PQ_LAUNDRY];
		KASSERT(domain->vmd_segs != 0, ("domain without segments"));
		vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY);

		shortfall = 0;
		in_shortfall = false;
		shortfall_cycle = 0;
		target = 0;
		last_launder = 0;

		/*
		* The pageout laundry worker is never done, so loop forever.
		*/
		for (;;) {
		KASSERT(target >= 0, ("negative target %d", target));
		KASSERT(shortfall_cycle >= 0,
		("negative cycle %d", shortfall_cycle));
		launder = 0;
		wakeups = VM_METER_PCPU_CNT(v_pdwakeups);

		/*
		* First determine whether we need to launder pages to meet a
		* shortage of free pages.
		*/
		if (shortfall > 0) {
		in_shortfall = true;
		shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE;
		target = shortfall;
		} else if (!in_shortfall)
		goto trybackground;
		else if (shortfall_cycle == 0 \|\| vm_laundry_target() <= 0) {
		/*
		* We recently entered shortfall and began laundering
		* pages. If we have completed that laundering run
		* (and we are no longer in shortfall) or we have met
		* our laundry target through other activity, then we
		* can stop laundering pages.
		*/
		in_shortfall = false;
		target = 0;
		goto trybackground;
		}
		last_launder = wakeups;
		launder = target / shortfall_cycle--;
		goto dolaundry;

		/*
		* There's no immediate need to launder any pages; see if we
		* meet the conditions to perform background laundering:
		*
		* 1. The ratio of dirty to clean inactive pages exceeds the
		* background laundering threshold and the pagedaemon has
		* been woken up to reclaim pages since our last
		* laundering, or
		* 2. we haven't yet reached the target of the current
		* background laundering run.
		*
		* The background laundering threshold is not a constant.
		* Instead, it is a slowly growing function of the number of
		* page daemon wakeups since the last laundering. Thus, as the
		* ratio of dirty to clean inactive pages grows, the amount of
		* memory pressure required to trigger laundering decreases.
		*/
		trybackground:
		nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count;
		ndirty = vm_cnt.v_laundry_count;
		if (target == 0 && wakeups != last_launder &&
		ndirty * isqrt(wakeups - last_launder) >= nclean) {
		target = vm_background_launder_target;
		}

		/*
		* We have a non-zero background laundering target. If we've
		* laundered up to our maximum without observing a page daemon
		* wakeup, just stop. This is a safety belt that ensures we
		* don't launder an excessive amount if memory pressure is low
		* and the ratio of dirty to clean pages is large. Otherwise,
		* proceed at the background laundering rate.
		*/
		if (target > 0) {
		if (wakeups != last_launder) {
		last_launder = wakeups;
		last_target = target;
		} else if (last_target - target >=
		vm_background_launder_max * PAGE_SIZE / 1024) {
		target = 0;
		}
		launder = vm_background_launder_rate * PAGE_SIZE / 1024;
		launder /= VM_LAUNDER_RATE;
		if (launder > target)
		launder = target;
		}

		dolaundry:
		if (launder > 0) {
		/*
		* Because of I/O clustering, the number of laundered
		* pages could exceed "target" by the maximum size of
		* a cluster minus one.
		*/
		target -= min(vm_pageout_launder(domain, launder,
		in_shortfall), target);
		pause("laundp", hz / VM_LAUNDER_RATE);
		}

		/*
		* If we're not currently laundering pages and the page daemon
		* hasn't posted a new request, sleep until the page daemon
		* kicks us.
		*/
		vm_pagequeue_lock(pq);
		if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE)
		(void)mtx_sleep(&vm_laundry_request,
		vm_pagequeue_lockptr(pq), PVM, "launds", 0);

		/*
		* If the pagedaemon has indicated that it's in shortfall, start
		* a shortfall laundering unless we're already in the middle of
		* one. This may preempt a background laundering.
		*/
		if (vm_laundry_request == VM_LAUNDRY_SHORTFALL &&
		(!in_shortfall \|\| shortfall_cycle == 0)) {
		shortfall = vm_laundry_target() + vm_pageout_deficit;
		target = 0;
		} else
		shortfall = 0;

		if (target == 0)
		vm_laundry_request = VM_LAUNDRY_IDLE;
		vm_pagequeue_unlock(pq);
		}
		}

		/*
* vm_pageout_scan does the dirty work for the pageout daemon.		* vm_pageout_scan does the dirty work for the pageout daemon.
*		*
* pass 0 - Update active LRU/deactivate pages		* pass == 0: Update active LRU/deactivate pages
* pass 1 - Free inactive pages		* pass >= 1: Free inactive pages
* pass 2 - Launder dirty pages
*		*
* Returns true if pass was zero or enough pages were freed by the inactive		* Returns true if pass was zero or enough pages were freed by the inactive
* queue scan to meet the target.		* queue scan to meet the target.
*/		*/
static bool		static bool
vm_pageout_scan(struct vm_domain *vmd, int pass)		vm_pageout_scan(struct vm_domain *vmd, int pass)
{		{
vm_page_t m, next;		vm_page_t m, next;
struct vm_pagequeue *pq;		struct vm_pagequeue *pq;
vm_object_t object;		vm_object_t object;
long min_scan;		long min_scan;
int act_delta, addl_page_shortage, deficit, error, inactq_shortage;		int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan;
int maxlaunder, maxscan, page_shortage, scan_tick, scanned;		int page_shortage, scan_tick, scanned, starting_page_shortage;
int starting_page_shortage, vnodes_skipped;		boolean_t queue_locked;
boolean_t pageout_ok, queue_locked;

/*		/*
* If we need to reclaim memory ask kernel caches to return		* If we need to reclaim memory ask kernel caches to return
* some. We rate limit to avoid thrashing.		* some. We rate limit to avoid thrashing.
*/		*/
if (vmd == &vm_dom[0] && pass > 0 &&		if (vmd == &vm_dom[0] && pass > 0 &&
(time_uptime - lowmem_uptime) >= lowmem_period) {		(time_uptime - lowmem_uptime) >= lowmem_period) {
/*		/*
Show All 25 Lines	vm_pageout_scan(struct vm_domain *vmd, int pass)
if (pass > 0) {		if (pass > 0) {
deficit = atomic_readandclear_int(&vm_pageout_deficit);		deficit = atomic_readandclear_int(&vm_pageout_deficit);
page_shortage = vm_paging_target() + deficit;		page_shortage = vm_paging_target() + deficit;
} else		} else
page_shortage = deficit = 0;		page_shortage = deficit = 0;
starting_page_shortage = page_shortage;		starting_page_shortage = page_shortage;

/*		/*
* maxlaunder limits the number of dirty pages we flush per scan.
* For most systems a smaller value (16 or 32) is more robust under
* extreme memory and disk pressure because any unnecessary writes
* to disk can result in extreme performance degredation. However,
* systems with excessive dirty pages (especially when MAP_NOSYNC is
* used) will die horribly with limited laundering. If the pageout
* daemon cannot clean enough pages in the first pass, we let it go
* all out in succeeding passes.
*/
if ((maxlaunder = vm_max_launder) <= 1)
maxlaunder = 1;
if (pass > 1)
maxlaunder = 10000;

vnodes_skipped = 0;

/*
* Start scanning the inactive queue for pages that we can free. The		* Start scanning the inactive queue for pages that we can free. The
* scan will stop when we reach the target or we have scanned the		* scan will stop when we reach the target or we have scanned the
* entire queue. (Note that m->act_count is not used to make		* entire queue. (Note that m->act_count is not used to make
* decisions for the inactive queue, only for the active queue.)		* decisions for the inactive queue, only for the active queue.)
*/		*/
pq = &vmd->vmd_pagequeues[PQ_INACTIVE];		pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
maxscan = pq->pq_cnt;		maxscan = pq->pq_cnt;
vm_pagequeue_lock(pq);		vm_pagequeue_lock(pq);
queue_locked = TRUE;		queue_locked = TRUE;
for (m = TAILQ_FIRST(&pq->pq_pl);		for (m = TAILQ_FIRST(&pq->pq_pl);
m != NULL && maxscan-- > 0 && page_shortage > 0;		m != NULL && maxscan-- > 0 && page_shortage > 0;
m = next) {		m = next) {
vm_pagequeue_assert_locked(pq);		vm_pagequeue_assert_locked(pq);
KASSERT(queue_locked, ("unlocked inactive queue"));		KASSERT(queue_locked, ("unlocked inactive queue"));
KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m));		KASSERT(vm_page_inactive(m), ("Inactive queue %p", m));

PCPU_INC(cnt.v_pdpages);		PCPU_INC(cnt.v_pdpages);
next = TAILQ_NEXT(m, plinks.q);		next = TAILQ_NEXT(m, plinks.q);

/*		/*
* skip marker pages		* skip marker pages
*/		*/
if (m->flags & PG_MARKER)		if (m->flags & PG_MARKER)
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines	unlock_object:
VM_OBJECT_WUNLOCK(object);		VM_OBJECT_WUNLOCK(object);
unlock_page:		unlock_page:
vm_page_unlock(m);		vm_page_unlock(m);
continue;		continue;
}		}
KASSERT(m->hold_count == 0, ("Held page %p", m));		KASSERT(m->hold_count == 0, ("Held page %p", m));

/*		/*
* We unlock the inactive page queue, invalidating the		* Dequeue the inactive page and unlock the inactive page
* 'next' pointer. Use our marker to remember our		* queue, invalidating the 'next' pointer. Dequeueing the
* place.		* page here avoids a later reacquisition (and release) of
		* the inactive page queue lock when vm_page_activate(),
		* vm_page_free(), or vm_page_launder() is called. Use a
		* marker to remember our place in the inactive queue.
*/		*/
TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);
		vm_page_dequeue_locked(m);
vm_pagequeue_unlock(pq);		vm_pagequeue_unlock(pq);
queue_locked = FALSE;		queue_locked = FALSE;

/*		/*
* Invalid pages can be easily freed. They cannot be		* Invalid pages can be easily freed. They cannot be
* mapped, vm_page_free() asserts this.		* mapped, vm_page_free() asserts this.
*/		*/
if (m->valid == 0)		if (m->valid == 0)
Show All 12 Lines	unlock_page:
if (object->ref_count != 0) {		if (object->ref_count != 0) {
act_delta += pmap_ts_referenced(m);		act_delta += pmap_ts_referenced(m);
} else {		} else {
KASSERT(!pmap_page_is_mapped(m),		KASSERT(!pmap_page_is_mapped(m),
("vm_pageout_scan: page %p is mapped", m));		("vm_pageout_scan: page %p is mapped", m));
}		}
if (act_delta != 0) {		if (act_delta != 0) {
if (object->ref_count != 0) {		if (object->ref_count != 0) {
		PCPU_INC(cnt.v_reactivated);
vm_page_activate(m);		vm_page_activate(m);

/*		/*
* Increase the activation count if the page		* Increase the activation count if the page
* was referenced while in the inactive queue.		* was referenced while in the inactive queue.
* This makes it less likely that the page will		* This makes it less likely that the page will
* be returned prematurely to the inactive		* be returned prematurely to the inactive
* queue.		* queue.
*/		*/
m->act_count += act_delta + ACT_ADVANCE;		m->act_count += act_delta + ACT_ADVANCE;
goto drop_page;		goto drop_page;
} else if ((object->flags & OBJ_DEAD) == 0)		} else if ((object->flags & OBJ_DEAD) == 0) {
goto requeue_page;		vm_pagequeue_lock(pq);
		queue_locked = TRUE;
		m->queue = PQ_INACTIVE;
		TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
		vm_pagequeue_cnt_inc(pq);
		goto drop_page;
}		}
		}

/*		/*
* If the page appears to be clean at the machine-independent		* If the page appears to be clean at the machine-independent
* layer, then remove all of its mappings from the pmap in		* layer, then remove all of its mappings from the pmap in
* anticipation of freeing it. If, however, any of the page's		* anticipation of freeing it. If, however, any of the page's
* mappings allow write access, then the page may still be		* mappings allow write access, then the page may still be
* modified until the last of those mappings are removed.		* modified until the last of those mappings are removed.
*/		*/
if (object->ref_count != 0) {		if (object->ref_count != 0) {
vm_page_test_dirty(m);		vm_page_test_dirty(m);
if (m->dirty == 0)		if (m->dirty == 0)
pmap_remove_all(m);		pmap_remove_all(m);
}		}

if (m->dirty == 0) {
/*		/*
* Clean pages can be freed.		* Clean pages can be freed, but dirty pages must be sent back
		* to the laundry, unless they belong to a dead object.
		* Requeueing dirty pages from dead objects is pointless, as
		* they are being paged out and freed by the thread that
		* destroyed the object.
*/		*/
		if (m->dirty == 0) {
free_page:		free_page:
vm_page_free(m);		vm_page_free(m);
PCPU_INC(cnt.v_dfree);		PCPU_INC(cnt.v_dfree);
--page_shortage;		--page_shortage;
} else if ((object->flags & OBJ_DEAD) != 0) {		} else if ((object->flags & OBJ_DEAD) == 0)
/*		vm_page_launder(m);
* Leave dirty pages from dead objects at the front of
* the queue. They are being paged out and freed by
* the thread that destroyed the object. They will
* leave the queue shortly after the scan finishes, so
* they should be discounted from the inactive count.
*/
addl_page_shortage++;
} else if ((m->flags & PG_WINATCFLS) == 0 && pass < 2) {
/*
* Dirty pages need to be paged out, but flushing
* a page is extremely expensive versus freeing
* a clean page. Rather then artificially limiting
* the number of pages we can flush, we instead give
* dirty pages extra priority on the inactive queue
* by forcing them to be cycled through the queue
* twice before being flushed, after which the
* (now clean) page will cycle through once more
* before being freed. This significantly extends
* the thrash point for a heavily loaded machine.
*/
m->flags \|= PG_WINATCFLS;
requeue_page:
vm_pagequeue_lock(pq);
queue_locked = TRUE;
vm_page_requeue_locked(m);
} else if (maxlaunder > 0) {
/*
* We always want to try to flush some dirty pages if
* we encounter them, to keep the system stable.
* Normally this number is small, but under extreme
* pressure where there are insufficient clean pages
* on the inactive queue, we may have to go all out.
*/

if (object->type != OBJT_SWAP &&
object->type != OBJT_DEFAULT)
pageout_ok = TRUE;
else if (disable_swap_pageouts)
pageout_ok = FALSE;
else if (defer_swap_pageouts)
pageout_ok = vm_page_count_min();
else
pageout_ok = TRUE;
if (!pageout_ok)
goto requeue_page;
error = vm_pageout_clean(m);
/*
* Decrement page_shortage on success to account for
* the (future) cleaned page. Otherwise we could wind
* up laundering or cleaning too many pages.
*/
if (error == 0) {
page_shortage--;
maxlaunder--;
} else if (error == EDEADLK) {
pageout_lock_miss++;
vnodes_skipped++;
} else if (error == EBUSY) {
addl_page_shortage++;
}
vm_page_lock_assert(m, MA_NOTOWNED);
goto relock_queue;
}
drop_page:		drop_page:
vm_page_unlock(m);		vm_page_unlock(m);
VM_OBJECT_WUNLOCK(object);		VM_OBJECT_WUNLOCK(object);
relock_queue:
if (!queue_locked) {		if (!queue_locked) {
vm_pagequeue_lock(pq);		vm_pagequeue_lock(pq);
queue_locked = TRUE;		queue_locked = TRUE;
}		}
next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q);		next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q);
TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);		TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
}		}
vm_pagequeue_unlock(pq);		vm_pagequeue_unlock(pq);

		/*
		* Wake up the laundry thread so that it can perform any needed
		* laundering. If we didn't meet our target, we're in shortfall and
		* need to launder more aggressively.
		*/
		if (vm_laundry_request == VM_LAUNDRY_IDLE &&
		starting_page_shortage > 0) {
		pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY];
		vm_pagequeue_lock(pq);
		if (page_shortage > 0) {
		vm_laundry_request = VM_LAUNDRY_SHORTFALL;
		PCPU_INC(cnt.v_pdshortfalls);
		} else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)
		vm_laundry_request = VM_LAUNDRY_BACKGROUND;
		wakeup(&vm_laundry_request);
		vm_pagequeue_unlock(pq);
		}

#if !defined(NO_SWAPPING)		#if !defined(NO_SWAPPING)
/*		/*
* Wakeup the swapout daemon if we didn't free the targeted number of		* Wakeup the swapout daemon if we didn't free the targeted number of
* pages.		* pages.
*/		*/
if (vm_swap_enabled && page_shortage > 0)		if (vm_swap_enabled && page_shortage > 0)
vm_req_vmdaemon(VM_SWAP_NORMAL);		vm_req_vmdaemon(VM_SWAP_NORMAL);
#endif		#endif

/*		/*
* Wakeup the sync daemon if we skipped a vnode in a writeable object
* and we didn't free enough pages.
*/
if (vnodes_skipped > 0 && page_shortage > vm_cnt.v_free_target -
vm_cnt.v_free_min)
(void)speedup_syncer();

/*
* If the inactive queue scan fails repeatedly to meet its		* If the inactive queue scan fails repeatedly to meet its
* target, kill the largest process.		* target, kill the largest process.
*/		*/
vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage);		vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage);

/*		/*
* Compute the number of pages we want to try to move from the		* Compute the number of pages we want to try to move from the
* active queue to the inactive queue.		* active queue to either the inactive or laundry queue.
		*
		* When scanning active pages, we make clean pages count more heavily
		* towards the page shortage than dirty pages. This is because dirty
		* pages must be laundered before they can be reused and thus have less
		* utility when attempting to quickly alleviate a shortage. However,
		* this weighting also causes the scan to deactivate dirty pages more
		* more aggressively, improving the effectiveness of clustering and
		* ensuring that they can eventually be reused.
*/		*/
inactq_shortage = vm_cnt.v_inactive_target - vm_cnt.v_inactive_count +		inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count +
		vm_cnt.v_laundry_count / act_scan_laundry_weight) +
vm_paging_target() + deficit + addl_page_shortage;		vm_paging_target() + deficit + addl_page_shortage;
		page_shortage *= act_scan_laundry_weight;

pq = &vmd->vmd_pagequeues[PQ_ACTIVE];		pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
vm_pagequeue_lock(pq);		vm_pagequeue_lock(pq);
maxscan = pq->pq_cnt;		maxscan = pq->pq_cnt;

/*		/*
* If we're just idle polling attempt to visit every		* If we're just idle polling attempt to visit every
* active page within 'update_period' seconds.		* active page within 'update_period' seconds.
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines	for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
if (act_delta != 0) {		if (act_delta != 0) {
m->act_count += ACT_ADVANCE + act_delta;		m->act_count += ACT_ADVANCE + act_delta;
if (m->act_count > ACT_MAX)		if (m->act_count > ACT_MAX)
m->act_count = ACT_MAX;		m->act_count = ACT_MAX;
} else		} else
m->act_count -= min(m->act_count, ACT_DECLINE);		m->act_count -= min(m->act_count, ACT_DECLINE);

/*		/*
* Move this page to the tail of the active or inactive		* Move this page to the tail of the active, inactive or laundry
* queue depending on usage.		* queue depending on usage.
*/		*/
if (m->act_count == 0) {		if (m->act_count == 0) {
/* Dequeue to avoid later lock recursion. */		/* Dequeue to avoid later lock recursion. */
vm_page_dequeue_locked(m);		vm_page_dequeue_locked(m);

		/*
		* When not short for inactive pages, let dirty pages go
		* through the inactive queue before moving to the
		* laundry queues. This gives them some extra time to
		* be reactivated, potentially avoiding an expensive
		* pageout. During a page shortage, the inactive queue
		* is necessarily small, so we may move dirty pages
		* directly to the laundry queue.
		*/
		if (inactq_shortage <= 0)
vm_page_deactivate(m);		vm_page_deactivate(m);
		else {
		/*
		* Calling vm_page_test_dirty() here would
		* require acquisition of the object's write
		* lock. However, during a page shortage,
		* directing dirty pages into the laundry
		* queue is only an optimization and not a
		* requirement. Therefore, we simply rely on
		* the opportunistic updates to the page's
		* dirty field by the pmap.
		*/
		if (m->dirty == 0) {
		vm_page_deactivate(m);
		inactq_shortage -=
		act_scan_laundry_weight;
		} else {
		vm_page_launder(m);
inactq_shortage--;		inactq_shortage--;
		}
		}
} else		} else
vm_page_requeue_locked(m);		vm_page_requeue_locked(m);
vm_page_unlock(m);		vm_page_unlock(m);
}		}
vm_pagequeue_unlock(pq);		vm_pagequeue_unlock(pq);
#if !defined(NO_SWAPPING)		#if !defined(NO_SWAPPING)
/*		/*
* Idle process swapout -- run once per second when we are reclaiming		* Idle process swapout -- run once per second when we are reclaiming
▲ Show 20 Lines • Show All 292 Lines • ▼ Show 20 Lines	while (TRUE) {
* Might the page daemon receive a wakeup call?		* Might the page daemon receive a wakeup call?
*/		*/
if (vm_pageout_wanted) {		if (vm_pageout_wanted) {
/*		/*
* No. Either vm_pageout_wanted was set by another		* No. Either vm_pageout_wanted was set by another
* thread during the previous scan, which must have		* thread during the previous scan, which must have
* been a level 0 scan, or vm_pageout_wanted was		* been a level 0 scan, or vm_pageout_wanted was
* already set and the scan failed to free enough		* already set and the scan failed to free enough
* pages. If we haven't yet performed a level >= 2		* pages. If we haven't yet performed a level >= 1
* scan (unlimited dirty cleaning), then upgrade the		* (page reclamation) scan, then increase the level
* level and scan again now. Otherwise, sleep a bit		* and scan again now. Otherwise, sleep a bit and
* and try again later.		* try again later.
*/		*/
mtx_unlock(&vm_page_queue_free_mtx);		mtx_unlock(&vm_page_queue_free_mtx);
if (pass > 1)		if (pass >= 1)
pause("psleep", hz / 2);		pause("psleep", hz / VM_INACT_SCAN_RATE);
pass++;		pass++;
} else {		} else {
/*		/*
* Yes. Sleep until pages need to be reclaimed or		* Yes. Sleep until pages need to be reclaimed or
* have their reference stats updated.		* have their reference stats updated.
*/		*/
if (mtx_sleep(&vm_pageout_wanted,		if (mtx_sleep(&vm_pageout_wanted,
&vm_page_queue_free_mtx, PDROP \| PVM, "psleep",		&vm_page_queue_free_mtx, PDROP \| PVM, "psleep",
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines	vm_pageout_init(void)
* case paging behaviors with stale active LRU.		* case paging behaviors with stale active LRU.
*/		*/
if (vm_pageout_update_period == 0)		if (vm_pageout_update_period == 0)
vm_pageout_update_period = 600;		vm_pageout_update_period = 600;

/* XXX does not really belong here */		/* XXX does not really belong here */
if (vm_page_max_wired == 0)		if (vm_page_max_wired == 0)
vm_page_max_wired = vm_cnt.v_free_count / 3;		vm_page_max_wired = vm_cnt.v_free_count / 3;

		/*
		* Target amount of memory to move out of the laundry queue during a
		* background laundering. This is proportional to the amount of system
		* memory.
		*/
		vm_background_launder_target = (vm_cnt.v_free_target -
		vm_cnt.v_free_min) / 10;
}		}

/*		/*
* vm_pageout is the high level pageout daemon.		* vm_pageout is the high level pageout daemon.
*/		*/
static void		static void
vm_pageout(void)		vm_pageout(void)
{		{
int error;		int error;
#ifdef VM_NUMA_ALLOC		#ifdef VM_NUMA_ALLOC
int i;		int i;
#endif		#endif

swap_pager_swap_init();		swap_pager_swap_init();
		error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
		0, 0, "laundry: dom0");
		if (error != 0)
		panic("starting laundry for domain 0, error %d", error);
#ifdef VM_NUMA_ALLOC		#ifdef VM_NUMA_ALLOC
for (i = 1; i < vm_ndomains; i++) {		for (i = 1; i < vm_ndomains; i++) {
error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,		error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
curproc, NULL, 0, 0, "dom%d", i);		curproc, NULL, 0, 0, "dom%d", i);
if (error != 0) {		if (error != 0) {
panic("starting pageout for domain %d, error %d\n",		panic("starting pageout for domain %d, error %d\n",
i, error);		i, error);
}		}
▲ Show 20 Lines • Show All 185 Lines • Show Last 20 Lines