Differential D14000 Diff 38653 sys/vm/vm_pageout.c

Changeset View

Standalone View

sys/vm/vm_pageout.c

Show First 20 Lines • Show All 104 Lines • ▼ Show 20 Lines
#include <vm/vm.h>		#include <vm/vm.h>
#include <vm/vm_param.h>		#include <vm/vm_param.h>
#include <vm/vm_object.h>		#include <vm/vm_object.h>
#include <vm/vm_page.h>		#include <vm/vm_page.h>
#include <vm/vm_map.h>		#include <vm/vm_map.h>
#include <vm/vm_pageout.h>		#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>		#include <vm/vm_pager.h>
#include <vm/vm_phys.h>		#include <vm/vm_phys.h>
		#include <vm/vm_pagequeue.h>
#include <vm/swap_pager.h>		#include <vm/swap_pager.h>
#include <vm/vm_extern.h>		#include <vm/vm_extern.h>
#include <vm/uma.h>		#include <vm/uma.h>

/*		/*
* System initialization		* System initialization
*/		*/

Show All 21 Lines

SDT_PROVIDER_DEFINE(vm);		SDT_PROVIDER_DEFINE(vm);
SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);		SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);

/* Pagedaemon activity rates, in subdivisions of one second. */		/* Pagedaemon activity rates, in subdivisions of one second. */
#define VM_LAUNDER_RATE 10		#define VM_LAUNDER_RATE 10
#define VM_INACT_SCAN_RATE 2		#define VM_INACT_SCAN_RATE 2

int vm_pageout_deficit; /* Estimated number of pages deficit */
u_int vm_pageout_wakeup_thresh;
static int vm_pageout_oom_seq = 12;		static int vm_pageout_oom_seq = 12;
static bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */
bool vm_pages_needed; /* Are threads waiting for free pages? */

/* Pending request for dirty page laundering. */
static enum {
VM_LAUNDRY_IDLE,
VM_LAUNDRY_BACKGROUND,
VM_LAUNDRY_SHORTFALL
} vm_laundry_request = VM_LAUNDRY_IDLE;
static int vm_inactq_scans;

static int vm_pageout_update_period;		static int vm_pageout_update_period;
static int disable_swap_pageouts;		static int disable_swap_pageouts;
static int lowmem_period = 10;		static int lowmem_period = 10;
static time_t lowmem_uptime;		static time_t lowmem_uptime;
static int swapdev_enabled;		static int swapdev_enabled;

static int vm_panic_on_oom = 0;		static int vm_panic_on_oom = 0;

SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,		SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,
CTLFLAG_RWTUN, &vm_panic_on_oom, 0,		CTLFLAG_RWTUN, &vm_panic_on_oom, 0,
"panic on out of memory instead of killing the largest process");		"panic on out of memory instead of killing the largest process");

SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
CTLFLAG_RWTUN, &vm_pageout_wakeup_thresh, 0,
"free page threshold for waking up the pageout daemon");

SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,		SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
CTLFLAG_RWTUN, &vm_pageout_update_period, 0,		CTLFLAG_RWTUN, &vm_pageout_update_period, 0,
"Maximum active LRU update period");		"Maximum active LRU update period");

SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RWTUN, &lowmem_period, 0,		SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RWTUN, &lowmem_period, 0,
"Low memory callback period");		"Low memory callback period");

SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,		SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
CTLFLAG_RWTUN, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");		CTLFLAG_RWTUN, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");

static int pageout_lock_miss;		static int pageout_lock_miss;
SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,		SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");		CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");

SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,		SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
CTLFLAG_RWTUN, &vm_pageout_oom_seq, 0,		CTLFLAG_RWTUN, &vm_pageout_oom_seq, 0,
"back-to-back calls to oom detector to start OOM");		"back-to-back calls to oom detector to start OOM");

static int act_scan_laundry_weight = 3;		static int act_scan_laundry_weight = 3;
SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RWTUN,		SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RWTUN,
&act_scan_laundry_weight, 0,		&act_scan_laundry_weight, 0,
"weight given to clean vs. dirty pages in active queue scans");		"weight given to clean vs. dirty pages in active queue scans");

static u_int vm_background_launder_target;
SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RWTUN,
&vm_background_launder_target, 0,
"background laundering target, in pages");

static u_int vm_background_launder_rate = 4096;		static u_int vm_background_launder_rate = 4096;
SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN,		SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN,
&vm_background_launder_rate, 0,		&vm_background_launder_rate, 0,
"background laundering rate, in kilobytes per second");		"background laundering rate, in kilobytes per second");

static u_int vm_background_launder_max = 20 * 1024;		static u_int vm_background_launder_max = 20 * 1024;
SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RWTUN,		SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RWTUN,
&vm_background_launder_max, 0, "background laundering cap, in kilobytes");		&vm_background_launder_max, 0, "background laundering cap, in kilobytes");
▲ Show 20 Lines • Show All 738 Lines • ▼ Show 20 Lines
/*		/*
* Perform the work of the laundry thread: periodically wake up and determine		* Perform the work of the laundry thread: periodically wake up and determine
* whether any pages need to be laundered. If so, determine the number of pages		* whether any pages need to be laundered. If so, determine the number of pages
* that need to be laundered, and launder them.		* that need to be laundered, and launder them.
*/		*/
static void		static void
vm_pageout_laundry_worker(void *arg)		vm_pageout_laundry_worker(void *arg)
{		{
struct vm_domain *domain;		struct vm_domain *vmd;
struct vm_pagequeue *pq;		struct vm_pagequeue *pq;
uint64_t nclean, ndirty;		uint64_t nclean, ndirty;
u_int inactq_scans, last_launder;		u_int inactq_scans, last_launder;
int domidx, last_target, launder, shortfall, shortfall_cycle, target;		int domain, last_target, launder, shortfall, shortfall_cycle, target;
bool in_shortfall;		bool in_shortfall;

domidx = (uintptr_t)arg;		domain = (uintptr_t)arg;
domain = &vm_dom[domidx];		vmd = VM_DOMAIN(domain);
pq = &domain->vmd_pagequeues[PQ_LAUNDRY];		pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
KASSERT(domain->vmd_segs != 0, ("domain without segments"));		KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY);		vm_pageout_init_marker(&vmd->vmd_laundry_marker, PQ_LAUNDRY);

shortfall = 0;		shortfall = 0;
in_shortfall = false;		in_shortfall = false;
shortfall_cycle = 0;		shortfall_cycle = 0;
target = 0;		target = 0;
inactq_scans = 0;		inactq_scans = 0;
last_launder = 0;		last_launder = 0;

/*		/*
* Calls to these handlers are serialized by the swap syscall lock.		* Calls to these handlers are serialized by the swap syscall lock.
*/		*/
(void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, domain,		(void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, vmd,
EVENTHANDLER_PRI_ANY);		EVENTHANDLER_PRI_ANY);
(void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, domain,		(void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, vmd,
EVENTHANDLER_PRI_ANY);		EVENTHANDLER_PRI_ANY);

/*		/*
* The pageout laundry worker is never done, so loop forever.		* The pageout laundry worker is never done, so loop forever.
*/		*/
for (;;) {		for (;;) {
KASSERT(target >= 0, ("negative target %d", target));		KASSERT(target >= 0, ("negative target %d", target));
KASSERT(shortfall_cycle >= 0,		KASSERT(shortfall_cycle >= 0,
("negative cycle %d", shortfall_cycle));		("negative cycle %d", shortfall_cycle));
launder = 0;		launder = 0;

/*		/*
* First determine whether we need to launder pages to meet a		* First determine whether we need to launder pages to meet a
* shortage of free pages.		* shortage of free pages.
*/		*/
if (shortfall > 0) {		if (shortfall > 0) {
in_shortfall = true;		in_shortfall = true;
shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE;		shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE;
target = shortfall;		target = shortfall;
} else if (!in_shortfall)		} else if (!in_shortfall)
goto trybackground;		goto trybackground;
else if (shortfall_cycle == 0 \|\| vm_laundry_target() <= 0) {		else if (shortfall_cycle == 0 \|\| vm_laundry_target(vmd) <= 0) {
/*		/*
* We recently entered shortfall and began laundering		* We recently entered shortfall and began laundering
* pages. If we have completed that laundering run		* pages. If we have completed that laundering run
* (and we are no longer in shortfall) or we have met		* (and we are no longer in shortfall) or we have met
* our laundry target through other activity, then we		* our laundry target through other activity, then we
* can stop laundering pages.		* can stop laundering pages.
*/		*/
in_shortfall = false;		in_shortfall = false;
Show All 17 Lines	for (;;) {
*		*
* The background laundering threshold is not a constant.		* The background laundering threshold is not a constant.
* Instead, it is a slowly growing function of the number of		* Instead, it is a slowly growing function of the number of
* page daemon scans since the last laundering. Thus, as the		* page daemon scans since the last laundering. Thus, as the
* ratio of dirty to clean inactive pages grows, the amount of		* ratio of dirty to clean inactive pages grows, the amount of
* memory pressure required to trigger laundering decreases.		* memory pressure required to trigger laundering decreases.
*/		*/
trybackground:		trybackground:
nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count;		nclean = vmd->vmd_free_count +
ndirty = vm_cnt.v_laundry_count;		vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt;
		ndirty = vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt;
if (target == 0 && inactq_scans != last_launder &&		if (target == 0 && inactq_scans != last_launder &&
ndirty * isqrt(inactq_scans - last_launder) >= nclean) {		ndirty * isqrt(inactq_scans - last_launder) >= nclean) {
target = vm_background_launder_target;		target = vmd->vmd_background_launder_target;
}		}

/*		/*
* We have a non-zero background laundering target. If we've		* We have a non-zero background laundering target. If we've
* laundered up to our maximum without observing a page daemon		* laundered up to our maximum without observing a page daemon
* request, just stop. This is a safety belt that ensures we		* request, just stop. This is a safety belt that ensures we
* don't launder an excessive amount if memory pressure is low		* don't launder an excessive amount if memory pressure is low
* and the ratio of dirty to clean pages is large. Otherwise,		* and the ratio of dirty to clean pages is large. Otherwise,
Show All 15 Lines

dolaundry:		dolaundry:
if (launder > 0) {		if (launder > 0) {
/*		/*
* Because of I/O clustering, the number of laundered		* Because of I/O clustering, the number of laundered
* pages could exceed "target" by the maximum size of		* pages could exceed "target" by the maximum size of
* a cluster minus one.		* a cluster minus one.
*/		*/
target -= min(vm_pageout_launder(domain, launder,		target -= min(vm_pageout_launder(vmd, launder,
in_shortfall), target);		in_shortfall), target);
pause("laundp", hz / VM_LAUNDER_RATE);		pause("laundp", hz / VM_LAUNDER_RATE);
}		}

/*		/*
* If we're not currently laundering pages and the page daemon		* If we're not currently laundering pages and the page daemon
* hasn't posted a new request, sleep until the page daemon		* hasn't posted a new request, sleep until the page daemon
* kicks us.		* kicks us.
*/		*/
vm_pagequeue_lock(pq);		vm_pagequeue_lock(pq);
if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE)		if (target == 0 && vmd->vmd_laundry_request == VM_LAUNDRY_IDLE)
(void)mtx_sleep(&vm_laundry_request,		(void)mtx_sleep(&vmd->vmd_laundry_request,
vm_pagequeue_lockptr(pq), PVM, "launds", 0);		vm_pagequeue_lockptr(pq), PVM, "launds", 0);

/*		/*
* If the pagedaemon has indicated that it's in shortfall, start		* If the pagedaemon has indicated that it's in shortfall, start
* a shortfall laundering unless we're already in the middle of		* a shortfall laundering unless we're already in the middle of
* one. This may preempt a background laundering.		* one. This may preempt a background laundering.
*/		*/
if (vm_laundry_request == VM_LAUNDRY_SHORTFALL &&		if (vmd->vmd_laundry_request == VM_LAUNDRY_SHORTFALL &&
(!in_shortfall \|\| shortfall_cycle == 0)) {		(!in_shortfall \|\| shortfall_cycle == 0)) {
shortfall = vm_laundry_target() + vm_pageout_deficit;		shortfall = vm_laundry_target(vmd) +
		vmd->vmd_pageout_deficit;
target = 0;		target = 0;
} else		} else
shortfall = 0;		shortfall = 0;

if (target == 0)		if (target == 0)
vm_laundry_request = VM_LAUNDRY_IDLE;		vmd->vmd_laundry_request = VM_LAUNDRY_IDLE;
inactq_scans = vm_inactq_scans;		inactq_scans = vmd->vmd_inactq_scans;
vm_pagequeue_unlock(pq);		vm_pagequeue_unlock(pq);
}		}
}		}

/*		/*
* vm_pageout_scan does the dirty work for the pageout daemon.		* vm_pageout_scan does the dirty work for the pageout daemon.
*		*
* pass == 0: Update active LRU/deactivate pages		* pass == 0: Update active LRU/deactivate pages
Show All 12 Lines	vm_pageout_scan(struct vm_domain *vmd, int pass)
int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan;		int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan;
int page_shortage, scan_tick, scanned, starting_page_shortage;		int page_shortage, scan_tick, scanned, starting_page_shortage;
boolean_t queue_locked;		boolean_t queue_locked;

/*		/*
* If we need to reclaim memory ask kernel caches to return		* If we need to reclaim memory ask kernel caches to return
* some. We rate limit to avoid thrashing.		* some. We rate limit to avoid thrashing.
*/		*/
if (vmd == &vm_dom[0] && pass > 0 &&		if (vmd == VM_DOMAIN(0) && pass > 0 &&
(time_uptime - lowmem_uptime) >= lowmem_period) {		(time_uptime - lowmem_uptime) >= lowmem_period) {
/*		/*
* Decrease registered cache sizes.		* Decrease registered cache sizes.
*/		*/
SDT_PROBE0(vm, , , vm__lowmem_scan);		SDT_PROBE0(vm, , , vm__lowmem_scan);
EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_PAGES);		EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_PAGES);
/*		/*
* We do this explicitly after the caches have been		* We do this explicitly after the caches have been
Show All 12 Lines	vm_pageout_scan(struct vm_domain *vmd, int pass)
addl_page_shortage = 0;		addl_page_shortage = 0;

/*		/*
* Calculate the number of pages that we want to free. This number		* Calculate the number of pages that we want to free. This number
* can be negative if many pages are freed between the wakeup call to		* can be negative if many pages are freed between the wakeup call to
* the page daemon and this calculation.		* the page daemon and this calculation.
*/		*/
if (pass > 0) {		if (pass > 0) {
deficit = atomic_readandclear_int(&vm_pageout_deficit);		deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
page_shortage = vm_paging_target() + deficit;		page_shortage = vm_paging_target(vmd) + deficit;
} else		} else
page_shortage = deficit = 0;		page_shortage = deficit = 0;
starting_page_shortage = page_shortage;		starting_page_shortage = page_shortage;

/*		/*
* Start scanning the inactive queue for pages that we can free. The		* Start scanning the inactive queue for pages that we can free. The
* scan will stop when we reach the target or we have scanned the		* scan will stop when we reach the target or we have scanned the
* entire queue. (Note that m->act_count is not used to make		* entire queue. (Note that m->act_count is not used to make
▲ Show 20 Lines • Show All 176 Lines • ▼ Show 20 Lines	drop_page:
* swap devices are configured, the laundry thread has no work to do, so		* swap devices are configured, the laundry thread has no work to do, so
* don't bother waking it up.		* don't bother waking it up.
*		*
* The laundry thread uses the number of inactive queue scans elapsed		* The laundry thread uses the number of inactive queue scans elapsed
* since the last laundering to determine whether to launder again, so		* since the last laundering to determine whether to launder again, so
* keep count.		* keep count.
*/		*/
if (starting_page_shortage > 0) {		if (starting_page_shortage > 0) {
pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY];		pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
vm_pagequeue_lock(pq);		vm_pagequeue_lock(pq);
if (vm_laundry_request == VM_LAUNDRY_IDLE &&		if (vmd->vmd_laundry_request == VM_LAUNDRY_IDLE &&
(pq->pq_cnt > 0 \|\| atomic_load_acq_int(&swapdev_enabled))) {		(pq->pq_cnt > 0 \|\| atomic_load_acq_int(&swapdev_enabled))) {
if (page_shortage > 0) {		if (page_shortage > 0) {
vm_laundry_request = VM_LAUNDRY_SHORTFALL;		vmd->vmd_laundry_request = VM_LAUNDRY_SHORTFALL;
VM_CNT_INC(v_pdshortfalls);		VM_CNT_INC(v_pdshortfalls);
} else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)		} else if (vmd->vmd_laundry_request !=
vm_laundry_request = VM_LAUNDRY_BACKGROUND;		VM_LAUNDRY_SHORTFALL)
wakeup(&vm_laundry_request);		vmd->vmd_laundry_request =
		VM_LAUNDRY_BACKGROUND;
		wakeup(&vmd->vmd_laundry_request);
}		}
vm_inactq_scans++;		vmd->vmd_inactq_scans++;
vm_pagequeue_unlock(pq);		vm_pagequeue_unlock(pq);
}		}

/*		/*
* Wakeup the swapout daemon if we didn't free the targeted number of		* Wakeup the swapout daemon if we didn't free the targeted number of
* pages.		* pages.
*/		*/
if (page_shortage > 0)		if (page_shortage > 0)
Show All 12 Lines	drop_page:
* When scanning active pages, we make clean pages count more heavily		* When scanning active pages, we make clean pages count more heavily
* towards the page shortage than dirty pages. This is because dirty		* towards the page shortage than dirty pages. This is because dirty
* pages must be laundered before they can be reused and thus have less		* pages must be laundered before they can be reused and thus have less
* utility when attempting to quickly alleviate a shortage. However,		* utility when attempting to quickly alleviate a shortage. However,
* this weighting also causes the scan to deactivate dirty pages more		* this weighting also causes the scan to deactivate dirty pages more
* more aggressively, improving the effectiveness of clustering and		* more aggressively, improving the effectiveness of clustering and
* ensuring that they can eventually be reused.		* ensuring that they can eventually be reused.
*/		*/
inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count +		inactq_shortage = vmd->vmd_inactive_target - (pq->pq_cnt +
vm_cnt.v_laundry_count / act_scan_laundry_weight) +		vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight) +
vm_paging_target() + deficit + addl_page_shortage;		vm_paging_target(vmd) + deficit + addl_page_shortage;
inactq_shortage *= act_scan_laundry_weight;		inactq_shortage *= act_scan_laundry_weight;

pq = &vmd->vmd_pagequeues[PQ_ACTIVE];		pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
vm_pagequeue_lock(pq);		vm_pagequeue_lock(pq);
maxscan = pq->pq_cnt;		maxscan = pq->pq_cnt;

/*		/*
* If we're just idle polling attempt to visit every		* If we're just idle polling attempt to visit every
▲ Show 20 Lines • Show All 326 Lines • ▼ Show 20 Lines	if (size > bigsize) {
bigproc = p;		bigproc = p;
bigsize = size;		bigsize = size;
} else {		} else {
PRELE(p);		PRELE(p);
}		}
}		}
sx_sunlock(&allproc_lock);		sx_sunlock(&allproc_lock);
if (bigproc != NULL) {		if (bigproc != NULL) {
		int i;

if (vm_panic_on_oom != 0)		if (vm_panic_on_oom != 0)
panic("out of swap space");		panic("out of swap space");
PROC_LOCK(bigproc);		PROC_LOCK(bigproc);
killproc(bigproc, "out of swap space");		killproc(bigproc, "out of swap space");
sched_nice(bigproc, PRIO_MIN);		sched_nice(bigproc, PRIO_MIN);
_PRELE(bigproc);		_PRELE(bigproc);
PROC_UNLOCK(bigproc);		PROC_UNLOCK(bigproc);
wakeup(&vm_cnt.v_free_count);		for (i = 0; i < vm_ndomains; i++)
		wakeup(&VM_DOMAIN(i)->vmd_free_count);
}		}
		jeffAuthorUnsubmitted Not Done Inline Actions I'm not sure this is even necessary. The code in vm_page_free_wakeup() should be sufficient. jeff: I'm not sure this is even necessary. The code in vm_page_free_wakeup() should be sufficient.
}		}

static void		static void
vm_pageout_worker(void *arg)		vm_pageout_worker(void *arg)
{		{
struct vm_domain *domain;		struct vm_domain *vmd;
int domidx, pass;		int domain, pass;
bool target_met;		bool target_met;

domidx = (uintptr_t)arg;		domain = (uintptr_t)arg;
domain = &vm_dom[domidx];		vmd = VM_DOMAIN(domain);
pass = 0;		pass = 0;
target_met = true;		target_met = true;

/*		/*
* XXXKIB It could be useful to bind pageout daemon threads to		* XXXKIB It could be useful to bind pageout daemon threads to
* the cores belonging to the domain, from which vm_page_array		* the cores belonging to the domain, from which vm_page_array
* is allocated.		* is allocated.
*/		*/

KASSERT(domain->vmd_segs != 0, ("domain without segments"));		KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
domain->vmd_last_active_scan = ticks;		vmd->vmd_last_active_scan = ticks;
vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE);		vm_pageout_init_marker(&vmd->vmd_marker, PQ_INACTIVE);
vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE);		vm_pageout_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE);
TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl,		TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pl,
&domain->vmd_inacthead, plinks.q);		&vmd->vmd_inacthead, plinks.q);

/*		/*
* The pageout daemon worker is never done, so loop forever.		* The pageout daemon worker is never done, so loop forever.
*/		*/
while (TRUE) {		while (TRUE) {
mtx_lock(&vm_page_queue_free_mtx);		vm_domain_free_lock(vmd);

/*		/*
* Generally, after a level >= 1 scan, if there are enough		* Generally, after a level >= 1 scan, if there are enough
* free pages to wakeup the waiters, then they are already		* free pages to wakeup the waiters, then they are already
* awake. A call to vm_page_free() during the scan awakened		* awake. A call to vm_page_free() during the scan awakened
* them. However, in the following case, this wakeup serves		* them. However, in the following case, this wakeup serves
* to bound the amount of time that a thread might wait.		* to bound the amount of time that a thread might wait.
* Suppose a thread's call to vm_page_alloc() fails, but		* Suppose a thread's call to vm_page_alloc() fails, but
* before that thread calls VM_WAIT, enough pages are freed by		* before that thread calls VM_WAIT, enough pages are freed by
* other threads to alleviate the free page shortage. The		* other threads to alleviate the free page shortage. The
* thread will, nonetheless, wait until another page is freed		* thread will, nonetheless, wait until another page is freed
* or this wakeup is performed.		* or this wakeup is performed.
*/		*/
if (vm_pages_needed && !vm_page_count_min()) {		if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
vm_pages_needed = false;		vmd->vmd_pages_needed = false;
wakeup(&vm_cnt.v_free_count);		wakeup(&vmd->vmd_free_count);
}		}

/*		/*
* Do not clear vm_pageout_wanted until we reach our free page		* Do not clear vmd_pageout_wanted until we reach our free page
* target. Otherwise, we may be awakened over and over again,		* target. Otherwise, we may be awakened over and over again,
* wasting CPU time.		* wasting CPU time.
*/		*/
if (vm_pageout_wanted && target_met)		if (vmd->vmd_pageout_wanted && target_met)
vm_pageout_wanted = false;		vmd->vmd_pageout_wanted = false;

/*		/*
* Might the page daemon receive a wakeup call?		* Might the page daemon receive a wakeup call?
*/		*/
if (vm_pageout_wanted) {		if (vmd->vmd_pageout_wanted) {
/*		/*
* No. Either vm_pageout_wanted was set by another		* No. Either vmd_pageout_wanted was set by another
* thread during the previous scan, which must have		* thread during the previous scan, which must have
* been a level 0 scan, or vm_pageout_wanted was		* been a level 0 scan, or vmd_pageout_wanted was
* already set and the scan failed to free enough		* already set and the scan failed to free enough
* pages. If we haven't yet performed a level >= 1		* pages. If we haven't yet performed a level >= 1
* (page reclamation) scan, then increase the level		* (page reclamation) scan, then increase the level
* and scan again now. Otherwise, sleep a bit and		* and scan again now. Otherwise, sleep a bit and
* try again later.		* try again later.
*/		*/
mtx_unlock(&vm_page_queue_free_mtx);		vm_domain_free_unlock(vmd);
if (pass >= 1)		if (pass >= 1)
pause("pwait", hz / VM_INACT_SCAN_RATE);		pause("pwait", hz / VM_INACT_SCAN_RATE);
pass++;		pass++;
} else {		} else {
/*		/*
* Yes. If threads are still sleeping in VM_WAIT		* Yes. If threads are still sleeping in VM_WAIT
* then we immediately start a new scan. Otherwise,		* then we immediately start a new scan. Otherwise,
* sleep until the next wakeup or until pages need to		* sleep until the next wakeup or until pages need to
* have their reference stats updated.		* have their reference stats updated.
*/		*/
if (vm_pages_needed) {		if (vmd->vmd_pages_needed) {
mtx_unlock(&vm_page_queue_free_mtx);		vm_domain_free_unlock(vmd);
if (pass == 0)		if (pass == 0)
pass++;		pass++;
} else if (mtx_sleep(&vm_pageout_wanted,		} else if (mtx_sleep(&vmd->vmd_pageout_wanted,
&vm_page_queue_free_mtx, PDROP \| PVM, "psleep",		vm_domain_free_lockptr(vmd), PDROP \| PVM,
hz) == 0) {		"psleep", hz) == 0) {
VM_CNT_INC(v_pdwakeups);		VM_CNT_INC(v_pdwakeups);
pass = 1;		pass = 1;
} else		} else
pass = 0;		pass = 0;
}		}

target_met = vm_pageout_scan(domain, pass);		target_met = vm_pageout_scan(vmd, pass);
}		}
}		}

/*		/*
* vm_pageout_init initialises basic pageout daemon settings.		* vm_pageout_init initialises basic pageout daemon settings.
*/		*/
static void		static void
vm_pageout_init(void)		vm_pageout_init_domain(int domain)
{		{
/*		struct vm_domain *vmd;
* Initialize some paging parameters.
*/
vm_cnt.v_interrupt_free_min = 2;
if (vm_cnt.v_page_count < 2000)
vm_pageout_page_count = 8;

		vmd = VM_DOMAIN(domain);
		vmd->vmd_interrupt_free_min = 2;
		jeffAuthorUnsubmitted Not Done Inline Actions interrupt is a misnomer. As far as I can tell it's only used by M_USE_RESERVE jeff: interrupt is a misnomer. As far as I can tell it's only used by M_USE_RESERVE
		markjUnsubmitted Not Done Inline Actions VM_ALLOC_INTERRUPT seems to be used in quite a few places? I agree that the name doesn't make sense. markj: VM_ALLOC_INTERRUPT seems to be used in quite a few places? I agree that the name doesn't make…

/*		/*
* v_free_reserved needs to include enough for the largest		* v_free_reserved needs to include enough for the largest
* swap pager structures plus enough for any pv_entry structs		* swap pager structures plus enough for any pv_entry structs
* when paging.		* when paging.
*/		*/
if (vm_cnt.v_page_count > 1024)		if (vmd->vmd_page_count > 1024)
vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200;		vmd->vmd_free_min = 4 + (vmd->vmd_page_count - 1024) / 200;
else		else
vm_cnt.v_free_min = 4;		vmd->vmd_free_min = 4;
vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +		vmd->vmd_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
vm_cnt.v_interrupt_free_min;		vmd->vmd_interrupt_free_min;
vm_cnt.v_free_reserved = vm_pageout_page_count +		vmd->vmd_free_reserved = vm_pageout_page_count +
vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768);		vmd->vmd_pageout_free_min + (vmd->vmd_page_count / 768);
vm_cnt.v_free_severe = vm_cnt.v_free_min / 2;		vmd->vmd_free_severe = vmd->vmd_free_min / 2;
vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved;		vmd->vmd_free_target = 4 * vmd->vmd_free_min + vmd->vmd_free_reserved;
vm_cnt.v_free_min += vm_cnt.v_free_reserved;		vmd->vmd_free_min += vmd->vmd_free_reserved;
vm_cnt.v_free_severe += vm_cnt.v_free_reserved;		vmd->vmd_free_severe += vmd->vmd_free_reserved;
vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2;		vmd->vmd_inactive_target = (3 * vmd->vmd_free_target) / 2;
if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3)		if (vmd->vmd_inactive_target > vmd->vmd_free_count / 3)
vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3;		vmd->vmd_inactive_target = vmd->vmd_free_count / 3;

/*		/*
* Set the default wakeup threshold to be 10% above the minimum		* Set the default wakeup threshold to be 10% above the minimum
* page limit. This keeps the steady state out of shortfall.		* page limit. This keeps the steady state out of shortfall.
*/		*/
vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11;		vmd->vmd_pageout_wakeup_thresh = (vmd->vmd_free_min / 10) * 11;

/*		/*
		* Target amount of memory to move out of the laundry queue during a
		* background laundering. This is proportional to the amount of system
		* memory.
		*/
		vmd->vmd_background_launder_target = (vmd->vmd_free_target -
		vmd->vmd_free_min) / 10;
		}

		static void
		vm_pageout_init(void)
		{
		u_int freecount;
		int i;

		/*
		* Initialize some paging parameters.
		*/
		if (vm_cnt.v_page_count < 2000)
		vm_pageout_page_count = 8;

		freecount = 0;
		for (i = 0; i < vm_ndomains; i++) {
		struct vm_domain *vmd;

		vm_pageout_init_domain(i);
		vmd = VM_DOMAIN(i);
		vm_cnt.v_free_reserved += vmd->vmd_free_reserved;
		vm_cnt.v_free_target += vmd->vmd_free_target;
		vm_cnt.v_free_min += vmd->vmd_free_min;
		vm_cnt.v_inactive_target += vmd->vmd_inactive_target;
		vm_cnt.v_pageout_free_min += vmd->vmd_pageout_free_min;
		vm_cnt.v_interrupt_free_min += vmd->vmd_interrupt_free_min;
		vm_cnt.v_free_severe += vmd->vmd_free_severe;
		freecount += vmd->vmd_free_count;
		}

		/*
* Set interval in seconds for active scan. We want to visit each		* Set interval in seconds for active scan. We want to visit each
* page at least once every ten minutes. This is to prevent worst		* page at least once every ten minutes. This is to prevent worst
* case paging behaviors with stale active LRU.		* case paging behaviors with stale active LRU.
*/		*/
if (vm_pageout_update_period == 0)		if (vm_pageout_update_period == 0)
vm_pageout_update_period = 600;		vm_pageout_update_period = 600;

/* XXX does not really belong here */
if (vm_page_max_wired == 0)		if (vm_page_max_wired == 0)
vm_page_max_wired = vm_cnt.v_free_count / 3;		vm_page_max_wired = freecount / 3;

/*
* Target amount of memory to move out of the laundry queue during a
* background laundering. This is proportional to the amount of system
* memory.
*/
vm_background_launder_target = (vm_cnt.v_free_target -
markjUnsubmitted Done Inline Actions This should be per-domain too. markj: This should be per-domain too.
vm_cnt.v_free_min) / 10;
}		}

/*		/*
* vm_pageout is the high level pageout daemon.		* vm_pageout is the high level pageout daemon.
*/		*/
static void		static void
vm_pageout(void)		vm_pageout(void)
{		{
int error;		int error;
int i;		int i;

swap_pager_swap_init();		swap_pager_swap_init();
error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,		error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
0, 0, "laundry: dom0");		0, 0, "laundry: dom0");
if (error != 0)		if (error != 0)
panic("starting laundry for domain 0, error %d", error);		panic("starting laundry for domain 0, error %d", error);
for (i = 1; i < vm_ndomains; i++) {		for (i = 1; i < vm_ndomains; i++) {
error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,		error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
curproc, NULL, 0, 0, "dom%d", i);		curproc, NULL, 0, 0, "dom%d", i);
if (error != 0) {		if (error != 0) {
panic("starting pageout for domain %d, error %d\n",		panic("starting pageout for domain %d, error %d\n",
i, error);		i, error);
}		}
		error = kthread_add(vm_pageout_laundry_worker,
		(void *)(uintptr_t)i, curproc, NULL, 0, 0,
		"laundry: dom%d", i);
		if (error != 0)
		panic("starting laundry for domain %d, error %d",
		i, error);
}		}
error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,		error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
0, 0, "uma");		0, 0, "uma");
if (error != 0)		if (error != 0)
panic("starting uma_reclaim helper, error %d\n", error);		panic("starting uma_reclaim helper, error %d\n", error);
vm_pageout_worker((void *)(uintptr_t)0);		vm_pageout_worker((void *)(uintptr_t)0);
}		}

/*		/*
* Perform an advisory wakeup of the page daemon.		* Perform an advisory wakeup of the page daemon.
*/		*/
void		void
pagedaemon_wakeup(void)		pagedaemon_wakeup(int domain)
{		{
		struct vm_domain *vmd;

mtx_assert(&vm_page_queue_free_mtx, MA_NOTOWNED);		vmd = VM_DOMAIN(domain);
		vm_domain_free_assert_unlocked(vmd);

if (!vm_pageout_wanted && curthread->td_proc != pageproc) {		if (!vmd->vmd_pageout_wanted && curthread->td_proc != pageproc) {
vm_pageout_wanted = true;		vmd->vmd_pageout_wanted = true;
wakeup(&vm_pageout_wanted);		wakeup(&vmd->vmd_pageout_wanted);
}		}
}		}

/*		/*
* Wake up the page daemon and wait for it to reclaim free pages.		* Wake up the page daemon and wait for it to reclaim free pages.
*		*
* This function returns with the free queues mutex unlocked.		* This function returns with the free queues mutex unlocked.
*/		*/
void		void
pagedaemon_wait(int pri, const char *wmesg)		pagedaemon_wait(int domain, int pri, const char *wmesg)
{		{
		struct vm_domain *vmd;

mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);		vmd = VM_DOMAIN(domain);
		vm_domain_free_assert_locked(vmd);

/*		/*
* vm_pageout_wanted may have been set by an advisory wakeup, but if the		* vmd_pageout_wanted may have been set by an advisory wakeup, but if
* page daemon is running on a CPU, the wakeup will have been lost.		* the page daemon is running on a CPU, the wakeup will have been lost.
* Thus, deliver a potentially spurious wakeup to ensure that the page		* Thus, deliver a potentially spurious wakeup to ensure that the page
* daemon has been notified of the shortage.		* daemon has been notified of the shortage.
*/		*/
if (!vm_pageout_wanted \|\| !vm_pages_needed) {		if (!vmd->vmd_pageout_wanted \|\| !vmd->vmd_pages_needed) {
vm_pageout_wanted = true;		vmd->vmd_pageout_wanted = true;
wakeup(&vm_pageout_wanted);		wakeup(&vmd->vmd_pageout_wanted);
}		}
vm_pages_needed = true;		vmd->vmd_pages_needed = true;
msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP \| pri,		vmd->vmd_waiters++;
		msleep(&vmd->vmd_free_count, vm_domain_free_lockptr(vmd), PDROP \| pri,
wmesg, 0);		wmesg, 0);
		vmd->vmd_waiters--;
}		}