Changeset View
Changeset View
Standalone View
Standalone View
sys/vm/vm_pageout.c
Show First 20 Lines • Show All 104 Lines • ▼ Show 20 Lines | |||||
#include <vm/vm.h> | #include <vm/vm.h> | ||||
#include <vm/vm_param.h> | #include <vm/vm_param.h> | ||||
#include <vm/vm_object.h> | #include <vm/vm_object.h> | ||||
#include <vm/vm_page.h> | #include <vm/vm_page.h> | ||||
#include <vm/vm_map.h> | #include <vm/vm_map.h> | ||||
#include <vm/vm_pageout.h> | #include <vm/vm_pageout.h> | ||||
#include <vm/vm_pager.h> | #include <vm/vm_pager.h> | ||||
#include <vm/vm_phys.h> | #include <vm/vm_phys.h> | ||||
#include <vm/vm_pagequeue.h> | |||||
#include <vm/swap_pager.h> | #include <vm/swap_pager.h> | ||||
#include <vm/vm_extern.h> | #include <vm/vm_extern.h> | ||||
#include <vm/uma.h> | #include <vm/uma.h> | ||||
/* | /* | ||||
* System initialization | * System initialization | ||||
*/ | */ | ||||
Show All 21 Lines | |||||
SDT_PROVIDER_DEFINE(vm); | SDT_PROVIDER_DEFINE(vm); | ||||
SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan); | SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan); | ||||
/* Pagedaemon activity rates, in subdivisions of one second. */ | /* Pagedaemon activity rates, in subdivisions of one second. */ | ||||
#define VM_LAUNDER_RATE 10 | #define VM_LAUNDER_RATE 10 | ||||
#define VM_INACT_SCAN_RATE 2 | #define VM_INACT_SCAN_RATE 2 | ||||
int vm_pageout_deficit; /* Estimated number of pages deficit */ | |||||
u_int vm_pageout_wakeup_thresh; | |||||
static int vm_pageout_oom_seq = 12; | static int vm_pageout_oom_seq = 12; | ||||
static bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */ | |||||
bool vm_pages_needed; /* Are threads waiting for free pages? */ | |||||
/* Pending request for dirty page laundering. */ | |||||
static enum { | |||||
VM_LAUNDRY_IDLE, | |||||
VM_LAUNDRY_BACKGROUND, | |||||
VM_LAUNDRY_SHORTFALL | |||||
} vm_laundry_request = VM_LAUNDRY_IDLE; | |||||
static int vm_inactq_scans; | |||||
static int vm_pageout_update_period; | static int vm_pageout_update_period; | ||||
static int disable_swap_pageouts; | static int disable_swap_pageouts; | ||||
static int lowmem_period = 10; | static int lowmem_period = 10; | ||||
static time_t lowmem_uptime; | static time_t lowmem_uptime; | ||||
static int swapdev_enabled; | static int swapdev_enabled; | ||||
static int vm_panic_on_oom = 0; | static int vm_panic_on_oom = 0; | ||||
SYSCTL_INT(_vm, OID_AUTO, panic_on_oom, | SYSCTL_INT(_vm, OID_AUTO, panic_on_oom, | ||||
CTLFLAG_RWTUN, &vm_panic_on_oom, 0, | CTLFLAG_RWTUN, &vm_panic_on_oom, 0, | ||||
"panic on out of memory instead of killing the largest process"); | "panic on out of memory instead of killing the largest process"); | ||||
SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh, | |||||
CTLFLAG_RWTUN, &vm_pageout_wakeup_thresh, 0, | |||||
"free page threshold for waking up the pageout daemon"); | |||||
SYSCTL_INT(_vm, OID_AUTO, pageout_update_period, | SYSCTL_INT(_vm, OID_AUTO, pageout_update_period, | ||||
CTLFLAG_RWTUN, &vm_pageout_update_period, 0, | CTLFLAG_RWTUN, &vm_pageout_update_period, 0, | ||||
"Maximum active LRU update period"); | "Maximum active LRU update period"); | ||||
SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RWTUN, &lowmem_period, 0, | SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RWTUN, &lowmem_period, 0, | ||||
"Low memory callback period"); | "Low memory callback period"); | ||||
SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts, | SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts, | ||||
CTLFLAG_RWTUN, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages"); | CTLFLAG_RWTUN, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages"); | ||||
static int pageout_lock_miss; | static int pageout_lock_miss; | ||||
SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss, | SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss, | ||||
CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout"); | CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout"); | ||||
SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq, | SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq, | ||||
CTLFLAG_RWTUN, &vm_pageout_oom_seq, 0, | CTLFLAG_RWTUN, &vm_pageout_oom_seq, 0, | ||||
"back-to-back calls to oom detector to start OOM"); | "back-to-back calls to oom detector to start OOM"); | ||||
static int act_scan_laundry_weight = 3; | static int act_scan_laundry_weight = 3; | ||||
SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RWTUN, | SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RWTUN, | ||||
&act_scan_laundry_weight, 0, | &act_scan_laundry_weight, 0, | ||||
"weight given to clean vs. dirty pages in active queue scans"); | "weight given to clean vs. dirty pages in active queue scans"); | ||||
static u_int vm_background_launder_target; | |||||
SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RWTUN, | |||||
&vm_background_launder_target, 0, | |||||
"background laundering target, in pages"); | |||||
static u_int vm_background_launder_rate = 4096; | static u_int vm_background_launder_rate = 4096; | ||||
SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN, | SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN, | ||||
&vm_background_launder_rate, 0, | &vm_background_launder_rate, 0, | ||||
"background laundering rate, in kilobytes per second"); | "background laundering rate, in kilobytes per second"); | ||||
static u_int vm_background_launder_max = 20 * 1024; | static u_int vm_background_launder_max = 20 * 1024; | ||||
SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RWTUN, | SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RWTUN, | ||||
&vm_background_launder_max, 0, "background laundering cap, in kilobytes"); | &vm_background_launder_max, 0, "background laundering cap, in kilobytes"); | ||||
▲ Show 20 Lines • Show All 738 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Perform the work of the laundry thread: periodically wake up and determine | * Perform the work of the laundry thread: periodically wake up and determine | ||||
* whether any pages need to be laundered. If so, determine the number of pages | * whether any pages need to be laundered. If so, determine the number of pages | ||||
* that need to be laundered, and launder them. | * that need to be laundered, and launder them. | ||||
*/ | */ | ||||
static void | static void | ||||
vm_pageout_laundry_worker(void *arg) | vm_pageout_laundry_worker(void *arg) | ||||
{ | { | ||||
struct vm_domain *domain; | struct vm_domain *vmd; | ||||
struct vm_pagequeue *pq; | struct vm_pagequeue *pq; | ||||
uint64_t nclean, ndirty; | uint64_t nclean, ndirty; | ||||
u_int inactq_scans, last_launder; | u_int inactq_scans, last_launder; | ||||
int domidx, last_target, launder, shortfall, shortfall_cycle, target; | int domain, last_target, launder, shortfall, shortfall_cycle, target; | ||||
bool in_shortfall; | bool in_shortfall; | ||||
domidx = (uintptr_t)arg; | domain = (uintptr_t)arg; | ||||
domain = &vm_dom[domidx]; | vmd = VM_DOMAIN(domain); | ||||
pq = &domain->vmd_pagequeues[PQ_LAUNDRY]; | pq = &vmd->vmd_pagequeues[PQ_LAUNDRY]; | ||||
KASSERT(domain->vmd_segs != 0, ("domain without segments")); | KASSERT(vmd->vmd_segs != 0, ("domain without segments")); | ||||
vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY); | vm_pageout_init_marker(&vmd->vmd_laundry_marker, PQ_LAUNDRY); | ||||
shortfall = 0; | shortfall = 0; | ||||
in_shortfall = false; | in_shortfall = false; | ||||
shortfall_cycle = 0; | shortfall_cycle = 0; | ||||
target = 0; | target = 0; | ||||
inactq_scans = 0; | inactq_scans = 0; | ||||
last_launder = 0; | last_launder = 0; | ||||
/* | /* | ||||
* Calls to these handlers are serialized by the swap syscall lock. | * Calls to these handlers are serialized by the swap syscall lock. | ||||
*/ | */ | ||||
(void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, domain, | (void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, vmd, | ||||
EVENTHANDLER_PRI_ANY); | EVENTHANDLER_PRI_ANY); | ||||
(void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, domain, | (void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, vmd, | ||||
EVENTHANDLER_PRI_ANY); | EVENTHANDLER_PRI_ANY); | ||||
/* | /* | ||||
* The pageout laundry worker is never done, so loop forever. | * The pageout laundry worker is never done, so loop forever. | ||||
*/ | */ | ||||
for (;;) { | for (;;) { | ||||
KASSERT(target >= 0, ("negative target %d", target)); | KASSERT(target >= 0, ("negative target %d", target)); | ||||
KASSERT(shortfall_cycle >= 0, | KASSERT(shortfall_cycle >= 0, | ||||
("negative cycle %d", shortfall_cycle)); | ("negative cycle %d", shortfall_cycle)); | ||||
launder = 0; | launder = 0; | ||||
/* | /* | ||||
* First determine whether we need to launder pages to meet a | * First determine whether we need to launder pages to meet a | ||||
* shortage of free pages. | * shortage of free pages. | ||||
*/ | */ | ||||
if (shortfall > 0) { | if (shortfall > 0) { | ||||
in_shortfall = true; | in_shortfall = true; | ||||
shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE; | shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE; | ||||
target = shortfall; | target = shortfall; | ||||
} else if (!in_shortfall) | } else if (!in_shortfall) | ||||
goto trybackground; | goto trybackground; | ||||
else if (shortfall_cycle == 0 || vm_laundry_target() <= 0) { | else if (shortfall_cycle == 0 || vm_laundry_target(vmd) <= 0) { | ||||
/* | /* | ||||
* We recently entered shortfall and began laundering | * We recently entered shortfall and began laundering | ||||
* pages. If we have completed that laundering run | * pages. If we have completed that laundering run | ||||
* (and we are no longer in shortfall) or we have met | * (and we are no longer in shortfall) or we have met | ||||
* our laundry target through other activity, then we | * our laundry target through other activity, then we | ||||
* can stop laundering pages. | * can stop laundering pages. | ||||
*/ | */ | ||||
in_shortfall = false; | in_shortfall = false; | ||||
Show All 17 Lines | for (;;) { | ||||
* | * | ||||
* The background laundering threshold is not a constant. | * The background laundering threshold is not a constant. | ||||
* Instead, it is a slowly growing function of the number of | * Instead, it is a slowly growing function of the number of | ||||
* page daemon scans since the last laundering. Thus, as the | * page daemon scans since the last laundering. Thus, as the | ||||
* ratio of dirty to clean inactive pages grows, the amount of | * ratio of dirty to clean inactive pages grows, the amount of | ||||
* memory pressure required to trigger laundering decreases. | * memory pressure required to trigger laundering decreases. | ||||
*/ | */ | ||||
trybackground: | trybackground: | ||||
nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count; | nclean = vmd->vmd_free_count + | ||||
ndirty = vm_cnt.v_laundry_count; | vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt; | ||||
ndirty = vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt; | |||||
if (target == 0 && inactq_scans != last_launder && | if (target == 0 && inactq_scans != last_launder && | ||||
ndirty * isqrt(inactq_scans - last_launder) >= nclean) { | ndirty * isqrt(inactq_scans - last_launder) >= nclean) { | ||||
target = vm_background_launder_target; | target = vmd->vmd_background_launder_target; | ||||
} | } | ||||
/* | /* | ||||
* We have a non-zero background laundering target. If we've | * We have a non-zero background laundering target. If we've | ||||
* laundered up to our maximum without observing a page daemon | * laundered up to our maximum without observing a page daemon | ||||
* request, just stop. This is a safety belt that ensures we | * request, just stop. This is a safety belt that ensures we | ||||
* don't launder an excessive amount if memory pressure is low | * don't launder an excessive amount if memory pressure is low | ||||
* and the ratio of dirty to clean pages is large. Otherwise, | * and the ratio of dirty to clean pages is large. Otherwise, | ||||
Show All 15 Lines | |||||
dolaundry: | dolaundry: | ||||
if (launder > 0) { | if (launder > 0) { | ||||
/* | /* | ||||
* Because of I/O clustering, the number of laundered | * Because of I/O clustering, the number of laundered | ||||
* pages could exceed "target" by the maximum size of | * pages could exceed "target" by the maximum size of | ||||
* a cluster minus one. | * a cluster minus one. | ||||
*/ | */ | ||||
target -= min(vm_pageout_launder(domain, launder, | target -= min(vm_pageout_launder(vmd, launder, | ||||
in_shortfall), target); | in_shortfall), target); | ||||
pause("laundp", hz / VM_LAUNDER_RATE); | pause("laundp", hz / VM_LAUNDER_RATE); | ||||
} | } | ||||
/* | /* | ||||
* If we're not currently laundering pages and the page daemon | * If we're not currently laundering pages and the page daemon | ||||
* hasn't posted a new request, sleep until the page daemon | * hasn't posted a new request, sleep until the page daemon | ||||
* kicks us. | * kicks us. | ||||
*/ | */ | ||||
vm_pagequeue_lock(pq); | vm_pagequeue_lock(pq); | ||||
if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE) | if (target == 0 && vmd->vmd_laundry_request == VM_LAUNDRY_IDLE) | ||||
(void)mtx_sleep(&vm_laundry_request, | (void)mtx_sleep(&vmd->vmd_laundry_request, | ||||
vm_pagequeue_lockptr(pq), PVM, "launds", 0); | vm_pagequeue_lockptr(pq), PVM, "launds", 0); | ||||
/* | /* | ||||
* If the pagedaemon has indicated that it's in shortfall, start | * If the pagedaemon has indicated that it's in shortfall, start | ||||
* a shortfall laundering unless we're already in the middle of | * a shortfall laundering unless we're already in the middle of | ||||
* one. This may preempt a background laundering. | * one. This may preempt a background laundering. | ||||
*/ | */ | ||||
if (vm_laundry_request == VM_LAUNDRY_SHORTFALL && | if (vmd->vmd_laundry_request == VM_LAUNDRY_SHORTFALL && | ||||
(!in_shortfall || shortfall_cycle == 0)) { | (!in_shortfall || shortfall_cycle == 0)) { | ||||
shortfall = vm_laundry_target() + vm_pageout_deficit; | shortfall = vm_laundry_target(vmd) + | ||||
vmd->vmd_pageout_deficit; | |||||
target = 0; | target = 0; | ||||
} else | } else | ||||
shortfall = 0; | shortfall = 0; | ||||
if (target == 0) | if (target == 0) | ||||
vm_laundry_request = VM_LAUNDRY_IDLE; | vmd->vmd_laundry_request = VM_LAUNDRY_IDLE; | ||||
inactq_scans = vm_inactq_scans; | inactq_scans = vmd->vmd_inactq_scans; | ||||
vm_pagequeue_unlock(pq); | vm_pagequeue_unlock(pq); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* vm_pageout_scan does the dirty work for the pageout daemon. | * vm_pageout_scan does the dirty work for the pageout daemon. | ||||
* | * | ||||
* pass == 0: Update active LRU/deactivate pages | * pass == 0: Update active LRU/deactivate pages | ||||
Show All 12 Lines | vm_pageout_scan(struct vm_domain *vmd, int pass) | ||||
int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan; | int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan; | ||||
int page_shortage, scan_tick, scanned, starting_page_shortage; | int page_shortage, scan_tick, scanned, starting_page_shortage; | ||||
boolean_t queue_locked; | boolean_t queue_locked; | ||||
/* | /* | ||||
* If we need to reclaim memory ask kernel caches to return | * If we need to reclaim memory ask kernel caches to return | ||||
* some. We rate limit to avoid thrashing. | * some. We rate limit to avoid thrashing. | ||||
*/ | */ | ||||
if (vmd == &vm_dom[0] && pass > 0 && | if (vmd == VM_DOMAIN(0) && pass > 0 && | ||||
(time_uptime - lowmem_uptime) >= lowmem_period) { | (time_uptime - lowmem_uptime) >= lowmem_period) { | ||||
/* | /* | ||||
* Decrease registered cache sizes. | * Decrease registered cache sizes. | ||||
*/ | */ | ||||
SDT_PROBE0(vm, , , vm__lowmem_scan); | SDT_PROBE0(vm, , , vm__lowmem_scan); | ||||
EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_PAGES); | EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_PAGES); | ||||
/* | /* | ||||
* We do this explicitly after the caches have been | * We do this explicitly after the caches have been | ||||
Show All 12 Lines | vm_pageout_scan(struct vm_domain *vmd, int pass) | ||||
addl_page_shortage = 0; | addl_page_shortage = 0; | ||||
/* | /* | ||||
* Calculate the number of pages that we want to free. This number | * Calculate the number of pages that we want to free. This number | ||||
* can be negative if many pages are freed between the wakeup call to | * can be negative if many pages are freed between the wakeup call to | ||||
* the page daemon and this calculation. | * the page daemon and this calculation. | ||||
*/ | */ | ||||
if (pass > 0) { | if (pass > 0) { | ||||
deficit = atomic_readandclear_int(&vm_pageout_deficit); | deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit); | ||||
page_shortage = vm_paging_target() + deficit; | page_shortage = vm_paging_target(vmd) + deficit; | ||||
} else | } else | ||||
page_shortage = deficit = 0; | page_shortage = deficit = 0; | ||||
starting_page_shortage = page_shortage; | starting_page_shortage = page_shortage; | ||||
/* | /* | ||||
* Start scanning the inactive queue for pages that we can free. The | * Start scanning the inactive queue for pages that we can free. The | ||||
* scan will stop when we reach the target or we have scanned the | * scan will stop when we reach the target or we have scanned the | ||||
* entire queue. (Note that m->act_count is not used to make | * entire queue. (Note that m->act_count is not used to make | ||||
▲ Show 20 Lines • Show All 176 Lines • ▼ Show 20 Lines | drop_page: | ||||
* swap devices are configured, the laundry thread has no work to do, so | * swap devices are configured, the laundry thread has no work to do, so | ||||
* don't bother waking it up. | * don't bother waking it up. | ||||
* | * | ||||
* The laundry thread uses the number of inactive queue scans elapsed | * The laundry thread uses the number of inactive queue scans elapsed | ||||
* since the last laundering to determine whether to launder again, so | * since the last laundering to determine whether to launder again, so | ||||
* keep count. | * keep count. | ||||
*/ | */ | ||||
if (starting_page_shortage > 0) { | if (starting_page_shortage > 0) { | ||||
pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY]; | pq = &vmd->vmd_pagequeues[PQ_LAUNDRY]; | ||||
vm_pagequeue_lock(pq); | vm_pagequeue_lock(pq); | ||||
if (vm_laundry_request == VM_LAUNDRY_IDLE && | if (vmd->vmd_laundry_request == VM_LAUNDRY_IDLE && | ||||
(pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) { | (pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) { | ||||
if (page_shortage > 0) { | if (page_shortage > 0) { | ||||
vm_laundry_request = VM_LAUNDRY_SHORTFALL; | vmd->vmd_laundry_request = VM_LAUNDRY_SHORTFALL; | ||||
VM_CNT_INC(v_pdshortfalls); | VM_CNT_INC(v_pdshortfalls); | ||||
} else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL) | } else if (vmd->vmd_laundry_request != | ||||
vm_laundry_request = VM_LAUNDRY_BACKGROUND; | VM_LAUNDRY_SHORTFALL) | ||||
wakeup(&vm_laundry_request); | vmd->vmd_laundry_request = | ||||
VM_LAUNDRY_BACKGROUND; | |||||
wakeup(&vmd->vmd_laundry_request); | |||||
} | } | ||||
vm_inactq_scans++; | vmd->vmd_inactq_scans++; | ||||
vm_pagequeue_unlock(pq); | vm_pagequeue_unlock(pq); | ||||
} | } | ||||
/* | /* | ||||
* Wakeup the swapout daemon if we didn't free the targeted number of | * Wakeup the swapout daemon if we didn't free the targeted number of | ||||
* pages. | * pages. | ||||
*/ | */ | ||||
if (page_shortage > 0) | if (page_shortage > 0) | ||||
Show All 12 Lines | drop_page: | ||||
* When scanning active pages, we make clean pages count more heavily | * When scanning active pages, we make clean pages count more heavily | ||||
* towards the page shortage than dirty pages. This is because dirty | * towards the page shortage than dirty pages. This is because dirty | ||||
* pages must be laundered before they can be reused and thus have less | * pages must be laundered before they can be reused and thus have less | ||||
* utility when attempting to quickly alleviate a shortage. However, | * utility when attempting to quickly alleviate a shortage. However, | ||||
* this weighting also causes the scan to deactivate dirty pages more | * this weighting also causes the scan to deactivate dirty pages more | ||||
* more aggressively, improving the effectiveness of clustering and | * more aggressively, improving the effectiveness of clustering and | ||||
* ensuring that they can eventually be reused. | * ensuring that they can eventually be reused. | ||||
*/ | */ | ||||
inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count + | inactq_shortage = vmd->vmd_inactive_target - (pq->pq_cnt + | ||||
vm_cnt.v_laundry_count / act_scan_laundry_weight) + | vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight) + | ||||
vm_paging_target() + deficit + addl_page_shortage; | vm_paging_target(vmd) + deficit + addl_page_shortage; | ||||
inactq_shortage *= act_scan_laundry_weight; | inactq_shortage *= act_scan_laundry_weight; | ||||
pq = &vmd->vmd_pagequeues[PQ_ACTIVE]; | pq = &vmd->vmd_pagequeues[PQ_ACTIVE]; | ||||
vm_pagequeue_lock(pq); | vm_pagequeue_lock(pq); | ||||
maxscan = pq->pq_cnt; | maxscan = pq->pq_cnt; | ||||
/* | /* | ||||
* If we're just idle polling attempt to visit every | * If we're just idle polling attempt to visit every | ||||
▲ Show 20 Lines • Show All 326 Lines • ▼ Show 20 Lines | if (size > bigsize) { | ||||
bigproc = p; | bigproc = p; | ||||
bigsize = size; | bigsize = size; | ||||
} else { | } else { | ||||
PRELE(p); | PRELE(p); | ||||
} | } | ||||
} | } | ||||
sx_sunlock(&allproc_lock); | sx_sunlock(&allproc_lock); | ||||
if (bigproc != NULL) { | if (bigproc != NULL) { | ||||
int i; | |||||
if (vm_panic_on_oom != 0) | if (vm_panic_on_oom != 0) | ||||
panic("out of swap space"); | panic("out of swap space"); | ||||
PROC_LOCK(bigproc); | PROC_LOCK(bigproc); | ||||
killproc(bigproc, "out of swap space"); | killproc(bigproc, "out of swap space"); | ||||
sched_nice(bigproc, PRIO_MIN); | sched_nice(bigproc, PRIO_MIN); | ||||
_PRELE(bigproc); | _PRELE(bigproc); | ||||
PROC_UNLOCK(bigproc); | PROC_UNLOCK(bigproc); | ||||
wakeup(&vm_cnt.v_free_count); | for (i = 0; i < vm_ndomains; i++) | ||||
wakeup(&VM_DOMAIN(i)->vmd_free_count); | |||||
} | } | ||||
jeff: I'm not sure this is even necessary. The code in vm_page_free_wakeup() should be sufficient. | |||||
} | } | ||||
static void | static void | ||||
vm_pageout_worker(void *arg) | vm_pageout_worker(void *arg) | ||||
{ | { | ||||
struct vm_domain *domain; | struct vm_domain *vmd; | ||||
int domidx, pass; | int domain, pass; | ||||
bool target_met; | bool target_met; | ||||
domidx = (uintptr_t)arg; | domain = (uintptr_t)arg; | ||||
domain = &vm_dom[domidx]; | vmd = VM_DOMAIN(domain); | ||||
pass = 0; | pass = 0; | ||||
target_met = true; | target_met = true; | ||||
/* | /* | ||||
* XXXKIB It could be useful to bind pageout daemon threads to | * XXXKIB It could be useful to bind pageout daemon threads to | ||||
* the cores belonging to the domain, from which vm_page_array | * the cores belonging to the domain, from which vm_page_array | ||||
* is allocated. | * is allocated. | ||||
*/ | */ | ||||
KASSERT(domain->vmd_segs != 0, ("domain without segments")); | KASSERT(vmd->vmd_segs != 0, ("domain without segments")); | ||||
domain->vmd_last_active_scan = ticks; | vmd->vmd_last_active_scan = ticks; | ||||
vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE); | vm_pageout_init_marker(&vmd->vmd_marker, PQ_INACTIVE); | ||||
vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE); | vm_pageout_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE); | ||||
TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl, | TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pl, | ||||
&domain->vmd_inacthead, plinks.q); | &vmd->vmd_inacthead, plinks.q); | ||||
/* | /* | ||||
* The pageout daemon worker is never done, so loop forever. | * The pageout daemon worker is never done, so loop forever. | ||||
*/ | */ | ||||
while (TRUE) { | while (TRUE) { | ||||
mtx_lock(&vm_page_queue_free_mtx); | vm_domain_free_lock(vmd); | ||||
/* | /* | ||||
* Generally, after a level >= 1 scan, if there are enough | * Generally, after a level >= 1 scan, if there are enough | ||||
* free pages to wakeup the waiters, then they are already | * free pages to wakeup the waiters, then they are already | ||||
* awake. A call to vm_page_free() during the scan awakened | * awake. A call to vm_page_free() during the scan awakened | ||||
* them. However, in the following case, this wakeup serves | * them. However, in the following case, this wakeup serves | ||||
* to bound the amount of time that a thread might wait. | * to bound the amount of time that a thread might wait. | ||||
* Suppose a thread's call to vm_page_alloc() fails, but | * Suppose a thread's call to vm_page_alloc() fails, but | ||||
* before that thread calls VM_WAIT, enough pages are freed by | * before that thread calls VM_WAIT, enough pages are freed by | ||||
* other threads to alleviate the free page shortage. The | * other threads to alleviate the free page shortage. The | ||||
* thread will, nonetheless, wait until another page is freed | * thread will, nonetheless, wait until another page is freed | ||||
* or this wakeup is performed. | * or this wakeup is performed. | ||||
*/ | */ | ||||
if (vm_pages_needed && !vm_page_count_min()) { | if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) { | ||||
vm_pages_needed = false; | vmd->vmd_pages_needed = false; | ||||
wakeup(&vm_cnt.v_free_count); | wakeup(&vmd->vmd_free_count); | ||||
} | } | ||||
/* | /* | ||||
* Do not clear vm_pageout_wanted until we reach our free page | * Do not clear vmd_pageout_wanted until we reach our free page | ||||
* target. Otherwise, we may be awakened over and over again, | * target. Otherwise, we may be awakened over and over again, | ||||
* wasting CPU time. | * wasting CPU time. | ||||
*/ | */ | ||||
if (vm_pageout_wanted && target_met) | if (vmd->vmd_pageout_wanted && target_met) | ||||
vm_pageout_wanted = false; | vmd->vmd_pageout_wanted = false; | ||||
/* | /* | ||||
* Might the page daemon receive a wakeup call? | * Might the page daemon receive a wakeup call? | ||||
*/ | */ | ||||
if (vm_pageout_wanted) { | if (vmd->vmd_pageout_wanted) { | ||||
/* | /* | ||||
* No. Either vm_pageout_wanted was set by another | * No. Either vmd_pageout_wanted was set by another | ||||
* thread during the previous scan, which must have | * thread during the previous scan, which must have | ||||
* been a level 0 scan, or vm_pageout_wanted was | * been a level 0 scan, or vmd_pageout_wanted was | ||||
* already set and the scan failed to free enough | * already set and the scan failed to free enough | ||||
* pages. If we haven't yet performed a level >= 1 | * pages. If we haven't yet performed a level >= 1 | ||||
* (page reclamation) scan, then increase the level | * (page reclamation) scan, then increase the level | ||||
* and scan again now. Otherwise, sleep a bit and | * and scan again now. Otherwise, sleep a bit and | ||||
* try again later. | * try again later. | ||||
*/ | */ | ||||
mtx_unlock(&vm_page_queue_free_mtx); | vm_domain_free_unlock(vmd); | ||||
if (pass >= 1) | if (pass >= 1) | ||||
pause("pwait", hz / VM_INACT_SCAN_RATE); | pause("pwait", hz / VM_INACT_SCAN_RATE); | ||||
pass++; | pass++; | ||||
} else { | } else { | ||||
/* | /* | ||||
* Yes. If threads are still sleeping in VM_WAIT | * Yes. If threads are still sleeping in VM_WAIT | ||||
* then we immediately start a new scan. Otherwise, | * then we immediately start a new scan. Otherwise, | ||||
* sleep until the next wakeup or until pages need to | * sleep until the next wakeup or until pages need to | ||||
* have their reference stats updated. | * have their reference stats updated. | ||||
*/ | */ | ||||
if (vm_pages_needed) { | if (vmd->vmd_pages_needed) { | ||||
mtx_unlock(&vm_page_queue_free_mtx); | vm_domain_free_unlock(vmd); | ||||
if (pass == 0) | if (pass == 0) | ||||
pass++; | pass++; | ||||
} else if (mtx_sleep(&vm_pageout_wanted, | } else if (mtx_sleep(&vmd->vmd_pageout_wanted, | ||||
&vm_page_queue_free_mtx, PDROP | PVM, "psleep", | vm_domain_free_lockptr(vmd), PDROP | PVM, | ||||
hz) == 0) { | "psleep", hz) == 0) { | ||||
VM_CNT_INC(v_pdwakeups); | VM_CNT_INC(v_pdwakeups); | ||||
pass = 1; | pass = 1; | ||||
} else | } else | ||||
pass = 0; | pass = 0; | ||||
} | } | ||||
target_met = vm_pageout_scan(domain, pass); | target_met = vm_pageout_scan(vmd, pass); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* vm_pageout_init initialises basic pageout daemon settings. | * vm_pageout_init initialises basic pageout daemon settings. | ||||
*/ | */ | ||||
static void | static void | ||||
vm_pageout_init(void) | vm_pageout_init_domain(int domain) | ||||
{ | { | ||||
/* | struct vm_domain *vmd; | ||||
* Initialize some paging parameters. | |||||
*/ | |||||
vm_cnt.v_interrupt_free_min = 2; | |||||
if (vm_cnt.v_page_count < 2000) | |||||
vm_pageout_page_count = 8; | |||||
vmd = VM_DOMAIN(domain); | |||||
vmd->vmd_interrupt_free_min = 2; | |||||
Not Done Inline Actionsinterrupt is a misnomer. As far as I can tell it's only used by M_USE_RESERVE jeff: interrupt is a misnomer. As far as I can tell it's only used by M_USE_RESERVE | |||||
Not Done Inline ActionsVM_ALLOC_INTERRUPT seems to be used in quite a few places? I agree that the name doesn't make sense. markj: VM_ALLOC_INTERRUPT seems to be used in quite a few places? I agree that the name doesn't make… | |||||
/* | /* | ||||
* v_free_reserved needs to include enough for the largest | * v_free_reserved needs to include enough for the largest | ||||
* swap pager structures plus enough for any pv_entry structs | * swap pager structures plus enough for any pv_entry structs | ||||
* when paging. | * when paging. | ||||
*/ | */ | ||||
if (vm_cnt.v_page_count > 1024) | if (vmd->vmd_page_count > 1024) | ||||
vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200; | vmd->vmd_free_min = 4 + (vmd->vmd_page_count - 1024) / 200; | ||||
else | else | ||||
vm_cnt.v_free_min = 4; | vmd->vmd_free_min = 4; | ||||
vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE + | vmd->vmd_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE + | ||||
vm_cnt.v_interrupt_free_min; | vmd->vmd_interrupt_free_min; | ||||
vm_cnt.v_free_reserved = vm_pageout_page_count + | vmd->vmd_free_reserved = vm_pageout_page_count + | ||||
vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768); | vmd->vmd_pageout_free_min + (vmd->vmd_page_count / 768); | ||||
vm_cnt.v_free_severe = vm_cnt.v_free_min / 2; | vmd->vmd_free_severe = vmd->vmd_free_min / 2; | ||||
vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved; | vmd->vmd_free_target = 4 * vmd->vmd_free_min + vmd->vmd_free_reserved; | ||||
vm_cnt.v_free_min += vm_cnt.v_free_reserved; | vmd->vmd_free_min += vmd->vmd_free_reserved; | ||||
vm_cnt.v_free_severe += vm_cnt.v_free_reserved; | vmd->vmd_free_severe += vmd->vmd_free_reserved; | ||||
vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2; | vmd->vmd_inactive_target = (3 * vmd->vmd_free_target) / 2; | ||||
if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3) | if (vmd->vmd_inactive_target > vmd->vmd_free_count / 3) | ||||
vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3; | vmd->vmd_inactive_target = vmd->vmd_free_count / 3; | ||||
/* | /* | ||||
* Set the default wakeup threshold to be 10% above the minimum | * Set the default wakeup threshold to be 10% above the minimum | ||||
* page limit. This keeps the steady state out of shortfall. | * page limit. This keeps the steady state out of shortfall. | ||||
*/ | */ | ||||
vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11; | vmd->vmd_pageout_wakeup_thresh = (vmd->vmd_free_min / 10) * 11; | ||||
/* | /* | ||||
* Target amount of memory to move out of the laundry queue during a | |||||
* background laundering. This is proportional to the amount of system | |||||
* memory. | |||||
*/ | |||||
vmd->vmd_background_launder_target = (vmd->vmd_free_target - | |||||
vmd->vmd_free_min) / 10; | |||||
} | |||||
static void | |||||
vm_pageout_init(void) | |||||
{ | |||||
u_int freecount; | |||||
int i; | |||||
/* | |||||
* Initialize some paging parameters. | |||||
*/ | |||||
if (vm_cnt.v_page_count < 2000) | |||||
vm_pageout_page_count = 8; | |||||
freecount = 0; | |||||
for (i = 0; i < vm_ndomains; i++) { | |||||
struct vm_domain *vmd; | |||||
vm_pageout_init_domain(i); | |||||
vmd = VM_DOMAIN(i); | |||||
vm_cnt.v_free_reserved += vmd->vmd_free_reserved; | |||||
vm_cnt.v_free_target += vmd->vmd_free_target; | |||||
vm_cnt.v_free_min += vmd->vmd_free_min; | |||||
vm_cnt.v_inactive_target += vmd->vmd_inactive_target; | |||||
vm_cnt.v_pageout_free_min += vmd->vmd_pageout_free_min; | |||||
vm_cnt.v_interrupt_free_min += vmd->vmd_interrupt_free_min; | |||||
vm_cnt.v_free_severe += vmd->vmd_free_severe; | |||||
freecount += vmd->vmd_free_count; | |||||
} | |||||
/* | |||||
* Set interval in seconds for active scan. We want to visit each | * Set interval in seconds for active scan. We want to visit each | ||||
* page at least once every ten minutes. This is to prevent worst | * page at least once every ten minutes. This is to prevent worst | ||||
* case paging behaviors with stale active LRU. | * case paging behaviors with stale active LRU. | ||||
*/ | */ | ||||
if (vm_pageout_update_period == 0) | if (vm_pageout_update_period == 0) | ||||
vm_pageout_update_period = 600; | vm_pageout_update_period = 600; | ||||
/* XXX does not really belong here */ | |||||
if (vm_page_max_wired == 0) | if (vm_page_max_wired == 0) | ||||
vm_page_max_wired = vm_cnt.v_free_count / 3; | vm_page_max_wired = freecount / 3; | ||||
/* | |||||
* Target amount of memory to move out of the laundry queue during a | |||||
* background laundering. This is proportional to the amount of system | |||||
* memory. | |||||
*/ | |||||
vm_background_launder_target = (vm_cnt.v_free_target - | |||||
Done Inline ActionsThis should be per-domain too. markj: This should be per-domain too. | |||||
vm_cnt.v_free_min) / 10; | |||||
} | } | ||||
/* | /* | ||||
* vm_pageout is the high level pageout daemon. | * vm_pageout is the high level pageout daemon. | ||||
*/ | */ | ||||
static void | static void | ||||
vm_pageout(void) | vm_pageout(void) | ||||
{ | { | ||||
int error; | int error; | ||||
int i; | int i; | ||||
swap_pager_swap_init(); | swap_pager_swap_init(); | ||||
error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL, | error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL, | ||||
0, 0, "laundry: dom0"); | 0, 0, "laundry: dom0"); | ||||
if (error != 0) | if (error != 0) | ||||
panic("starting laundry for domain 0, error %d", error); | panic("starting laundry for domain 0, error %d", error); | ||||
for (i = 1; i < vm_ndomains; i++) { | for (i = 1; i < vm_ndomains; i++) { | ||||
error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i, | error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i, | ||||
curproc, NULL, 0, 0, "dom%d", i); | curproc, NULL, 0, 0, "dom%d", i); | ||||
if (error != 0) { | if (error != 0) { | ||||
panic("starting pageout for domain %d, error %d\n", | panic("starting pageout for domain %d, error %d\n", | ||||
i, error); | i, error); | ||||
} | } | ||||
error = kthread_add(vm_pageout_laundry_worker, | |||||
(void *)(uintptr_t)i, curproc, NULL, 0, 0, | |||||
"laundry: dom%d", i); | |||||
if (error != 0) | |||||
panic("starting laundry for domain %d, error %d", | |||||
i, error); | |||||
} | } | ||||
error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL, | error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL, | ||||
0, 0, "uma"); | 0, 0, "uma"); | ||||
if (error != 0) | if (error != 0) | ||||
panic("starting uma_reclaim helper, error %d\n", error); | panic("starting uma_reclaim helper, error %d\n", error); | ||||
vm_pageout_worker((void *)(uintptr_t)0); | vm_pageout_worker((void *)(uintptr_t)0); | ||||
} | } | ||||
/* | /* | ||||
* Perform an advisory wakeup of the page daemon. | * Perform an advisory wakeup of the page daemon. | ||||
*/ | */ | ||||
void | void | ||||
pagedaemon_wakeup(void) | pagedaemon_wakeup(int domain) | ||||
{ | { | ||||
struct vm_domain *vmd; | |||||
mtx_assert(&vm_page_queue_free_mtx, MA_NOTOWNED); | vmd = VM_DOMAIN(domain); | ||||
vm_domain_free_assert_unlocked(vmd); | |||||
if (!vm_pageout_wanted && curthread->td_proc != pageproc) { | if (!vmd->vmd_pageout_wanted && curthread->td_proc != pageproc) { | ||||
vm_pageout_wanted = true; | vmd->vmd_pageout_wanted = true; | ||||
wakeup(&vm_pageout_wanted); | wakeup(&vmd->vmd_pageout_wanted); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Wake up the page daemon and wait for it to reclaim free pages. | * Wake up the page daemon and wait for it to reclaim free pages. | ||||
* | * | ||||
* This function returns with the free queues mutex unlocked. | * This function returns with the free queues mutex unlocked. | ||||
*/ | */ | ||||
void | void | ||||
pagedaemon_wait(int pri, const char *wmesg) | pagedaemon_wait(int domain, int pri, const char *wmesg) | ||||
{ | { | ||||
struct vm_domain *vmd; | |||||
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); | vmd = VM_DOMAIN(domain); | ||||
vm_domain_free_assert_locked(vmd); | |||||
/* | /* | ||||
* vm_pageout_wanted may have been set by an advisory wakeup, but if the | * vmd_pageout_wanted may have been set by an advisory wakeup, but if | ||||
* page daemon is running on a CPU, the wakeup will have been lost. | * the page daemon is running on a CPU, the wakeup will have been lost. | ||||
* Thus, deliver a potentially spurious wakeup to ensure that the page | * Thus, deliver a potentially spurious wakeup to ensure that the page | ||||
* daemon has been notified of the shortage. | * daemon has been notified of the shortage. | ||||
*/ | */ | ||||
if (!vm_pageout_wanted || !vm_pages_needed) { | if (!vmd->vmd_pageout_wanted || !vmd->vmd_pages_needed) { | ||||
vm_pageout_wanted = true; | vmd->vmd_pageout_wanted = true; | ||||
wakeup(&vm_pageout_wanted); | wakeup(&vmd->vmd_pageout_wanted); | ||||
} | } | ||||
vm_pages_needed = true; | vmd->vmd_pages_needed = true; | ||||
msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | pri, | vmd->vmd_waiters++; | ||||
msleep(&vmd->vmd_free_count, vm_domain_free_lockptr(vmd), PDROP | pri, | |||||
wmesg, 0); | wmesg, 0); | ||||
vmd->vmd_waiters--; | |||||
} | } |
I'm not sure this is even necessary. The code in vm_page_free_wakeup() should be sufficient.