Changeset View
Changeset View
Standalone View
Standalone View
head/sys/vm/vm_pageout.c
Show First 20 Lines • Show All 113 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* System initialization | * System initialization | ||||
*/ | */ | ||||
/* the kernel process "vm_pageout"*/ | /* the kernel process "vm_pageout"*/ | ||||
static void vm_pageout(void); | static void vm_pageout(void); | ||||
static void vm_pageout_init(void); | static void vm_pageout_init(void); | ||||
static int vm_pageout_clean(vm_page_t m); | static int vm_pageout_clean(vm_page_t m, int *numpagedout); | ||||
static int vm_pageout_cluster(vm_page_t m); | static int vm_pageout_cluster(vm_page_t m); | ||||
static bool vm_pageout_scan(struct vm_domain *vmd, int pass); | static bool vm_pageout_scan(struct vm_domain *vmd, int pass); | ||||
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage, | static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage, | ||||
int starting_page_shortage); | int starting_page_shortage); | ||||
SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init, | SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init, | ||||
NULL); | NULL); | ||||
Show All 18 Lines | |||||
static struct kproc_desc vm_kp = { | static struct kproc_desc vm_kp = { | ||||
"vmdaemon", | "vmdaemon", | ||||
vm_daemon, | vm_daemon, | ||||
&vmproc | &vmproc | ||||
}; | }; | ||||
SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp); | SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp); | ||||
#endif | #endif | ||||
/* Pagedaemon activity rates, in subdivisions of one second. */ | |||||
#define VM_LAUNDER_RATE 10 | |||||
#define VM_INACT_SCAN_RATE 2 | |||||
int vm_pageout_deficit; /* Estimated number of pages deficit */ | int vm_pageout_deficit; /* Estimated number of pages deficit */ | ||||
u_int vm_pageout_wakeup_thresh; | u_int vm_pageout_wakeup_thresh; | ||||
static int vm_pageout_oom_seq = 12; | static int vm_pageout_oom_seq = 12; | ||||
bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */ | bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */ | ||||
bool vm_pages_needed; /* Are threads waiting for free pages? */ | bool vm_pages_needed; /* Are threads waiting for free pages? */ | ||||
/* Pending request for dirty page laundering. */ | |||||
static enum { | |||||
VM_LAUNDRY_IDLE, | |||||
VM_LAUNDRY_BACKGROUND, | |||||
VM_LAUNDRY_SHORTFALL | |||||
} vm_laundry_request = VM_LAUNDRY_IDLE; | |||||
#if !defined(NO_SWAPPING) | #if !defined(NO_SWAPPING) | ||||
static int vm_pageout_req_swapout; /* XXX */ | static int vm_pageout_req_swapout; /* XXX */ | ||||
static int vm_daemon_needed; | static int vm_daemon_needed; | ||||
static struct mtx vm_daemon_mtx; | static struct mtx vm_daemon_mtx; | ||||
/* Allow for use by vm_pageout before vm_daemon is initialized. */ | /* Allow for use by vm_pageout before vm_daemon is initialized. */ | ||||
MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF); | MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF); | ||||
#endif | #endif | ||||
static int vm_max_launder = 32; | |||||
static int vm_pageout_update_period; | static int vm_pageout_update_period; | ||||
static int defer_swap_pageouts; | |||||
static int disable_swap_pageouts; | static int disable_swap_pageouts; | ||||
static int lowmem_period = 10; | static int lowmem_period = 10; | ||||
static time_t lowmem_uptime; | static time_t lowmem_uptime; | ||||
#if defined(NO_SWAPPING) | #if defined(NO_SWAPPING) | ||||
static int vm_swap_enabled = 0; | static int vm_swap_enabled = 0; | ||||
static int vm_swap_idle_enabled = 0; | static int vm_swap_idle_enabled = 0; | ||||
#else | #else | ||||
static int vm_swap_enabled = 1; | static int vm_swap_enabled = 1; | ||||
static int vm_swap_idle_enabled = 0; | static int vm_swap_idle_enabled = 0; | ||||
#endif | #endif | ||||
static int vm_panic_on_oom = 0; | static int vm_panic_on_oom = 0; | ||||
SYSCTL_INT(_vm, OID_AUTO, panic_on_oom, | SYSCTL_INT(_vm, OID_AUTO, panic_on_oom, | ||||
CTLFLAG_RWTUN, &vm_panic_on_oom, 0, | CTLFLAG_RWTUN, &vm_panic_on_oom, 0, | ||||
"panic on out of memory instead of killing the largest process"); | "panic on out of memory instead of killing the largest process"); | ||||
SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh, | SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh, | ||||
CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0, | CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0, | ||||
"free page threshold for waking up the pageout daemon"); | "free page threshold for waking up the pageout daemon"); | ||||
SYSCTL_INT(_vm, OID_AUTO, max_launder, | |||||
CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout"); | |||||
SYSCTL_INT(_vm, OID_AUTO, pageout_update_period, | SYSCTL_INT(_vm, OID_AUTO, pageout_update_period, | ||||
CTLFLAG_RW, &vm_pageout_update_period, 0, | CTLFLAG_RW, &vm_pageout_update_period, 0, | ||||
"Maximum active LRU update period"); | "Maximum active LRU update period"); | ||||
SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0, | SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0, | ||||
"Low memory callback period"); | "Low memory callback period"); | ||||
#if defined(NO_SWAPPING) | #if defined(NO_SWAPPING) | ||||
SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, | SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, | ||||
CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout"); | CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout"); | ||||
SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, | SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, | ||||
CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); | CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); | ||||
#else | #else | ||||
SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, | SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, | ||||
CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout"); | CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout"); | ||||
SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, | SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, | ||||
CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); | CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); | ||||
#endif | #endif | ||||
SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts, | |||||
CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem"); | |||||
SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts, | SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts, | ||||
CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages"); | CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages"); | ||||
static int pageout_lock_miss; | static int pageout_lock_miss; | ||||
SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss, | SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss, | ||||
CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout"); | CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout"); | ||||
SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq, | SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq, | ||||
CTLFLAG_RW, &vm_pageout_oom_seq, 0, | CTLFLAG_RW, &vm_pageout_oom_seq, 0, | ||||
"back-to-back calls to oom detector to start OOM"); | "back-to-back calls to oom detector to start OOM"); | ||||
static int act_scan_laundry_weight = 3; | |||||
SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RW, | |||||
&act_scan_laundry_weight, 0, | |||||
"weight given to clean vs. dirty pages in active queue scans"); | |||||
static u_int vm_background_launder_target; | |||||
SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RW, | |||||
&vm_background_launder_target, 0, | |||||
"background laundering target, in pages"); | |||||
static u_int vm_background_launder_rate = 4096; | |||||
SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RW, | |||||
&vm_background_launder_rate, 0, | |||||
"background laundering rate, in kilobytes per second"); | |||||
static u_int vm_background_launder_max = 20 * 1024; | |||||
SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RW, | |||||
&vm_background_launder_max, 0, "background laundering cap, in kilobytes"); | |||||
#define VM_PAGEOUT_PAGE_COUNT 16 | #define VM_PAGEOUT_PAGE_COUNT 16 | ||||
int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT; | int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT; | ||||
int vm_page_max_wired; /* XXX max # of wired pages system-wide */ | int vm_page_max_wired; /* XXX max # of wired pages system-wide */ | ||||
SYSCTL_INT(_vm, OID_AUTO, max_wired, | SYSCTL_INT(_vm, OID_AUTO, max_wired, | ||||
CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); | CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); | ||||
static u_int isqrt(u_int num); | |||||
static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); | static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); | ||||
static int vm_pageout_launder(struct vm_domain *vmd, int launder, | |||||
bool in_shortfall); | |||||
static void vm_pageout_laundry_worker(void *arg); | |||||
#if !defined(NO_SWAPPING) | #if !defined(NO_SWAPPING) | ||||
static void vm_pageout_map_deactivate_pages(vm_map_t, long); | static void vm_pageout_map_deactivate_pages(vm_map_t, long); | ||||
static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long); | static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long); | ||||
static void vm_req_vmdaemon(int req); | static void vm_req_vmdaemon(int req); | ||||
#endif | #endif | ||||
static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *); | static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *); | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 134 Lines • ▼ Show 20 Lines | vm_pageout_cluster(vm_page_t m) | ||||
mc[vm_pageout_page_count] = pb = ps = m; | mc[vm_pageout_page_count] = pb = ps = m; | ||||
pageout_count = 1; | pageout_count = 1; | ||||
page_base = vm_pageout_page_count; | page_base = vm_pageout_page_count; | ||||
ib = 1; | ib = 1; | ||||
is = 1; | is = 1; | ||||
/* | /* | ||||
* We can cluster only if the page is not clean, busy, or held, and | * We can cluster only if the page is not clean, busy, or held, and | ||||
* the page is inactive. | * the page is in the laundry queue. | ||||
* | * | ||||
* During heavy mmap/modification loads the pageout | * During heavy mmap/modification loads the pageout | ||||
* daemon can really fragment the underlying file | * daemon can really fragment the underlying file | ||||
* due to flushing pages out of order and not trying to | * due to flushing pages out of order and not trying to | ||||
* align the clusters (which leaves sporadic out-of-order | * align the clusters (which leaves sporadic out-of-order | ||||
* holes). To solve this problem we do the reverse scan | * holes). To solve this problem we do the reverse scan | ||||
* first and attempt to align our cluster, then do a | * first and attempt to align our cluster, then do a | ||||
* forward scan if room remains. | * forward scan if room remains. | ||||
Show All 9 Lines | if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) { | ||||
break; | break; | ||||
} | } | ||||
vm_page_test_dirty(p); | vm_page_test_dirty(p); | ||||
if (p->dirty == 0) { | if (p->dirty == 0) { | ||||
ib = 0; | ib = 0; | ||||
break; | break; | ||||
} | } | ||||
vm_page_lock(p); | vm_page_lock(p); | ||||
if (p->queue != PQ_INACTIVE || | if (!vm_page_in_laundry(p) || | ||||
p->hold_count != 0) { /* may be undergoing I/O */ | p->hold_count != 0) { /* may be undergoing I/O */ | ||||
vm_page_unlock(p); | vm_page_unlock(p); | ||||
ib = 0; | ib = 0; | ||||
break; | break; | ||||
} | } | ||||
vm_page_unlock(p); | vm_page_unlock(p); | ||||
mc[--page_base] = pb = p; | mc[--page_base] = pb = p; | ||||
++pageout_count; | ++pageout_count; | ||||
Show All 9 Lines | more: | ||||
while (pageout_count < vm_pageout_page_count && | while (pageout_count < vm_pageout_page_count && | ||||
pindex + is < object->size) { | pindex + is < object->size) { | ||||
if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p)) | if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p)) | ||||
break; | break; | ||||
vm_page_test_dirty(p); | vm_page_test_dirty(p); | ||||
if (p->dirty == 0) | if (p->dirty == 0) | ||||
break; | break; | ||||
vm_page_lock(p); | vm_page_lock(p); | ||||
if (p->queue != PQ_INACTIVE || | if (!vm_page_in_laundry(p) || | ||||
p->hold_count != 0) { /* may be undergoing I/O */ | p->hold_count != 0) { /* may be undergoing I/O */ | ||||
vm_page_unlock(p); | vm_page_unlock(p); | ||||
break; | break; | ||||
} | } | ||||
vm_page_unlock(p); | vm_page_unlock(p); | ||||
mc[page_base + pageout_count] = ps = p; | mc[page_base + pageout_count] = ps = p; | ||||
++pageout_count; | ++pageout_count; | ||||
++is; | ++is; | ||||
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines | vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen, | ||||
for (i = 0; i < count; i++) { | for (i = 0; i < count; i++) { | ||||
vm_page_t mt = mc[i]; | vm_page_t mt = mc[i]; | ||||
KASSERT(pageout_status[i] == VM_PAGER_PEND || | KASSERT(pageout_status[i] == VM_PAGER_PEND || | ||||
!pmap_page_is_write_mapped(mt), | !pmap_page_is_write_mapped(mt), | ||||
("vm_pageout_flush: page %p is not write protected", mt)); | ("vm_pageout_flush: page %p is not write protected", mt)); | ||||
switch (pageout_status[i]) { | switch (pageout_status[i]) { | ||||
case VM_PAGER_OK: | case VM_PAGER_OK: | ||||
vm_page_lock(mt); | |||||
if (vm_page_in_laundry(mt)) | |||||
vm_page_deactivate_noreuse(mt); | |||||
vm_page_unlock(mt); | |||||
/* FALLTHROUGH */ | |||||
case VM_PAGER_PEND: | case VM_PAGER_PEND: | ||||
numpagedout++; | numpagedout++; | ||||
break; | break; | ||||
case VM_PAGER_BAD: | case VM_PAGER_BAD: | ||||
/* | /* | ||||
* Page outside of range of object. Right now we | * The page is outside the object's range. We pretend | ||||
* essentially lose the changes by pretending it | * that the page out worked and clean the page, so the | ||||
* worked. | * changes will be lost if the page is reclaimed by | ||||
* the page daemon. | |||||
*/ | */ | ||||
vm_page_undirty(mt); | vm_page_undirty(mt); | ||||
vm_page_lock(mt); | |||||
if (vm_page_in_laundry(mt)) | |||||
vm_page_deactivate_noreuse(mt); | |||||
vm_page_unlock(mt); | |||||
break; | break; | ||||
case VM_PAGER_ERROR: | case VM_PAGER_ERROR: | ||||
case VM_PAGER_FAIL: | case VM_PAGER_FAIL: | ||||
/* | /* | ||||
* If page couldn't be paged out, then reactivate the | * If the page couldn't be paged out, then reactivate | ||||
* page so it doesn't clog the inactive list. (We | * it so that it doesn't clog the laundry and inactive | ||||
* will try paging out it again later). | * queues. (We will try paging it out again later). | ||||
*/ | */ | ||||
vm_page_lock(mt); | vm_page_lock(mt); | ||||
vm_page_activate(mt); | vm_page_activate(mt); | ||||
vm_page_unlock(mt); | vm_page_unlock(mt); | ||||
if (eio != NULL && i >= mreq && i - mreq < runlen) | if (eio != NULL && i >= mreq && i - mreq < runlen) | ||||
*eio = TRUE; | *eio = TRUE; | ||||
break; | break; | ||||
case VM_PAGER_AGAIN: | case VM_PAGER_AGAIN: | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | TAILQ_FOREACH(p, &object->memq, listq) { | ||||
continue; | continue; | ||||
} | } | ||||
act_delta = pmap_ts_referenced(p); | act_delta = pmap_ts_referenced(p); | ||||
if ((p->aflags & PGA_REFERENCED) != 0) { | if ((p->aflags & PGA_REFERENCED) != 0) { | ||||
if (act_delta == 0) | if (act_delta == 0) | ||||
act_delta = 1; | act_delta = 1; | ||||
vm_page_aflag_clear(p, PGA_REFERENCED); | vm_page_aflag_clear(p, PGA_REFERENCED); | ||||
} | } | ||||
if (p->queue != PQ_ACTIVE && act_delta != 0) { | if (!vm_page_active(p) && act_delta != 0) { | ||||
vm_page_activate(p); | vm_page_activate(p); | ||||
p->act_count += act_delta; | p->act_count += act_delta; | ||||
} else if (p->queue == PQ_ACTIVE) { | } else if (vm_page_active(p)) { | ||||
if (act_delta == 0) { | if (act_delta == 0) { | ||||
p->act_count -= min(p->act_count, | p->act_count -= min(p->act_count, | ||||
ACT_DECLINE); | ACT_DECLINE); | ||||
if (!remove_mode && p->act_count == 0) { | if (!remove_mode && p->act_count == 0) { | ||||
pmap_remove_all(p); | pmap_remove_all(p); | ||||
vm_page_deactivate(p); | vm_page_deactivate(p); | ||||
} else | } else | ||||
vm_page_requeue(p); | vm_page_requeue(p); | ||||
} else { | } else { | ||||
vm_page_activate(p); | vm_page_activate(p); | ||||
if (p->act_count < ACT_MAX - | if (p->act_count < ACT_MAX - | ||||
ACT_ADVANCE) | ACT_ADVANCE) | ||||
p->act_count += ACT_ADVANCE; | p->act_count += ACT_ADVANCE; | ||||
vm_page_requeue(p); | vm_page_requeue(p); | ||||
} | } | ||||
} else if (p->queue == PQ_INACTIVE) | } else if (vm_page_inactive(p)) | ||||
pmap_remove_all(p); | pmap_remove_all(p); | ||||
vm_page_unlock(p); | vm_page_unlock(p); | ||||
} | } | ||||
if ((backing_object = object->backing_object) == NULL) | if ((backing_object = object->backing_object) == NULL) | ||||
goto unlock_return; | goto unlock_return; | ||||
VM_OBJECT_RLOCK(backing_object); | VM_OBJECT_RLOCK(backing_object); | ||||
if (object != first_object) | if (object != first_object) | ||||
VM_OBJECT_RUNLOCK(object); | VM_OBJECT_RUNLOCK(object); | ||||
▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines | |||||
* Attempt to acquire all of the necessary locks to launder a page and | * Attempt to acquire all of the necessary locks to launder a page and | ||||
* then call through the clustering layer to PUTPAGES. Wait a short | * then call through the clustering layer to PUTPAGES. Wait a short | ||||
* time for a vnode lock. | * time for a vnode lock. | ||||
* | * | ||||
* Requires the page and object lock on entry, releases both before return. | * Requires the page and object lock on entry, releases both before return. | ||||
* Returns 0 on success and an errno otherwise. | * Returns 0 on success and an errno otherwise. | ||||
*/ | */ | ||||
static int | static int | ||||
vm_pageout_clean(vm_page_t m) | vm_pageout_clean(vm_page_t m, int *numpagedout) | ||||
{ | { | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct mount *mp; | struct mount *mp; | ||||
vm_object_t object; | vm_object_t object; | ||||
vm_pindex_t pindex; | vm_pindex_t pindex; | ||||
int error, lockmode; | int error, lockmode; | ||||
vm_page_assert_locked(m); | vm_page_assert_locked(m); | ||||
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | if (object->type == OBJT_VNODE) { | ||||
/* | /* | ||||
* While the object and page were unlocked, the page | * While the object and page were unlocked, the page | ||||
* may have been: | * may have been: | ||||
* (1) moved to a different queue, | * (1) moved to a different queue, | ||||
* (2) reallocated to a different object, | * (2) reallocated to a different object, | ||||
* (3) reallocated to a different offset, or | * (3) reallocated to a different offset, or | ||||
* (4) cleaned. | * (4) cleaned. | ||||
*/ | */ | ||||
if (m->queue != PQ_INACTIVE || m->object != object || | if (!vm_page_in_laundry(m) || m->object != object || | ||||
m->pindex != pindex || m->dirty == 0) { | m->pindex != pindex || m->dirty == 0) { | ||||
vm_page_unlock(m); | vm_page_unlock(m); | ||||
error = ENXIO; | error = ENXIO; | ||||
goto unlock_all; | goto unlock_all; | ||||
} | } | ||||
/* | /* | ||||
* The page may have been busied or held while the object | * The page may have been busied or held while the object | ||||
* and page locks were released. | * and page locks were released. | ||||
*/ | */ | ||||
if (vm_page_busied(m) || m->hold_count != 0) { | if (vm_page_busied(m) || m->hold_count != 0) { | ||||
vm_page_unlock(m); | vm_page_unlock(m); | ||||
error = EBUSY; | error = EBUSY; | ||||
goto unlock_all; | goto unlock_all; | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* If a page is dirty, then it is either being washed | * If a page is dirty, then it is either being washed | ||||
* (but not yet cleaned) or it is still in the | * (but not yet cleaned) or it is still in the | ||||
* laundry. If it is still in the laundry, then we | * laundry. If it is still in the laundry, then we | ||||
* start the cleaning operation. | * start the cleaning operation. | ||||
*/ | */ | ||||
if (vm_pageout_cluster(m) == 0) | if ((*numpagedout = vm_pageout_cluster(m)) == 0) | ||||
error = EIO; | error = EIO; | ||||
unlock_all: | unlock_all: | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
unlock_mp: | unlock_mp: | ||||
vm_page_lock_assert(m, MA_NOTOWNED); | vm_page_lock_assert(m, MA_NOTOWNED); | ||||
if (mp != NULL) { | if (mp != NULL) { | ||||
if (vp != NULL) | if (vp != NULL) | ||||
vput(vp); | vput(vp); | ||||
vm_object_deallocate(object); | vm_object_deallocate(object); | ||||
vn_finished_write(mp); | vn_finished_write(mp); | ||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
/* | /* | ||||
* Attempt to launder the specified number of pages. | |||||
* | |||||
* Returns the number of pages successfully laundered. | |||||
*/ | |||||
static int | |||||
vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) | |||||
{ | |||||
struct vm_pagequeue *pq; | |||||
vm_object_t object; | |||||
vm_page_t m, next; | |||||
int act_delta, error, maxscan, numpagedout, starting_target; | |||||
int vnodes_skipped; | |||||
bool pageout_ok, queue_locked; | |||||
starting_target = launder; | |||||
vnodes_skipped = 0; | |||||
/* | |||||
* Scan the laundry queue for pages eligible to be laundered. We stop | |||||
* once the target number of dirty pages have been laundered, or once | |||||
* we've reached the end of the queue. A single iteration of this loop | |||||
* may cause more than one page to be laundered because of clustering. | |||||
* | |||||
* maxscan ensures that we don't re-examine requeued pages. Any | |||||
* additional pages written as part of a cluster are subtracted from | |||||
* maxscan since they must be taken from the laundry queue. | |||||
*/ | |||||
pq = &vmd->vmd_pagequeues[PQ_LAUNDRY]; | |||||
maxscan = pq->pq_cnt; | |||||
vm_pagequeue_lock(pq); | |||||
queue_locked = true; | |||||
for (m = TAILQ_FIRST(&pq->pq_pl); | |||||
m != NULL && maxscan-- > 0 && launder > 0; | |||||
m = next) { | |||||
vm_pagequeue_assert_locked(pq); | |||||
KASSERT(queue_locked, ("unlocked laundry queue")); | |||||
KASSERT(vm_page_in_laundry(m), | |||||
("page %p has an inconsistent queue", m)); | |||||
next = TAILQ_NEXT(m, plinks.q); | |||||
if ((m->flags & PG_MARKER) != 0) | |||||
continue; | |||||
KASSERT((m->flags & PG_FICTITIOUS) == 0, | |||||
("PG_FICTITIOUS page %p cannot be in laundry queue", m)); | |||||
KASSERT((m->oflags & VPO_UNMANAGED) == 0, | |||||
("VPO_UNMANAGED page %p cannot be in laundry queue", m)); | |||||
if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) { | |||||
vm_page_unlock(m); | |||||
continue; | |||||
} | |||||
object = m->object; | |||||
if ((!VM_OBJECT_TRYWLOCK(object) && | |||||
(!vm_pageout_fallback_object_lock(m, &next) || | |||||
m->hold_count != 0)) || vm_page_busied(m)) { | |||||
VM_OBJECT_WUNLOCK(object); | |||||
vm_page_unlock(m); | |||||
continue; | |||||
} | |||||
/* | |||||
* Unlock the laundry queue, invalidating the 'next' pointer. | |||||
* Use a marker to remember our place in the laundry queue. | |||||
*/ | |||||
TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_laundry_marker, | |||||
plinks.q); | |||||
vm_pagequeue_unlock(pq); | |||||
queue_locked = false; | |||||
/* | |||||
* Invalid pages can be easily freed. They cannot be | |||||
* mapped; vm_page_free() asserts this. | |||||
*/ | |||||
if (m->valid == 0) | |||||
goto free_page; | |||||
/* | |||||
* If the page has been referenced and the object is not dead, | |||||
* reactivate or requeue the page depending on whether the | |||||
* object is mapped. | |||||
*/ | |||||
if ((m->aflags & PGA_REFERENCED) != 0) { | |||||
vm_page_aflag_clear(m, PGA_REFERENCED); | |||||
act_delta = 1; | |||||
} else | |||||
act_delta = 0; | |||||
if (object->ref_count != 0) | |||||
act_delta += pmap_ts_referenced(m); | |||||
else { | |||||
KASSERT(!pmap_page_is_mapped(m), | |||||
("page %p is mapped", m)); | |||||
} | |||||
if (act_delta != 0) { | |||||
if (object->ref_count != 0) { | |||||
PCPU_INC(cnt.v_reactivated); | |||||
vm_page_activate(m); | |||||
/* | |||||
* Increase the activation count if the page | |||||
* was referenced while in the laundry queue. | |||||
* This makes it less likely that the page will | |||||
* be returned prematurely to the inactive | |||||
* queue. | |||||
*/ | |||||
m->act_count += act_delta + ACT_ADVANCE; | |||||
/* | |||||
* If this was a background laundering, count | |||||
* activated pages towards our target. The | |||||
* purpose of background laundering is to ensure | |||||
* that pages are eventually cycled through the | |||||
* laundry queue, and an activation is a valid | |||||
* way out. | |||||
*/ | |||||
if (!in_shortfall) | |||||
launder--; | |||||
goto drop_page; | |||||
} else if ((object->flags & OBJ_DEAD) == 0) | |||||
goto requeue_page; | |||||
} | |||||
/* | |||||
* If the page appears to be clean at the machine-independent | |||||
* layer, then remove all of its mappings from the pmap in | |||||
* anticipation of freeing it. If, however, any of the page's | |||||
* mappings allow write access, then the page may still be | |||||
* modified until the last of those mappings are removed. | |||||
*/ | |||||
if (object->ref_count != 0) { | |||||
vm_page_test_dirty(m); | |||||
if (m->dirty == 0) | |||||
pmap_remove_all(m); | |||||
} | |||||
/* | |||||
* Clean pages are freed, and dirty pages are paged out unless | |||||
* they belong to a dead object. Requeueing dirty pages from | |||||
* dead objects is pointless, as they are being paged out and | |||||
* freed by the thread that destroyed the object. | |||||
*/ | |||||
if (m->dirty == 0) { | |||||
free_page: | |||||
vm_page_free(m); | |||||
PCPU_INC(cnt.v_dfree); | |||||
} else if ((object->flags & OBJ_DEAD) == 0) { | |||||
if (object->type != OBJT_SWAP && | |||||
object->type != OBJT_DEFAULT) | |||||
pageout_ok = true; | |||||
else if (disable_swap_pageouts) | |||||
pageout_ok = false; | |||||
else | |||||
pageout_ok = true; | |||||
if (!pageout_ok) { | |||||
requeue_page: | |||||
vm_pagequeue_lock(pq); | |||||
queue_locked = true; | |||||
vm_page_requeue_locked(m); | |||||
goto drop_page; | |||||
} | |||||
/* | |||||
* Form a cluster with adjacent, dirty pages from the | |||||
* same object, and page out that entire cluster. | |||||
* | |||||
* The adjacent, dirty pages must also be in the | |||||
* laundry. However, their mappings are not checked | |||||
* for new references. Consequently, a recently | |||||
* referenced page may be paged out. However, that | |||||
* page will not be prematurely reclaimed. After page | |||||
* out, the page will be placed in the inactive queue, | |||||
* where any new references will be detected and the | |||||
* page reactivated. | |||||
*/ | |||||
error = vm_pageout_clean(m, &numpagedout); | |||||
if (error == 0) { | |||||
launder -= numpagedout; | |||||
maxscan -= numpagedout - 1; | |||||
} else if (error == EDEADLK) { | |||||
pageout_lock_miss++; | |||||
vnodes_skipped++; | |||||
} | |||||
goto relock_queue; | |||||
} | |||||
drop_page: | |||||
vm_page_unlock(m); | |||||
VM_OBJECT_WUNLOCK(object); | |||||
relock_queue: | |||||
if (!queue_locked) { | |||||
vm_pagequeue_lock(pq); | |||||
queue_locked = true; | |||||
} | |||||
next = TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q); | |||||
TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q); | |||||
} | |||||
vm_pagequeue_unlock(pq); | |||||
/* | |||||
* Wakeup the sync daemon if we skipped a vnode in a writeable object | |||||
* and we didn't launder enough pages. | |||||
*/ | |||||
if (vnodes_skipped > 0 && launder > 0) | |||||
(void)speedup_syncer(); | |||||
return (starting_target - launder); | |||||
} | |||||
/* | |||||
* Compute the integer square root. | |||||
*/ | |||||
static u_int | |||||
isqrt(u_int num) | |||||
{ | |||||
u_int bit, root, tmp; | |||||
bit = 1u << ((NBBY * sizeof(u_int)) - 2); | |||||
while (bit > num) | |||||
bit >>= 2; | |||||
root = 0; | |||||
while (bit != 0) { | |||||
tmp = root + bit; | |||||
root >>= 1; | |||||
if (num >= tmp) { | |||||
num -= tmp; | |||||
root += bit; | |||||
} | |||||
bit >>= 2; | |||||
} | |||||
return (root); | |||||
} | |||||
/* | |||||
* Perform the work of the laundry thread: periodically wake up and determine | |||||
* whether any pages need to be laundered. If so, determine the number of pages | |||||
* that need to be laundered, and launder them. | |||||
*/ | |||||
static void | |||||
vm_pageout_laundry_worker(void *arg) | |||||
{ | |||||
struct vm_domain *domain; | |||||
struct vm_pagequeue *pq; | |||||
uint64_t nclean, ndirty; | |||||
u_int last_launder, wakeups; | |||||
int domidx, last_target, launder, shortfall, shortfall_cycle, target; | |||||
bool in_shortfall; | |||||
domidx = (uintptr_t)arg; | |||||
domain = &vm_dom[domidx]; | |||||
pq = &domain->vmd_pagequeues[PQ_LAUNDRY]; | |||||
KASSERT(domain->vmd_segs != 0, ("domain without segments")); | |||||
vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY); | |||||
shortfall = 0; | |||||
in_shortfall = false; | |||||
shortfall_cycle = 0; | |||||
target = 0; | |||||
last_launder = 0; | |||||
/* | |||||
* The pageout laundry worker is never done, so loop forever. | |||||
*/ | |||||
for (;;) { | |||||
KASSERT(target >= 0, ("negative target %d", target)); | |||||
KASSERT(shortfall_cycle >= 0, | |||||
("negative cycle %d", shortfall_cycle)); | |||||
launder = 0; | |||||
wakeups = VM_METER_PCPU_CNT(v_pdwakeups); | |||||
/* | |||||
* First determine whether we need to launder pages to meet a | |||||
* shortage of free pages. | |||||
*/ | |||||
if (shortfall > 0) { | |||||
in_shortfall = true; | |||||
shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE; | |||||
target = shortfall; | |||||
} else if (!in_shortfall) | |||||
goto trybackground; | |||||
else if (shortfall_cycle == 0 || vm_laundry_target() <= 0) { | |||||
/* | |||||
* We recently entered shortfall and began laundering | |||||
* pages. If we have completed that laundering run | |||||
* (and we are no longer in shortfall) or we have met | |||||
* our laundry target through other activity, then we | |||||
* can stop laundering pages. | |||||
*/ | |||||
in_shortfall = false; | |||||
target = 0; | |||||
goto trybackground; | |||||
} | |||||
last_launder = wakeups; | |||||
launder = target / shortfall_cycle--; | |||||
goto dolaundry; | |||||
/* | |||||
* There's no immediate need to launder any pages; see if we | |||||
* meet the conditions to perform background laundering: | |||||
* | |||||
* 1. The ratio of dirty to clean inactive pages exceeds the | |||||
* background laundering threshold and the pagedaemon has | |||||
* been woken up to reclaim pages since our last | |||||
* laundering, or | |||||
* 2. we haven't yet reached the target of the current | |||||
* background laundering run. | |||||
* | |||||
* The background laundering threshold is not a constant. | |||||
* Instead, it is a slowly growing function of the number of | |||||
* page daemon wakeups since the last laundering. Thus, as the | |||||
* ratio of dirty to clean inactive pages grows, the amount of | |||||
* memory pressure required to trigger laundering decreases. | |||||
*/ | |||||
trybackground: | |||||
nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count; | |||||
ndirty = vm_cnt.v_laundry_count; | |||||
if (target == 0 && wakeups != last_launder && | |||||
ndirty * isqrt(wakeups - last_launder) >= nclean) { | |||||
target = vm_background_launder_target; | |||||
} | |||||
/* | |||||
* We have a non-zero background laundering target. If we've | |||||
* laundered up to our maximum without observing a page daemon | |||||
* wakeup, just stop. This is a safety belt that ensures we | |||||
* don't launder an excessive amount if memory pressure is low | |||||
* and the ratio of dirty to clean pages is large. Otherwise, | |||||
* proceed at the background laundering rate. | |||||
*/ | |||||
if (target > 0) { | |||||
if (wakeups != last_launder) { | |||||
last_launder = wakeups; | |||||
last_target = target; | |||||
} else if (last_target - target >= | |||||
vm_background_launder_max * PAGE_SIZE / 1024) { | |||||
target = 0; | |||||
} | |||||
launder = vm_background_launder_rate * PAGE_SIZE / 1024; | |||||
launder /= VM_LAUNDER_RATE; | |||||
if (launder > target) | |||||
launder = target; | |||||
} | |||||
dolaundry: | |||||
if (launder > 0) { | |||||
/* | |||||
* Because of I/O clustering, the number of laundered | |||||
* pages could exceed "target" by the maximum size of | |||||
* a cluster minus one. | |||||
*/ | |||||
target -= min(vm_pageout_launder(domain, launder, | |||||
in_shortfall), target); | |||||
pause("laundp", hz / VM_LAUNDER_RATE); | |||||
} | |||||
/* | |||||
* If we're not currently laundering pages and the page daemon | |||||
* hasn't posted a new request, sleep until the page daemon | |||||
* kicks us. | |||||
*/ | |||||
vm_pagequeue_lock(pq); | |||||
if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE) | |||||
(void)mtx_sleep(&vm_laundry_request, | |||||
vm_pagequeue_lockptr(pq), PVM, "launds", 0); | |||||
/* | |||||
* If the pagedaemon has indicated that it's in shortfall, start | |||||
* a shortfall laundering unless we're already in the middle of | |||||
* one. This may preempt a background laundering. | |||||
*/ | |||||
if (vm_laundry_request == VM_LAUNDRY_SHORTFALL && | |||||
(!in_shortfall || shortfall_cycle == 0)) { | |||||
shortfall = vm_laundry_target() + vm_pageout_deficit; | |||||
target = 0; | |||||
} else | |||||
shortfall = 0; | |||||
if (target == 0) | |||||
vm_laundry_request = VM_LAUNDRY_IDLE; | |||||
vm_pagequeue_unlock(pq); | |||||
} | |||||
} | |||||
/* | |||||
* vm_pageout_scan does the dirty work for the pageout daemon. | * vm_pageout_scan does the dirty work for the pageout daemon. | ||||
* | * | ||||
* pass 0 - Update active LRU/deactivate pages | * pass == 0: Update active LRU/deactivate pages | ||||
* pass 1 - Free inactive pages | * pass >= 1: Free inactive pages | ||||
* pass 2 - Launder dirty pages | |||||
* | * | ||||
* Returns true if pass was zero or enough pages were freed by the inactive | * Returns true if pass was zero or enough pages were freed by the inactive | ||||
* queue scan to meet the target. | * queue scan to meet the target. | ||||
*/ | */ | ||||
static bool | static bool | ||||
vm_pageout_scan(struct vm_domain *vmd, int pass) | vm_pageout_scan(struct vm_domain *vmd, int pass) | ||||
{ | { | ||||
vm_page_t m, next; | vm_page_t m, next; | ||||
struct vm_pagequeue *pq; | struct vm_pagequeue *pq; | ||||
vm_object_t object; | vm_object_t object; | ||||
long min_scan; | long min_scan; | ||||
int act_delta, addl_page_shortage, deficit, error, inactq_shortage; | int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan; | ||||
int maxlaunder, maxscan, page_shortage, scan_tick, scanned; | int page_shortage, scan_tick, scanned, starting_page_shortage; | ||||
int starting_page_shortage, vnodes_skipped; | boolean_t queue_locked; | ||||
boolean_t pageout_ok, queue_locked; | |||||
/* | /* | ||||
* If we need to reclaim memory ask kernel caches to return | * If we need to reclaim memory ask kernel caches to return | ||||
* some. We rate limit to avoid thrashing. | * some. We rate limit to avoid thrashing. | ||||
*/ | */ | ||||
if (vmd == &vm_dom[0] && pass > 0 && | if (vmd == &vm_dom[0] && pass > 0 && | ||||
(time_uptime - lowmem_uptime) >= lowmem_period) { | (time_uptime - lowmem_uptime) >= lowmem_period) { | ||||
/* | /* | ||||
Show All 25 Lines | vm_pageout_scan(struct vm_domain *vmd, int pass) | ||||
if (pass > 0) { | if (pass > 0) { | ||||
deficit = atomic_readandclear_int(&vm_pageout_deficit); | deficit = atomic_readandclear_int(&vm_pageout_deficit); | ||||
page_shortage = vm_paging_target() + deficit; | page_shortage = vm_paging_target() + deficit; | ||||
} else | } else | ||||
page_shortage = deficit = 0; | page_shortage = deficit = 0; | ||||
starting_page_shortage = page_shortage; | starting_page_shortage = page_shortage; | ||||
/* | /* | ||||
* maxlaunder limits the number of dirty pages we flush per scan. | |||||
* For most systems a smaller value (16 or 32) is more robust under | |||||
* extreme memory and disk pressure because any unnecessary writes | |||||
* to disk can result in extreme performance degredation. However, | |||||
* systems with excessive dirty pages (especially when MAP_NOSYNC is | |||||
* used) will die horribly with limited laundering. If the pageout | |||||
* daemon cannot clean enough pages in the first pass, we let it go | |||||
* all out in succeeding passes. | |||||
*/ | |||||
if ((maxlaunder = vm_max_launder) <= 1) | |||||
maxlaunder = 1; | |||||
if (pass > 1) | |||||
maxlaunder = 10000; | |||||
vnodes_skipped = 0; | |||||
/* | |||||
* Start scanning the inactive queue for pages that we can free. The | * Start scanning the inactive queue for pages that we can free. The | ||||
* scan will stop when we reach the target or we have scanned the | * scan will stop when we reach the target or we have scanned the | ||||
* entire queue. (Note that m->act_count is not used to make | * entire queue. (Note that m->act_count is not used to make | ||||
* decisions for the inactive queue, only for the active queue.) | * decisions for the inactive queue, only for the active queue.) | ||||
*/ | */ | ||||
pq = &vmd->vmd_pagequeues[PQ_INACTIVE]; | pq = &vmd->vmd_pagequeues[PQ_INACTIVE]; | ||||
maxscan = pq->pq_cnt; | maxscan = pq->pq_cnt; | ||||
vm_pagequeue_lock(pq); | vm_pagequeue_lock(pq); | ||||
queue_locked = TRUE; | queue_locked = TRUE; | ||||
for (m = TAILQ_FIRST(&pq->pq_pl); | for (m = TAILQ_FIRST(&pq->pq_pl); | ||||
m != NULL && maxscan-- > 0 && page_shortage > 0; | m != NULL && maxscan-- > 0 && page_shortage > 0; | ||||
m = next) { | m = next) { | ||||
vm_pagequeue_assert_locked(pq); | vm_pagequeue_assert_locked(pq); | ||||
KASSERT(queue_locked, ("unlocked inactive queue")); | KASSERT(queue_locked, ("unlocked inactive queue")); | ||||
KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m)); | KASSERT(vm_page_inactive(m), ("Inactive queue %p", m)); | ||||
PCPU_INC(cnt.v_pdpages); | PCPU_INC(cnt.v_pdpages); | ||||
next = TAILQ_NEXT(m, plinks.q); | next = TAILQ_NEXT(m, plinks.q); | ||||
/* | /* | ||||
* skip marker pages | * skip marker pages | ||||
*/ | */ | ||||
if (m->flags & PG_MARKER) | if (m->flags & PG_MARKER) | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | unlock_object: | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
unlock_page: | unlock_page: | ||||
vm_page_unlock(m); | vm_page_unlock(m); | ||||
continue; | continue; | ||||
} | } | ||||
KASSERT(m->hold_count == 0, ("Held page %p", m)); | KASSERT(m->hold_count == 0, ("Held page %p", m)); | ||||
/* | /* | ||||
* We unlock the inactive page queue, invalidating the | * Dequeue the inactive page and unlock the inactive page | ||||
* 'next' pointer. Use our marker to remember our | * queue, invalidating the 'next' pointer. Dequeueing the | ||||
* place. | * page here avoids a later reacquisition (and release) of | ||||
* the inactive page queue lock when vm_page_activate(), | |||||
* vm_page_free(), or vm_page_launder() is called. Use a | |||||
* marker to remember our place in the inactive queue. | |||||
*/ | */ | ||||
TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q); | TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q); | ||||
vm_page_dequeue_locked(m); | |||||
vm_pagequeue_unlock(pq); | vm_pagequeue_unlock(pq); | ||||
queue_locked = FALSE; | queue_locked = FALSE; | ||||
/* | /* | ||||
* Invalid pages can be easily freed. They cannot be | * Invalid pages can be easily freed. They cannot be | ||||
* mapped, vm_page_free() asserts this. | * mapped, vm_page_free() asserts this. | ||||
*/ | */ | ||||
if (m->valid == 0) | if (m->valid == 0) | ||||
Show All 12 Lines | unlock_page: | ||||
if (object->ref_count != 0) { | if (object->ref_count != 0) { | ||||
act_delta += pmap_ts_referenced(m); | act_delta += pmap_ts_referenced(m); | ||||
} else { | } else { | ||||
KASSERT(!pmap_page_is_mapped(m), | KASSERT(!pmap_page_is_mapped(m), | ||||
("vm_pageout_scan: page %p is mapped", m)); | ("vm_pageout_scan: page %p is mapped", m)); | ||||
} | } | ||||
if (act_delta != 0) { | if (act_delta != 0) { | ||||
if (object->ref_count != 0) { | if (object->ref_count != 0) { | ||||
PCPU_INC(cnt.v_reactivated); | |||||
vm_page_activate(m); | vm_page_activate(m); | ||||
/* | /* | ||||
* Increase the activation count if the page | * Increase the activation count if the page | ||||
* was referenced while in the inactive queue. | * was referenced while in the inactive queue. | ||||
* This makes it less likely that the page will | * This makes it less likely that the page will | ||||
* be returned prematurely to the inactive | * be returned prematurely to the inactive | ||||
* queue. | * queue. | ||||
*/ | */ | ||||
m->act_count += act_delta + ACT_ADVANCE; | m->act_count += act_delta + ACT_ADVANCE; | ||||
goto drop_page; | goto drop_page; | ||||
} else if ((object->flags & OBJ_DEAD) == 0) | } else if ((object->flags & OBJ_DEAD) == 0) { | ||||
goto requeue_page; | vm_pagequeue_lock(pq); | ||||
queue_locked = TRUE; | |||||
m->queue = PQ_INACTIVE; | |||||
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); | |||||
vm_pagequeue_cnt_inc(pq); | |||||
goto drop_page; | |||||
} | } | ||||
} | |||||
/* | /* | ||||
* If the page appears to be clean at the machine-independent | * If the page appears to be clean at the machine-independent | ||||
* layer, then remove all of its mappings from the pmap in | * layer, then remove all of its mappings from the pmap in | ||||
* anticipation of freeing it. If, however, any of the page's | * anticipation of freeing it. If, however, any of the page's | ||||
* mappings allow write access, then the page may still be | * mappings allow write access, then the page may still be | ||||
* modified until the last of those mappings are removed. | * modified until the last of those mappings are removed. | ||||
*/ | */ | ||||
if (object->ref_count != 0) { | if (object->ref_count != 0) { | ||||
vm_page_test_dirty(m); | vm_page_test_dirty(m); | ||||
if (m->dirty == 0) | if (m->dirty == 0) | ||||
pmap_remove_all(m); | pmap_remove_all(m); | ||||
} | } | ||||
if (m->dirty == 0) { | |||||
/* | /* | ||||
* Clean pages can be freed. | * Clean pages can be freed, but dirty pages must be sent back | ||||
* to the laundry, unless they belong to a dead object. | |||||
* Requeueing dirty pages from dead objects is pointless, as | |||||
* they are being paged out and freed by the thread that | |||||
* destroyed the object. | |||||
*/ | */ | ||||
if (m->dirty == 0) { | |||||
free_page: | free_page: | ||||
vm_page_free(m); | vm_page_free(m); | ||||
PCPU_INC(cnt.v_dfree); | PCPU_INC(cnt.v_dfree); | ||||
--page_shortage; | --page_shortage; | ||||
} else if ((object->flags & OBJ_DEAD) != 0) { | } else if ((object->flags & OBJ_DEAD) == 0) | ||||
/* | vm_page_launder(m); | ||||
* Leave dirty pages from dead objects at the front of | |||||
* the queue. They are being paged out and freed by | |||||
* the thread that destroyed the object. They will | |||||
* leave the queue shortly after the scan finishes, so | |||||
* they should be discounted from the inactive count. | |||||
*/ | |||||
addl_page_shortage++; | |||||
} else if ((m->flags & PG_WINATCFLS) == 0 && pass < 2) { | |||||
/* | |||||
* Dirty pages need to be paged out, but flushing | |||||
* a page is extremely expensive versus freeing | |||||
* a clean page. Rather then artificially limiting | |||||
* the number of pages we can flush, we instead give | |||||
* dirty pages extra priority on the inactive queue | |||||
* by forcing them to be cycled through the queue | |||||
* twice before being flushed, after which the | |||||
* (now clean) page will cycle through once more | |||||
* before being freed. This significantly extends | |||||
* the thrash point for a heavily loaded machine. | |||||
*/ | |||||
m->flags |= PG_WINATCFLS; | |||||
requeue_page: | |||||
vm_pagequeue_lock(pq); | |||||
queue_locked = TRUE; | |||||
vm_page_requeue_locked(m); | |||||
} else if (maxlaunder > 0) { | |||||
/* | |||||
* We always want to try to flush some dirty pages if | |||||
* we encounter them, to keep the system stable. | |||||
* Normally this number is small, but under extreme | |||||
* pressure where there are insufficient clean pages | |||||
* on the inactive queue, we may have to go all out. | |||||
*/ | |||||
if (object->type != OBJT_SWAP && | |||||
object->type != OBJT_DEFAULT) | |||||
pageout_ok = TRUE; | |||||
else if (disable_swap_pageouts) | |||||
pageout_ok = FALSE; | |||||
else if (defer_swap_pageouts) | |||||
pageout_ok = vm_page_count_min(); | |||||
else | |||||
pageout_ok = TRUE; | |||||
if (!pageout_ok) | |||||
goto requeue_page; | |||||
error = vm_pageout_clean(m); | |||||
/* | |||||
* Decrement page_shortage on success to account for | |||||
* the (future) cleaned page. Otherwise we could wind | |||||
* up laundering or cleaning too many pages. | |||||
*/ | |||||
if (error == 0) { | |||||
page_shortage--; | |||||
maxlaunder--; | |||||
} else if (error == EDEADLK) { | |||||
pageout_lock_miss++; | |||||
vnodes_skipped++; | |||||
} else if (error == EBUSY) { | |||||
addl_page_shortage++; | |||||
} | |||||
vm_page_lock_assert(m, MA_NOTOWNED); | |||||
goto relock_queue; | |||||
} | |||||
drop_page: | drop_page: | ||||
vm_page_unlock(m); | vm_page_unlock(m); | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
relock_queue: | |||||
if (!queue_locked) { | if (!queue_locked) { | ||||
vm_pagequeue_lock(pq); | vm_pagequeue_lock(pq); | ||||
queue_locked = TRUE; | queue_locked = TRUE; | ||||
} | } | ||||
next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q); | next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q); | ||||
TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q); | TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q); | ||||
} | } | ||||
vm_pagequeue_unlock(pq); | vm_pagequeue_unlock(pq); | ||||
/* | |||||
* Wake up the laundry thread so that it can perform any needed | |||||
* laundering. If we didn't meet our target, we're in shortfall and | |||||
* need to launder more aggressively. | |||||
*/ | |||||
if (vm_laundry_request == VM_LAUNDRY_IDLE && | |||||
starting_page_shortage > 0) { | |||||
pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY]; | |||||
vm_pagequeue_lock(pq); | |||||
if (page_shortage > 0) { | |||||
vm_laundry_request = VM_LAUNDRY_SHORTFALL; | |||||
PCPU_INC(cnt.v_pdshortfalls); | |||||
} else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL) | |||||
vm_laundry_request = VM_LAUNDRY_BACKGROUND; | |||||
wakeup(&vm_laundry_request); | |||||
vm_pagequeue_unlock(pq); | |||||
} | |||||
#if !defined(NO_SWAPPING) | #if !defined(NO_SWAPPING) | ||||
/* | /* | ||||
* Wakeup the swapout daemon if we didn't free the targeted number of | * Wakeup the swapout daemon if we didn't free the targeted number of | ||||
* pages. | * pages. | ||||
*/ | */ | ||||
if (vm_swap_enabled && page_shortage > 0) | if (vm_swap_enabled && page_shortage > 0) | ||||
vm_req_vmdaemon(VM_SWAP_NORMAL); | vm_req_vmdaemon(VM_SWAP_NORMAL); | ||||
#endif | #endif | ||||
/* | /* | ||||
* Wakeup the sync daemon if we skipped a vnode in a writeable object | |||||
* and we didn't free enough pages. | |||||
*/ | |||||
if (vnodes_skipped > 0 && page_shortage > vm_cnt.v_free_target - | |||||
vm_cnt.v_free_min) | |||||
(void)speedup_syncer(); | |||||
/* | |||||
* If the inactive queue scan fails repeatedly to meet its | * If the inactive queue scan fails repeatedly to meet its | ||||
* target, kill the largest process. | * target, kill the largest process. | ||||
*/ | */ | ||||
vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage); | vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage); | ||||
/* | /* | ||||
* Compute the number of pages we want to try to move from the | * Compute the number of pages we want to try to move from the | ||||
* active queue to the inactive queue. | * active queue to either the inactive or laundry queue. | ||||
* | |||||
* When scanning active pages, we make clean pages count more heavily | |||||
* towards the page shortage than dirty pages. This is because dirty | |||||
* pages must be laundered before they can be reused and thus have less | |||||
* utility when attempting to quickly alleviate a shortage. However, | |||||
* this weighting also causes the scan to deactivate dirty pages more | |||||
* more aggressively, improving the effectiveness of clustering and | |||||
* ensuring that they can eventually be reused. | |||||
*/ | */ | ||||
inactq_shortage = vm_cnt.v_inactive_target - vm_cnt.v_inactive_count + | inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count + | ||||
vm_cnt.v_laundry_count / act_scan_laundry_weight) + | |||||
vm_paging_target() + deficit + addl_page_shortage; | vm_paging_target() + deficit + addl_page_shortage; | ||||
page_shortage *= act_scan_laundry_weight; | |||||
pq = &vmd->vmd_pagequeues[PQ_ACTIVE]; | pq = &vmd->vmd_pagequeues[PQ_ACTIVE]; | ||||
vm_pagequeue_lock(pq); | vm_pagequeue_lock(pq); | ||||
maxscan = pq->pq_cnt; | maxscan = pq->pq_cnt; | ||||
/* | /* | ||||
* If we're just idle polling attempt to visit every | * If we're just idle polling attempt to visit every | ||||
* active page within 'update_period' seconds. | * active page within 'update_period' seconds. | ||||
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines | for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned < | ||||
if (act_delta != 0) { | if (act_delta != 0) { | ||||
m->act_count += ACT_ADVANCE + act_delta; | m->act_count += ACT_ADVANCE + act_delta; | ||||
if (m->act_count > ACT_MAX) | if (m->act_count > ACT_MAX) | ||||
m->act_count = ACT_MAX; | m->act_count = ACT_MAX; | ||||
} else | } else | ||||
m->act_count -= min(m->act_count, ACT_DECLINE); | m->act_count -= min(m->act_count, ACT_DECLINE); | ||||
/* | /* | ||||
* Move this page to the tail of the active or inactive | * Move this page to the tail of the active, inactive or laundry | ||||
* queue depending on usage. | * queue depending on usage. | ||||
*/ | */ | ||||
if (m->act_count == 0) { | if (m->act_count == 0) { | ||||
/* Dequeue to avoid later lock recursion. */ | /* Dequeue to avoid later lock recursion. */ | ||||
vm_page_dequeue_locked(m); | vm_page_dequeue_locked(m); | ||||
/* | |||||
* When not short for inactive pages, let dirty pages go | |||||
* through the inactive queue before moving to the | |||||
* laundry queues. This gives them some extra time to | |||||
* be reactivated, potentially avoiding an expensive | |||||
* pageout. During a page shortage, the inactive queue | |||||
* is necessarily small, so we may move dirty pages | |||||
* directly to the laundry queue. | |||||
*/ | |||||
if (inactq_shortage <= 0) | |||||
vm_page_deactivate(m); | vm_page_deactivate(m); | ||||
else { | |||||
/* | |||||
* Calling vm_page_test_dirty() here would | |||||
* require acquisition of the object's write | |||||
* lock. However, during a page shortage, | |||||
* directing dirty pages into the laundry | |||||
* queue is only an optimization and not a | |||||
* requirement. Therefore, we simply rely on | |||||
* the opportunistic updates to the page's | |||||
* dirty field by the pmap. | |||||
*/ | |||||
if (m->dirty == 0) { | |||||
vm_page_deactivate(m); | |||||
inactq_shortage -= | |||||
act_scan_laundry_weight; | |||||
} else { | |||||
vm_page_launder(m); | |||||
inactq_shortage--; | inactq_shortage--; | ||||
} | |||||
} | |||||
} else | } else | ||||
vm_page_requeue_locked(m); | vm_page_requeue_locked(m); | ||||
vm_page_unlock(m); | vm_page_unlock(m); | ||||
} | } | ||||
vm_pagequeue_unlock(pq); | vm_pagequeue_unlock(pq); | ||||
#if !defined(NO_SWAPPING) | #if !defined(NO_SWAPPING) | ||||
/* | /* | ||||
* Idle process swapout -- run once per second when we are reclaiming | * Idle process swapout -- run once per second when we are reclaiming | ||||
▲ Show 20 Lines • Show All 292 Lines • ▼ Show 20 Lines | while (TRUE) { | ||||
* Might the page daemon receive a wakeup call? | * Might the page daemon receive a wakeup call? | ||||
*/ | */ | ||||
if (vm_pageout_wanted) { | if (vm_pageout_wanted) { | ||||
/* | /* | ||||
* No. Either vm_pageout_wanted was set by another | * No. Either vm_pageout_wanted was set by another | ||||
* thread during the previous scan, which must have | * thread during the previous scan, which must have | ||||
* been a level 0 scan, or vm_pageout_wanted was | * been a level 0 scan, or vm_pageout_wanted was | ||||
* already set and the scan failed to free enough | * already set and the scan failed to free enough | ||||
* pages. If we haven't yet performed a level >= 2 | * pages. If we haven't yet performed a level >= 1 | ||||
* scan (unlimited dirty cleaning), then upgrade the | * (page reclamation) scan, then increase the level | ||||
* level and scan again now. Otherwise, sleep a bit | * and scan again now. Otherwise, sleep a bit and | ||||
* and try again later. | * try again later. | ||||
*/ | */ | ||||
mtx_unlock(&vm_page_queue_free_mtx); | mtx_unlock(&vm_page_queue_free_mtx); | ||||
if (pass > 1) | if (pass >= 1) | ||||
pause("psleep", hz / 2); | pause("psleep", hz / VM_INACT_SCAN_RATE); | ||||
pass++; | pass++; | ||||
} else { | } else { | ||||
/* | /* | ||||
* Yes. Sleep until pages need to be reclaimed or | * Yes. Sleep until pages need to be reclaimed or | ||||
* have their reference stats updated. | * have their reference stats updated. | ||||
*/ | */ | ||||
if (mtx_sleep(&vm_pageout_wanted, | if (mtx_sleep(&vm_pageout_wanted, | ||||
&vm_page_queue_free_mtx, PDROP | PVM, "psleep", | &vm_page_queue_free_mtx, PDROP | PVM, "psleep", | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | vm_pageout_init(void) | ||||
* case paging behaviors with stale active LRU. | * case paging behaviors with stale active LRU. | ||||
*/ | */ | ||||
if (vm_pageout_update_period == 0) | if (vm_pageout_update_period == 0) | ||||
vm_pageout_update_period = 600; | vm_pageout_update_period = 600; | ||||
/* XXX does not really belong here */ | /* XXX does not really belong here */ | ||||
if (vm_page_max_wired == 0) | if (vm_page_max_wired == 0) | ||||
vm_page_max_wired = vm_cnt.v_free_count / 3; | vm_page_max_wired = vm_cnt.v_free_count / 3; | ||||
/* | |||||
* Target amount of memory to move out of the laundry queue during a | |||||
* background laundering. This is proportional to the amount of system | |||||
* memory. | |||||
*/ | |||||
vm_background_launder_target = (vm_cnt.v_free_target - | |||||
vm_cnt.v_free_min) / 10; | |||||
} | } | ||||
/* | /* | ||||
* vm_pageout is the high level pageout daemon. | * vm_pageout is the high level pageout daemon. | ||||
*/ | */ | ||||
static void | static void | ||||
vm_pageout(void) | vm_pageout(void) | ||||
{ | { | ||||
int error; | int error; | ||||
#ifdef VM_NUMA_ALLOC | #ifdef VM_NUMA_ALLOC | ||||
int i; | int i; | ||||
#endif | #endif | ||||
swap_pager_swap_init(); | swap_pager_swap_init(); | ||||
error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL, | |||||
0, 0, "laundry: dom0"); | |||||
if (error != 0) | |||||
panic("starting laundry for domain 0, error %d", error); | |||||
#ifdef VM_NUMA_ALLOC | #ifdef VM_NUMA_ALLOC | ||||
for (i = 1; i < vm_ndomains; i++) { | for (i = 1; i < vm_ndomains; i++) { | ||||
error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i, | error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i, | ||||
curproc, NULL, 0, 0, "dom%d", i); | curproc, NULL, 0, 0, "dom%d", i); | ||||
if (error != 0) { | if (error != 0) { | ||||
panic("starting pageout for domain %d, error %d\n", | panic("starting pageout for domain %d, error %d\n", | ||||
i, error); | i, error); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 185 Lines • Show Last 20 Lines |