Changeset View
Changeset View
Standalone View
Standalone View
head/sys/vm/vm_fault.c
Show First 20 Lines • Show All 145 Lines • ▼ Show 20 Lines | |||||
static int vm_pfault_oom_wait = 10; | static int vm_pfault_oom_wait = 10; | ||||
SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN, | SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN, | ||||
&vm_pfault_oom_wait, 0, | &vm_pfault_oom_wait, 0, | ||||
"Number of seconds to wait for free pages before retrying " | "Number of seconds to wait for free pages before retrying " | ||||
"the page fault handler"); | "the page fault handler"); | ||||
static inline void | static inline void | ||||
release_page(struct faultstate *fs) | fault_page_release(vm_page_t *mp) | ||||
{ | { | ||||
vm_page_t m; | |||||
if (fs->m != NULL) { | m = *mp; | ||||
if (m != NULL) { | |||||
/* | /* | ||||
* fs->m's object lock might not be held, so the page must be | * We are likely to loop around again and attempt to busy | ||||
* kept busy until we are done with it. | * this page. Deactivating it leaves it available for | ||||
* pageout while optimizing fault restarts. | |||||
*/ | */ | ||||
vm_page_lock(fs->m); | vm_page_lock(m); | ||||
vm_page_deactivate(fs->m); | vm_page_deactivate(m); | ||||
vm_page_unlock(fs->m); | vm_page_unlock(m); | ||||
vm_page_xunbusy(fs->m); | vm_page_xunbusy(m); | ||||
fs->m = NULL; | *mp = NULL; | ||||
} | } | ||||
} | } | ||||
static inline void | static inline void | ||||
fault_page_free(vm_page_t *mp) | |||||
{ | |||||
vm_page_t m; | |||||
m = *mp; | |||||
if (m != NULL) { | |||||
VM_OBJECT_ASSERT_WLOCKED(m->object); | |||||
if (!vm_page_wired(m)) | |||||
vm_page_free(m); | |||||
else | |||||
vm_page_xunbusy(m); | |||||
*mp = NULL; | |||||
} | |||||
} | |||||
static inline void | |||||
unlock_map(struct faultstate *fs) | unlock_map(struct faultstate *fs) | ||||
{ | { | ||||
if (fs->lookup_still_valid) { | if (fs->lookup_still_valid) { | ||||
vm_map_lookup_done(fs->map, fs->entry); | vm_map_lookup_done(fs->map, fs->entry); | ||||
fs->lookup_still_valid = false; | fs->lookup_still_valid = false; | ||||
} | } | ||||
} | } | ||||
static void | static void | ||||
unlock_vp(struct faultstate *fs) | unlock_vp(struct faultstate *fs) | ||||
{ | { | ||||
if (fs->vp != NULL) { | if (fs->vp != NULL) { | ||||
vput(fs->vp); | vput(fs->vp); | ||||
fs->vp = NULL; | fs->vp = NULL; | ||||
} | } | ||||
} | } | ||||
static void | static void | ||||
fault_deallocate(struct faultstate *fs) | fault_deallocate(struct faultstate *fs) | ||||
{ | { | ||||
fault_page_release(&fs->m); | |||||
vm_object_pip_wakeup(fs->object); | vm_object_pip_wakeup(fs->object); | ||||
if (fs->object != fs->first_object) { | if (fs->object != fs->first_object) { | ||||
VM_OBJECT_WLOCK(fs->first_object); | VM_OBJECT_WLOCK(fs->first_object); | ||||
vm_page_free(fs->first_m); | fault_page_free(&fs->first_m); | ||||
vm_object_pip_wakeup(fs->first_object); | |||||
VM_OBJECT_WUNLOCK(fs->first_object); | VM_OBJECT_WUNLOCK(fs->first_object); | ||||
fs->first_m = NULL; | vm_object_pip_wakeup(fs->first_object); | ||||
} | } | ||||
vm_object_deallocate(fs->first_object); | vm_object_deallocate(fs->first_object); | ||||
unlock_map(fs); | unlock_map(fs); | ||||
unlock_vp(fs); | unlock_vp(fs); | ||||
} | } | ||||
static void | static void | ||||
unlock_and_deallocate(struct faultstate *fs) | unlock_and_deallocate(struct faultstate *fs) | ||||
▲ Show 20 Lines • Show All 443 Lines • ▼ Show 20 Lines | vm_fault_lock_vnode(struct faultstate *fs) | ||||
*/ | */ | ||||
error = vget(vp, locked | LK_CANRECURSE | LK_NOWAIT, curthread); | error = vget(vp, locked | LK_CANRECURSE | LK_NOWAIT, curthread); | ||||
if (error == 0) { | if (error == 0) { | ||||
fs->vp = vp; | fs->vp = vp; | ||||
return (KERN_SUCCESS); | return (KERN_SUCCESS); | ||||
} | } | ||||
vhold(vp); | vhold(vp); | ||||
release_page(fs); | |||||
unlock_and_deallocate(fs); | unlock_and_deallocate(fs); | ||||
error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread); | error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread); | ||||
vdrop(vp); | vdrop(vp); | ||||
fs->vp = vp; | fs->vp = vp; | ||||
KASSERT(error == 0, ("vm_fault: vget failed %d", error)); | KASSERT(error == 0, ("vm_fault: vget failed %d", error)); | ||||
return (KERN_RESOURCE_SHORTAGE); | return (KERN_RESOURCE_SHORTAGE); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 100 Lines • ▼ Show 20 Lines | RetryFault_oom: | ||||
* Bump the paging-in-progress count to prevent size changes (e.g. | * Bump the paging-in-progress count to prevent size changes (e.g. | ||||
* truncation operations) during I/O. | * truncation operations) during I/O. | ||||
*/ | */ | ||||
vm_object_reference_locked(fs.first_object); | vm_object_reference_locked(fs.first_object); | ||||
vm_object_pip_add(fs.first_object, 1); | vm_object_pip_add(fs.first_object, 1); | ||||
fs.lookup_still_valid = true; | fs.lookup_still_valid = true; | ||||
fs.first_m = NULL; | fs.m = fs.first_m = NULL; | ||||
/* | /* | ||||
* Search for the page at object/offset. | * Search for the page at object/offset. | ||||
*/ | */ | ||||
fs.object = fs.first_object; | fs.object = fs.first_object; | ||||
fs.pindex = fs.first_pindex; | fs.pindex = fs.first_pindex; | ||||
while (TRUE) { | while (TRUE) { | ||||
KASSERT(fs.m == NULL, | |||||
("page still set %p at loop start", fs.m)); | |||||
/* | /* | ||||
* If the object is marked for imminent termination, | * If the object is marked for imminent termination, | ||||
* we retry here, since the collapse pass has raced | * we retry here, since the collapse pass has raced | ||||
* with us. Otherwise, if we see terminally dead | * with us. Otherwise, if we see terminally dead | ||||
* object, return fail. | * object, return fail. | ||||
*/ | */ | ||||
if ((fs.object->flags & OBJ_DEAD) != 0) { | if ((fs.object->flags & OBJ_DEAD) != 0) { | ||||
dead = fs.object->type == OBJT_DEAD; | dead = fs.object->type == OBJT_DEAD; | ||||
Show All 28 Lines | if (fs.m != NULL) { | ||||
if (vm_page_tryxbusy(fs.m) == 0) { | if (vm_page_tryxbusy(fs.m) == 0) { | ||||
/* | /* | ||||
* Reference the page before unlocking and | * Reference the page before unlocking and | ||||
* sleeping so that the page daemon is less | * sleeping so that the page daemon is less | ||||
* likely to reclaim it. | * likely to reclaim it. | ||||
*/ | */ | ||||
vm_page_aflag_set(fs.m, PGA_REFERENCED); | vm_page_aflag_set(fs.m, PGA_REFERENCED); | ||||
if (fs.object != fs.first_object) { | if (fs.object != fs.first_object) { | ||||
if (!VM_OBJECT_TRYWLOCK( | fault_page_release(&fs.first_m); | ||||
fs.first_object)) { | |||||
VM_OBJECT_WUNLOCK(fs.object); | |||||
VM_OBJECT_WLOCK(fs.first_object); | |||||
VM_OBJECT_WLOCK(fs.object); | |||||
} | |||||
vm_page_free(fs.first_m); | |||||
vm_object_pip_wakeup(fs.first_object); | vm_object_pip_wakeup(fs.first_object); | ||||
VM_OBJECT_WUNLOCK(fs.first_object); | |||||
fs.first_m = NULL; | |||||
} | } | ||||
unlock_map(&fs); | unlock_map(&fs); | ||||
vm_object_pip_wakeup(fs.object); | |||||
if (fs.m == vm_page_lookup(fs.object, | if (fs.m == vm_page_lookup(fs.object, | ||||
fs.pindex)) { | fs.pindex)) { | ||||
vm_page_sleep_if_busy(fs.m, "vmpfw"); | vm_page_sleep_if_busy(fs.m, "vmpfw"); | ||||
} | } | ||||
vm_object_pip_wakeup(fs.object); | |||||
VM_OBJECT_WUNLOCK(fs.object); | VM_OBJECT_WUNLOCK(fs.object); | ||||
VM_CNT_INC(v_intrans); | VM_CNT_INC(v_intrans); | ||||
vm_object_deallocate(fs.first_object); | vm_object_deallocate(fs.first_object); | ||||
goto RetryFault; | goto RetryFault; | ||||
} | } | ||||
/* | /* | ||||
* The page is marked busy for other processes and the | * The page is marked busy for other processes and the | ||||
▲ Show 20 Lines • Show All 232 Lines • ▼ Show 20 Lines | if (fs.object->type != OBJT_DEFAULT) { | ||||
curproc->p_pid, curproc->p_comm); | curproc->p_pid, curproc->p_comm); | ||||
/* | /* | ||||
* If an I/O error occurred or the requested page was | * If an I/O error occurred or the requested page was | ||||
* outside the range of the pager, clean up and return | * outside the range of the pager, clean up and return | ||||
* an error. | * an error. | ||||
*/ | */ | ||||
if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { | if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { | ||||
if (!vm_page_wired(fs.m)) | fault_page_free(&fs.m); | ||||
vm_page_free(fs.m); | |||||
else | |||||
vm_page_xunbusy(fs.m); | |||||
fs.m = NULL; | |||||
unlock_and_deallocate(&fs); | unlock_and_deallocate(&fs); | ||||
return (KERN_OUT_OF_BOUNDS); | return (KERN_OUT_OF_BOUNDS); | ||||
} | } | ||||
} | |||||
/* | /* | ||||
* The requested page does not exist at this object/ | * The requested page does not exist at this object/ | ||||
* offset. Remove the invalid page from the object, | * offset. Remove the invalid page from the object, | ||||
* waking up anyone waiting for it, and continue on to | * waking up anyone waiting for it, and continue on to | ||||
* the next object. However, if this is the top-level | * the next object. However, if this is the top-level | ||||
* object, we must leave the busy page in place to | * object, we must leave the busy page in place to | ||||
* prevent another process from rushing past us, and | * prevent another process from rushing past us, and | ||||
* inserting the page in that object at the same time | * inserting the page in that object at the same time | ||||
* that we are. | * that we are. | ||||
*/ | */ | ||||
if (fs.object != fs.first_object) { | if (fs.object == fs.first_object) { | ||||
if (!vm_page_wired(fs.m)) | fs.first_m = fs.m; | ||||
vm_page_free(fs.m); | |||||
else | |||||
vm_page_xunbusy(fs.m); | |||||
fs.m = NULL; | fs.m = NULL; | ||||
} | } else | ||||
} | fault_page_free(&fs.m); | ||||
/* | /* | ||||
* We get here if the object has default pager (or unwiring) | |||||
* or the pager doesn't have the page. | |||||
*/ | |||||
if (fs.object == fs.first_object) | |||||
fs.first_m = fs.m; | |||||
/* | |||||
* Move on to the next object. Lock the next object before | * Move on to the next object. Lock the next object before | ||||
* unlocking the current one. | * unlocking the current one. | ||||
*/ | */ | ||||
next_object = fs.object->backing_object; | next_object = fs.object->backing_object; | ||||
if (next_object == NULL) { | if (next_object == NULL) { | ||||
/* | /* | ||||
* If there's no object left, fill the page in the top | * If there's no object left, fill the page in the top | ||||
* object with zeros. | * object with zeros. | ||||
*/ | */ | ||||
if (fs.object != fs.first_object) { | if (fs.object != fs.first_object) { | ||||
vm_object_pip_wakeup(fs.object); | vm_object_pip_wakeup(fs.object); | ||||
VM_OBJECT_WUNLOCK(fs.object); | VM_OBJECT_WUNLOCK(fs.object); | ||||
fs.object = fs.first_object; | fs.object = fs.first_object; | ||||
fs.pindex = fs.first_pindex; | fs.pindex = fs.first_pindex; | ||||
fs.m = fs.first_m; | |||||
VM_OBJECT_WLOCK(fs.object); | VM_OBJECT_WLOCK(fs.object); | ||||
} | } | ||||
MPASS(fs.first_m != NULL); | |||||
MPASS(fs.m == NULL); | |||||
fs.m = fs.first_m; | |||||
fs.first_m = NULL; | fs.first_m = NULL; | ||||
/* | /* | ||||
* Zero the page if necessary and mark it valid. | * Zero the page if necessary and mark it valid. | ||||
*/ | */ | ||||
if ((fs.m->flags & PG_ZERO) == 0) { | if ((fs.m->flags & PG_ZERO) == 0) { | ||||
pmap_zero_page(fs.m); | pmap_zero_page(fs.m); | ||||
} else { | } else { | ||||
VM_CNT_INC(v_ozfod); | VM_CNT_INC(v_ozfod); | ||||
} | } | ||||
VM_CNT_INC(v_zfod); | VM_CNT_INC(v_zfod); | ||||
vm_page_valid(fs.m); | vm_page_valid(fs.m); | ||||
/* Don't try to prefault neighboring pages. */ | /* Don't try to prefault neighboring pages. */ | ||||
faultcount = 1; | faultcount = 1; | ||||
break; /* break to PAGE HAS BEEN FOUND */ | break; /* break to PAGE HAS BEEN FOUND */ | ||||
} else { | } else { | ||||
MPASS(fs.first_m != NULL); | |||||
KASSERT(fs.object != next_object, | KASSERT(fs.object != next_object, | ||||
("object loop %p", next_object)); | ("object loop %p", next_object)); | ||||
VM_OBJECT_WLOCK(next_object); | VM_OBJECT_WLOCK(next_object); | ||||
vm_object_pip_add(next_object, 1); | vm_object_pip_add(next_object, 1); | ||||
if (fs.object != fs.first_object) | if (fs.object != fs.first_object) | ||||
vm_object_pip_wakeup(fs.object); | vm_object_pip_wakeup(fs.object); | ||||
fs.pindex += | fs.pindex += | ||||
OFF_TO_IDX(fs.object->backing_object_offset); | OFF_TO_IDX(fs.object->backing_object_offset); | ||||
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines | #endif | ||||
if (wired && (fault_flags & | if (wired && (fault_flags & | ||||
VM_FAULT_WIRE) == 0) { | VM_FAULT_WIRE) == 0) { | ||||
vm_page_wire(fs.first_m); | vm_page_wire(fs.first_m); | ||||
vm_page_unwire(fs.m, PQ_INACTIVE); | vm_page_unwire(fs.m, PQ_INACTIVE); | ||||
} | } | ||||
/* | /* | ||||
* We no longer need the old page or object. | * We no longer need the old page or object. | ||||
*/ | */ | ||||
release_page(&fs); | fault_page_release(&fs.m); | ||||
} | } | ||||
/* | /* | ||||
* fs.object != fs.first_object due to above | * fs.object != fs.first_object due to above | ||||
* conditional | * conditional | ||||
*/ | */ | ||||
vm_object_pip_wakeup(fs.object); | vm_object_pip_wakeup(fs.object); | ||||
/* | /* | ||||
Show All 21 Lines | #endif | ||||
} | } | ||||
/* | /* | ||||
* We must verify that the maps have not changed since our last | * We must verify that the maps have not changed since our last | ||||
* lookup. | * lookup. | ||||
*/ | */ | ||||
if (!fs.lookup_still_valid) { | if (!fs.lookup_still_valid) { | ||||
if (!vm_map_trylock_read(fs.map)) { | if (!vm_map_trylock_read(fs.map)) { | ||||
release_page(&fs); | |||||
unlock_and_deallocate(&fs); | unlock_and_deallocate(&fs); | ||||
goto RetryFault; | goto RetryFault; | ||||
} | } | ||||
fs.lookup_still_valid = true; | fs.lookup_still_valid = true; | ||||
if (fs.map->timestamp != fs.map_generation) { | if (fs.map->timestamp != fs.map_generation) { | ||||
result = vm_map_lookup_locked(&fs.map, vaddr, fault_type, | result = vm_map_lookup_locked(&fs.map, vaddr, fault_type, | ||||
&fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired); | &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired); | ||||
/* | /* | ||||
* If we don't need the page any longer, put it on the inactive | * If we don't need the page any longer, put it on the inactive | ||||
* list (the easiest thing to do here). If no one needs it, | * list (the easiest thing to do here). If no one needs it, | ||||
* pageout will grab it eventually. | * pageout will grab it eventually. | ||||
*/ | */ | ||||
if (result != KERN_SUCCESS) { | if (result != KERN_SUCCESS) { | ||||
release_page(&fs); | |||||
unlock_and_deallocate(&fs); | unlock_and_deallocate(&fs); | ||||
/* | /* | ||||
* If retry of map lookup would have blocked then | * If retry of map lookup would have blocked then | ||||
* retry fault from start. | * retry fault from start. | ||||
*/ | */ | ||||
if (result == KERN_FAILURE) | if (result == KERN_FAILURE) | ||||
goto RetryFault; | goto RetryFault; | ||||
return (result); | return (result); | ||||
} | } | ||||
if ((retry_object != fs.first_object) || | if ((retry_object != fs.first_object) || | ||||
(retry_pindex != fs.first_pindex)) { | (retry_pindex != fs.first_pindex)) { | ||||
release_page(&fs); | |||||
unlock_and_deallocate(&fs); | unlock_and_deallocate(&fs); | ||||
goto RetryFault; | goto RetryFault; | ||||
} | } | ||||
/* | /* | ||||
* Check whether the protection has changed or the object has | * Check whether the protection has changed or the object has | ||||
* been copied while we left the map unlocked. Changing from | * been copied while we left the map unlocked. Changing from | ||||
* read to write permission is OK - we leave the page | * read to write permission is OK - we leave the page | ||||
* write-protected, and catch the write fault. Changing from | * write-protected, and catch the write fault. Changing from | ||||
* write to read permission means that we can't mark the page | * write to read permission means that we can't mark the page | ||||
* write-enabled after all. | * write-enabled after all. | ||||
*/ | */ | ||||
prot &= retry_prot; | prot &= retry_prot; | ||||
fault_type &= retry_prot; | fault_type &= retry_prot; | ||||
if (prot == 0) { | if (prot == 0) { | ||||
release_page(&fs); | |||||
unlock_and_deallocate(&fs); | unlock_and_deallocate(&fs); | ||||
goto RetryFault; | goto RetryFault; | ||||
} | } | ||||
/* Reassert because wired may have changed. */ | /* Reassert because wired may have changed. */ | ||||
KASSERT(wired || (fault_flags & VM_FAULT_WIRE) == 0, | KASSERT(wired || (fault_flags & VM_FAULT_WIRE) == 0, | ||||
("!wired && VM_FAULT_WIRE")); | ("!wired && VM_FAULT_WIRE")); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | if ((fault_flags & VM_FAULT_WIRE) != 0) { | ||||
vm_page_activate(fs.m); | vm_page_activate(fs.m); | ||||
vm_page_unlock(fs.m); | vm_page_unlock(fs.m); | ||||
} | } | ||||
if (m_hold != NULL) { | if (m_hold != NULL) { | ||||
*m_hold = fs.m; | *m_hold = fs.m; | ||||
vm_page_wire(fs.m); | vm_page_wire(fs.m); | ||||
} | } | ||||
vm_page_xunbusy(fs.m); | vm_page_xunbusy(fs.m); | ||||
fs.m = NULL; | |||||
/* | /* | ||||
* Unlock everything, and return | * Unlock everything, and return | ||||
*/ | */ | ||||
fault_deallocate(&fs); | fault_deallocate(&fs); | ||||
if (hardfault) { | if (hardfault) { | ||||
VM_CNT_INC(v_io_faults); | VM_CNT_INC(v_io_faults); | ||||
curthread->td_ru.ru_majflt++; | curthread->td_ru.ru_majflt++; | ||||
▲ Show 20 Lines • Show All 497 Lines • Show Last 20 Lines |