Changeset View
Standalone View
sys/vm/vm_fault.c
Show First 20 Lines • Show All 128 Lines • ▼ Show 20 Lines | struct faultstate { | ||||
struct vnode *vp; | struct vnode *vp; | ||||
}; | }; | ||||
static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, | static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, | ||||
int ahead); | int ahead); | ||||
static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, | static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, | ||||
int backward, int forward, bool obj_locked); | int backward, int forward, bool obj_locked); | ||||
static int vm_pfault_oom_attempts = 3; | |||||
SYSCTL_INT(_vm, OID_AUTO, pfault_oom_attempts, CTLFLAG_RWTUN, | |||||
&vm_pfault_oom_attempts, 0, | |||||
""); | |||||
markj: "Number of page allocation attempts before the page fault handler triggers OOM handling"? | |||||
static int vm_pfault_oom_wait = 10; | |||||
SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN, | |||||
&vm_pfault_oom_wait, 0, | |||||
""); | |||||
Not Done Inline Actions"Number of seconds to wait for free pages before retrying the page fault handler"? markj: "Number of seconds to wait for free pages before retrying the page fault handler"? | |||||
static inline void | static inline void | ||||
release_page(struct faultstate *fs) | release_page(struct faultstate *fs) | ||||
{ | { | ||||
vm_page_xunbusy(fs->m); | vm_page_xunbusy(fs->m); | ||||
vm_page_lock(fs->m); | vm_page_lock(fs->m); | ||||
vm_page_deactivate(fs->m); | vm_page_deactivate(fs->m); | ||||
vm_page_unlock(fs->m); | vm_page_unlock(fs->m); | ||||
▲ Show 20 Lines • Show All 402 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
struct faultstate fs; | struct faultstate fs; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
vm_object_t next_object, retry_object; | vm_object_t next_object, retry_object; | ||||
vm_offset_t e_end, e_start; | vm_offset_t e_end, e_start; | ||||
vm_pindex_t retry_pindex; | vm_pindex_t retry_pindex; | ||||
vm_prot_t prot, retry_prot; | vm_prot_t prot, retry_prot; | ||||
int ahead, alloc_req, behind, cluster_offset, error, era, faultcount; | int ahead, alloc_req, behind, cluster_offset, error, era, faultcount; | ||||
int locked, nera, result, rv; | int locked, nera, oom, result, rv; | ||||
u_char behavior; | u_char behavior; | ||||
boolean_t wired; /* Passed by reference. */ | boolean_t wired; /* Passed by reference. */ | ||||
bool dead, hardfault, is_first_object_locked; | bool dead, hardfault, is_first_object_locked; | ||||
VM_CNT_INC(v_vm_faults); | VM_CNT_INC(v_vm_faults); | ||||
fs.vp = NULL; | fs.vp = NULL; | ||||
faultcount = 0; | faultcount = 0; | ||||
nera = -1; | nera = -1; | ||||
hardfault = false; | hardfault = false; | ||||
RetryFault:; | RetryFault: | ||||
oom = 0; | |||||
RetryFault_oom: | |||||
/* | /* | ||||
* Find the backing store object and offset into it to begin the | * Find the backing store object and offset into it to begin the | ||||
* search. | * search. | ||||
*/ | */ | ||||
fs.map = map; | fs.map = map; | ||||
result = vm_map_lookup(&fs.map, vaddr, fault_type | | result = vm_map_lookup(&fs.map, vaddr, fault_type | | ||||
VM_PROT_FAULT_LOOKUP, &fs.entry, &fs.first_object, | VM_PROT_FAULT_LOOKUP, &fs.entry, &fs.first_object, | ||||
▲ Show 20 Lines • Show All 225 Lines • ▼ Show 20 Lines | #endif | ||||
if (fs.object->type != OBJT_VNODE && | if (fs.object->type != OBJT_VNODE && | ||||
fs.object->backing_object == NULL) | fs.object->backing_object == NULL) | ||||
alloc_req |= VM_ALLOC_ZERO; | alloc_req |= VM_ALLOC_ZERO; | ||||
fs.m = vm_page_alloc(fs.object, fs.pindex, | fs.m = vm_page_alloc(fs.object, fs.pindex, | ||||
alloc_req); | alloc_req); | ||||
} | } | ||||
if (fs.m == NULL) { | if (fs.m == NULL) { | ||||
unlock_and_deallocate(&fs); | unlock_and_deallocate(&fs); | ||||
vm_waitpfault(); | if (vm_pfault_oom_attempts < 0 || | ||||
oom < vm_pfault_oom_attempts) { | |||||
oom++; | |||||
vm_waitpfault(vm_pfault_oom_wait * hz); | |||||
goto RetryFault_oom; | |||||
Not Done Inline ActionsI want to ask a high-level question. I see the point of adding a timeout to vm_waitpfault(). Otherwise, the calling thread will sleep until the current memory shortage is resolved, and killing this process may be the preferred way to address that shortage. However, I want to ask why simply introducing the timeout doesn't suffice? Won't the existing OOM code identify this process as problematic and attempt to kill it? And, if all of the faulting, sleeping threads eventually wake up, won't the existing code in vm_fault() for handling faults by killed processes allow for the process to be terminated? alc: I want to ask a high-level question. I see the point of adding a timeout to vm_waitpfault(). | |||||
Done Inline ActionsTheoretically OOM P_KILLED check should be enough, but practically it was not in my situation which prompted me to write the patch. Sometimes pageadaemon can make very small (units of pages) progress sometimes, which is enough for the OOM killer to reset the oom sequence, but not enough for the system to make real progress. Basically, any random vm_page_free() sabotages OOM, In my case I had a bug introduced into the build system which caused many instances of the parallel make to consume a lot of anon memory. There were several dozen of processes each eating several GBs, all non-killable. Machine can sit several hours in this state until I hit reset. Making the timeout for paging allocation allowed it to recover on its own. Simply introducing the timeout is not enough for the reason stated above, because OOM really did not killed anything. kib: Theoretically OOM P_KILLED check should be enough, but practically it was not in my situation… | |||||
} | |||||
if (bootverbose) | |||||
printf( | |||||
"proc %d (%s) failed to alloc page on fault, starting OOM\n", | |||||
curproc->p_pid, curproc->p_comm); | |||||
vm_pageout_oom(VM_OOM_MEM_PF); | |||||
goto RetryFault; | goto RetryFault; | ||||
} | } | ||||
} | } | ||||
readrest: | readrest: | ||||
/* | /* | ||||
* At this point, we have either allocated a new page or found | * At this point, we have either allocated a new page or found | ||||
* an existing page that is only partially valid. | * an existing page that is only partially valid. | ||||
▲ Show 20 Lines • Show All 897 Lines • ▼ Show 20 Lines | if (object != dst_object) { | ||||
* Allocate a page in the destination object. | * Allocate a page in the destination object. | ||||
*/ | */ | ||||
dst_m = vm_page_alloc(dst_object, (src_object == | dst_m = vm_page_alloc(dst_object, (src_object == | ||||
dst_object ? src_pindex : 0) + dst_pindex, | dst_object ? src_pindex : 0) + dst_pindex, | ||||
VM_ALLOC_NORMAL); | VM_ALLOC_NORMAL); | ||||
if (dst_m == NULL) { | if (dst_m == NULL) { | ||||
VM_OBJECT_WUNLOCK(dst_object); | VM_OBJECT_WUNLOCK(dst_object); | ||||
VM_OBJECT_RUNLOCK(object); | VM_OBJECT_RUNLOCK(object); | ||||
vm_wait(dst_object); | vm_wait(dst_object, 0); | ||||
VM_OBJECT_WLOCK(dst_object); | VM_OBJECT_WLOCK(dst_object); | ||||
goto again; | goto again; | ||||
} | } | ||||
pmap_copy_page(src_m, dst_m); | pmap_copy_page(src_m, dst_m); | ||||
VM_OBJECT_RUNLOCK(object); | VM_OBJECT_RUNLOCK(object); | ||||
dst_m->valid = VM_PAGE_BITS_ALL; | dst_m->valid = VM_PAGE_BITS_ALL; | ||||
dst_m->dirty = VM_PAGE_BITS_ALL; | dst_m->dirty = VM_PAGE_BITS_ALL; | ||||
} else { | } else { | ||||
▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines |
"Number of page allocation attempts before the page fault handler triggers OOM handling"?