Changeset View
Standalone View
sys/vm/vm_fault.c
Show First 20 Lines • Show All 1,041 Lines • ▼ Show 20 Lines | #endif | ||||
*/ | */ | ||||
fs->object = fs->first_object; | fs->object = fs->first_object; | ||||
fs->pindex = fs->first_pindex; | fs->pindex = fs->first_pindex; | ||||
fs->m = fs->first_m; | fs->m = fs->first_m; | ||||
VM_CNT_INC(v_cow_faults); | VM_CNT_INC(v_cow_faults); | ||||
curthread->td_cow++; | curthread->td_cow++; | ||||
} | } | ||||
enum object_locked { UNLOCKED, RLOCK, WLOCK }; | |||||
static void | |||||
vm_fault_object_unlock(struct faultstate *fs, enum object_locked objlocked) | |||||
markj: The lock state should go into faultstate. | |||||
{ | |||||
switch (objlocked) { | |||||
case RLOCK: | |||||
VM_OBJECT_RUNLOCK(fs->object); | |||||
return; | |||||
case WLOCK: | |||||
VM_OBJECT_WUNLOCK(fs->object); | |||||
return; | |||||
case UNLOCKED: | |||||
panic("object %p already unlocked", fs->object); | |||||
} | |||||
__assert_unreachable(); | |||||
} | |||||
static bool | static bool | ||||
vm_fault_next(struct faultstate *fs) | vm_fault_object_upgrade_cond(struct faultstate *fs, enum object_locked *objlocked) | ||||
{ | { | ||||
switch (*objlocked) { | |||||
case WLOCK: | |||||
VM_OBJECT_ASSERT_WLOCKED(fs->object); | |||||
return (true); | |||||
case RLOCK: | |||||
*objlocked = WLOCK; | |||||
if (VM_OBJECT_TRYUPGRADE(fs->object)) | |||||
return (true); | |||||
VM_OBJECT_RUNLOCK(fs->object); | |||||
VM_OBJECT_WLOCK(fs->object); | |||||
return (false); | |||||
case UNLOCKED: | |||||
panic("object %p not locked", fs->object); | |||||
} | |||||
__assert_unreachable(); | |||||
} | |||||
static void | |||||
vm_fault_object_lock_assert(struct faultstate *fs, enum object_locked objlocked) | |||||
{ | |||||
switch (objlocked) { | |||||
case WLOCK: | |||||
VM_OBJECT_ASSERT_WLOCKED(fs->object); | |||||
return; | |||||
case RLOCK: | |||||
VM_OBJECT_ASSERT_RLOCKED(fs->object); | |||||
return; | |||||
case UNLOCKED: | |||||
panic("object %p not locked", fs->object); | |||||
} | |||||
__assert_unreachable(); | |||||
} | |||||
static bool | |||||
vm_fault_next(struct faultstate *fs, enum object_locked *objlocked) | |||||
{ | |||||
vm_object_t next_object; | vm_object_t next_object; | ||||
vm_fault_object_lock_assert(fs, *objlocked); | |||||
Done Inline Actionsi moved this and the same assert in vm_fault_object up, i dn't understand why they were pushed below. this can be a separate commit. mjg: i moved this and the same assert in vm_fault_object up, i dn't understand why they were pushed… | |||||
/* | /* | ||||
* The requested page does not exist at this object/ | * The requested page does not exist at this object/ | ||||
* offset. Remove the invalid page from the object, | * offset. Remove the invalid page from the object, | ||||
* waking up anyone waiting for it, and continue on to | * waking up anyone waiting for it, and continue on to | ||||
* the next object. However, if this is the top-level | * the next object. However, if this is the top-level | ||||
* object, we must leave the busy page in place to | * object, we must leave the busy page in place to | ||||
* prevent another process from rushing past us, and | * prevent another process from rushing past us, and | ||||
* inserting the page in that object at the same time | * inserting the page in that object at the same time | ||||
* that we are. | * that we are. | ||||
*/ | */ | ||||
if (fs->object == fs->first_object) { | if (fs->object == fs->first_object) { | ||||
fs->first_m = fs->m; | fs->first_m = fs->m; | ||||
fs->m = NULL; | fs->m = NULL; | ||||
} else | } else | ||||
fault_page_free(&fs->m); | fault_page_free(&fs->m); | ||||
/* | /* | ||||
* Move on to the next object. Lock the next object before | * Move on to the next object. Lock the next object before | ||||
* unlocking the current one. | * unlocking the current one. | ||||
*/ | */ | ||||
VM_OBJECT_ASSERT_WLOCKED(fs->object); | |||||
next_object = fs->object->backing_object; | next_object = fs->object->backing_object; | ||||
if (next_object == NULL) | if (next_object == NULL) | ||||
return (false); | return (false); | ||||
MPASS(fs->first_m != NULL); | MPASS(fs->first_m != NULL); | ||||
KASSERT(fs->object != next_object, ("object loop %p", next_object)); | KASSERT(fs->object != next_object, ("object loop %p", next_object)); | ||||
VM_OBJECT_WLOCK(next_object); | VM_OBJECT_RLOCK(next_object); | ||||
Not Done Inline ActionsYou can do lockless lookup for the page (and may be checking its validity), only to select rlock vs. wlock there, probably eliminating almost all lock upgrades. kib: You can do lockless lookup for the page (and may be checking its validity), only to select… | |||||
Done Inline Actionswhat is rlock for in this case if the page was found and validated locklessly? how about something of this sort, with failing case wlocking the object static enum fault_status vm_fault_object_unlocked(struct faultstate *fs) { VM_OBJECT_ASSERT_UNLOCKED(fs->object); fs->m = vm_page_lookup_unlocked(fs->object, fs->pindex); if (fs->m == NULL) return (FAULT_CONTINUE); if (!vm_page_all_valid(fs->m)) return (FAULT_CONTINUE); if (!vm_page_tryxbusy(fs->m)) { vm_fault_busy_sleep_unlocked(fs); return (FAULT_RESTART); } if (fs->m->object != fs->object || fs->m->pindex != fs->pindex || !vm_page_all_valid(fs->m)) { vm_page_xunbusy(fs->m); fs->m = NULL; return (FAULT_CONTINUE); } /* * Finally make sure the object is still alive. */ if ((fs->object->flags & OBJ_DEAD) != 0) { vm_page_xunbusy(fs->m); fs->m = NULL; return (FAULT_CONTINUE); } return (FAULT_SOFT); } mjg: what is rlock for in this case if the page was found and validated locklessly?
how about… | |||||
vm_object_pip_add(next_object, 1); | vm_object_pip_add(next_object, 1); | ||||
if (fs->object != fs->first_object) | if (fs->object != fs->first_object) | ||||
vm_object_pip_wakeup(fs->object); | vm_object_pip_wakeup(fs->object); | ||||
fs->pindex += OFF_TO_IDX(fs->object->backing_object_offset); | fs->pindex += OFF_TO_IDX(fs->object->backing_object_offset); | ||||
VM_OBJECT_WUNLOCK(fs->object); | vm_fault_object_unlock(fs, *objlocked); | ||||
fs->object = next_object; | fs->object = next_object; | ||||
*objlocked = RLOCK; | |||||
return (true); | return (true); | ||||
} | } | ||||
static void | static void | ||||
vm_fault_zerofill(struct faultstate *fs) | vm_fault_zerofill(struct faultstate *fs) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 247 Lines • ▼ Show 20 Lines | |||||
* | * | ||||
* We can theoretically allow the busy case on a read fault if the page | * We can theoretically allow the busy case on a read fault if the page | ||||
* is marked valid, but since such pages are typically already pmap'd, | * is marked valid, but since such pages are typically already pmap'd, | ||||
* putting that special case in might be more effort then it is worth. | * putting that special case in might be more effort then it is worth. | ||||
* We cannot under any circumstances mess around with a shared busied | * We cannot under any circumstances mess around with a shared busied | ||||
* page except, perhaps, to pmap it. | * page except, perhaps, to pmap it. | ||||
*/ | */ | ||||
static void | static void | ||||
vm_fault_busy_sleep(struct faultstate *fs) | vm_fault_busy_sleep(struct faultstate *fs, enum object_locked objlocked) | ||||
{ | { | ||||
/* | /* | ||||
* Reference the page before unlocking and | * Reference the page before unlocking and | ||||
* sleeping so that the page daemon is less | * sleeping so that the page daemon is less | ||||
* likely to reclaim it. | * likely to reclaim it. | ||||
*/ | */ | ||||
vm_page_aflag_set(fs->m, PGA_REFERENCED); | vm_page_aflag_set(fs->m, PGA_REFERENCED); | ||||
if (fs->object != fs->first_object) { | if (fs->object != fs->first_object) { | ||||
fault_page_release(&fs->first_m); | fault_page_release(&fs->first_m); | ||||
vm_object_pip_wakeup(fs->first_object); | vm_object_pip_wakeup(fs->first_object); | ||||
} | } | ||||
vm_object_pip_wakeup(fs->object); | vm_object_pip_wakeup(fs->object); | ||||
unlock_map(fs); | unlock_map(fs); | ||||
if (fs->m != vm_page_lookup(fs->object, fs->pindex) || | if (fs->m != vm_page_lookup(fs->object, fs->pindex) || | ||||
!vm_page_busy_sleep(fs->m, "vmpfw", 0)) | !vm_page_busy_sleep(fs->m, "vmpfw", 0)) | ||||
VM_OBJECT_WUNLOCK(fs->object); | vm_fault_object_unlock(fs, objlocked); | ||||
VM_CNT_INC(v_intrans); | VM_CNT_INC(v_intrans); | ||||
vm_object_deallocate(fs->first_object); | vm_object_deallocate(fs->first_object); | ||||
} | } | ||||
Not Done Inline ActionsIs it time to finally have VM_OBJECT_UNLOCK() ? kib: Is it time to finally have VM_OBJECT_UNLOCK() ? | |||||
Not Done Inline ActionsWe already have VM_OBJECT_DROP(), BTW. I think VM_OBJECT_UNLOCK() makes more sense as a name though. markj: We already have VM_OBJECT_DROP(), BTW. I think VM_OBJECT_UNLOCK() makes more sense as a name… | |||||
Not Done Inline Actions_DROP() calls the lock class method, which at least gives more overhead than plain rw_unlock(). It also intended to be used in pair with _PICKUP. kib: _DROP() calls the lock class method, which at least gives more overhead than plain rw_unlock(). | |||||
Done Inline ActionsThis would make things slower multi-threaded. write unlock does not pre-read the lock and adding that would require an extra transaction. read unlock happens to read upfront, but that's only a current artifact of the implementation mjg: This would make things slower multi-threaded. write unlock does not pre-read the lock and… | |||||
/* | /* | ||||
* Handle page lookup, populate, allocate, page-in for the current | * Handle page lookup, populate, allocate, page-in for the current | ||||
* object. | * object. | ||||
* | * | ||||
* The object is locked on entry and will remain locked with a return | * The object is locked on entry and will remain locked with a return | ||||
* code of FAULT_CONTINUE so that fault may follow the shadow chain. | * code of FAULT_CONTINUE so that fault may follow the shadow chain. | ||||
* Otherwise, the object will be unlocked upon return. | * Otherwise, the object will be unlocked upon return. | ||||
*/ | */ | ||||
static enum fault_status | static enum fault_status | ||||
Done Inline ActionsI don't think this bit is pretty, but given unlocked operation later I suspect this will require a dedicated variant anyway. mjg: I don't think this bit is pretty, but given unlocked operation later I suspect this will… | |||||
vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp) | vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp, | ||||
enum object_locked *objlocked) | |||||
{ | { | ||||
enum fault_status res; | enum fault_status res; | ||||
bool dead; | bool dead; | ||||
VM_OBJECT_ASSERT_LOCKED(fs->object); | |||||
again: | |||||
/* | /* | ||||
* If the object is marked for imminent termination, we retry | * If the object is marked for imminent termination, we retry | ||||
* here, since the collapse pass has raced with us. Otherwise, | * here, since the collapse pass has raced with us. Otherwise, | ||||
* if we see terminally dead object, return fail. | * if we see terminally dead object, return fail. | ||||
*/ | */ | ||||
if ((fs->object->flags & OBJ_DEAD) != 0) { | if (__predict_false((fs->object->flags & OBJ_DEAD) != 0)) { | ||||
dead = fs->object->type == OBJT_DEAD; | dead = fs->object->type == OBJT_DEAD; | ||||
unlock_and_deallocate(fs); | vm_fault_object_unlock(fs, *objlocked); | ||||
fault_deallocate(fs); | |||||
Not Done Inline ActionsIt's better to handle this in unlock_and_deallocate(). markj: It's better to handle this in unlock_and_deallocate(). | |||||
Done Inline Actionsthis would require patching all other callers of the routine, or reworking it internally to wrap a vriant which grabs objlocked argument. i explicitly did not do that to avoid extra changes in the area given the entire thing will need to be refactored down the road. mjg: this would require patching all other callers of the routine, or reworking it internally to… | |||||
Not Done Inline ActionsI suggested to put the objlock state in the faultstate structure and handle it without passing around an extra parameter. Why not do it? That way, vm_fault_lock_vnode() can be simplified as well, and you don't have to modify every caller. markj: I suggested to put the objlock state in the faultstate structure and handle it without passing… | |||||
Done Inline ActionsThe patch as implemented confines rlock to the loop, without affecting other code. Most notably everything outside of it will only ever see a write lock, just like without the patch. Moving the lock state to the faultstate struct would require patching *all* places which unlock and it's just a lot of churn for no benefit that I see. mjg: The patch as implemented confines rlock to the loop, without affecting other code. Most notably… | |||||
if (dead) | if (dead) | ||||
return (FAULT_PROTECTION_FAILURE); | return (FAULT_PROTECTION_FAILURE); | ||||
pause("vmf_de", 1); | pause("vmf_de", 1); | ||||
return (FAULT_RESTART); | return (FAULT_RESTART); | ||||
} | } | ||||
/* | /* | ||||
* See if the page is resident. | * See if the page is resident. | ||||
*/ | */ | ||||
fs->m = vm_page_lookup(fs->object, fs->pindex); | fs->m = vm_page_lookup(fs->object, fs->pindex); | ||||
if (fs->m != NULL) { | if (fs->m != NULL) { | ||||
if (!vm_page_tryxbusy(fs->m)) { | if (!vm_page_tryxbusy(fs->m)) { | ||||
vm_fault_busy_sleep(fs); | vm_fault_busy_sleep(fs, *objlocked); | ||||
return (FAULT_RESTART); | return (FAULT_RESTART); | ||||
} | } | ||||
/* | /* | ||||
* The page is marked busy for other processes and the | * The page is marked busy for other processes and the | ||||
* pagedaemon. If it is still completely valid we are | * pagedaemon. If it is still completely valid we are | ||||
* done. | * done. | ||||
*/ | */ | ||||
if (vm_page_all_valid(fs->m)) { | if (vm_page_all_valid(fs->m)) { | ||||
VM_OBJECT_WUNLOCK(fs->object); | vm_fault_object_unlock(fs, *objlocked); | ||||
return (FAULT_SOFT); | return (FAULT_SOFT); | ||||
} | } | ||||
} | } | ||||
if (!vm_fault_object_upgrade_cond(fs, objlocked)) | |||||
goto again; | |||||
VM_OBJECT_ASSERT_WLOCKED(fs->object); | VM_OBJECT_ASSERT_WLOCKED(fs->object); | ||||
/* | /* | ||||
* Page is not resident. If the pager might contain the page | * Page is not resident. If the pager might contain the page | ||||
* or this is the beginning of the search, allocate a new | * or this is the beginning of the search, allocate a new | ||||
* page. | * page. | ||||
*/ | */ | ||||
if (fs->m == NULL && (fault_object_needs_getpages(fs->object) || | if (fs->m == NULL && (fault_object_needs_getpages(fs->object) || | ||||
Show All 30 Lines | again: | ||||
return (res); | return (res); | ||||
} | } | ||||
int | int | ||||
vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, | vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, | ||||
int fault_flags, vm_page_t *m_hold) | int fault_flags, vm_page_t *m_hold) | ||||
{ | { | ||||
struct faultstate fs; | struct faultstate fs; | ||||
int ahead, behind, faultcount, rv; | int ahead, behind, faultcount, rv; | ||||
Not Done Inline ActionsWhy did you omitted the checks for dead object flag and state? kib: Why did you omitted the checks for dead object flag and state? | |||||
enum fault_status res; | enum fault_status res; | ||||
enum object_locked objlocked; | |||||
bool hardfault; | bool hardfault; | ||||
VM_CNT_INC(v_vm_faults); | VM_CNT_INC(v_vm_faults); | ||||
if ((curthread->td_pflags & TDP_NOFAULTING) != 0) | if ((curthread->td_pflags & TDP_NOFAULTING) != 0) | ||||
return (KERN_PROTECTION_FAILURE); | return (KERN_PROTECTION_FAILURE); | ||||
fs.vp = NULL; | fs.vp = NULL; | ||||
fs.vaddr = vaddr; | fs.vaddr = vaddr; | ||||
fs.m_hold = m_hold; | fs.m_hold = m_hold; | ||||
fs.fault_flags = fault_flags; | fs.fault_flags = fault_flags; | ||||
fs.map = map; | fs.map = map; | ||||
fs.lookup_still_valid = false; | fs.lookup_still_valid = false; | ||||
fs.oom_started = false; | fs.oom_started = false; | ||||
fs.nera = -1; | fs.nera = -1; | ||||
Done Inline ActionsI don't expect this to ever be a real problem -- the page has to go invalid in-between the initial check and xbusy, which I strongly suspect will be rare. leaving the code like this avoids modifying vm_fault_object. mjg: I don't expect this to ever be a real problem -- the page has to go invalid in-between the… | |||||
faultcount = 0; | faultcount = 0; | ||||
hardfault = false; | hardfault = false; | ||||
RetryFault: | RetryFault: | ||||
fs.fault_type = fault_type; | fs.fault_type = fault_type; | ||||
/* | /* | ||||
* Find the backing store object and offset into it to begin the | * Find the backing store object and offset into it to begin the | ||||
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | case FAULT_OUT_OF_BOUNDS: | ||||
return (KERN_OUT_OF_BOUNDS); | return (KERN_OUT_OF_BOUNDS); | ||||
case FAULT_CONTINUE: | case FAULT_CONTINUE: | ||||
break; | break; | ||||
default: | default: | ||||
panic("vm_fault: Unhandled status %d", res); | panic("vm_fault: Unhandled status %d", res); | ||||
} | } | ||||
} | } | ||||
objlocked = WLOCK; | |||||
while (TRUE) { | while (TRUE) { | ||||
KASSERT(fs.m == NULL, | KASSERT(fs.m == NULL, | ||||
("page still set %p at loop start", fs.m)); | ("page still set %p at loop start", fs.m)); | ||||
res = vm_fault_object(&fs, &behind, &ahead); | res = vm_fault_object(&fs, &behind, &ahead, &objlocked); | ||||
switch (res) { | switch (res) { | ||||
case FAULT_SOFT: | case FAULT_SOFT: | ||||
goto found; | goto found; | ||||
case FAULT_HARD: | case FAULT_HARD: | ||||
faultcount = behind + 1 + ahead; | faultcount = behind + 1 + ahead; | ||||
hardfault = true; | hardfault = true; | ||||
goto found; | goto found; | ||||
case FAULT_RESTART: | case FAULT_RESTART: | ||||
goto RetryFault; | goto RetryFault; | ||||
case FAULT_SUCCESS: | case FAULT_SUCCESS: | ||||
return (KERN_SUCCESS); | return (KERN_SUCCESS); | ||||
case FAULT_FAILURE: | case FAULT_FAILURE: | ||||
return (KERN_FAILURE); | return (KERN_FAILURE); | ||||
case FAULT_OUT_OF_BOUNDS: | case FAULT_OUT_OF_BOUNDS: | ||||
Not Done Inline ActionsIf the lock upgrade fails, then we just retry the lookup. Why not keep vm_fault_object() intact and retry the lookup within that function? markj: If the lock upgrade fails, then we just retry the lookup. Why not keep vm_fault_object() intact… | |||||
return (KERN_OUT_OF_BOUNDS); | return (KERN_OUT_OF_BOUNDS); | ||||
case FAULT_PROTECTION_FAILURE: | case FAULT_PROTECTION_FAILURE: | ||||
return (KERN_PROTECTION_FAILURE); | return (KERN_PROTECTION_FAILURE); | ||||
case FAULT_CONTINUE: | case FAULT_CONTINUE: | ||||
break; | break; | ||||
default: | default: | ||||
panic("vm_fault: Unhandled status %d", res); | panic("vm_fault: Unhandled status %d", res); | ||||
Not Done Inline ActionsThis should be a separate commit. kib: This should be a separate commit. | |||||
} | } | ||||
/* | /* | ||||
* The page was not found in the current object. Try to | * The page was not found in the current object. Try to | ||||
* traverse into a backing object or zero fill if none is | * traverse into a backing object or zero fill if none is | ||||
* found. | * found. | ||||
*/ | */ | ||||
if (vm_fault_next(&fs)) | if (vm_fault_next(&fs, &objlocked)) | ||||
continue; | continue; | ||||
if ((fs.fault_flags & VM_FAULT_NOFILL) != 0) { | if ((fs.fault_flags & VM_FAULT_NOFILL) != 0) { | ||||
if (fs.first_object == fs.object) | if (fs.first_object == fs.object) | ||||
fault_page_free(&fs.first_m); | fault_page_free(&fs.first_m); | ||||
unlock_and_deallocate(&fs); | vm_fault_object_unlock(&fs, objlocked); | ||||
fault_deallocate(&fs); | |||||
return (KERN_OUT_OF_BOUNDS); | return (KERN_OUT_OF_BOUNDS); | ||||
} | } | ||||
VM_OBJECT_WUNLOCK(fs.object); | vm_fault_object_unlock(&fs, objlocked); | ||||
vm_fault_zerofill(&fs); | vm_fault_zerofill(&fs); | ||||
/* Don't try to prefault neighboring pages. */ | /* Don't try to prefault neighboring pages. */ | ||||
faultcount = 1; | faultcount = 1; | ||||
break; | break; | ||||
} | } | ||||
found: | found: | ||||
/* | /* | ||||
* A valid page has been found and exclusively busied. The | * A valid page has been found and exclusively busied. The | ||||
* object lock must no longer be held. | * object lock must no longer be held. | ||||
*/ | */ | ||||
vm_page_assert_xbusied(fs.m); | vm_page_assert_xbusied(fs.m); | ||||
Not Done Inline ActionsIs this change of the comment style needed? kib: Is this change of the comment style needed? | |||||
Done Inline Actionsno, but it matches other comments in the file and since I'm moving the code around I don't see why not mjg: no, but it matches other comments in the file and since I'm moving the code around I don't see… | |||||
VM_OBJECT_ASSERT_UNLOCKED(fs.object); | VM_OBJECT_ASSERT_UNLOCKED(fs.object); | ||||
/* | /* | ||||
* If the page is being written, but isn't already owned by the | * If the page is being written, but isn't already owned by the | ||||
* top-level object, we have to copy it into a new page owned by the | * top-level object, we have to copy it into a new page owned by the | ||||
* top-level object. | * top-level object. | ||||
*/ | */ | ||||
if (fs.object != fs.first_object) { | if (fs.object != fs.first_object) { | ||||
▲ Show 20 Lines • Show All 574 Lines • Show Last 20 Lines |
The lock state should go into faultstate.