Differential D36038 Diff 108883 sys/vm/vm_fault.c

Changeset View

Standalone View

sys/vm/vm_fault.c

Show First 20 Lines • Show All 1,041 Lines • ▼ Show 20 Lines	#endif
*/		*/
fs->object = fs->first_object;		fs->object = fs->first_object;
fs->pindex = fs->first_pindex;		fs->pindex = fs->first_pindex;
fs->m = fs->first_m;		fs->m = fs->first_m;
VM_CNT_INC(v_cow_faults);		VM_CNT_INC(v_cow_faults);
curthread->td_cow++;		curthread->td_cow++;
}		}

		enum object_locked { UNLOCKED, RLOCK, WLOCK };

		static void
		vm_fault_object_unlock(struct faultstate *fs, enum object_locked objlocked)
		markjUnsubmitted Not Done Inline Actions The lock state should go into faultstate. markj: The lock state should go into faultstate.
		{

		switch (objlocked) {
		case RLOCK:
		VM_OBJECT_RUNLOCK(fs->object);
		return;
		case WLOCK:
		VM_OBJECT_WUNLOCK(fs->object);
		return;
		case UNLOCKED:
		panic("object %p already unlocked", fs->object);
		}

		__assert_unreachable();
		}

static bool		static bool
vm_fault_next(struct faultstate *fs)		vm_fault_object_upgrade_cond(struct faultstate fs, enum object_locked objlocked)
{		{

		switch (*objlocked) {
		case WLOCK:
		VM_OBJECT_ASSERT_WLOCKED(fs->object);
		return (true);
		case RLOCK:
		*objlocked = WLOCK;
		if (VM_OBJECT_TRYUPGRADE(fs->object))
		return (true);

		VM_OBJECT_RUNLOCK(fs->object);
		VM_OBJECT_WLOCK(fs->object);
		return (false);
		case UNLOCKED:
		panic("object %p not locked", fs->object);
		}

		__assert_unreachable();
		}

		static void
		vm_fault_object_lock_assert(struct faultstate *fs, enum object_locked objlocked)
		{

		switch (objlocked) {
		case WLOCK:
		VM_OBJECT_ASSERT_WLOCKED(fs->object);
		return;
		case RLOCK:
		VM_OBJECT_ASSERT_RLOCKED(fs->object);
		return;
		case UNLOCKED:
		panic("object %p not locked", fs->object);
		}

		__assert_unreachable();
		}

		static bool
		vm_fault_next(struct faultstate fs, enum object_locked objlocked)
		{
vm_object_t next_object;		vm_object_t next_object;

		vm_fault_object_lock_assert(fs, *objlocked);
		mjgAuthorUnsubmitted Done Inline Actions i moved this and the same assert in vm_fault_object up, i dn't understand why they were pushed below. this can be a separate commit. mjg: i moved this and the same assert in vm_fault_object up, i dn't understand why they were pushed…

/*		/*
* The requested page does not exist at this object/		* The requested page does not exist at this object/
* offset. Remove the invalid page from the object,		* offset. Remove the invalid page from the object,
* waking up anyone waiting for it, and continue on to		* waking up anyone waiting for it, and continue on to
* the next object. However, if this is the top-level		* the next object. However, if this is the top-level
* object, we must leave the busy page in place to		* object, we must leave the busy page in place to
* prevent another process from rushing past us, and		* prevent another process from rushing past us, and
* inserting the page in that object at the same time		* inserting the page in that object at the same time
* that we are.		* that we are.
*/		*/
if (fs->object == fs->first_object) {		if (fs->object == fs->first_object) {
fs->first_m = fs->m;		fs->first_m = fs->m;
fs->m = NULL;		fs->m = NULL;
} else		} else
fault_page_free(&fs->m);		fault_page_free(&fs->m);

/*		/*
* Move on to the next object. Lock the next object before		* Move on to the next object. Lock the next object before
* unlocking the current one.		* unlocking the current one.
*/		*/
VM_OBJECT_ASSERT_WLOCKED(fs->object);
next_object = fs->object->backing_object;		next_object = fs->object->backing_object;
if (next_object == NULL)		if (next_object == NULL)
return (false);		return (false);
MPASS(fs->first_m != NULL);		MPASS(fs->first_m != NULL);
KASSERT(fs->object != next_object, ("object loop %p", next_object));		KASSERT(fs->object != next_object, ("object loop %p", next_object));
VM_OBJECT_WLOCK(next_object);		VM_OBJECT_RLOCK(next_object);
		kibUnsubmitted Not Done Inline Actions You can do lockless lookup for the page (and may be checking its validity), only to select rlock vs. wlock there, probably eliminating almost all lock upgrades. kib: You can do lockless lookup for the page (and may be checking its validity), only to select…
		mjgAuthorUnsubmitted Done Inline Actions what is rlock for in this case if the page was found and validated locklessly? how about something of this sort, with failing case wlocking the object static enum fault_status vm_fault_object_unlocked(struct faultstate fs) { VM_OBJECT_ASSERT_UNLOCKED(fs->object); fs->m = vm_page_lookup_unlocked(fs->object, fs->pindex); if (fs->m == NULL) return (FAULT_CONTINUE); if (!vm_page_all_valid(fs->m)) return (FAULT_CONTINUE); if (!vm_page_tryxbusy(fs->m)) { vm_fault_busy_sleep_unlocked(fs); return (FAULT_RESTART); } if (fs->m->object != fs->object \|\| fs->m->pindex != fs->pindex \|\| !vm_page_all_valid(fs->m)) { vm_page_xunbusy(fs->m); fs->m = NULL; return (FAULT_CONTINUE); } / * Finally make sure the object is still alive. / if ((fs->object->flags & OBJ_DEAD) != 0) { vm_page_xunbusy(fs->m); fs->m = NULL; return (FAULT_CONTINUE); } return (FAULT_SOFT); } mjg:* what is rlock for in this case if the page was found and validated locklessly? how about…
vm_object_pip_add(next_object, 1);		vm_object_pip_add(next_object, 1);
if (fs->object != fs->first_object)		if (fs->object != fs->first_object)
vm_object_pip_wakeup(fs->object);		vm_object_pip_wakeup(fs->object);
fs->pindex += OFF_TO_IDX(fs->object->backing_object_offset);		fs->pindex += OFF_TO_IDX(fs->object->backing_object_offset);
VM_OBJECT_WUNLOCK(fs->object);		vm_fault_object_unlock(fs, *objlocked);
fs->object = next_object;		fs->object = next_object;
		*objlocked = RLOCK;

return (true);		return (true);
}		}

static void		static void
vm_fault_zerofill(struct faultstate *fs)		vm_fault_zerofill(struct faultstate *fs)
{		{

▲ Show 20 Lines • Show All 247 Lines • ▼ Show 20 Lines
*		*
* We can theoretically allow the busy case on a read fault if the page		* We can theoretically allow the busy case on a read fault if the page
* is marked valid, but since such pages are typically already pmap'd,		* is marked valid, but since such pages are typically already pmap'd,
* putting that special case in might be more effort then it is worth.		* putting that special case in might be more effort then it is worth.
* We cannot under any circumstances mess around with a shared busied		* We cannot under any circumstances mess around with a shared busied
* page except, perhaps, to pmap it.		* page except, perhaps, to pmap it.
*/		*/
static void		static void
vm_fault_busy_sleep(struct faultstate *fs)		vm_fault_busy_sleep(struct faultstate *fs, enum object_locked objlocked)
{		{
/*		/*
* Reference the page before unlocking and		* Reference the page before unlocking and
* sleeping so that the page daemon is less		* sleeping so that the page daemon is less
* likely to reclaim it.		* likely to reclaim it.
*/		*/
vm_page_aflag_set(fs->m, PGA_REFERENCED);		vm_page_aflag_set(fs->m, PGA_REFERENCED);
if (fs->object != fs->first_object) {		if (fs->object != fs->first_object) {
fault_page_release(&fs->first_m);		fault_page_release(&fs->first_m);
vm_object_pip_wakeup(fs->first_object);		vm_object_pip_wakeup(fs->first_object);
}		}
vm_object_pip_wakeup(fs->object);		vm_object_pip_wakeup(fs->object);
unlock_map(fs);		unlock_map(fs);
if (fs->m != vm_page_lookup(fs->object, fs->pindex) \|\|		if (fs->m != vm_page_lookup(fs->object, fs->pindex) \|\|
!vm_page_busy_sleep(fs->m, "vmpfw", 0))		!vm_page_busy_sleep(fs->m, "vmpfw", 0))
VM_OBJECT_WUNLOCK(fs->object);		vm_fault_object_unlock(fs, objlocked);
VM_CNT_INC(v_intrans);		VM_CNT_INC(v_intrans);
vm_object_deallocate(fs->first_object);		vm_object_deallocate(fs->first_object);
}		}

		kibUnsubmitted Not Done Inline Actions Is it time to finally have VM_OBJECT_UNLOCK() ? kib: Is it time to finally have VM_OBJECT_UNLOCK() ?
		markjUnsubmitted Not Done Inline Actions We already have VM_OBJECT_DROP(), BTW. I think VM_OBJECT_UNLOCK() makes more sense as a name though. markj: We already have VM_OBJECT_DROP(), BTW. I think VM_OBJECT_UNLOCK() makes more sense as a name…
		kibUnsubmitted Not Done Inline Actions _DROP() calls the lock class method, which at least gives more overhead than plain rw_unlock(). It also intended to be used in pair with _PICKUP. kib: _DROP() calls the lock class method, which at least gives more overhead than plain rw_unlock().
		mjgAuthorUnsubmitted Done Inline Actions This would make things slower multi-threaded. write unlock does not pre-read the lock and adding that would require an extra transaction. read unlock happens to read upfront, but that's only a current artifact of the implementation mjg: This would make things slower multi-threaded. write unlock does not pre-read the lock and…
/*		/*
* Handle page lookup, populate, allocate, page-in for the current		* Handle page lookup, populate, allocate, page-in for the current
* object.		* object.
*		*
* The object is locked on entry and will remain locked with a return		* The object is locked on entry and will remain locked with a return
* code of FAULT_CONTINUE so that fault may follow the shadow chain.		* code of FAULT_CONTINUE so that fault may follow the shadow chain.
* Otherwise, the object will be unlocked upon return.		* Otherwise, the object will be unlocked upon return.
*/		*/
static enum fault_status		static enum fault_status
		mjgAuthorUnsubmitted Done Inline Actions I don't think this bit is pretty, but given unlocked operation later I suspect this will require a dedicated variant anyway. mjg: I don't think this bit is pretty, but given unlocked operation later I suspect this will…
vm_fault_object(struct faultstate fs, int behindp, int *aheadp)		vm_fault_object(struct faultstate fs, int behindp, int *aheadp,
		enum object_locked *objlocked)
{		{
enum fault_status res;		enum fault_status res;
bool dead;		bool dead;

		VM_OBJECT_ASSERT_LOCKED(fs->object);

		again:
/*		/*
* If the object is marked for imminent termination, we retry		* If the object is marked for imminent termination, we retry
* here, since the collapse pass has raced with us. Otherwise,		* here, since the collapse pass has raced with us. Otherwise,
* if we see terminally dead object, return fail.		* if we see terminally dead object, return fail.
*/		*/
if ((fs->object->flags & OBJ_DEAD) != 0) {		if (__predict_false((fs->object->flags & OBJ_DEAD) != 0)) {
dead = fs->object->type == OBJT_DEAD;		dead = fs->object->type == OBJT_DEAD;
unlock_and_deallocate(fs);		vm_fault_object_unlock(fs, *objlocked);
		fault_deallocate(fs);
		markjUnsubmitted Not Done Inline Actions It's better to handle this in unlock_and_deallocate(). markj: It's better to handle this in unlock_and_deallocate().
		mjgAuthorUnsubmitted Done Inline Actions this would require patching all other callers of the routine, or reworking it internally to wrap a vriant which grabs objlocked argument. i explicitly did not do that to avoid extra changes in the area given the entire thing will need to be refactored down the road. mjg: this would require patching all other callers of the routine, or reworking it internally to…
		markjUnsubmitted Not Done Inline Actions I suggested to put the objlock state in the faultstate structure and handle it without passing around an extra parameter. Why not do it? That way, vm_fault_lock_vnode() can be simplified as well, and you don't have to modify every caller. markj: I suggested to put the objlock state in the faultstate structure and handle it without passing…
		mjgAuthorUnsubmitted Done Inline Actions The patch as implemented confines rlock to the loop, without affecting other code. Most notably everything outside of it will only ever see a write lock, just like without the patch. Moving the lock state to the faultstate struct would require patching all places which unlock and it's just a lot of churn for no benefit that I see. mjg: The patch as implemented confines rlock to the loop, without affecting other code. Most notably…
if (dead)		if (dead)
return (FAULT_PROTECTION_FAILURE);		return (FAULT_PROTECTION_FAILURE);
pause("vmf_de", 1);		pause("vmf_de", 1);
return (FAULT_RESTART);		return (FAULT_RESTART);
}		}

/*		/*
* See if the page is resident.		* See if the page is resident.
*/		*/
fs->m = vm_page_lookup(fs->object, fs->pindex);		fs->m = vm_page_lookup(fs->object, fs->pindex);
if (fs->m != NULL) {		if (fs->m != NULL) {
if (!vm_page_tryxbusy(fs->m)) {		if (!vm_page_tryxbusy(fs->m)) {
vm_fault_busy_sleep(fs);		vm_fault_busy_sleep(fs, *objlocked);
return (FAULT_RESTART);		return (FAULT_RESTART);
}		}

/*		/*
* The page is marked busy for other processes and the		* The page is marked busy for other processes and the
* pagedaemon. If it is still completely valid we are		* pagedaemon. If it is still completely valid we are
* done.		* done.
*/		*/
if (vm_page_all_valid(fs->m)) {		if (vm_page_all_valid(fs->m)) {
VM_OBJECT_WUNLOCK(fs->object);		vm_fault_object_unlock(fs, *objlocked);
return (FAULT_SOFT);		return (FAULT_SOFT);
}		}
}		}

		if (!vm_fault_object_upgrade_cond(fs, objlocked))
		goto again;

VM_OBJECT_ASSERT_WLOCKED(fs->object);		VM_OBJECT_ASSERT_WLOCKED(fs->object);

/*		/*
* Page is not resident. If the pager might contain the page		* Page is not resident. If the pager might contain the page
* or this is the beginning of the search, allocate a new		* or this is the beginning of the search, allocate a new
* page.		* page.
*/		*/
if (fs->m == NULL && (fault_object_needs_getpages(fs->object) \|\|		if (fs->m == NULL && (fault_object_needs_getpages(fs->object) \|\|
Show All 30 Lines	again:
return (res);		return (res);
}		}

int		int
vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,		vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags, vm_page_t *m_hold)		int fault_flags, vm_page_t *m_hold)
{		{
struct faultstate fs;		struct faultstate fs;
int ahead, behind, faultcount, rv;		int ahead, behind, faultcount, rv;
		kibUnsubmitted Not Done Inline Actions Why did you omitted the checks for dead object flag and state? kib: Why did you omitted the checks for dead object flag and state?
enum fault_status res;		enum fault_status res;
		enum object_locked objlocked;
bool hardfault;		bool hardfault;

VM_CNT_INC(v_vm_faults);		VM_CNT_INC(v_vm_faults);

if ((curthread->td_pflags & TDP_NOFAULTING) != 0)		if ((curthread->td_pflags & TDP_NOFAULTING) != 0)
return (KERN_PROTECTION_FAILURE);		return (KERN_PROTECTION_FAILURE);

fs.vp = NULL;		fs.vp = NULL;
fs.vaddr = vaddr;		fs.vaddr = vaddr;
fs.m_hold = m_hold;		fs.m_hold = m_hold;
fs.fault_flags = fault_flags;		fs.fault_flags = fault_flags;
fs.map = map;		fs.map = map;
fs.lookup_still_valid = false;		fs.lookup_still_valid = false;
fs.oom_started = false;		fs.oom_started = false;
fs.nera = -1;		fs.nera = -1;
		mjgAuthorUnsubmitted Done Inline Actions I don't expect this to ever be a real problem -- the page has to go invalid in-between the initial check and xbusy, which I strongly suspect will be rare. leaving the code like this avoids modifying vm_fault_object. mjg: I don't expect this to ever be a real problem -- the page has to go invalid in-between the…
faultcount = 0;		faultcount = 0;
hardfault = false;		hardfault = false;

RetryFault:		RetryFault:
fs.fault_type = fault_type;		fs.fault_type = fault_type;

/*		/*
* Find the backing store object and offset into it to begin the		* Find the backing store object and offset into it to begin the
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines	case FAULT_OUT_OF_BOUNDS:
return (KERN_OUT_OF_BOUNDS);		return (KERN_OUT_OF_BOUNDS);
case FAULT_CONTINUE:		case FAULT_CONTINUE:
break;		break;
default:		default:
panic("vm_fault: Unhandled status %d", res);		panic("vm_fault: Unhandled status %d", res);
}		}
}		}

		objlocked = WLOCK;
while (TRUE) {		while (TRUE) {
KASSERT(fs.m == NULL,		KASSERT(fs.m == NULL,
("page still set %p at loop start", fs.m));		("page still set %p at loop start", fs.m));

res = vm_fault_object(&fs, &behind, &ahead);		res = vm_fault_object(&fs, &behind, &ahead, &objlocked);
switch (res) {		switch (res) {
case FAULT_SOFT:		case FAULT_SOFT:
goto found;		goto found;
case FAULT_HARD:		case FAULT_HARD:
faultcount = behind + 1 + ahead;		faultcount = behind + 1 + ahead;
hardfault = true;		hardfault = true;
goto found;		goto found;
case FAULT_RESTART:		case FAULT_RESTART:
goto RetryFault;		goto RetryFault;
case FAULT_SUCCESS:		case FAULT_SUCCESS:
return (KERN_SUCCESS);		return (KERN_SUCCESS);
case FAULT_FAILURE:		case FAULT_FAILURE:
return (KERN_FAILURE);		return (KERN_FAILURE);
case FAULT_OUT_OF_BOUNDS:		case FAULT_OUT_OF_BOUNDS:
		markjUnsubmitted Not Done Inline Actions If the lock upgrade fails, then we just retry the lookup. Why not keep vm_fault_object() intact and retry the lookup within that function? markj: If the lock upgrade fails, then we just retry the lookup. Why not keep vm_fault_object() intact…
return (KERN_OUT_OF_BOUNDS);		return (KERN_OUT_OF_BOUNDS);
case FAULT_PROTECTION_FAILURE:		case FAULT_PROTECTION_FAILURE:
return (KERN_PROTECTION_FAILURE);		return (KERN_PROTECTION_FAILURE);
case FAULT_CONTINUE:		case FAULT_CONTINUE:
break;		break;
default:		default:
panic("vm_fault: Unhandled status %d", res);		panic("vm_fault: Unhandled status %d", res);
		kibUnsubmitted Not Done Inline Actions This should be a separate commit. kib: This should be a separate commit.
}		}

/*		/*
* The page was not found in the current object. Try to		* The page was not found in the current object. Try to
* traverse into a backing object or zero fill if none is		* traverse into a backing object or zero fill if none is
* found.		* found.
*/		*/
if (vm_fault_next(&fs))		if (vm_fault_next(&fs, &objlocked))
continue;		continue;
if ((fs.fault_flags & VM_FAULT_NOFILL) != 0) {		if ((fs.fault_flags & VM_FAULT_NOFILL) != 0) {
if (fs.first_object == fs.object)		if (fs.first_object == fs.object)
fault_page_free(&fs.first_m);		fault_page_free(&fs.first_m);
unlock_and_deallocate(&fs);		vm_fault_object_unlock(&fs, objlocked);
		fault_deallocate(&fs);
return (KERN_OUT_OF_BOUNDS);		return (KERN_OUT_OF_BOUNDS);
}		}
VM_OBJECT_WUNLOCK(fs.object);		vm_fault_object_unlock(&fs, objlocked);
vm_fault_zerofill(&fs);		vm_fault_zerofill(&fs);
/* Don't try to prefault neighboring pages. */		/* Don't try to prefault neighboring pages. */
faultcount = 1;		faultcount = 1;
break;		break;
}		}

found:		found:
/*		/*
* A valid page has been found and exclusively busied. The		* A valid page has been found and exclusively busied. The
* object lock must no longer be held.		* object lock must no longer be held.
*/		*/
vm_page_assert_xbusied(fs.m);		vm_page_assert_xbusied(fs.m);
		kibUnsubmitted Not Done Inline Actions Is this change of the comment style needed? kib: Is this change of the comment style needed?
		mjgAuthorUnsubmitted Done Inline Actions no, but it matches other comments in the file and since I'm moving the code around I don't see why not mjg: no, but it matches other comments in the file and since I'm moving the code around I don't see…
VM_OBJECT_ASSERT_UNLOCKED(fs.object);		VM_OBJECT_ASSERT_UNLOCKED(fs.object);

/*		/*
* If the page is being written, but isn't already owned by the		* If the page is being written, but isn't already owned by the
* top-level object, we have to copy it into a new page owned by the		* top-level object, we have to copy it into a new page owned by the
* top-level object.		* top-level object.
*/		*/
if (fs.object != fs.first_object) {		if (fs.object != fs.first_object) {
▲ Show 20 Lines • Show All 574 Lines • Show Last 20 Lines