Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -1047,11 +1047,74 @@ curthread->td_cow++; } +enum object_locked { UNLOCKED, RLOCK, WLOCK }; + +static void +vm_fault_object_unlock(struct faultstate *fs, enum object_locked objlocked) +{ + + switch (objlocked) { + case RLOCK: + VM_OBJECT_RUNLOCK(fs->object); + return; + case WLOCK: + VM_OBJECT_WUNLOCK(fs->object); + return; + case UNLOCKED: + panic("object %p already unlocked", fs->object); + } + + __assert_unreachable(); +} + +static bool +vm_fault_object_upgrade_cond(struct faultstate *fs, enum object_locked *objlocked) +{ + + switch (*objlocked) { + case WLOCK: + VM_OBJECT_ASSERT_WLOCKED(fs->object); + return (true); + case RLOCK: + *objlocked = WLOCK; + if (VM_OBJECT_TRYUPGRADE(fs->object)) + return (true); + + VM_OBJECT_RUNLOCK(fs->object); + VM_OBJECT_WLOCK(fs->object); + return (false); + case UNLOCKED: + panic("object %p not locked", fs->object); + } + + __assert_unreachable(); +} + +static void +vm_fault_object_lock_assert(struct faultstate *fs, enum object_locked objlocked) +{ + + switch (objlocked) { + case WLOCK: + VM_OBJECT_ASSERT_WLOCKED(fs->object); + return; + case RLOCK: + VM_OBJECT_ASSERT_RLOCKED(fs->object); + return; + case UNLOCKED: + panic("object %p not locked", fs->object); + } + + __assert_unreachable(); +} + static bool -vm_fault_next(struct faultstate *fs) +vm_fault_next(struct faultstate *fs, enum object_locked *objlocked) { vm_object_t next_object; + vm_fault_object_lock_assert(fs, *objlocked); + /* * The requested page does not exist at this object/ * offset. Remove the invalid page from the object, @@ -1072,19 +1135,19 @@ * Move on to the next object. Lock the next object before * unlocking the current one. */ - VM_OBJECT_ASSERT_WLOCKED(fs->object); next_object = fs->object->backing_object; if (next_object == NULL) return (false); MPASS(fs->first_m != NULL); KASSERT(fs->object != next_object, ("object loop %p", next_object)); - VM_OBJECT_WLOCK(next_object); + VM_OBJECT_RLOCK(next_object); vm_object_pip_add(next_object, 1); if (fs->object != fs->first_object) vm_object_pip_wakeup(fs->object); fs->pindex += OFF_TO_IDX(fs->object->backing_object_offset); - VM_OBJECT_WUNLOCK(fs->object); + vm_fault_object_unlock(fs, *objlocked); fs->object = next_object; + *objlocked = RLOCK; return (true); } @@ -1348,7 +1411,7 @@ * page except, perhaps, to pmap it. */ static void -vm_fault_busy_sleep(struct faultstate *fs) +vm_fault_busy_sleep(struct faultstate *fs, enum object_locked objlocked) { /* * Reference the page before unlocking and @@ -1364,7 +1427,7 @@ unlock_map(fs); if (fs->m != vm_page_lookup(fs->object, fs->pindex) || !vm_page_busy_sleep(fs->m, "vmpfw", 0)) - VM_OBJECT_WUNLOCK(fs->object); + vm_fault_object_unlock(fs, objlocked); VM_CNT_INC(v_intrans); vm_object_deallocate(fs->first_object); } @@ -1378,19 +1441,24 @@ * Otherwise, the object will be unlocked upon return. */ static enum fault_status -vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp) +vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp, + enum object_locked *objlocked) { enum fault_status res; bool dead; + VM_OBJECT_ASSERT_LOCKED(fs->object); + +again: /* * If the object is marked for imminent termination, we retry * here, since the collapse pass has raced with us. Otherwise, * if we see terminally dead object, return fail. */ - if ((fs->object->flags & OBJ_DEAD) != 0) { + if (__predict_false((fs->object->flags & OBJ_DEAD) != 0)) { dead = fs->object->type == OBJT_DEAD; - unlock_and_deallocate(fs); + vm_fault_object_unlock(fs, *objlocked); + fault_deallocate(fs); if (dead) return (FAULT_PROTECTION_FAILURE); pause("vmf_de", 1); @@ -1403,7 +1471,7 @@ fs->m = vm_page_lookup(fs->object, fs->pindex); if (fs->m != NULL) { if (!vm_page_tryxbusy(fs->m)) { - vm_fault_busy_sleep(fs); + vm_fault_busy_sleep(fs, *objlocked); return (FAULT_RESTART); } @@ -1413,10 +1481,14 @@ * done. */ if (vm_page_all_valid(fs->m)) { - VM_OBJECT_WUNLOCK(fs->object); + vm_fault_object_unlock(fs, *objlocked); return (FAULT_SOFT); } } + + if (!vm_fault_object_upgrade_cond(fs, objlocked)) + goto again; + VM_OBJECT_ASSERT_WLOCKED(fs->object); /* @@ -1465,6 +1537,7 @@ struct faultstate fs; int ahead, behind, faultcount, rv; enum fault_status res; + enum object_locked objlocked; bool hardfault; VM_CNT_INC(v_vm_faults); @@ -1557,11 +1630,12 @@ } } + objlocked = WLOCK; while (TRUE) { KASSERT(fs.m == NULL, ("page still set %p at loop start", fs.m)); - res = vm_fault_object(&fs, &behind, &ahead); + res = vm_fault_object(&fs, &behind, &ahead, &objlocked); switch (res) { case FAULT_SOFT: goto found; @@ -1590,15 +1664,16 @@ * traverse into a backing object or zero fill if none is * found. */ - if (vm_fault_next(&fs)) + if (vm_fault_next(&fs, &objlocked)) continue; if ((fs.fault_flags & VM_FAULT_NOFILL) != 0) { if (fs.first_object == fs.object) fault_page_free(&fs.first_m); - unlock_and_deallocate(&fs); + vm_fault_object_unlock(&fs, objlocked); + fault_deallocate(&fs); return (KERN_OUT_OF_BOUNDS); } - VM_OBJECT_WUNLOCK(fs.object); + vm_fault_object_unlock(&fs, objlocked); vm_fault_zerofill(&fs); /* Don't try to prefault neighboring pages. */ faultcount = 1;