diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -127,6 +127,7 @@ vm_prot_t prot; int fault_flags; boolean_t wired; + bool can_read_lock; /* Control state. */ struct timeval oom_start_time; @@ -170,6 +171,12 @@ FAULT_PROTECTION_FAILURE, /* Invalid access. */ }; +enum fault_next_status { + FAULT_NEXT_GOTOBJ = 1, + FAULT_NEXT_NOOBJ, + FAULT_NEXT_RESTART, +}; + static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead); static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, @@ -278,7 +285,7 @@ unlock_and_deallocate(struct faultstate *fs) { - VM_OBJECT_WUNLOCK(fs->object); + VM_OBJECT_UNLOCK(fs->object); fault_deallocate(fs); } @@ -736,6 +743,26 @@ return (result); } +static bool +vm_fault_object_ensure_wlocked(struct faultstate *fs) +{ + if (fs->object == fs->first_object) + VM_OBJECT_ASSERT_WLOCKED(fs->object); + + if (!fs->can_read_lock) { + VM_OBJECT_ASSERT_WLOCKED(fs->object); + return (true); + } + + if (VM_OBJECT_WOWNED(fs->object)) + return (true); + + if (VM_OBJECT_TRYUPGRADE(fs->object)) + return (true); + + return (false); +} + static enum fault_status vm_fault_lock_vnode(struct faultstate *fs, bool objlocked) { @@ -1042,11 +1069,16 @@ curthread->td_cow++; } -static bool +static enum fault_next_status vm_fault_next(struct faultstate *fs) { vm_object_t next_object; + if (fs->object == fs->first_object || !fs->can_read_lock) + VM_OBJECT_ASSERT_WLOCKED(fs->object); + else + VM_OBJECT_ASSERT_LOCKED(fs->object); + /* * The requested page does not exist at this object/ * offset. Remove the invalid page from the object, @@ -1060,28 +1092,36 @@ if (fs->object == fs->first_object) { fs->first_m = fs->m; fs->m = NULL; - } else + } else { + if (!vm_fault_object_ensure_wlocked(fs)) { + fs->can_read_lock = false; + unlock_and_deallocate(fs); + return (FAULT_NEXT_RESTART); + } fault_page_free(&fs->m); + } /* * Move on to the next object. Lock the next object before * unlocking the current one. */ - VM_OBJECT_ASSERT_WLOCKED(fs->object); next_object = fs->object->backing_object; if (next_object == NULL) - return (false); + return (FAULT_NEXT_NOOBJ); MPASS(fs->first_m != NULL); KASSERT(fs->object != next_object, ("object loop %p", next_object)); - VM_OBJECT_WLOCK(next_object); + if (fs->can_read_lock) + VM_OBJECT_RLOCK(next_object); + else + VM_OBJECT_WLOCK(next_object); vm_object_pip_add(next_object, 1); if (fs->object != fs->first_object) vm_object_pip_wakeup(fs->object); fs->pindex += OFF_TO_IDX(fs->object->backing_object_offset); - VM_OBJECT_WUNLOCK(fs->object); + VM_OBJECT_UNLOCK(fs->object); fs->object = next_object; - return (true); + return (FAULT_NEXT_GOTOBJ); } static void @@ -1363,7 +1403,7 @@ unlock_map(fs); if (fs->m != vm_page_lookup(fs->object, fs->pindex) || !vm_page_busy_sleep(fs->m, "vmpfw", 0)) - VM_OBJECT_WUNLOCK(fs->object); + VM_OBJECT_UNLOCK(fs->object); VM_CNT_INC(v_intrans); vm_object_deallocate(fs->first_object); } @@ -1382,6 +1422,11 @@ enum fault_status res; bool dead; + if (fs->object == fs->first_object || !fs->can_read_lock) + VM_OBJECT_ASSERT_WLOCKED(fs->object); + else + VM_OBJECT_ASSERT_LOCKED(fs->object); + /* * If the object is marked for imminent termination, we retry * here, since the collapse pass has raced with us. Otherwise, @@ -1412,11 +1457,10 @@ * done. */ if (vm_page_all_valid(fs->m)) { - VM_OBJECT_WUNLOCK(fs->object); + VM_OBJECT_UNLOCK(fs->object); return (FAULT_SOFT); } } - VM_OBJECT_ASSERT_WLOCKED(fs->object); /* * Page is not resident. If the pager might contain the page @@ -1425,6 +1469,11 @@ */ if (fs->m == NULL && (fault_object_needs_getpages(fs->object) || fs->object == fs->first_object)) { + if (!vm_fault_object_ensure_wlocked(fs)) { + fs->can_read_lock = false; + unlock_and_deallocate(fs); + return (FAULT_RESTART); + } res = vm_fault_allocate(fs); if (res != FAULT_CONTINUE) return (res); @@ -1447,7 +1496,7 @@ * prevents simultaneous faults and collapses while * the object lock is dropped. */ - VM_OBJECT_WUNLOCK(fs->object); + VM_OBJECT_UNLOCK(fs->object); res = vm_fault_getpages(fs, behindp, aheadp); if (res == FAULT_CONTINUE) VM_OBJECT_WLOCK(fs->object); @@ -1464,6 +1513,7 @@ struct faultstate fs; int ahead, behind, faultcount, rv; enum fault_status res; + enum fault_next_status res_next; bool hardfault; VM_CNT_INC(v_vm_faults); @@ -1479,6 +1529,7 @@ fs.lookup_still_valid = false; fs.oom_started = false; fs.nera = -1; + fs.can_read_lock = true; faultcount = 0; hardfault = false; @@ -1589,15 +1640,19 @@ * traverse into a backing object or zero fill if none is * found. */ - if (vm_fault_next(&fs)) + res_next = vm_fault_next(&fs); + if (res_next == FAULT_NEXT_RESTART) + goto RetryFault; + else if (res_next == FAULT_NEXT_GOTOBJ) continue; + MPASS(res_next == FAULT_NEXT_NOOBJ); if ((fs.fault_flags & VM_FAULT_NOFILL) != 0) { if (fs.first_object == fs.object) fault_page_free(&fs.first_m); unlock_and_deallocate(&fs); return (KERN_OUT_OF_BOUNDS); } - VM_OBJECT_WUNLOCK(fs.object); + VM_OBJECT_UNLOCK(fs.object); vm_fault_zerofill(&fs); /* Don't try to prefault neighboring pages. */ faultcount = 1; diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -267,6 +267,8 @@ rw_wowned(&(object)->lock) #define VM_OBJECT_WUNLOCK(object) \ rw_wunlock(&(object)->lock) +#define VM_OBJECT_UNLOCK(object) \ + rw_unlock(&(object)->lock) #define VM_OBJECT_DROP(object) \ lock_class_rw.lc_unlock(&(object)->lock.lock_object) #define VM_OBJECT_PICKUP(object, state) \