diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -71,11 +71,9 @@ * Page fault handling module. */ -#include #include "opt_ktrace.h" #include "opt_vm.h" -#include #include #include #include @@ -204,7 +202,10 @@ * pageout while optimizing fault restarts. */ vm_page_deactivate(m); - vm_page_xunbusy(m); + if (vm_page_xbusied(m)) + vm_page_xunbusy(m); + else + vm_page_sunbusy(m); *mp = NULL; } } @@ -329,6 +330,20 @@ } +static bool +vm_fault_is_read(const struct faultstate *fs) +{ + return ((fs->prot & VM_PROT_WRITE) == 0 && + (fs->fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) == 0); +} + +static bool +vm_fault_fo_is_onemapping(const struct faultstate *fs) +{ + return (fs->object != fs->first_object && + (fs->first_object->flags & OBJ_ONEMAPPING) != 0); +} + /* * Unlocks fs.first_object and fs.map on success. */ @@ -1002,10 +1017,20 @@ return (KERN_SUCCESS); } -static void -vm_fault_cow(struct faultstate *fs) +static bool +vm_fault_can_cow_rename(struct faultstate *fs) { - bool is_first_object_locked; + return ( + /* Only one shadow object and no other refs. */ + fs->object->shadow_count == 1 && fs->object->ref_count == 1 && + /* No other ways to look the object up. */ + fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0); +} + +static void +vm_fault_cow(struct faultstate *fs, int res) +{ + bool is_first_object_locked, rename_cow; KASSERT(fs->object != fs->first_object, ("source and target COW objects are identical")); @@ -1019,21 +1044,23 @@ * object so that it will go out to swap when needed. */ is_first_object_locked = false; - if ( - /* - * Only one shadow object and no other refs. - */ - fs->object->shadow_count == 1 && fs->object->ref_count == 1 && - /* - * No other ways to look the object up - */ - fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0 && - /* - * We don't chase down the shadow chain and we can acquire locks. - */ - (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object)) && - fs->object == fs->first_object->backing_object && - VM_OBJECT_TRYWLOCK(fs->object)) { + rename_cow = false; + + if (vm_fault_can_cow_rename(fs)) { + /* + * Check that we don't chase down the shadow chain and + * we can acquire locks. + */ + is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object); + if (is_first_object_locked && + fs->object == fs->first_object->backing_object && + vm_page_xbusied(fs->m)) + rename_cow = VM_OBJECT_TRYWLOCK(fs->object); + } + + if (rename_cow) { + vm_page_assert_xbusied(fs->m); + /* * Remove but keep xbusy for replace. fs->m is moved into * fs->first_object and left busy while fs->first_m is @@ -1090,9 +1117,15 @@ * address space. If OBJ_ONEMAPPING is set after the check, * removing mappings will at worse trigger some unnecessary page * faults. + * + * In the fs->m shared busy case, the xbusy state of + * fs->first_m prevents new mappings of fs->m from + * being created because a parallel fault on this + * shadow chain should wait for xbusy on fs->first_m. */ - vm_page_assert_xbusied(fs->m_cow); - if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0) + if (!vm_page_xbusied(fs->m_cow)) + VM_OBJECT_UNLOCK(fs->object); + else if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0) pmap_remove_all(fs->m_cow); } @@ -1427,7 +1460,7 @@ * page except, perhaps, to pmap it. */ static void -vm_fault_busy_sleep(struct faultstate *fs) +vm_fault_busy_sleep(struct faultstate *fs, int allocflags) { /* * Reference the page before unlocking and @@ -1441,7 +1474,7 @@ } vm_object_pip_wakeup(fs->object); vm_fault_unlock_map(fs); - if (!vm_page_busy_sleep(fs->m, "vmpfw", 0)) + if (!vm_page_busy_sleep(fs->m, "vmpfw", allocflags)) VM_OBJECT_UNLOCK(fs->object); VM_CNT_INC(v_intrans); vm_object_deallocate(fs->first_object); @@ -1487,8 +1520,49 @@ vm_page_iter_init(&pages, fs->object); fs->m = vm_radix_iter_lookup(&pages, fs->pindex); if (fs->m != NULL) { + /* + * If the found page is valid, either will be shadowed + * or mapped for read, and would not be renamed, then + * busy it in shared mode. This allows other faults + * needing this page to proceed in parallel. + * + * Unlocked check for validity, rechecked after busy + * is obtained. + */ + if (vm_page_all_valid(fs->m) && + /* + * No write permissions for new fs->m mapping, + * or the first object has only one mapping so + * other writeable CoW mappings of fs->m cannot + * appear under us. + */ + (vm_fault_is_read(fs) || vm_fault_fo_is_onemapping(fs)) && + /* fs->m cannot be renamed from object to first_object. */ + (!vm_fault_can_cow_rename(fs) || + fs->object != fs->first_object->backing_object)) { + if (!vm_page_trysbusy(fs->m)) { + vm_fault_busy_sleep(fs, VM_ALLOC_SBUSY); + return (FAULT_RESTART); + } + + /* + * Now make sure that conditions checked by + * race are still valid, in particular, that + * CoW OBJ_ONEMAPPING check is not occuring. + * Keep fs->object locked until vm_fault_cow() + * is done. + */ + if (__predict_true(vm_page_all_valid(fs->m)) && + (vm_fault_is_read(fs) || + vm_fault_fo_is_onemapping(fs))) { + return (FAULT_SOFT); + } + + vm_page_sunbusy(fs->m); + } + if (!vm_page_tryxbusy(fs->m)) { - vm_fault_busy_sleep(fs); + vm_fault_busy_sleep(fs, 0); return (FAULT_RESTART); } @@ -1555,7 +1629,7 @@ int ahead, behind, faultcount, rv; enum fault_status res; enum fault_next_status res_next; - bool hardfault; + bool hardfault, object_locked; VM_CNT_INC(v_vm_faults); @@ -1701,11 +1775,24 @@ found: /* - * A valid page has been found and exclusively busied. The - * object lock must no longer be held. + * A valid page has been found and busied. The object lock + * must no longer be held if the page was xbusied, but is kept + * around until CoW is done for the sbusied case. + * + * Regardless of the busy state of fs.m, fs.first_m is always + * exclusively busied after the first iteration of the loop + * calling vm_fault_object(). This is an ordering point for + * the parallel faults occuring in our vm_map on the same + * page. */ - vm_page_assert_xbusied(fs.m); - VM_OBJECT_ASSERT_UNLOCKED(fs.object); + if (vm_page_xbusied(fs.m)) { + object_locked = false; + VM_OBJECT_ASSERT_UNLOCKED(fs.object); + } else { + vm_page_assert_busied(fs.m); + object_locked = true; + VM_OBJECT_ASSERT_LOCKED(fs.object); + } /* * If the page is being written, but isn't already owned by the @@ -1717,7 +1804,9 @@ * We only really need to copy if we want to write it. */ if ((fs.fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { - vm_fault_cow(&fs); + vm_fault_cow(&fs, res); + object_locked = false; + /* * We only try to prefault read-only mappings to the * neighboring pages when this copy-on-write fault is @@ -1731,6 +1820,8 @@ fs.prot &= ~VM_PROT_WRITE; } } + if (object_locked) + VM_OBJECT_UNLOCK(fs.object); /* * We must verify that the maps have not changed since our last @@ -1773,7 +1864,7 @@ * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ - vm_page_assert_xbusied(fs.m); + vm_page_assert_busied(fs.m); KASSERT(vm_page_all_valid(fs.m), ("vm_fault: page %p partially invalid", fs.m)); @@ -1805,7 +1896,13 @@ (*fs.m_hold) = fs.m; vm_page_wire(fs.m); } - vm_page_xunbusy(fs.m); + + KASSERT(fs.first_object == fs.object || vm_page_xbusied(fs.first_m), + ("first_m must be xbusy")); + if (vm_page_xbusied(fs.m)) + vm_page_xunbusy(fs.m); + else + vm_page_sunbusy(fs.m); fs.m = NULL; /*