diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -130,6 +130,8 @@ bool oom_started; int nera; bool can_read_lock; + bool can_sbusy; + bool first_object_rlocked; /* Page reference for cow. */ vm_page_t m_cow; @@ -204,7 +206,10 @@ * pageout while optimizing fault restarts. */ vm_page_deactivate(m); - vm_page_xunbusy(m); + if (vm_page_xbusied(m)) + vm_page_xunbusy(m); + else + vm_page_sunbusy(m); *mp = NULL; } } @@ -283,6 +288,8 @@ { VM_OBJECT_UNLOCK(fs->object); + if (fs->first_object_rlocked) + VM_OBJECT_RUNLOCK(fs->first_object); vm_fault_deallocate(fs); } @@ -329,6 +336,20 @@ } +static bool +vm_fault_is_read(const struct faultstate *fs) +{ + return ((fs->prot & VM_PROT_WRITE) == 0 && + (fs->fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) == 0); +} + +static bool +vm_fault_fo_is_onemapping(const struct faultstate *fs) +{ + return (fs->object != fs->first_object && + (fs->first_object->flags & OBJ_ONEMAPPING) != 0); +} + /* * Unlocks fs.first_object and fs.map on success. */ @@ -1002,10 +1023,20 @@ return (KERN_SUCCESS); } -static void -vm_fault_cow(struct faultstate *fs) +static bool +vm_fault_can_cow_rename(struct faultstate *fs) { - bool is_first_object_locked; + return ( + /* Only one shadow object and no other refs. */ + fs->object->shadow_count == 1 && fs->object->ref_count == 1 && + /* No other ways to look the object up. */ + fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0); +} + +static void +vm_fault_cow(struct faultstate *fs, int res) +{ + bool is_first_object_locked, rename_cow; KASSERT(fs->object != fs->first_object, ("source and target COW objects are identical")); @@ -1019,21 +1050,23 @@ * object so that it will go out to swap when needed. */ is_first_object_locked = false; - if ( - /* - * Only one shadow object and no other refs. - */ - fs->object->shadow_count == 1 && fs->object->ref_count == 1 && - /* - * No other ways to look the object up - */ - fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0 && - /* - * We don't chase down the shadow chain and we can acquire locks. - */ - (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object)) && - fs->object == fs->first_object->backing_object && - VM_OBJECT_TRYWLOCK(fs->object)) { + rename_cow = false; + + if (vm_fault_can_cow_rename(fs)) { + /* + * Check that we don't chase down the shadow chain and + * we can acquire locks. + */ + is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object); + if (is_first_object_locked && + fs->object == fs->first_object->backing_object && + vm_page_xbusied(fs->m)) + rename_cow = VM_OBJECT_TRYWLOCK(fs->object); + } + + if (rename_cow) { + vm_page_assert_xbusied(fs->m); + /* * Remove but keep xbusy for replace. fs->m is moved into * fs->first_object and left busy while fs->first_m is @@ -1090,9 +1123,15 @@ * address space. If OBJ_ONEMAPPING is set after the check, * removing mappings will at worse trigger some unnecessary page * faults. + * + * Note that fs-> shared busy case is only possible + * when the shadow object is read-mapped or have + * single mapping, so this workaround is not required + * then. */ - vm_page_assert_xbusied(fs->m_cow); - if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0) + if (!vm_page_xbusied(fs->m_cow)) + VM_OBJECT_UNLOCK(fs->object); + else if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0) pmap_remove_all(fs->m_cow); } @@ -1487,6 +1526,56 @@ vm_page_iter_init(&pages, fs->object); fs->m = vm_radix_iter_lookup(&pages, fs->pindex); if (fs->m != NULL) { + /* + * If the found page is valid, either will be shadowed + * or mapped for read, and would not be renamed, then + * busy it in shared mode. This allows other faults + * needing this page to proceed in parallel. + * + * Unlocked check for validity, rechecked after busy + * is obtained. + */ + if (vm_page_all_valid(fs->m) && fs->can_sbusy && + /* + * No write permissions for new fs->m mapping, + * or the first object has only one mapping so + * other writeable CoW mappings of fs->m cannot + * appear under us. + */ + (vm_fault_is_read(fs) || vm_fault_fo_is_onemapping(fs)) && + /* fs->m cannot be renamed from object to first_object. */ + (!vm_fault_can_cow_rename(fs) || + fs->object != fs->first_object->backing_object)) { + if (!vm_page_trysbusy(fs->m)) { + fs->can_sbusy = false; + vm_fault_busy_sleep(fs); + return (FAULT_RESTART); + } + if (__predict_false(!vm_page_all_valid(fs->m))) + goto failed_sbusy; + + /* + * Now make sure that conditions checked by + * race are still valid, in particular, that + * CoW OBJ_ONEMAPPING check is not occuring. + * Keep fs->object and possibly + * fs->first_object locked until + * vm_fault_cow() is done. + */ + if (vm_fault_is_read(fs)) + return (FAULT_SOFT); + + if (!VM_OBJECT_TRYRLOCK(fs->first_object)) + goto failed_sbusy; + if (vm_fault_fo_is_onemapping(fs)) { + fs->first_object_rlocked = true; + return (FAULT_SOFT); + } + VM_OBJECT_RUNLOCK(fs->first_object); +failed_sbusy: + vm_page_sunbusy(fs->m); + } + if (!vm_page_tryxbusy(fs->m)) { vm_fault_busy_sleep(fs); return (FAULT_RESTART); @@ -1555,7 +1644,7 @@ int ahead, behind, faultcount, rv; enum fault_status res; enum fault_next_status res_next; - bool hardfault; + bool hardfault, object_locked; VM_CNT_INC(v_vm_faults); @@ -1571,6 +1660,8 @@ fs.oom_started = false; fs.nera = -1; fs.can_read_lock = true; + fs.can_sbusy = true; + fs.first_object_rlocked = false; faultcount = 0; hardfault = false; @@ -1701,11 +1792,18 @@ found: /* - * A valid page has been found and exclusively busied. The - * object lock must no longer be held. + * A valid page has been found and busied. The object lock + * must no longer be held if the page was xbusied, but is kept + * around until CoW is done for the sbusied case. */ - vm_page_assert_xbusied(fs.m); - VM_OBJECT_ASSERT_UNLOCKED(fs.object); + if (vm_page_xbusied(fs.m)) { + object_locked = false; + VM_OBJECT_ASSERT_UNLOCKED(fs.object); + } else { + vm_page_assert_busied(fs.m); + object_locked = true; + VM_OBJECT_ASSERT_LOCKED(fs.object); + } /* * If the page is being written, but isn't already owned by the @@ -1717,7 +1815,9 @@ * We only really need to copy if we want to write it. */ if ((fs.fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { - vm_fault_cow(&fs); + vm_fault_cow(&fs, res); + object_locked = false; + /* * We only try to prefault read-only mappings to the * neighboring pages when this copy-on-write fault is @@ -1731,6 +1831,12 @@ fs.prot &= ~VM_PROT_WRITE; } } + if (object_locked) + VM_OBJECT_UNLOCK(fs.object); + if (fs.first_object_rlocked) { + VM_OBJECT_RUNLOCK(fs.first_object); + fs.first_object_rlocked = false; + } /* * We must verify that the maps have not changed since our last @@ -1773,7 +1879,7 @@ * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ - vm_page_assert_xbusied(fs.m); + vm_page_assert_busied(fs.m); KASSERT(vm_page_all_valid(fs.m), ("vm_fault: page %p partially invalid", fs.m)); @@ -1805,7 +1911,10 @@ (*fs.m_hold) = fs.m; vm_page_wire(fs.m); } - vm_page_xunbusy(fs.m); + if (vm_page_xbusied(fs.m)) + vm_page_xunbusy(fs.m); + else + vm_page_sunbusy(fs.m); fs.m = NULL; /*