diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -130,6 +130,7 @@ bool oom_started; int nera; bool can_read_lock; + bool can_sbusy; /* Page reference for cow. */ vm_page_t m_cow; @@ -165,6 +166,7 @@ FAULT_OUT_OF_BOUNDS, /* Invalid address for pager. */ FAULT_HARD, /* Performed I/O. */ FAULT_SOFT, /* Found valid page. */ + FAULT_SOFT_MSHAREDBUSY, /* Found valid page and busied it shared. */ FAULT_PROTECTION_FAILURE, /* Invalid access. */ }; @@ -204,7 +206,10 @@ * pageout while optimizing fault restarts. */ vm_page_deactivate(m); - vm_page_xunbusy(m); + if (vm_page_xbusied(m)) + vm_page_xunbusy(m); + else + vm_page_sunbusy(m); *mp = NULL; } } @@ -354,7 +359,7 @@ */ m = vm_page_lookup_unlocked(fs->first_object, fs->first_pindex); if (m == NULL || !vm_page_all_valid(m) || - ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_busied(m))) { + ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_xbusied(m))) { VM_OBJECT_WLOCK(fs->first_object); return (FAULT_FAILURE); } @@ -374,7 +379,7 @@ vm_object_busy(fs->first_object); if (!vm_page_all_valid(m) || - ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_busied(m))) + ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_xbusied(m))) goto fail_busy; m_map = m; @@ -1002,10 +1007,20 @@ return (KERN_SUCCESS); } -static void -vm_fault_cow(struct faultstate *fs) +static bool +vm_fault_can_cow_rename(struct faultstate *fs) { - bool is_first_object_locked; + return ( + /* Only one shadow object and no other refs. */ + fs->object->shadow_count == 1 && fs->object->ref_count == 1 && + /* No other ways to look the object up. */ + fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0); +} + +static void +vm_fault_cow(struct faultstate *fs, int res) +{ + bool is_first_object_locked, fast_cow; KASSERT(fs->object != fs->first_object, ("source and target COW objects are identical")); @@ -1019,21 +1034,28 @@ * object so that it will go out to swap when needed. */ is_first_object_locked = false; - if ( - /* - * Only one shadow object and no other refs. - */ - fs->object->shadow_count == 1 && fs->object->ref_count == 1 && - /* - * No other ways to look the object up - */ - fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0 && - /* - * We don't chase down the shadow chain and we can acquire locks. - */ - (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object)) && - fs->object == fs->first_object->backing_object && - VM_OBJECT_TRYWLOCK(fs->object)) { + fast_cow = false; + + if (vm_fault_can_cow_rename(fs)) { + /* + * Check that we don't chase down the shadow chain and + * we can acquire locks. + */ + is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object); + if (is_first_object_locked && + fs->object == fs->first_object->backing_object) { + if (res == FAULT_SOFT_MSHAREDBUSY) { + fast_cow = VM_OBJECT_WOWNED(fs->object) || + VM_OBJECT_TRYUPGRADE(fs->object); + } else { + fast_cow = VM_OBJECT_TRYWLOCK(fs->object); + } + } + } + + if (fast_cow) { + vm_page_assert_xbusied(fs->m); + /* * Remove but keep xbusy for replace. fs->m is moved into * fs->first_object and left busy while fs->first_m is @@ -1090,9 +1112,18 @@ * address space. If OBJ_ONEMAPPING is set after the check, * removing mappings will at worse trigger some unnecessary page * faults. + * + * Note that FAULT_SOFT_MSHAREDBUSY case is only + * possible when the shadow object is read-mapped, so + * this workaround is not required them. */ - vm_page_assert_xbusied(fs->m_cow); - if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0) + if (res == FAULT_SOFT_MSHAREDBUSY) + vm_page_assert_busied(fs->m_cow); + else + vm_page_assert_xbusied(fs->m_cow); + if (res == FAULT_SOFT_MSHAREDBUSY) + VM_OBJECT_UNLOCK(fs->object); + else if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0) pmap_remove_all(fs->m_cow); } @@ -1487,6 +1518,40 @@ vm_page_iter_init(&pages, fs->object); fs->m = vm_radix_iter_lookup(&pages, fs->pindex); if (fs->m != NULL) { + /* + * If the found page is valid, either will be shadowed + * or mapped for read, and would not be renamed, then + * busy it in shared mode. This allows other faults + * needing this page to proceed in parallel. + * + * Unlocked check for validity, rechecked after busy + * is obtained. + */ + if (vm_page_all_valid(fs->m) && fs->can_sbusy && + /* No write permissions for new fs->m mapping. */ + (((fs->prot & VM_PROT_WRITE) == 0 && + (fs->fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) == 0) || + fs->object != fs->first_object) && + /* fs->m cannot be renamed from object to first_object. */ + (!vm_fault_can_cow_rename(fs) || + fs->object != fs->first_object->backing_object)) { + if (!vm_page_trysbusy(fs->m)) { +restart: + fs->can_sbusy = false; + vm_fault_busy_sleep(fs); + return (FAULT_RESTART); + } + if (!vm_page_all_valid(fs->m)) { + vm_page_sunbusy(fs->m); + goto restart; + } + /* + * Keep fs->object locked for validity of the + * CoW checks. + */ + return (FAULT_SOFT_MSHAREDBUSY); + } + if (!vm_page_tryxbusy(fs->m)) { vm_fault_busy_sleep(fs); return (FAULT_RESTART); @@ -1546,8 +1611,8 @@ return (res); } -int -vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, +static int +vm_fault_rangelocked(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { struct pctrie_iter pages; @@ -1555,12 +1620,7 @@ int ahead, behind, faultcount, rv; enum fault_status res; enum fault_next_status res_next; - bool hardfault; - - VM_CNT_INC(v_vm_faults); - - if ((curthread->td_pflags & TDP_NOFAULTING) != 0) - return (KERN_PROTECTION_FAILURE); + bool hardfault, unlock_object; fs.vp = NULL; fs.vaddr = vaddr; @@ -1571,6 +1631,7 @@ fs.oom_started = false; fs.nera = -1; fs.can_read_lock = true; + fs.can_sbusy = true; faultcount = 0; hardfault = false; @@ -1654,6 +1715,7 @@ res = vm_fault_object(&fs, &behind, &ahead); switch (res) { case FAULT_SOFT: + case FAULT_SOFT_MSHAREDBUSY: goto found; case FAULT_HARD: faultcount = behind + 1 + ahead; @@ -1701,11 +1763,17 @@ found: /* - * A valid page has been found and exclusively busied. The - * object lock must no longer be held. + * A valid page has been found and busied. The object lock + * must no longer be held. */ - vm_page_assert_xbusied(fs.m); - VM_OBJECT_ASSERT_UNLOCKED(fs.object); + vm_page_assert_busied(fs.m); + if (res != FAULT_SOFT_MSHAREDBUSY) { + unlock_object = false; + VM_OBJECT_ASSERT_UNLOCKED(fs.object); + } else { + unlock_object = true; + VM_OBJECT_ASSERT_LOCKED(fs.object); + } /* * If the page is being written, but isn't already owned by the @@ -1717,7 +1785,9 @@ * We only really need to copy if we want to write it. */ if ((fs.fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { - vm_fault_cow(&fs); + vm_fault_cow(&fs, res); + unlock_object = false; + /* * We only try to prefault read-only mappings to the * neighboring pages when this copy-on-write fault is @@ -1731,6 +1801,8 @@ fs.prot &= ~VM_PROT_WRITE; } } + if (unlock_object) + VM_OBJECT_UNLOCK(fs.object); /* * We must verify that the maps have not changed since our last @@ -1773,7 +1845,7 @@ * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ - vm_page_assert_xbusied(fs.m); + vm_page_assert_busied(fs.m); KASSERT(vm_page_all_valid(fs.m), ("vm_fault: page %p partially invalid", fs.m)); @@ -1805,7 +1877,10 @@ (*fs.m_hold) = fs.m; vm_page_wire(fs.m); } - vm_page_xunbusy(fs.m); + if (vm_page_xbusied(fs.m)) + vm_page_xunbusy(fs.m); + else + vm_page_sunbusy(fs.m); fs.m = NULL; /* @@ -1836,6 +1911,24 @@ return (KERN_SUCCESS); } +int +vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, + int fault_flags, vm_page_t *m_hold) +{ + void *cookie; + int rv; + + VM_CNT_INC(v_vm_faults); + + if ((curthread->td_pflags & TDP_NOFAULTING) != 0) + return (KERN_PROTECTION_FAILURE); + + cookie = rangelock_wlock(&map->fltlock, vaddr, vaddr + PAGE_SIZE); + rv = vm_fault_rangelocked(map, vaddr, fault_type, fault_flags, m_hold); + rangelock_unlock(&map->fltlock, cookie); + return (rv); +} + /* * Speed up the reclamation of pages that precede the faulting pindex within * the first object of the shadow chain. Essentially, perform the equivalent diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -65,6 +65,7 @@ #define _VM_MAP_ #include +#include #include #include @@ -206,6 +207,7 @@ struct sx lock; /* Lock for map data */ struct mtx system_mtx; }; + struct rangelock fltlock; int nentries; /* Number of entries */ vm_size_t size; /* virtual size */ u_int timestamp; /* Version number */ diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -896,6 +896,7 @@ map->timestamp = 0; map->busy = 0; map->anon_loc = 0; + rangelock_init(&map->fltlock); #ifdef DIAGNOSTIC map->nupdates = 0; #endif