diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -130,6 +130,7 @@ bool oom_started; int nera; bool can_read_lock; + bool can_sbusy; /* Page reference for cow. */ vm_page_t m_cow; @@ -204,7 +205,10 @@ * pageout while optimizing fault restarts. */ vm_page_deactivate(m); - vm_page_xunbusy(m); + if (vm_page_xbusied(m)) + vm_page_xunbusy(m); + else + vm_page_sunbusy(m); *mp = NULL; } } @@ -1034,6 +1038,8 @@ (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object)) && fs->object == fs->first_object->backing_object && VM_OBJECT_TRYWLOCK(fs->object)) { + vm_page_assert_xbusied(fs->m); + /* * Remove but keep xbusy for replace. fs->m is moved into * fs->first_object and left busy while fs->first_m is @@ -1091,7 +1097,7 @@ * removing mappings will at worse trigger some unnecessary page * faults. */ - vm_page_assert_xbusied(fs->m_cow); + vm_page_assert_busied(fs->m_cow); if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0) pmap_remove_all(fs->m_cow); } @@ -1487,6 +1493,32 @@ vm_page_iter_init(&pages, fs->object); fs->m = vm_radix_iter_lookup(&pages, fs->pindex); if (fs->m != NULL) { + /* + * Unlocked check for validity, rechecked after busy + * is obtained. + */ + if (vm_page_all_valid(fs->m) && fs->can_sbusy && + ((fs->fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) == 0 || + fs->object != fs->first_object) && + (fs->object->shadow_count != 1 || + fs->object->ref_count != 1 || + fs->object->handle != NULL || + fs->object != fs->first_object->backing_object || + (fs->object->flags & OBJ_ANON) == 0)) { + if (!vm_page_trysbusy(fs->m)) { +restart: + fs->can_sbusy = false; + vm_fault_busy_sleep(fs); + return (FAULT_RESTART); + } + if (!vm_page_all_valid(fs->m)) { + vm_page_sunbusy(fs->m); + goto restart; + } + VM_OBJECT_UNLOCK(fs->object); + return (FAULT_SOFT); + } + if (!vm_page_tryxbusy(fs->m)) { vm_fault_busy_sleep(fs); return (FAULT_RESTART); @@ -1546,8 +1578,8 @@ return (res); } -int -vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, +static int +vm_fault_rglocked(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { struct pctrie_iter pages; @@ -1557,11 +1589,6 @@ enum fault_next_status res_next; bool hardfault; - VM_CNT_INC(v_vm_faults); - - if ((curthread->td_pflags & TDP_NOFAULTING) != 0) - return (KERN_PROTECTION_FAILURE); - fs.vp = NULL; fs.vaddr = vaddr; fs.m_hold = m_hold; @@ -1571,6 +1598,7 @@ fs.oom_started = false; fs.nera = -1; fs.can_read_lock = true; + fs.can_sbusy = true; faultcount = 0; hardfault = false; @@ -1704,7 +1732,7 @@ * A valid page has been found and exclusively busied. The * object lock must no longer be held. */ - vm_page_assert_xbusied(fs.m); + vm_page_assert_busied(fs.m); VM_OBJECT_ASSERT_UNLOCKED(fs.object); /* @@ -1718,6 +1746,7 @@ */ if ((fs.fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { vm_fault_cow(&fs); + /* * We only try to prefault read-only mappings to the * neighboring pages when this copy-on-write fault is @@ -1773,7 +1802,7 @@ * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ - vm_page_assert_xbusied(fs.m); + vm_page_assert_busied(fs.m); KASSERT(vm_page_all_valid(fs.m), ("vm_fault: page %p partially invalid", fs.m)); @@ -1805,7 +1834,10 @@ (*fs.m_hold) = fs.m; vm_page_wire(fs.m); } - vm_page_xunbusy(fs.m); + if (vm_page_xbusied(fs.m)) + vm_page_xunbusy(fs.m); + else + vm_page_sunbusy(fs.m); fs.m = NULL; /* @@ -1836,6 +1868,24 @@ return (KERN_SUCCESS); } +int +vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, + int fault_flags, vm_page_t *m_hold) +{ + void *cookie; + int rv; + + VM_CNT_INC(v_vm_faults); + + if ((curthread->td_pflags & TDP_NOFAULTING) != 0) + return (KERN_PROTECTION_FAILURE); + + cookie = rangelock_wlock(&map->fltlock, vaddr, vaddr + PAGE_SIZE); + rv = vm_fault_rglocked(map, vaddr, fault_type, fault_flags, m_hold); + rangelock_unlock(&map->fltlock, cookie); + return (rv); +} + /* * Speed up the reclamation of pages that precede the faulting pindex within * the first object of the shadow chain. Essentially, perform the equivalent diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -65,6 +65,7 @@ #define _VM_MAP_ #include +#include #include #include @@ -206,6 +207,7 @@ struct sx lock; /* Lock for map data */ struct mtx system_mtx; }; + struct rangelock fltlock; int nentries; /* Number of entries */ vm_size_t size; /* virtual size */ u_int timestamp; /* Version number */ diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -896,6 +896,7 @@ map->timestamp = 0; map->busy = 0; map->anon_loc = 0; + rangelock_init(&map->fltlock); #ifdef DIAGNOSTIC map->nupdates = 0; #endif