Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -422,10 +422,7 @@ * likely to reclaim it. */ vm_page_reference(pp); - vm_page_lock(pp); - zfs_vmobject_wunlock(obj); - vm_page_busy_sleep(pp, "zfsmwb", true); - zfs_vmobject_wlock(obj); + vm_page_sleep_if_xbusy(pp, "zfsmwb"); continue; } vm_page_sbusy(pp); @@ -473,10 +470,7 @@ * likely to reclaim it. */ vm_page_reference(pp); - vm_page_lock(pp); - zfs_vmobject_wunlock(obj); - vm_page_busy_sleep(pp, "zfsmwb", true); - zfs_vmobject_wlock(obj); + vm_page_sleep_if_xbusy(pp, "zfsmwb"); continue; } Index: sys/dev/drm2/ttm/ttm_bo_vm.c =================================================================== --- sys/dev/drm2/ttm/ttm_bo_vm.c +++ sys/dev/drm2/ttm/ttm_bo_vm.c @@ -234,10 +234,7 @@ VM_OBJECT_WLOCK(vm_obj); if (vm_page_busied(m)) { - vm_page_lock(m); - VM_OBJECT_WUNLOCK(vm_obj); - vm_page_busy_sleep(m, "ttmpbs", false); - VM_OBJECT_WLOCK(vm_obj); + vm_page_sleep_if_busy(m, "ttmpbs"); ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); goto retry; Index: sys/kern/vfs_bio.c =================================================================== --- sys/kern/vfs_bio.c +++ sys/kern/vfs_bio.c @@ -2936,12 +2936,8 @@ presid = resid > (PAGE_SIZE - poffset) ? (PAGE_SIZE - poffset) : resid; KASSERT(presid >= 0, ("brelse: extra page")); - while (vm_page_xbusied(m)) { - vm_page_lock(m); - VM_OBJECT_WUNLOCK(obj); - vm_page_busy_sleep(m, "mbncsh", true); - VM_OBJECT_WLOCK(obj); - } + while (vm_page_xbusied(m)) + vm_page_sleep_if_xbusy(m, "mbncsh"); if (pmap_page_wired_mappings(m) == 0) vm_page_set_invalid(m, poffset, presid); vm_page_release_locked(m, flags); @@ -4575,10 +4571,7 @@ for (; last_busied < i; last_busied++) vm_page_sbusy(bp->b_pages[last_busied]); while (vm_page_xbusied(m)) { - vm_page_lock(m); - VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); - vm_page_busy_sleep(m, "vbpage", true); - VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); + vm_page_sleep_if_xbusy(m, "vbpage"); } } } Index: sys/vm/phys_pager.c =================================================================== --- sys/vm/phys_pager.c +++ sys/vm/phys_pager.c @@ -219,10 +219,7 @@ pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; } else if (vm_page_xbusied(m)) { - vm_page_lock(m); - VM_OBJECT_WUNLOCK(object); - vm_page_busy_sleep(m, "physb", true); - VM_OBJECT_WLOCK(object); + vm_page_sleep_if_xbusy(m, "physb"); goto retry; } else { vm_page_xbusy(m); Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -520,7 +520,7 @@ *m_hold = &m[i]; vm_page_wire(&m[i]); } - vm_page_xunbusy_maybelocked(&m[i]); + vm_page_xunbusy(&m[i]); } if (m_mtx != NULL) mtx_unlock(m_mtx); @@ -1034,7 +1034,7 @@ if (!vm_page_wired(fs.m)) vm_page_free(fs.m); else - vm_page_xunbusy_maybelocked(fs.m); + vm_page_xunbusy(fs.m); vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); @@ -1057,7 +1057,7 @@ if (!vm_page_wired(fs.m)) vm_page_free(fs.m); else - vm_page_xunbusy_maybelocked(fs.m); + vm_page_xunbusy(fs.m); vm_page_unlock(fs.m); fs.m = NULL; } Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -1216,8 +1216,7 @@ ("vm_object_madvise: page %p is not managed", tm)); if (vm_page_busied(tm)) { if (object != tobject) - VM_OBJECT_WUNLOCK(tobject); - VM_OBJECT_WUNLOCK(object); + VM_OBJECT_WUNLOCK(object); if (advice == MADV_WILLNEED) { /* * Reference the page before unlocking and @@ -1226,6 +1225,7 @@ */ vm_page_aflag_set(tm, PGA_REFERENCED); } + vm_page_unlock(tm); vm_page_busy_sleep(tm, "madvpo", false); goto relookup; } @@ -1399,10 +1399,7 @@ */ if (vm_page_busied(m)) { VM_OBJECT_WUNLOCK(new_object); - vm_page_lock(m); - VM_OBJECT_WUNLOCK(orig_object); - vm_page_busy_sleep(m, "spltwt", false); - VM_OBJECT_WLOCK(orig_object); + vm_page_sleep_if_busy(m, "spltwt"); VM_OBJECT_WLOCK(new_object); goto retry; } @@ -1469,15 +1466,16 @@ ("invalid ownership %p %p %p", p, object, backing_object)); if ((op & OBSC_COLLAPSE_NOWAIT) != 0) return (next); - if (p != NULL) - vm_page_lock(p); - VM_OBJECT_WUNLOCK(object); - VM_OBJECT_WUNLOCK(backing_object); /* The page is only NULL when rename fails. */ - if (p == NULL) + if (p == NULL) { vm_radix_wait(); - else + } else { + if (p->object == object) + VM_OBJECT_WUNLOCK(backing_object); + else + VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(p, "vmocol", false); + } VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(backing_object); return (TAILQ_FIRST(&backing_object->memq)); @@ -1929,9 +1927,8 @@ */ vm_page_change_lock(p, &mtx); if (vm_page_xbusied(p)) { - VM_OBJECT_WUNLOCK(object); - vm_page_busy_sleep(p, "vmopax", true); - VM_OBJECT_WLOCK(object); + mtx_unlock(mtx); + vm_page_sleep_if_xbusy(p, "vmopax"); goto again; } if (vm_page_wired(p)) { @@ -1945,9 +1942,8 @@ continue; } if (vm_page_busied(p)) { - VM_OBJECT_WUNLOCK(object); - vm_page_busy_sleep(p, "vmopar", false); - VM_OBJECT_WLOCK(object); + mtx_unlock(mtx); + vm_page_sleep_if_busy(p, "vmopar"); goto again; } KASSERT((p->flags & PG_FICTITIOUS) == 0, @@ -2250,9 +2246,8 @@ tm = m; m = TAILQ_NEXT(m, listq); } - vm_page_lock(tm); if (vm_page_xbusied(tm)) { - for (tobject = object; locked_depth >= 1; + for (tobject = object; locked_depth > 1; locked_depth--) { t1object = tobject->backing_object; VM_OBJECT_RUNLOCK(tobject); @@ -2261,6 +2256,7 @@ vm_page_busy_sleep(tm, "unwbo", true); goto again; } + vm_page_lock(tm); vm_page_unwire(tm, queue); vm_page_unlock(tm); next_page: Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -513,7 +513,6 @@ void vm_page_busy_downgrade(vm_page_t m); void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared); -void vm_page_flash(vm_page_t m); void vm_page_free(vm_page_t m); void vm_page_free_zero(vm_page_t m); @@ -576,6 +575,7 @@ vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options); void vm_page_set_valid_range(vm_page_t m, int base, int size); int vm_page_sleep_if_busy(vm_page_t m, const char *msg); +int vm_page_sleep_if_xbusy(vm_page_t m, const char *msg); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_sunbusy(vm_page_t m); int vm_page_trysbusy(vm_page_t m); @@ -586,7 +586,6 @@ void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_wire (vm_page_t); void vm_page_xunbusy_hard(vm_page_t m); -void vm_page_xunbusy_maybelocked(vm_page_t m); void vm_page_set_validclean (vm_page_t, int, int); void vm_page_clear_dirty (vm_page_t, int, int); void vm_page_set_invalid (vm_page_t, int, int); Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -84,6 +84,7 @@ #include #include #include +#include #include #include #include @@ -876,27 +877,17 @@ vm_page_busy_downgrade(vm_page_t m) { u_int x; - bool locked; vm_page_assert_xbusied(m); - locked = mtx_owned(vm_page_lockptr(m)); + x = m->busy_lock; for (;;) { - x = m->busy_lock; - x &= VPB_BIT_WAITERS; - if (x != 0 && !locked) - vm_page_lock(m); - if (atomic_cmpset_rel_int(&m->busy_lock, - VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1))) + if (atomic_fcmpset_rel_int(&m->busy_lock, + &x, VPB_SHARERS_WORD(1))) break; - if (x != 0 && !locked) - vm_page_unlock(m); } - if (x != 0) { + if ((x & VPB_BIT_WAITERS) != 0) wakeup(m); - if (!locked) - vm_page_unlock(m); - } } /* @@ -923,35 +914,23 @@ { u_int x; - vm_page_lock_assert(m, MA_NOTOWNED); vm_page_assert_sbusied(m); + x = m->busy_lock; for (;;) { - x = m->busy_lock; if (VPB_SHARERS(x) > 1) { - if (atomic_cmpset_int(&m->busy_lock, x, + if (atomic_fcmpset_int(&m->busy_lock, &x, x - VPB_ONE_SHARER)) break; continue; } - if ((x & VPB_BIT_WAITERS) == 0) { - KASSERT(x == VPB_SHARERS_WORD(1), - ("vm_page_sunbusy: invalid lock state")); - if (atomic_cmpset_int(&m->busy_lock, - VPB_SHARERS_WORD(1), VPB_UNBUSIED)) - break; - continue; - } - KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS), - ("vm_page_sunbusy: invalid lock state for waiters")); - - vm_page_lock(m); - if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) { - vm_page_unlock(m); + KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1), + ("vm_page_sunbusy: invalid lock state")); + if (!atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_UNBUSIED)) continue; - } + if ((x & VPB_BIT_WAITERS) == 0) + break; wakeup(m); - vm_page_unlock(m); break; } } @@ -959,28 +938,35 @@ /* * vm_page_busy_sleep: * - * Sleep and release the page lock, using the page pointer as wchan. + * Sleep if the page is busy, using the page pointer as wchan. * This is used to implement the hard-path of busying mechanism. * - * The given page must be locked. - * * If nonshared is true, sleep only if the page is xbusy. + * + * The object lock must be held on entry and will be released on exit. */ void vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared) { + vm_object_t obj; u_int x; - vm_page_assert_locked(m); + obj = m->object; + vm_page_lock_assert(m, MA_NOTOWNED); + VM_OBJECT_ASSERT_LOCKED(obj); + sleepq_lock(m); x = m->busy_lock; if (x == VPB_UNBUSIED || (nonshared && (x & VPB_BIT_SHARED) != 0) || ((x & VPB_BIT_WAITERS) == 0 && !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS))) { - vm_page_unlock(m); + VM_OBJECT_DROP(obj); + sleepq_release(m); return; } - msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0); + VM_OBJECT_DROP(obj); + sleepq_add(m, NULL, wmesg, 0, 0); + sleepq_wait(m, PVM); } /* @@ -995,55 +981,20 @@ { u_int x; + x = m->busy_lock; for (;;) { - x = m->busy_lock; if ((x & VPB_BIT_SHARED) == 0) return (0); - if (atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER)) + if (atomic_fcmpset_acq_int(&m->busy_lock, &x, + x + VPB_ONE_SHARER)) return (1); } } -static void -vm_page_xunbusy_locked(vm_page_t m) -{ - - vm_page_assert_xbusied(m); - vm_page_assert_locked(m); - - atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED); - /* There is a waiter, do wakeup() instead of vm_page_flash(). */ - wakeup(m); -} - -void -vm_page_xunbusy_maybelocked(vm_page_t m) -{ - bool lockacq; - - vm_page_assert_xbusied(m); - - /* - * Fast path for unbusy. If it succeeds, we know that there - * are no waiters, so we do not need a wakeup. - */ - if (atomic_cmpset_rel_int(&m->busy_lock, VPB_SINGLE_EXCLUSIVER, - VPB_UNBUSIED)) - return; - - lockacq = !mtx_owned(vm_page_lockptr(m)); - if (lockacq) - vm_page_lock(m); - vm_page_xunbusy_locked(m); - if (lockacq) - vm_page_unlock(m); -} - /* * vm_page_xunbusy_hard: * - * Called after the first try the exclusive unbusy of a page failed. - * It is assumed that the waiters bit is on. + * Called when unbusy has failed because there is a waiter. */ void vm_page_xunbusy_hard(vm_page_t m) @@ -1051,34 +1002,10 @@ vm_page_assert_xbusied(m); - vm_page_lock(m); - vm_page_xunbusy_locked(m); - vm_page_unlock(m); -} - -/* - * vm_page_flash: - * - * Wakeup anyone waiting for the page. - * The ownership bits do not change. - * - * The given page must be locked. - */ -void -vm_page_flash(vm_page_t m) -{ - u_int x; - - vm_page_lock_assert(m, MA_OWNED); - - for (;;) { - x = m->busy_lock; - if ((x & VPB_BIT_WAITERS) == 0) - return; - if (atomic_cmpset_int(&m->busy_lock, x, - x & (~VPB_BIT_WAITERS))) - break; - } + /* + * Wake the waiter. + */ + atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED); wakeup(m); } @@ -1272,7 +1199,7 @@ /* * vm_page_sleep_if_busy: * - * Sleep and release the page queues lock if the page is busied. + * Sleep and release the object lock if the page is busied. * Returns TRUE if the thread slept. * * The given page must be unlocked and object containing it must @@ -1295,8 +1222,6 @@ * held by the callers. */ obj = m->object; - vm_page_lock(m); - VM_OBJECT_WUNLOCK(obj); vm_page_busy_sleep(m, msg, false); VM_OBJECT_WLOCK(obj); return (TRUE); @@ -1304,6 +1229,39 @@ return (FALSE); } +/* + * vm_page_sleep_if_xbusy: + * + * Sleep and release the object lock if the page is xbusied. + * Returns TRUE if the thread slept. + * + * The given page must be unlocked and object containing it must + * be locked. + */ +int +vm_page_sleep_if_xbusy(vm_page_t m, const char *msg) +{ + vm_object_t obj; + + vm_page_lock_assert(m, MA_NOTOWNED); + VM_OBJECT_ASSERT_WLOCKED(m->object); + + if (vm_page_xbusied(m)) { + /* + * The page-specific object must be cached because page + * identity can change during the sleep, causing the + * re-lock of a different object. + * It is assumed that a reference to the object is already + * held by the callers. + */ + obj = m->object; + vm_page_busy_sleep(m, msg, true); + VM_OBJECT_WLOCK(obj); + return (TRUE); + } + return (FALSE); +} + /* * vm_page_dirty_KBI: [ internal use only ] * @@ -1462,7 +1420,7 @@ vm_page_assert_locked(m); VM_OBJECT_ASSERT_WLOCKED(object); if (vm_page_xbusied(m)) - vm_page_xunbusy_maybelocked(m); + vm_page_xunbusy(m); mrem = vm_radix_remove(&object->rtree, m->pindex); KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); @@ -1595,7 +1553,7 @@ TAILQ_REMOVE(&object->memq, mold, listq); mold->object = NULL; - vm_page_xunbusy_maybelocked(mold); + vm_page_xunbusy(mold); /* * The object's resident_page_count does not change because we have @@ -3929,8 +3887,6 @@ * likely to reclaim it. */ vm_page_aflag_set(m, PGA_REFERENCED); - vm_page_lock(m); - VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(m, "pgrbwt", (allocflags & VM_ALLOC_IGN_SBUSY) != 0); VM_OBJECT_WLOCK(object); @@ -4031,8 +3987,6 @@ * likely to reclaim it. */ vm_page_aflag_set(m, PGA_REFERENCED); - vm_page_lock(m); - VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(m, "grbmaw", (allocflags & VM_ALLOC_IGN_SBUSY) != 0); VM_OBJECT_WLOCK(object);