Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c @@ -1761,12 +1761,10 @@ bcopy((char *)db->db_data + bufoff, va, PAGESIZE); zfs_unmap_page(sf); vm_page_valid(m); - vm_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); - vm_page_unlock(m); vm_page_sunbusy(m); } *rbehind = i; @@ -1884,12 +1882,10 @@ } zfs_unmap_page(sf); vm_page_valid(m); - vm_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); - vm_page_unlock(m); vm_page_sunbusy(m); } *rahead = i; Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -545,9 +545,7 @@ zfs_vmobject_wlock(obj); if (error == 0) { vm_page_valid(pp); - vm_page_lock(pp); vm_page_activate(pp); - vm_page_unlock(pp); } vm_page_sunbusy(pp); if (error != 0 && !vm_page_wired(pp) && Index: head/sys/dev/md/md.c =================================================================== --- head/sys/dev/md/md.c +++ head/sys/dev/md/md.c @@ -1145,12 +1145,7 @@ } if (m != NULL) { vm_page_xunbusy(m); - vm_page_lock(m); - if (vm_page_active(m)) - vm_page_reference(m); - else - vm_page_activate(m); - vm_page_unlock(m); + vm_page_reference(m); } /* Actions on further pages start at offset 0 */ Index: head/sys/fs/tmpfs/tmpfs_subr.c =================================================================== --- head/sys/fs/tmpfs/tmpfs_subr.c +++ head/sys/fs/tmpfs/tmpfs_subr.c @@ -1490,9 +1490,7 @@ * current operation is not regarded * as an access. */ - vm_page_lock(m); vm_page_launder(m); - vm_page_unlock(m); } else { vm_page_free(m); if (ignerr) Index: head/sys/kern/uipc_shm.c =================================================================== --- head/sys/kern/uipc_shm.c +++ head/sys/kern/uipc_shm.c @@ -209,9 +209,7 @@ error = uiomove_fromphys(&m, offset, tlen, uio); if (uio->uio_rw == UIO_WRITE && error == 0) vm_page_set_dirty(m); - vm_page_lock(m); vm_page_activate(m); - vm_page_unlock(m); vm_page_sunbusy(m); return (error); Index: head/sys/vm/swap_pager.c =================================================================== --- head/sys/vm/swap_pager.c +++ head/sys/vm/swap_pager.c @@ -1630,10 +1630,9 @@ * then finish the I/O. */ MPASS(m->dirty == VM_PAGE_BITS_ALL); + /* PQ_UNSWAPPABLE? */ - vm_page_lock(m); vm_page_activate(m); - vm_page_unlock(m); vm_page_sunbusy(m); } } else if (bp->b_iocmd == BIO_READ) { @@ -1668,9 +1667,7 @@ ("swp_pager_async_iodone: page %p is not write" " protected", m)); vm_page_undirty(m); - vm_page_lock(m); vm_page_deactivate_noreuse(m); - vm_page_unlock(m); vm_page_sunbusy(m); } } @@ -1718,10 +1715,8 @@ vm_page_dirty(m); #ifdef INVARIANTS - vm_page_lock(m); if (!vm_page_wired(m) && m->a.queue == PQ_NONE) panic("page %p is neither wired nor queued", m); - vm_page_unlock(m); #endif vm_page_xunbusy(m); swap_pager_unswapped(m); @@ -1732,9 +1727,7 @@ { vm_page_dirty(m); - vm_page_lock(m); vm_page_launder(m); - vm_page_unlock(m); vm_page_xunbusy(m); swap_pager_unswapped(m); } Index: head/sys/vm/vm_fault.c =================================================================== --- head/sys/vm/vm_fault.c +++ head/sys/vm/vm_fault.c @@ -162,9 +162,7 @@ * this page. Deactivating it leaves it available for * pageout while optimizing fault restarts. */ - vm_page_lock(m); vm_page_deactivate(m); - vm_page_unlock(m); vm_page_xunbusy(m); *mp = NULL; } @@ -395,9 +393,7 @@ for (pidx = first, m = vm_page_lookup(object, pidx); pidx <= last; pidx++, m = vm_page_next(m)) { vm_fault_populate_check_page(m); - vm_page_lock(m); vm_page_deactivate(m); - vm_page_unlock(m); vm_page_xunbusy(m); } } @@ -406,7 +402,6 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold) { - struct mtx *m_mtx; vm_offset_t vaddr; vm_page_t m; vm_pindex_t map_first, map_last, pager_first, pager_last, pidx; @@ -518,22 +513,17 @@ MPASS(rv == KERN_SUCCESS); #endif VM_OBJECT_WLOCK(fs->first_object); - m_mtx = NULL; for (i = 0; i < npages; i++) { - if ((fault_flags & VM_FAULT_WIRE) != 0) { + if ((fault_flags & VM_FAULT_WIRE) != 0) vm_page_wire(&m[i]); - } else { - vm_page_change_lock(&m[i], &m_mtx); + else vm_page_activate(&m[i]); - } if (m_hold != NULL && m[i].pindex == fs->first_pindex) { *m_hold = &m[i]; vm_page_wire(&m[i]); } vm_page_xunbusy(&m[i]); } - if (m_mtx != NULL) - mtx_unlock(m_mtx); } curthread->td_ru.ru_majflt++; return (KERN_SUCCESS); @@ -1393,13 +1383,10 @@ * If the page is not wired down, then put it where the pageout daemon * can find it. */ - if ((fault_flags & VM_FAULT_WIRE) != 0) { + if ((fault_flags & VM_FAULT_WIRE) != 0) vm_page_wire(fs.m); - } else { - vm_page_lock(fs.m); + else vm_page_activate(fs.m); - vm_page_unlock(fs.m); - } if (m_hold != NULL) { *m_hold = fs.m; vm_page_wire(fs.m); @@ -1499,12 +1486,13 @@ * Typically, at this point, prefetched pages * are still in the inactive queue. Only * pages that triggered page faults are in the - * active queue. + * active queue. The test for whether the page + * is in the inactive queue is racy; in the + * worst case we will requeue the page + * unnecessarily. */ - vm_page_lock(m); if (!vm_page_inactive(m)) vm_page_deactivate(m); - vm_page_unlock(m); } } } @@ -1879,9 +1867,7 @@ ("dst_m %p is not wired", dst_m)); } } else { - vm_page_lock(dst_m); vm_page_activate(dst_m); - vm_page_unlock(dst_m); } vm_page_xunbusy(dst_m); } Index: head/sys/vm/vm_object.c =================================================================== --- head/sys/vm/vm_object.c +++ head/sys/vm/vm_object.c @@ -1293,9 +1293,7 @@ vm_page_busy_sleep(tm, "madvpo", false); goto relookup; } - vm_page_lock(tm); vm_page_advise(tm, advice); - vm_page_unlock(tm); vm_page_xunbusy(tm); vm_object_madvise_freespace(tobject, advice, tm->pindex, 1); next_pindex: @@ -2059,7 +2057,6 @@ void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { - struct mtx *mtx; vm_page_t p, next; VM_OBJECT_ASSERT_LOCKED(object); @@ -2073,14 +2070,10 @@ * Here, the variable "p" is either (1) the page with the least pindex * greater than or equal to the parameter "start" or (2) NULL. */ - mtx = NULL; for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); - vm_page_change_lock(p, &mtx); vm_page_deactivate_noreuse(p); } - if (mtx != NULL) - mtx_unlock(mtx); } /* Index: head/sys/vm/vm_page.h =================================================================== --- head/sys/vm/vm_page.h +++ head/sys/vm/vm_page.h @@ -808,12 +808,6 @@ uint32_t *addr, val; /* - * The PGA_REFERENCED flag can only be cleared if the page is locked. - */ - if ((bits & PGA_REFERENCED) != 0) - vm_page_assert_locked(m); - - /* * Access the whole 32-bit word containing the aflags field with an * atomic update. Parallel non-atomic updates to the other fields * within this word are handled properly by the atomic update. @@ -920,8 +914,6 @@ static inline uint8_t vm_page_queue(vm_page_t m) { - - vm_page_assert_locked(m); return (_vm_page_queue(vm_page_astate_load(m))); } Index: head/sys/vm/vm_page.c =================================================================== --- head/sys/vm/vm_page.c +++ head/sys/vm/vm_page.c @@ -1371,12 +1371,10 @@ * have shown that deactivating the page is usually the best choice, * unless the page is wanted by another thread. */ - vm_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); - vm_page_unlock(m); vm_page_xunbusy_unchecked(m); } @@ -3651,7 +3649,6 @@ vm_page_enqueue(vm_page_t m, uint8_t queue) { - vm_page_assert_locked(m); KASSERT(m->a.queue == PQ_NONE && (m->a.flags & PGA_QUEUE_STATE_MASK) == 0, ("%s: page %p is already enqueued", __func__, m)); @@ -4124,7 +4121,6 @@ vm_page_unswappable(vm_page_t m) { - vm_page_assert_locked(m); KASSERT(!vm_page_wired(m) && (m->oflags & VPO_UNMANAGED) == 0, ("page %p already unswappable", m)); @@ -4222,9 +4218,7 @@ m->dirty == 0 && vm_page_tryxbusy(m)) { vm_page_free(m); } else { - vm_page_lock(m); vm_page_release_toq(m, PQ_INACTIVE, flags != 0); - vm_page_unlock(m); } } } @@ -4293,7 +4287,6 @@ vm_page_advise(vm_page_t m, int advice) { - vm_page_assert_locked(m); VM_OBJECT_ASSERT_WLOCKED(m->object); if (advice == MADV_FREE) /* @@ -4309,14 +4302,14 @@ return; } + if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m)) + vm_page_dirty(m); + /* * Clear any references to the page. Otherwise, the page daemon will * immediately reactivate the page. */ vm_page_aflag_clear(m, PGA_REFERENCED); - - if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m)) - vm_page_dirty(m); /* * Place clean pages near the head of the inactive queue rather than Index: head/sys/vm/vm_pageout.c =================================================================== --- head/sys/vm/vm_pageout.c +++ head/sys/vm/vm_pageout.c @@ -396,14 +396,11 @@ vm_page_xunbusy(p); break; } - vm_page_lock(p); if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { - vm_page_unlock(p); vm_page_xunbusy(p); ib = 0; break; } - vm_page_unlock(p); mc[--page_base] = pb = p; ++pageout_count; ++ib; @@ -429,13 +426,10 @@ vm_page_xunbusy(p); break; } - vm_page_lock(p); if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { - vm_page_unlock(p); vm_page_xunbusy(p); break; } - vm_page_unlock(p); mc[page_base + pageout_count] = ps = p; ++pageout_count; ++is; @@ -511,10 +505,12 @@ ("vm_pageout_flush: page %p is not write protected", mt)); switch (pageout_status[i]) { case VM_PAGER_OK: - vm_page_lock(mt); + /* + * The page may have moved since laundering started, in + * which case it should be left alone. + */ if (vm_page_in_laundry(mt)) vm_page_deactivate_noreuse(mt); - vm_page_unlock(mt); /* FALLTHROUGH */ case VM_PAGER_PEND: numpagedout++; @@ -527,10 +523,8 @@ * the page daemon. */ vm_page_undirty(mt); - vm_page_lock(mt); if (vm_page_in_laundry(mt)) vm_page_deactivate_noreuse(mt); - vm_page_unlock(mt); break; case VM_PAGER_ERROR: case VM_PAGER_FAIL: @@ -546,14 +540,12 @@ * clog the laundry and inactive queues. (We will try * paging it out again later.) */ - vm_page_lock(mt); if (object->type == OBJT_SWAP && pageout_status[i] == VM_PAGER_FAIL) { vm_page_unswappable(mt); numpagedout++; } else vm_page_activate(mt); - vm_page_unlock(mt); if (eio != NULL && i >= mreq && i - mreq < runlen) *eio = TRUE; break; @@ -611,7 +603,6 @@ vm_pindex_t pindex; int error, lockmode; - vm_page_assert_locked(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); error = 0; @@ -631,7 +622,6 @@ * of time. */ if (object->type == OBJT_VNODE) { - vm_page_unlock(m); vm_page_xunbusy(m); vp = object->handle; if (vp->v_type == VREG && @@ -662,11 +652,9 @@ error = ENOENT; goto unlock_all; } - vm_page_lock(m); /* - * While the object and page were unlocked, the page - * may have been: + * While the object was unlocked, the page may have been: * (1) moved to a different queue, * (2) reallocated to a different object, * (3) reallocated to a different offset, or @@ -674,17 +662,15 @@ */ if (!vm_page_in_laundry(m) || m->object != object || m->pindex != pindex || m->dirty == 0) { - vm_page_unlock(m); error = ENXIO; goto unlock_all; } /* - * The page may have been busied while the object and page - * locks were released. + * The page may have been busied while the object lock was + * released. */ if (vm_page_tryxbusy(m) == 0) { - vm_page_unlock(m); error = EBUSY; goto unlock_all; } @@ -695,11 +681,9 @@ */ if (!vm_page_try_remove_write(m)) { vm_page_xunbusy(m); - vm_page_unlock(m); error = EBUSY; goto unlock_all; } - vm_page_unlock(m); /* * If a page is dirty, then it is either being washed @@ -714,7 +698,6 @@ VM_OBJECT_WUNLOCK(object); unlock_mp: - vm_page_lock_assert(m, MA_NOTOWNED); if (mp != NULL) { if (vp != NULL) vput(vp); @@ -735,7 +718,6 @@ { struct scan_state ss; struct vm_pagequeue *pq; - struct mtx *mtx; vm_object_t object; vm_page_t m, marker; vm_page_astate_t new, old; @@ -743,7 +725,6 @@ int vnodes_skipped; bool pageout_ok; - mtx = NULL; object = NULL; starting_target = launder; vnodes_skipped = 0; @@ -771,9 +752,6 @@ if (__predict_false((m->flags & PG_MARKER) != 0)) continue; - vm_page_change_lock(m, &mtx); - -recheck: /* * Don't touch a page that was removed from the queue after the * page queue lock was released. Otherwise, ensure that any @@ -783,46 +761,39 @@ if (vm_pageout_defer(m, queue, true)) continue; - if (object != m->object) { + /* + * Lock the page's object. + */ + if (object == NULL || object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); - - /* - * A page's object pointer may be set to NULL before - * the object lock is acquired. - */ object = (vm_object_t)atomic_load_ptr(&m->object); - if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) { - mtx_unlock(mtx); - /* Depends on type-stability. */ - VM_OBJECT_WLOCK(object); - mtx_lock(mtx); - goto recheck; + if (__predict_false(object == NULL)) + /* The page is being freed by another thread. */ + continue; + + /* Depends on type-stability. */ + VM_OBJECT_WLOCK(object); + if (__predict_false(m->object != object)) { + VM_OBJECT_WUNLOCK(object); + object = NULL; + continue; } } - if (__predict_false(m->object == NULL)) - /* - * The page has been removed from its object. - */ - continue; - KASSERT(m->object == object, ("page %p does not belong to %p", - m, object)); if (vm_page_tryxbusy(m) == 0) continue; /* * Check for wirings now that we hold the object lock and have - * verified that the page is unbusied. If the page is mapped, - * it may still be wired by pmap lookups. The call to + * exclusively busied the page. If the page is mapped, it may + * still be wired by pmap lookups. The call to * vm_page_try_remove_all() below atomically checks for such * wirings and removes mappings. If the page is unmapped, the - * wire count is guaranteed not to increase. + * wire count is guaranteed not to increase after this check. */ - if (__predict_false(vm_page_wired(m))) { - vm_page_dequeue_deferred(m); + if (__predict_false(vm_page_wired(m))) goto skip_page; - } /* * Invalid pages can be easily freed. They cannot be @@ -898,10 +869,8 @@ */ if (object->ref_count != 0) { vm_page_test_dirty(m); - if (m->dirty == 0 && !vm_page_try_remove_all(m)) { - vm_page_dequeue_deferred(m); + if (m->dirty == 0 && !vm_page_try_remove_all(m)) goto skip_page; - } } /* @@ -912,6 +881,13 @@ */ if (m->dirty == 0) { free_page: + /* + * Now we are guaranteed that no other threads are + * manipulating the page, check for a last-second + * reference. + */ + if (vm_pageout_defer(m, queue, true)) + goto skip_page; vm_page_free(m); VM_CNT_INC(v_dfree); } else if ((object->flags & OBJ_DEAD) == 0) { @@ -948,17 +924,12 @@ pageout_lock_miss++; vnodes_skipped++; } - mtx = NULL; object = NULL; } else { skip_page: vm_page_xunbusy(m); } } - if (mtx != NULL) { - mtx_unlock(mtx); - mtx = NULL; - } if (object != NULL) { VM_OBJECT_WUNLOCK(object); object = NULL; @@ -1195,7 +1166,6 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage) { struct scan_state ss; - struct mtx *mtx; vm_object_t object; vm_page_t m, marker; struct vm_pagequeue *pq; @@ -1236,7 +1206,6 @@ * and scanning resumes. */ max_scan = page_shortage > 0 ? pq->pq_cnt : min_scan; - mtx = NULL; act_scan: vm_pageout_init_scan(&ss, pq, marker, &vmd->vmd_clock[0], max_scan); while ((m = vm_pageout_next(&ss, false)) != NULL) { @@ -1255,8 +1224,6 @@ if (__predict_false((m->flags & PG_MARKER) != 0)) continue; - vm_page_change_lock(m, &mtx); - /* * Don't touch a page that was removed from the queue after the * page queue lock was released. Otherwise, ensure that any @@ -1390,10 +1357,6 @@ page_shortage -= ps_delta; } - if (mtx != NULL) { - mtx_unlock(mtx); - mtx = NULL; - } vm_pagequeue_lock(pq); TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q); TAILQ_INSERT_AFTER(&pq->pq_pl, marker, &vmd->vmd_clock[0], plinks.q); @@ -1459,7 +1422,6 @@ { struct scan_state ss; struct vm_batchqueue rq; - struct mtx *mtx; vm_page_t m, marker; struct vm_pagequeue *pq; vm_object_t object; @@ -1484,7 +1446,6 @@ deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit); starting_page_shortage = page_shortage = shortage + deficit; - mtx = NULL; object = NULL; vm_batchqueue_init(&rq); @@ -1502,9 +1463,6 @@ KASSERT((m->flags & PG_MARKER) == 0, ("marker page %p was dequeued", m)); - vm_page_change_lock(m, &mtx); - -recheck: /* * Don't touch a page that was removed from the queue after the * page queue lock was released. Otherwise, ensure that any @@ -1514,30 +1472,25 @@ if (vm_pageout_defer(m, PQ_INACTIVE, false)) continue; - if (object != m->object) { + /* + * Lock the page's object. + */ + if (object == NULL || object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); - - /* - * A page's object pointer may be set to NULL before - * the object lock is acquired. - */ object = (vm_object_t)atomic_load_ptr(&m->object); - if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) { - mtx_unlock(mtx); - /* Depends on type-stability. */ - VM_OBJECT_WLOCK(object); - mtx_lock(mtx); - goto recheck; + if (__predict_false(object == NULL)) + /* The page is being freed by another thread. */ + continue; + + /* Depends on type-stability. */ + VM_OBJECT_WLOCK(object); + if (__predict_false(m->object != object)) { + VM_OBJECT_WUNLOCK(object); + object = NULL; + goto reinsert; } } - if (__predict_false(m->object == NULL)) - /* - * The page has been removed from its object. - */ - continue; - KASSERT(m->object == object, ("page %p does not belong to %p", - m, object)); if (vm_page_tryxbusy(m) == 0) { /* @@ -1557,17 +1510,15 @@ vm_pager_page_unswapped(m); /* - * Re-check for wirings now that we hold the object lock and - * have verified that the page is unbusied. If the page is - * mapped, it may still be wired by pmap lookups. The call to + * Check for wirings now that we hold the object lock and have + * exclusively busied the page. If the page is mapped, it may + * still be wired by pmap lookups. The call to * vm_page_try_remove_all() below atomically checks for such * wirings and removes mappings. If the page is unmapped, the - * wire count is guaranteed not to increase. + * wire count is guaranteed not to increase after this check. */ - if (__predict_false(vm_page_wired(m))) { - vm_page_dequeue_deferred(m); + if (__predict_false(vm_page_wired(m))) goto skip_page; - } /* * Invalid pages can be easily freed. They cannot be @@ -1635,10 +1586,8 @@ */ if (object->ref_count != 0) { vm_page_test_dirty(m); - if (m->dirty == 0 && !vm_page_try_remove_all(m)) { - vm_page_dequeue_deferred(m); + if (m->dirty == 0 && !vm_page_try_remove_all(m)) goto skip_page; - } } /* @@ -1651,13 +1600,19 @@ if (m->dirty == 0) { free_page: /* - * Because we dequeued the page and have already - * checked for concurrent dequeue and enqueue - * requests, we can safely disassociate the page - * from the inactive queue. + * Now we are guaranteed that no other threads are + * manipulating the page, check for a last-second + * reference that would save it from doom. */ - KASSERT((m->a.flags & PGA_QUEUE_STATE_MASK) == 0, - ("page %p has queue state", m)); + if (vm_pageout_defer(m, PQ_INACTIVE, false)) + goto skip_page; + + /* + * Because we dequeued the page and have already checked + * for pending dequeue and enqueue requests, we can + * safely disassociate the page from the inactive queue + * without holding the queue lock. + */ m->a.queue = PQ_NONE; vm_page_free(m); page_shortage--; @@ -1671,8 +1626,6 @@ reinsert: vm_pageout_reinsert_inactive(&ss, &rq, m); } - if (mtx != NULL) - mtx_unlock(mtx); if (object != NULL) VM_OBJECT_WUNLOCK(object); vm_pageout_reinsert_inactive(&ss, &rq, NULL); Index: head/sys/vm/vm_swapout.c =================================================================== --- head/sys/vm/vm_swapout.c +++ head/sys/vm/vm_swapout.c @@ -186,12 +186,10 @@ return; } if (!pmap_is_referenced(m)) { - vm_page_lock(m); if (!vm_page_active(m)) (void)vm_page_try_remove_all(m); else if (unmap && vm_page_try_remove_all(m)) vm_page_deactivate(m); - vm_page_unlock(m); } vm_page_xunbusy(m); }