Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -2931,31 +2931,23 @@ m = NULL; PG_RW = pmap_rw_bit(pmap); PG_V = pmap_valid_bit(pmap); + PMAP_LOCK(pmap); -retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { - if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - if (vm_page_pa_tryrelock(pmap, (pde & - PG_PS_FRAME) | (va & PDRMASK), &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + if ((pde & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0) + m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | + (va & PDRMASK)); } else { pte = *pmap_pde_to_pte(pdep, va); - if ((pte & PG_V) && - ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, - &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + if ((pte & PG_V) != 0 && + ((pte & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0)) + m = PHYS_TO_VM_PAGE(pte & PG_FRAME); } - if (m != NULL) - vm_page_wire(m); + if (m != NULL && !vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -4081,7 +4073,7 @@ /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } Index: sys/amd64/sgx/sgx.c =================================================================== --- sys/amd64/sgx/sgx.c +++ sys/amd64/sgx/sgx.c @@ -358,7 +358,7 @@ uint64_t offs; vm_page_lock(p); - vm_page_remove(p); + (void)vm_page_remove(p); vm_page_unlock(p); dprintf("%s: p->pidx %ld\n", __func__, p->pindex); Index: sys/arm/arm/pmap-v6.c =================================================================== --- sys/arm/arm/pmap-v6.c +++ sys/arm/arm/pmap-v6.c @@ -2973,7 +2973,7 @@ /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); } Index: sys/arm/nvidia/drm2/tegra_bo.c =================================================================== --- sys/arm/nvidia/drm2/tegra_bo.c +++ sys/arm/nvidia/drm2/tegra_bo.c @@ -67,7 +67,7 @@ cdev_pager_free_page(bo->cdev_pager, m); vm_page_lock(m); m->flags &= ~PG_FICTITIOUS; - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); vm_page_unlock(m); } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -481,9 +481,7 @@ } ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_page_lock(pp); vm_page_wire(pp); - vm_page_unlock(pp); } else pp = NULL; break; Index: sys/compat/linuxkpi/common/include/linux/mm.h =================================================================== --- sys/compat/linuxkpi/common/include/linux/mm.h +++ sys/compat/linuxkpi/common/include/linux/mm.h @@ -227,9 +227,7 @@ static inline void get_page(struct vm_page *page) { - vm_page_lock(page); vm_page_wire(page); - vm_page_unlock(page); } extern long @@ -251,8 +249,7 @@ put_page(struct vm_page *page) { vm_page_lock(page); - if (vm_page_unwire(page, PQ_ACTIVE) && page->object == NULL) - vm_page_free(page); + vm_page_unwire(page, PQ_ACTIVE); vm_page_unlock(page); } Index: sys/compat/linuxkpi/common/src/linux_page.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_page.c +++ sys/compat/linuxkpi/common/src/linux_page.c @@ -91,9 +91,10 @@ if (PMAP_HAS_DMAP) { unsigned long npages = 1UL << order; - int req = (flags & M_ZERO) ? (VM_ALLOC_ZERO | VM_ALLOC_NOOBJ | - VM_ALLOC_NORMAL) : (VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL); + int req = VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; + if ((flags & M_ZERO) != 0) + req |= VM_ALLOC_ZERO; if (order == 0 && (flags & GFP_DMA32) == 0) { page = vm_page_alloc(NULL, 0, req); if (page == NULL) @@ -153,9 +154,8 @@ for (x = 0; x != npages; x++) { vm_page_t pgo = page + x; - vm_page_lock(pgo); - vm_page_free(pgo); - vm_page_unlock(pgo); + if (vm_page_unwire_noq(pgo)) + vm_page_free(pgo); } } else { vm_offset_t vaddr; @@ -294,7 +294,7 @@ rv = vm_pager_get_pages(obj, &page, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(page); - vm_page_unwire(page, PQ_NONE); + vm_page_unwire_noq(page); vm_page_free(page); vm_page_unlock(page); VM_OBJECT_WUNLOCK(obj); Index: sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c =================================================================== --- sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c +++ sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c @@ -378,8 +378,7 @@ pagelist_page_free(vm_page_t pp) { vm_page_lock(pp); - if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL) - vm_page_free(pp); + vm_page_unwire(pp, PQ_INACTIVE); vm_page_unlock(pp); } Index: sys/dev/drm2/ttm/ttm_page_alloc.c =================================================================== --- sys/dev/drm2/ttm/ttm_page_alloc.c +++ sys/dev/drm2/ttm/ttm_page_alloc.c @@ -137,7 +137,7 @@ KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("ttm got unmanaged %p", m)); m->flags &= ~PG_FICTITIOUS; m->oflags |= VPO_UNMANAGED; - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } Index: sys/dev/ti/if_ti.c =================================================================== --- sys/dev/ti/if_ti.c +++ sys/dev/ti/if_ti.c @@ -1623,7 +1623,7 @@ } sf[i] = sf_buf_alloc(frame, SFB_NOWAIT); if (sf[i] == NULL) { - vm_page_unwire(frame, PQ_NONE); + vm_page_unwire_noq(frame); vm_page_free(frame); device_printf(sc->ti_dev, "buffer allocation " "failed -- packet dropped!\n"); Index: sys/dev/xen/gntdev/gntdev.c =================================================================== --- sys/dev/xen/gntdev/gntdev.c +++ sys/dev/xen/gntdev/gntdev.c @@ -278,7 +278,7 @@ continue; gnttab_free_grant_reference(gref->gref_id); } - vm_page_unwire(gref->page, PQ_NONE); + vm_page_unwire_noq(gref->page); vm_page_free(gref->page); gref->page = NULL; } Index: sys/i386/i386/pmap.c =================================================================== --- sys/i386/i386/pmap.c +++ sys/i386/i386/pmap.c @@ -2465,7 +2465,7 @@ /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } Index: sys/kern/kern_exec.c =================================================================== --- sys/kern/kern_exec.c +++ sys/kern/kern_exec.c @@ -976,9 +976,7 @@ if (ma[0]->valid != VM_PAGE_BITS_ALL) { vm_page_xbusy(ma[0]); if (!vm_pager_has_page(object, 0, NULL, &after)) { - vm_page_lock(ma[0]); vm_page_free(ma[0]); - vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); return (EIO); } @@ -1002,11 +1000,8 @@ initial_pagein = i; rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL); if (rv != VM_PAGER_OK) { - for (i = 0; i < initial_pagein; i++) { - vm_page_lock(ma[i]); + for (i = 0; i < initial_pagein; i++) vm_page_free(ma[i]); - vm_page_unlock(ma[i]); - } VM_OBJECT_WUNLOCK(object); return (EIO); } @@ -1014,9 +1009,7 @@ for (i = 1; i < initial_pagein; i++) vm_page_readahead_finish(ma[i]); } - vm_page_lock(ma[0]); vm_page_wire(ma[0]); - vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); imgp->firstpage = sf_buf_alloc(ma[0], 0); Index: sys/kern/kern_sendfile.c =================================================================== --- sys/kern/kern_sendfile.c +++ sys/kern/kern_sendfile.c @@ -119,76 +119,20 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW, NULL, 0, sfstat_sysctl, "I", "sendfile statistics"); -/* - * Detach mapped page and release resources back to the system. Called - * by mbuf(9) code when last reference to a page is freed. - */ -static void -sendfile_free_page(vm_page_t pg, bool nocache) -{ - bool freed; - - vm_page_lock(pg); - /* - * In either case check for the object going away on us. This can - * happen since we don't hold a reference to it. If so, we're - * responsible for freeing the page. In 'noncache' case try to free - * the page, but only if it is cheap to. - */ - if (vm_page_unwire_noq(pg)) { - vm_object_t obj; - - if ((obj = pg->object) == NULL) - vm_page_free(pg); - else { - freed = false; - if (nocache && !vm_page_xbusied(pg) && - VM_OBJECT_TRYWLOCK(obj)) { - /* Only free unmapped pages. */ - if (obj->ref_count == 0 || - !pmap_page_is_mapped(pg)) - /* - * The busy test before the object is - * locked cannot be relied upon. - */ - freed = vm_page_try_to_free(pg); - VM_OBJECT_WUNLOCK(obj); - } - if (!freed) { - /* - * If we were asked to not cache the page, place - * it near the head of the inactive queue so - * that it is reclaimed sooner. Otherwise, - * maintain LRU. - */ - if (nocache) - vm_page_deactivate_noreuse(pg); - else if (vm_page_active(pg)) - vm_page_reference(pg); - else - vm_page_deactivate(pg); - } - } - } - vm_page_unlock(pg); -} - static void sendfile_free_mext(struct mbuf *m) { struct sf_buf *sf; vm_page_t pg; - bool nocache; KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_SFBUF, ("%s: m %p !M_EXT or !EXT_SFBUF", __func__, m)); sf = m->m_ext.ext_arg1; pg = sf_buf_page(sf); - nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE; sf_buf_free(sf); - sendfile_free_page(pg, nocache); + vm_page_release(pg, (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0); if (m->m_ext.ext_flags & EXT_FLAG_SYNC) { struct sendfile_sync *sfs = m->m_ext.ext_arg2; Index: sys/kern/sys_process.c =================================================================== --- sys/kern/sys_process.c +++ sys/kern/sys_process.c @@ -307,8 +307,7 @@ * Release the page. */ vm_page_lock(m); - if (vm_page_unwire(m, PQ_ACTIVE) && m->object == NULL) - vm_page_free(m); + vm_page_unwire(m, PQ_ACTIVE); vm_page_unlock(m); } while (error == 0 && uio->uio_resid > 0); Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c +++ sys/kern/uipc_shm.c @@ -196,9 +196,7 @@ printf( "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n", obj, idx, m->valid, rv); - vm_page_lock(m); vm_page_free(m); - vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); return (EIO); } @@ -206,9 +204,7 @@ vm_page_zero_invalid(m, TRUE); vm_page_xunbusy(m); } - vm_page_lock(m); vm_page_wire(m); - vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); error = uiomove_fromphys(&m, offset, tlen, uio); if (uio->uio_rw == UIO_WRITE && error == 0) { Index: sys/kern/vfs_bio.c =================================================================== --- sys/kern/vfs_bio.c +++ sys/kern/vfs_bio.c @@ -2894,47 +2894,6 @@ } } -/* - * Unwire a page held by a buf and either free it or update the page queues to - * reflect its recent use. - */ -static void -vfs_vmio_unwire(struct buf *bp, vm_page_t m) -{ - bool freed; - - vm_page_lock(m); - if (vm_page_unwire_noq(m)) { - if ((bp->b_flags & B_DIRECT) != 0) - freed = vm_page_try_to_free(m); - else - freed = false; - if (!freed) { - /* - * Use a racy check of the valid bits to determine - * whether we can accelerate reclamation of the page. - * The valid bits will be stable unless the page is - * being mapped or is referenced by multiple buffers, - * and in those cases we expect races to be rare. At - * worst we will either accelerate reclamation of a - * valid page and violate LRU, or unnecessarily defer - * reclamation of an invalid page. - * - * The B_NOREUSE flag marks data that is not expected to - * be reused, so accelerate reclamation in that case - * too. Otherwise, maintain LRU. - */ - if (m->valid == 0 || (bp->b_flags & B_NOREUSE) != 0) - vm_page_deactivate_noreuse(m); - else if (vm_page_active(m)) - vm_page_reference(m); - else - vm_page_deactivate(m); - } - } - vm_page_unlock(m); -} - /* * Perform page invalidation when a buffer is released. The fully invalid * pages will be reclaimed later in vfs_vmio_truncate(). @@ -2984,7 +2943,8 @@ } if (pmap_page_wired_mappings(m) == 0) vm_page_set_invalid(m, poffset, presid); - vfs_vmio_unwire(bp, m); + vm_page_release_locked(m, + (bp->b_flags & (B_NOREUSE | B_DIRECT)) != 0); resid -= presid; poffset = 0; } @@ -3022,7 +2982,10 @@ m = bp->b_pages[i]; KASSERT(m != bogus_page, ("allocbuf: bogus page found")); bp->b_pages[i] = NULL; - vfs_vmio_unwire(bp, m); + if (obj != NULL) + vm_page_release_locked(m, true); + else + vm_page_release(m, (bp->b_flags & B_NOREUSE) != 0); } if (obj != NULL) VM_OBJECT_WUNLOCK(obj); Index: sys/mips/mips/pmap.c =================================================================== --- sys/mips/mips/pmap.c +++ sys/mips/mips/pmap.c @@ -1591,7 +1591,7 @@ /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } Index: sys/net/bpf_zerocopy.c =================================================================== --- sys/net/bpf_zerocopy.c +++ sys/net/bpf_zerocopy.c @@ -116,8 +116,7 @@ { vm_page_lock(pp); - if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL) - vm_page_free(pp); + vm_page_unwire(pp, PQ_INACTIVE); vm_page_unlock(pp); } Index: sys/riscv/riscv/pmap.c =================================================================== --- sys/riscv/riscv/pmap.c +++ sys/riscv/riscv/pmap.c @@ -1648,7 +1648,7 @@ /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } Index: sys/vm/device_pager.c =================================================================== --- sys/vm/device_pager.c +++ sys/vm/device_pager.c @@ -235,9 +235,7 @@ if (object->type == OBJT_MGTDEVICE) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m)); pmap_remove_all(m); - vm_page_lock(m); - vm_page_remove(m); - vm_page_unlock(m); + (void)vm_page_remove(m); } else if (object->type == OBJT_DEVICE) dev_pager_free_page(object, m); } Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -1274,9 +1274,9 @@ zkva += PAGE_SIZE; } return ((void*)addr); - fail: +fail: TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) { - vm_page_unwire(p, PQ_NONE); + vm_page_unwire_noq(p); vm_page_free(p); } return (NULL); @@ -1326,7 +1326,7 @@ * exit. */ TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) { - vm_page_unwire(p, PQ_NONE); + vm_page_unwire_noq(p); vm_page_free(p); } return (NULL); @@ -1387,7 +1387,7 @@ for (curva = sva; curva < sva + size; curva += PAGE_SIZE) { paddr = pmap_kextract(curva); m = PHYS_TO_VM_PAGE(paddr); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } pmap_qremove(sva, size >> PAGE_SHIFT); Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -251,18 +251,6 @@ vm_pager_page_unswapped(m); } -static void -vm_fault_fill_hold(vm_page_t *m_hold, vm_page_t m) -{ - - if (m_hold != NULL) { - *m_hold = m; - vm_page_lock(m); - vm_page_wire(m); - vm_page_unlock(m); - } -} - /* * Unlocks fs.first_object and fs.map on success. */ @@ -323,7 +311,10 @@ PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind); if (rv != KERN_SUCCESS) return (rv); - vm_fault_fill_hold(m_hold, m); + if (m_hold != NULL) { + *m_hold = m; + vm_page_wire(m); + } vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false); if (psind == 0 && !wired) vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true); @@ -374,9 +365,11 @@ for (pidx = first, m = vm_page_lookup(object, pidx); pidx <= last; pidx++, m = vm_page_next(m)) { vm_fault_populate_check_page(m); - vm_page_lock(m); - vm_page_deactivate(m); - vm_page_unlock(m); + if (!vm_page_wired(m)) { + vm_page_lock(m); + vm_page_deactivate(m); + vm_page_unlock(m); + } vm_page_xunbusy(m); } } @@ -499,11 +492,12 @@ VM_OBJECT_WLOCK(fs->first_object); m_mtx = NULL; for (i = 0; i < npages; i++) { - vm_page_change_lock(&m[i], &m_mtx); - if ((fault_flags & VM_FAULT_WIRE) != 0) + if ((fault_flags & VM_FAULT_WIRE) != 0) { vm_page_wire(&m[i]); - else + } else if (!vm_page_wired(&m[i])) { + vm_page_change_lock(&m[i], &m_mtx); vm_page_activate(&m[i]); + } if (m_hold != NULL && m[i].pindex == fs->first_pindex) { *m_hold = &m[i]; vm_page_wire(&m[i]); @@ -1151,10 +1145,11 @@ * daemon, while it is disassociated from an * object. */ + vm_page_wire(fs.m); + mtx = NULL; vm_page_change_lock(fs.m, &mtx); - vm_page_wire(fs.m); - vm_page_remove(fs.m); + (void)vm_page_remove(fs.m); vm_page_change_lock(fs.first_m, &mtx); vm_page_replace_checked(fs.m, fs.first_object, fs.first_pindex, fs.first_m); @@ -1187,10 +1182,8 @@ fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_WIRE) == 0) { - vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); - vm_page_unlock(fs.first_m); - + vm_page_lock(fs.m); vm_page_unwire(fs.m, PQ_INACTIVE); vm_page_unlock(fs.m); @@ -1326,21 +1319,22 @@ faultcount > 0 ? behind : PFBAK, faultcount > 0 ? ahead : PFFOR, false); VM_OBJECT_WLOCK(fs.object); - vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ - if ((fault_flags & VM_FAULT_WIRE) != 0) + if ((fault_flags & VM_FAULT_WIRE) != 0) { vm_page_wire(fs.m); - else + } else if (!vm_page_wired(fs.m)) { + vm_page_lock(fs.m); vm_page_activate(fs.m); + vm_page_unlock(fs.m); + } if (m_hold != NULL) { *m_hold = fs.m; vm_page_wire(fs.m); } - vm_page_unlock(fs.m); vm_page_xunbusy(fs.m); /* @@ -1611,9 +1605,7 @@ for (mp = ma; mp < ma + count; mp++) if (*mp != NULL) { vm_page_lock(*mp); - if (vm_page_unwire(*mp, PQ_INACTIVE) && - (*mp)->object == NULL) - vm_page_free(*mp); + vm_page_unwire(*mp, PQ_INACTIVE); vm_page_unlock(*mp); } return (-1); @@ -1814,9 +1806,7 @@ vm_page_lock(src_m); vm_page_unwire(src_m, PQ_INACTIVE); vm_page_unlock(src_m); - vm_page_lock(dst_m); vm_page_wire(dst_m); - vm_page_unlock(dst_m); } else { KASSERT(vm_page_wired(dst_m), ("dst_m %p is not wired", dst_m)); Index: sys/vm/vm_glue.c =================================================================== --- sys/vm/vm_glue.c +++ sys/vm/vm_glue.c @@ -230,7 +230,7 @@ rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(m); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); vm_page_unlock(m); m = NULL; @@ -405,10 +405,8 @@ m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_dispose: kstack already missing?"); - vm_page_lock(m); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(ksobj); vm_object_deallocate(ksobj); Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -586,7 +586,7 @@ #endif for (; offset < end; offset += PAGE_SIZE, m = next) { next = vm_page_next(m); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } VM_OBJECT_WUNLOCK(object); Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -699,12 +699,9 @@ vm_object_terminate_pages(vm_object_t object) { vm_page_t p, p_next; - struct mtx *mtx; VM_OBJECT_ASSERT_WLOCKED(object); - mtx = NULL; - /* * Free any remaining pageable pages. This also removes them from the * paging queues. However, don't free wired pages, just remove them @@ -713,20 +710,16 @@ */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { vm_page_assert_unbusied(p); - if ((object->flags & OBJ_UNMANAGED) == 0) - /* - * vm_page_free_prep() only needs the page - * lock for managed pages. - */ - vm_page_change_lock(p, &mtx); + KASSERT(p->object == object && p->ref_count > 0, + ("vm_object_terminate_pages: page %p is inconsistent", p)); + p->object = NULL; - if (vm_page_wired(p)) - continue; - VM_CNT_INC(v_pfree); - vm_page_free(p); + if (atomic_fetchadd_int(&p->ref_count, -VPRC_OBJREF) == + VPRC_OBJREF) { + VM_CNT_INC(v_pfree); + vm_page_free(p); + } } - if (mtx != NULL) - mtx_unlock(mtx); /* * If the object contained any pages, then reset it to an empty state. @@ -1588,18 +1581,10 @@ swap_pager_freespace(backing_object, p->pindex, 1); - /* - * Page is out of the parent object's range, we can - * simply destroy it. - */ - vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (!vm_page_wired(p)) + if (vm_page_remove(p)) vm_page_free(p); - else - vm_page_remove(p); - vm_page_unlock(p); continue; } @@ -1636,14 +1621,10 @@ if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); - vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (!vm_page_wired(p)) + if (vm_page_remove(p)) vm_page_free(p); - else - vm_page_remove(p); - vm_page_unlock(p); continue; } @@ -1944,6 +1925,7 @@ VM_OBJECT_WLOCK(object); goto again; } +wired: if (vm_page_wired(p)) { if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) @@ -1964,14 +1946,17 @@ ("vm_object_page_remove: page %p is fictitious", p)); if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { if ((options & OBJPR_NOTMAPPED) == 0 && - object->ref_count != 0) - pmap_remove_write(p); + object->ref_count != 0 && + !vm_page_try_remove_write(p)) + goto wired; if (p->dirty != 0) continue; } - if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) - pmap_remove_all(p); - vm_page_free(p); + if ((options & OBJPR_NOTMAPPED) == 0 && + object->ref_count != 0 && !vm_page_try_remove_all(p)) + goto wired; + if (vm_page_remove(p)) + vm_page_free(p); } if (mtx != NULL) mtx_unlock(mtx); Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -115,24 +115,19 @@ * the implementation of read-modify-write operations on the * field is encapsulated in vm_page_clear_dirty_mask(). * - * The page structure contains two counters which prevent page reuse. - * Both counters are protected by the page lock (P). The hold - * counter counts transient references obtained via a pmap lookup, and - * is also used to prevent page reclamation in situations where it is - * undesirable to block other accesses to the page. The wire counter - * is used to implement mlock(2) and is non-zero for pages containing - * kernel memory. Pages that are wired or held will not be reclaimed - * or laundered by the page daemon, but are treated differently during - * a page queue scan: held pages remain at their position in the queue, - * while wired pages are removed from the queue and must later be - * re-enqueued appropriately by the unwiring thread. It is legal to - * call vm_page_free() on a held page; doing so causes it to be removed - * from its object and page queue, and the page is released to the - * allocator once the last hold reference is dropped. In contrast, - * wired pages may not be freed. - * - * In some pmap implementations, the wire count of a page table page is - * used to track the number of populated entries. + * The ref_count field tracks references to the page. References that + * prevent the page from being reclaimable are called wirings and are + * counted in the low bits of ref_count. Upper bits are reserved for + * special references that do not prevent reclamation of the page. + * Specifically, the containing object, if any, holds such a reference, + * and the page daemon takes a transient reference when it is scanning + * a page. Updates to ref_count are atomic unless the page is + * unallocated. To wire a page after it has been allocated, the object + * lock must be held, or the page must be busy, or the wiring thread + * must atomically take a reference and verify that the VPRC_BLOCKED + * bit is not set. No locks are required to unwire a page, but care + * must be taken to free the page if that wiring represented the last + * reference to the page. * * The busy lock is an embedded reader-writer lock which protects the * page's contents and identity (i.e., its tuple) and @@ -198,11 +193,14 @@ } memguard; } plinks; TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ - vm_object_t object; /* which object am I in (O,P) */ + vm_object_t object; /* which object am I in (O) */ vm_pindex_t pindex; /* offset into object (O,P) */ vm_paddr_t phys_addr; /* physical address of page (C) */ struct md_page md; /* machine dependent stuff */ - u_int wire_count; /* wired down maps refs (P) */ + union { + u_int wire_count; + u_int ref_count; /* page references */ + }; volatile u_int busy_lock; /* busy owners lock */ uint16_t flags; /* page PG_* flags (P) */ uint8_t order; /* index of the buddy queue (F) */ @@ -219,6 +217,32 @@ vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ }; +/* + * Special bits used in the ref_count field. + * + * ref_count is normally used to count wirings that prevent the page from being + * reclaimed, but also supports several special types of references that do not + * prevent reclamation. Accesses to the ref_count field must be atomic unless + * the page is unallocated. + * + * VPRC_PDREF is a transient reference acquired by the page daemon when + * scanning. Pages may be dequeued without the page lock held when they are + * being freed, and this reference ensures that the page daemon is not + * simultaneously manipulating the queue state of the page. The page lock must + * be held to set or clear this bit. + * + * VPRC_OBJREF is the reference held by the containing object. It can set or + * cleared only when the corresponding object's write lock is held. + * + * VPRC_BLOCKED is used to atomically block wirings via pmap lookups while + * attempting to tear down all mappings of a given page. The page lock and + * object write lock must both be held in order to set or clear this bit. + */ +#define VPRC_BLOCKED 0x20000000u /* mappings are being removed */ +#define VPRC_OBJREF 0x40000000u /* object reference, cleared with (O) */ +#define VPRC_PDREF 0x80000000u /* page daemon reference for scanning */ +#define VPRC_REFMASK (VPRC_BLOCKED | VPRC_OBJREF | VPRC_PDREF) + /* * Page flags stored in oflags: * @@ -557,8 +581,10 @@ bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); void vm_page_reference(vm_page_t m); -void vm_page_remove (vm_page_t); -int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); +void vm_page_release(vm_page_t m, bool nocache); +void vm_page_release_locked(vm_page_t m, bool nocache); +bool vm_page_remove(vm_page_t); +int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex); void vm_page_requeue(vm_page_t m); @@ -569,14 +595,16 @@ int vm_page_sleep_if_busy(vm_page_t m, const char *msg); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_sunbusy(vm_page_t m); -bool vm_page_try_to_free(vm_page_t m); +bool vm_page_try_remove_all(vm_page_t m); +bool vm_page_try_remove_write(vm_page_t m); int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unswappable(vm_page_t m); -bool vm_page_unwire(vm_page_t m, uint8_t queue); +void vm_page_unwire(vm_page_t m, uint8_t queue); bool vm_page_unwire_noq(vm_page_t m); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); -void vm_page_wire (vm_page_t); +void vm_page_wire(vm_page_t); +bool vm_page_wire_mapped(vm_page_t m); void vm_page_xunbusy_hard(vm_page_t m); void vm_page_xunbusy_maybelocked(vm_page_t m); void vm_page_set_validclean (vm_page_t, int, int); @@ -806,6 +834,19 @@ return (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE); } +/* + * + * vm_page_wire_count: + * + * Return the number of wiring references of the page. + */ +static inline u_int +vm_page_wire_count(vm_page_t m) +{ + + return (m->ref_count & ~VPRC_REFMASK); +} + /* * vm_page_wired: * @@ -815,7 +856,7 @@ vm_page_wired(vm_page_t m) { - return (m->wire_count > 0); + return (vm_page_wire_count(m) > 0); } #endif /* _KERNEL */ Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -62,27 +62,6 @@ * rights to redistribute these changes. */ -/* - * GENERAL RULES ON VM_PAGE MANIPULATION - * - * - A page queue lock is required when adding or removing a page from a - * page queue regardless of other locks or the busy state of a page. - * - * * In general, no thread besides the page daemon can acquire or - * hold more than one page queue lock at a time. - * - * * The page daemon can acquire and hold any pair of page queue - * locks in any order. - * - * - The object lock is required when inserting or removing - * pages from an object (vm_page_insert() or vm_page_remove()). - * - */ - -/* - * Resident memory management module. - */ - #include __FBSDID("$FreeBSD$"); @@ -185,9 +164,9 @@ vm_page_t m_run, vm_paddr_t high); static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req); -static int vm_page_import(void *arg, void **store, int cnt, int domain, +static int vm_page_zone_import(void *arg, void **store, int cnt, int domain, int flags); -static void vm_page_release(void *arg, void **store, int cnt); +static void vm_page_zone_release(void *arg, void **store, int cnt); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL); @@ -221,7 +200,7 @@ continue; vmd->vmd_pgcache = uma_zcache_create("vm pgcache", sizeof(struct vm_page), NULL, NULL, NULL, NULL, - vm_page_import, vm_page_release, vmd, + vm_page_zone_import, vm_page_zone_release, vmd, UMA_ZONE_MAXBUCKET | UMA_ZONE_VM); (void )uma_zone_set_maxcache(vmd->vmd_pgcache, 0); } @@ -519,7 +498,7 @@ { m->object = NULL; - m->wire_count = 0; + m->ref_count = 0; m->busy_lock = VPB_UNBUSIED; m->flags = m->aflags = 0; m->phys_addr = pa; @@ -1115,8 +1094,7 @@ mtx = NULL; for (; count != 0; count--) { vm_page_change_lock(*ma, &mtx); - if (vm_page_unwire(*ma, PQ_ACTIVE) && (*ma)->object == NULL) - vm_page_free(*ma); + vm_page_unwire(*ma, PQ_ACTIVE); ma++; } if (mtx != NULL) @@ -1183,7 +1161,8 @@ /* Fictitious pages don't use "order" or "pool". */ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_SINGLE_EXCLUSIVER; - m->wire_count = 1; + /* Fictitious pages are unevictable. */ + m->ref_count = 1; pmap_page_init(m); memattr: pmap_page_set_memattr(m, memattr); @@ -1382,6 +1361,7 @@ */ m->object = object; m->pindex = pindex; + atomic_set_int(&m->ref_count, VPRC_OBJREF); /* * Now link into the object's ordered list of backed pages. @@ -1389,6 +1369,7 @@ if (vm_radix_insert(&object->rtree, m)) { m->object = NULL; m->pindex = 0; + atomic_clear_int(&m->ref_count, VPRC_OBJREF); return (1); } vm_page_insert_radixdone(m, object, mpred); @@ -1413,11 +1394,13 @@ VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object != NULL && m->object == object, ("vm_page_insert_radixdone: page %p has inconsistent object", m)); + KASSERT((m->ref_count & VPRC_OBJREF) != 0, + ("vm_page_insert_radixdone: page %p is missing object ref", m)); if (mpred != NULL) { KASSERT(mpred->object == object, - ("vm_page_insert_after: object doesn't contain mpred")); + ("vm_page_insert_radixdone: object doesn't contain mpred")); KASSERT(mpred->pindex < m->pindex, - ("vm_page_insert_after: mpred doesn't precede pindex")); + ("vm_page_insert_radixdone: mpred doesn't precede pindex")); } if (mpred != NULL) @@ -1452,16 +1435,14 @@ * * The object must be locked. The page must be locked if it is managed. */ -void +bool vm_page_remove(vm_page_t m) { vm_object_t object; vm_page_t mrem; + u_int old; - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_assert_locked(m); - if ((object = m->object) == NULL) - return; + object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); if (vm_page_xbusied(m)) vm_page_xunbusy_maybelocked(m); @@ -1485,6 +1466,8 @@ vdrop(object->handle); m->object = NULL; + old = atomic_fetchadd_int(&m->ref_count, -VPRC_OBJREF); + return (old == VPRC_OBJREF); } /* @@ -1565,8 +1548,6 @@ /* * Uses the page mnew as a replacement for an existing page at index * pindex which must be already present in the object. - * - * The existing page must not be on a paging queue. */ vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) @@ -1576,8 +1557,6 @@ VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(mnew->object == NULL, ("vm_page_replace: page %p already in object", mnew)); - KASSERT(mnew->queue == PQ_NONE || vm_page_wired(mnew), - ("vm_page_replace: new page %p is on a paging queue", mnew)); /* * This function mostly follows vm_page_insert() and @@ -1587,6 +1566,7 @@ mnew->object = object; mnew->pindex = pindex; + atomic_set_int(&mnew->ref_count, VPRC_OBJREF); mold = vm_radix_replace(&object->rtree, mnew); KASSERT(mold->queue == PQ_NONE, ("vm_page_replace: old page %p is on a paging queue", mold)); @@ -1596,6 +1576,7 @@ TAILQ_REMOVE(&object->memq, mold, listq); mold->object = NULL; + atomic_clear_int(&mold->ref_count, VPRC_OBJREF); vm_page_xunbusy_maybelocked(mold); /* @@ -1633,6 +1614,7 @@ VM_OBJECT_ASSERT_WLOCKED(new_object); + KASSERT(m->ref_count > 0, ("vm_page_rename: page %p has no refs", m)); mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex); KASSERT(mpred == NULL || mpred->pindex != new_pindex, ("vm_page_rename: pindex already renamed")); @@ -1655,11 +1637,13 @@ */ m->pindex = opidx; vm_page_lock(m); - vm_page_remove(m); + (void)vm_page_remove(m); /* Return back to the new pindex to complete vm_page_insert(). */ m->pindex = new_pindex; m->object = new_object; + atomic_set_int(&m->ref_count, VPRC_OBJREF); + vm_page_unlock(m); vm_page_insert_radixdone(m, new_object, mpred); vm_page_dirty(m); @@ -1878,7 +1862,7 @@ * page is inserted into the object. */ vm_wire_add(1); - m->wire_count = 1; + m->ref_count = 1; } m->act_count = 0; @@ -1886,7 +1870,7 @@ if (vm_page_insert_after(m, object, pindex, mpred)) { if (req & VM_ALLOC_WIRED) { vm_wire_sub(1); - m->wire_count = 0; + m->ref_count = 0; } KASSERT(m->object == NULL, ("page %p has object", m)); m->oflags = VPO_UNMANAGED; @@ -2080,7 +2064,7 @@ m->flags = (m->flags | PG_NODUMP) & flags; m->busy_lock = busy_lock; if ((req & VM_ALLOC_WIRED) != 0) - m->wire_count = 1; + m->ref_count = 1; m->act_count = 0; m->oflags = oflags; if (object != NULL) { @@ -2093,7 +2077,7 @@ for (m = m_ret; m < &m_ret[npages]; m++) { if (m <= mpred && (req & VM_ALLOC_WIRED) != 0) - m->wire_count = 0; + m->ref_count = 0; m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ @@ -2127,7 +2111,7 @@ KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0, ("page %p has unexpected queue %d, flags %#x", m, m->queue, (m->aflags & PGA_QUEUE_STATE_MASK))); - KASSERT(!vm_page_wired(m), ("page %p is wired", m)); + KASSERT(m->ref_count == 0, ("page %p has references", m)); KASSERT(!vm_page_busied(m), ("page %p is busy", m)); KASSERT(m->dirty == 0, ("page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, @@ -2211,7 +2195,7 @@ * not belong to an object. */ vm_wire_add(1); - m->wire_count = 1; + m->ref_count = 1; } /* Unmanaged pages don't use "act_count". */ m->oflags = VPO_UNMANAGED; @@ -2219,7 +2203,7 @@ } static int -vm_page_import(void *arg, void **store, int cnt, int domain, int flags) +vm_page_zone_import(void *arg, void **store, int cnt, int domain, int flags) { struct vm_domain *vmd; int i; @@ -2240,7 +2224,7 @@ } static void -vm_page_release(void *arg, void **store, int cnt) +vm_page_zone_release(void *arg, void **store, int cnt) { struct vm_domain *vmd; vm_page_t m; @@ -2300,8 +2284,8 @@ for (m = m_start; m < m_end && run_len < npages; m += m_inc) { KASSERT((m->flags & PG_MARKER) == 0, ("page %p is PG_MARKER", m)); - KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->wire_count == 1, - ("fictitious page %p has invalid wire count", m)); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1, + ("fictitious page %p has invalid ref count", m)); /* * If the current page would be the start of a run, check its @@ -2358,9 +2342,6 @@ */ VM_OBJECT_RUNLOCK(object); goto retry; - } else if (vm_page_wired(m)) { - run_ext = 0; - goto unlock; } } /* Don't care: PG_NODUMP, PG_ZERO. */ @@ -2378,7 +2359,8 @@ vm_reserv_size(level)) - pa); #endif } else if (object->memattr == VM_MEMATTR_DEFAULT && - vm_page_queue(m) != PQ_NONE && !vm_page_busied(m)) { + vm_page_queue(m) != PQ_NONE && !vm_page_busied(m) && + !vm_page_wired(m)) { /* * The page is allocated but eligible for * relocation. Extend the current run by one @@ -2394,7 +2376,6 @@ run_ext = 1; } else run_ext = 0; -unlock: VM_OBJECT_RUNLOCK(object); #if VM_NRESERVLEVEL > 0 } else if (level >= 0) { @@ -2515,9 +2496,6 @@ */ VM_OBJECT_WUNLOCK(object); goto retry; - } else if (vm_page_wired(m)) { - error = EBUSY; - goto unlock; } } /* Don't care: PG_NODUMP, PG_ZERO. */ @@ -2528,7 +2506,7 @@ else if (object->memattr != VM_MEMATTR_DEFAULT) error = EINVAL; else if (vm_page_queue(m) != PQ_NONE && - !vm_page_busied(m)) { + !vm_page_busied(m) && !vm_page_wired(m)) { KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); @@ -2577,8 +2555,6 @@ error = ENOMEM; goto unlock; } - KASSERT(!vm_page_wired(m_new), - ("page %p is wired", m_new)); /* * Replace "m" with the new page. For @@ -2586,8 +2562,11 @@ * and dequeued. Finally, change "m" * as if vm_page_free() was called. */ - if (object->ref_count != 0) - pmap_remove_all(m); + if (object->ref_count != 0 && + !vm_page_try_remove_all(m)) { + error = EBUSY; + goto unlock; + } m_new->aflags = m->aflags & ~PGA_QUEUE_STATE_MASK; KASSERT(m_new->oflags == VPO_UNMANAGED, @@ -2614,7 +2593,6 @@ } else { m->flags &= ~PG_ZERO; vm_page_dequeue(m); - vm_page_remove(m); if (vm_page_free_prep(m)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); @@ -3148,8 +3126,7 @@ KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m)); - KASSERT(mtx_owned(vm_page_lockptr(m)) || - (m->object == NULL && (m->aflags & PGA_DEQUEUE) != 0), + KASSERT(mtx_owned(vm_page_lockptr(m)) || m->object == NULL, ("missing synchronization for page %p", m)); KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue)); @@ -3402,9 +3379,11 @@ vm_page_activate(vm_page_t m) { + KASSERT(m->object != NULL, + ("vm_page_activate: page %p has no object", m)); vm_page_assert_locked(m); - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) + if ((m->oflags & VPO_UNMANAGED) != 0) return; if (vm_page_queue(m) == PQ_ACTIVE) { if (m->act_count < ACT_INIT) @@ -3442,11 +3421,10 @@ m, i, (uintmax_t)*p)); } #endif - if ((m->oflags & VPO_UNMANAGED) == 0) { - vm_page_lock_assert(m, MA_OWNED); + if ((m->oflags & VPO_UNMANAGED) == 0) KASSERT(!pmap_page_is_mapped(m), ("vm_page_free_prep: freeing mapped page %p", m)); - } else + else KASSERT(m->queue == PQ_NONE, ("vm_page_free_prep: unmanaged page %p is queued", m)); VM_CNT_INC(v_tfree); @@ -3454,15 +3432,16 @@ if (vm_page_sbusied(m)) panic("vm_page_free_prep: freeing busy page %p", m); - vm_page_remove(m); + if (m->object != NULL) + (void)vm_page_remove(m); /* * If fictitious remove object association and * return. */ if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("fictitious page %p is not wired", m)); + KASSERT(m->ref_count == 1, + ("fictitious page %p is not referenced", m)); KASSERT(m->queue == PQ_NONE, ("fictitious page %p is queued", m)); return (false); @@ -3479,8 +3458,8 @@ m->valid = 0; vm_page_undirty(m); - if (vm_page_wired(m)) - panic("vm_page_free_prep: freeing wired page %p", m); + if (m->ref_count != 0) + panic("vm_page_free_prep: page %p has references", m); /* * Restore the default memory attribute to the page. @@ -3555,110 +3534,122 @@ } /* - * vm_page_wire: - * - * Mark this page as wired down. If the page is fictitious, then - * its wire count must remain one. - * - * The page must be locked. + * Mark this page as wired down, preventing reclamation by the page daemon + * or when the containing object is destroyed. */ void vm_page_wire(vm_page_t m) { + u_int old; - vm_page_assert_locked(m); - if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("vm_page_wire: fictitious page %p's wire count isn't one", - m)); - return; - } - if (!vm_page_wired(m)) { - KASSERT((m->oflags & VPO_UNMANAGED) == 0 || - m->queue == PQ_NONE, - ("vm_page_wire: unmanaged page %p is queued", m)); + KASSERT(m->object != NULL, + ("vm_page_wire: page %p does not belong to an object", m)); + if (!vm_page_busied(m)) + VM_OBJECT_ASSERT_LOCKED(m->object); + + if ((m->flags & PG_FICTITIOUS) != 0) + KASSERT(m->ref_count >= 1, + ("vm_page_wire: fictitious page %p has zero refs", m)); + + if (!vm_page_wired(m)) vm_wire_add(1); - } - m->wire_count++; - KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); + + old = atomic_fetchadd_int(&m->ref_count, 1); + KASSERT(old != ~VPRC_REFMASK, + ("vm_page_wire: counter overflow for page %p", m)); +} + +/* + * Attempt to wire a mapped page following a pmap lookup of that page. + * This may fail if a thread is concurrently tearing down mappings of the page. + */ +bool +vm_page_wire_mapped(vm_page_t m) +{ + u_int old; + + KASSERT(m->object != NULL, + ("vm_page_try_wire: page %p does not belong to an object", m)); + + old = m->ref_count; + do { + KASSERT(old > 0, + ("vm_page_try_wire: wiring unreferenced page %p", m)); + if ((old & VPRC_BLOCKED) != 0) + return (false); + } while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1)); + if ((old & ~VPRC_REFMASK) == 1) + vm_wire_add(1); + return (true); } /* - * vm_page_unwire: - * * Release one wiring of the specified page, potentially allowing it to be - * paged out. Returns TRUE if the number of wirings transitions to zero and - * FALSE otherwise. + * paged out. * * Only managed pages belonging to an object can be paged out. If the number * of wirings transitions to zero and the page is eligible for page out, then - * the page is added to the specified paging queue (unless PQ_NONE is - * specified, in which case the page is dequeued if it belongs to a paging - * queue). - * - * If a page is fictitious, then its wire count must always be one. + * the page is added to the specified paging queue. * * A managed page must be locked. */ -bool +void vm_page_unwire(vm_page_t m, uint8_t queue) { - bool unwired; - KASSERT(queue < PQ_COUNT || queue == PQ_NONE, - ("vm_page_unwire: invalid queue %u request for page %p", - queue, m)); + KASSERT(queue < PQ_COUNT, + ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); + KASSERT(vm_page_wire_count(m) >= 1, + ("vm_page_unwire: page %p is not wired", m)); if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_assert_locked(m); - unwired = vm_page_unwire_noq(m); - if (!unwired || (m->oflags & VPO_UNMANAGED) != 0 || m->object == NULL) - return (unwired); - - if (vm_page_queue(m) == queue) { - if (queue == PQ_ACTIVE) - vm_page_reference(m); - else if (queue != PQ_NONE) - vm_page_requeue(m); - } else { - vm_page_dequeue(m); - if (queue != PQ_NONE) { + /* + * Racily test the wire count to avoid polluting the page queues with + * unreclaimable pages. + */ + if ((m->oflags & VPO_UNMANAGED) == 0 && vm_page_wire_count(m) == 1) { + if (vm_page_queue(m) == queue) { + if (queue == PQ_ACTIVE) + vm_page_reference(m); + else + vm_page_requeue(m); + } else { + vm_page_dequeue(m); vm_page_enqueue(m, queue); if (queue == PQ_ACTIVE) /* Initialize act_count. */ vm_page_activate(m); } } - return (unwired); + + if (vm_page_unwire_noq(m) && m->ref_count == 0) + vm_page_free(m); } /* - * * vm_page_unwire_noq: * * Unwire a page without (re-)inserting it into a page queue. It is up * to the caller to enqueue, requeue, or free the page as appropriate. - * In most cases, vm_page_unwire() should be used instead. + * In most cases involving managed pages, vm_page_unwire() should be used + * instead. */ bool vm_page_unwire_noq(vm_page_t m) { + u_int old; - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_assert_locked(m); - if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); - return (false); - } - if (!vm_page_wired(m)) - panic("vm_page_unwire: page %p's wire count is zero", m); - m->wire_count--; - if (m->wire_count == 0) { - vm_wire_sub(1); - return (true); - } else + old = atomic_fetchadd_int(&m->ref_count, -1); + KASSERT((old & ~VPRC_REFMASK) != 0, + ("vm_page_unref: counter underflow for page %p", m)); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || (old & ~VPRC_REFMASK) > 1, + ("vm_page_unref: missing ref on fictitious page %p", m)); + + if ((old & ~VPRC_REFMASK) != 1) return (false); + vm_wire_sub(1); + return (true); } /* @@ -3671,11 +3662,12 @@ vm_page_deactivate(vm_page_t m) { + KASSERT(m->object != NULL, + ("vm_page_deactivate: page %p has no object", m)); vm_page_assert_locked(m); - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) + if ((m->oflags & VPO_UNMANAGED) != 0) return; - if (!vm_page_inactive(m)) { vm_page_dequeue(m); vm_page_enqueue(m, PQ_INACTIVE); @@ -3695,11 +3687,12 @@ vm_page_deactivate_noreuse(vm_page_t m) { + KASSERT(m->object != NULL, + ("vm_page_deactivate_noreuse: page %p has no object", m)); vm_page_assert_locked(m); - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) + if ((m->oflags & VPO_UNMANAGED) != 0) return; - if (!vm_page_inactive(m)) { vm_page_dequeue(m); m->queue = PQ_INACTIVE; @@ -3718,10 +3711,12 @@ vm_page_launder(vm_page_t m) { + KASSERT(m->object != NULL, + ("vm_page_launder: page %p has no object", m)); vm_page_assert_locked(m); - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) - return; + if ((m->oflags & VPO_UNMANAGED) != 0) + return; if (vm_page_in_laundry(m)) vm_page_requeue(m); else { @@ -3748,30 +3743,134 @@ } /* - * Attempt to free the page. If it cannot be freed, do nothing. Returns true - * if the page is freed and false otherwise. - * - * The page must be managed. The page and its containing object must be - * locked. + * Release a wired page to the page cache, and optionally attempt to free it. + * The page's object must be locked. See the comment above vm_page_release(). */ -bool -vm_page_try_to_free(vm_page_t m) +void +vm_page_release_locked(vm_page_t m, bool nocache) +{ + vm_object_t object; + + object = m->object; + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_release_locked: page %p is unmanaged", m)); + + if (!vm_page_unwire_noq(m)) + return; + if (m->valid == 0 || nocache) { + if ((object->ref_count == 0 || !pmap_page_is_mapped(m)) && + m->dirty == 0 && !vm_page_busied(m) && !vm_page_wired(m)) { + vm_page_free(m); + } else { + vm_page_lock(m); + vm_page_deactivate_noreuse(m); + vm_page_unlock(m); + } + } else { + vm_page_lock(m); + if (vm_page_active(m)) + vm_page_reference(m); + else + vm_page_deactivate(m); + vm_page_unlock(m); + } +} + +/* + * Release a wired page to the page cache, and optionally attempt to free it. + * If the caller wishes to attempt to free the page, and the page is mapped, + * dirty, busy or wired, we do not free it but instead place it near the head of + * the inactive queue to accelerate reclamation. + */ +void +vm_page_release(vm_page_t m, bool nocache) +{ + vm_object_t object; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_release: page %p is unmanaged", m)); + + if (nocache) { + /* + * Attempt to free the page. The page may be renamed between + * objects so we must verify the page's object pointer after + * acquiring the lock and retry if they do not match. + */ + while ((object = m->object) != NULL) { + if (!VM_OBJECT_TRYWLOCK(object)) { + object = NULL; + break; + } + if (m->object == object) + break; + VM_OBJECT_WUNLOCK(object); + } + if (object != NULL) { + vm_page_release_locked(m, nocache); + VM_OBJECT_WUNLOCK(object); + return; + } + } + + if (vm_page_wire_count(m) == 1) { + vm_page_lock(m); + if (m->valid == 0 || nocache) + vm_page_deactivate_noreuse(m); + else if (vm_page_active(m)) + vm_page_reference(m); + else + vm_page_deactivate(m); + vm_page_unlock(m); + } + if (vm_page_unwire_noq(m) && m->ref_count == 0) + vm_page_free(m); +} + +/* + * Attempt to invoke the requested operation while blocking new wirings of the + * page. + */ +static bool +vm_page_try_blocked_op(vm_page_t m, void (*op)(vm_page_t)) { + u_int old; vm_page_assert_locked(m); - VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m)); - if (m->dirty != 0 || vm_page_wired(m) || vm_page_busied(m)) - return (false); - if (m->object->ref_count != 0) { - pmap_remove_all(m); - if (m->dirty != 0) + KASSERT(m->object != NULL && (m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_try_blocked_op: page %p has no object", m)); + KASSERT(!vm_page_busied(m), + ("vm_page_try_blocked_op: page %p is busy", m)); + VM_OBJECT_ASSERT_LOCKED(m->object); + + old = m->ref_count; + do { + KASSERT(old != 0, + ("vm_page_try_blocked_op: page %p has no references", m)); + if ((old & ~VPRC_REFMASK) != 0) return (false); - } - vm_page_free(m); + } while (!atomic_fcmpset_int(&m->ref_count, &old, old | VPRC_BLOCKED)); + + (op)(m); + + atomic_clear_int(&m->ref_count, VPRC_BLOCKED); return (true); } +bool +vm_page_try_remove_all(vm_page_t m) +{ + + return (vm_page_try_blocked_op(m, pmap_remove_all)); +} + +bool +vm_page_try_remove_write(vm_page_t m) +{ + + return (vm_page_try_blocked_op(m, pmap_remove_write)); +} + /* * vm_page_advise * @@ -3866,11 +3965,8 @@ VM_OBJECT_WLOCK(object); goto retrylookup; } else { - if ((allocflags & VM_ALLOC_WIRED) != 0) { - vm_page_lock(m); + if ((allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); - vm_page_unlock(m); - } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); @@ -3968,11 +4064,8 @@ VM_OBJECT_WLOCK(object); goto retrylookup; } - if ((allocflags & VM_ALLOC_WIRED) != 0) { - vm_page_lock(m); + if ((allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); - vm_page_unlock(m); - } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); @@ -4501,10 +4594,10 @@ else m = (vm_page_t)addr; db_printf( - "page %p obj %p pidx 0x%jx phys 0x%jx q %d wire %d\n" + "page %p obj %p pidx 0x%jx phys 0x%jx q %d ref %u\n" " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, - m->queue, m->wire_count, m->aflags, m->oflags, + m->queue, m->ref_count, m->aflags, m->oflags, m->flags, m->act_count, m->busy_lock, m->valid, m->dirty); } #endif /* DDB */ Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -314,6 +314,53 @@ return (vm_batchqueue_pop(&ss->bq)); } +/* + * Lock a page and set a reference bit to ensure that it does not get freed out + * from under us. + */ +static bool +vm_pageout_lock_and_hold_page(vm_page_t m, struct mtx **mtx) +{ + u_int ref_count; + + vm_page_change_lock(m, mtx); + + ref_count = m->ref_count; + do { + if (ref_count == 0) + return (false); + } while (!atomic_fcmpset_int(&m->ref_count, &ref_count, ref_count | + VPRC_PDREF)); + return (true); +} + +/* + * Drop the page daemon's transient page reference and determine whether we need + * to free the page. + */ +static bool +vm_pageout_drop_page(vm_page_t m) +{ + + KASSERT((m->ref_count & VPRC_PDREF) != 0, + ("vm_pageout_drop_page: page %p missing pagedaemon ref", m)); + return (atomic_fetchadd_int(&m->ref_count, -VPRC_PDREF) == VPRC_PDREF); +} + +/* + * Drop the page daemon's transient reference once we know that the page's + * identity is stable. + */ +static void +vm_pageout_drop_page_quick(vm_page_t m) +{ + + KASSERT((m->ref_count & (VPRC_OBJREF | VPRC_PDREF)) == + (VPRC_OBJREF | VPRC_PDREF), + ("vm_pageout_drop_page_quick: page %p missing refs", m)); + atomic_clear_int(&m->ref_count, VPRC_PDREF); +} + /* * Scan for pages at adjacent offsets within the given page's object that are * eligible for laundering, form a cluster of these pages and the given page, @@ -327,16 +374,11 @@ vm_pindex_t pindex; int ib, is, page_base, pageout_count; - vm_page_assert_locked(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); pindex = m->pindex; vm_page_assert_unbusied(m); - KASSERT(!vm_page_wired(m), ("page %p is wired", m)); - - pmap_remove_write(m); - vm_page_unlock(m); mc[vm_pageout_page_count] = pb = ps = m; pageout_count = 1; @@ -362,7 +404,8 @@ ib = 0; break; } - if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) { + if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p) || + vm_page_wired(p)) { ib = 0; break; } @@ -372,12 +415,11 @@ break; } vm_page_lock(p); - if (vm_page_wired(p) || !vm_page_in_laundry(p)) { + if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { vm_page_unlock(p); ib = 0; break; } - pmap_remove_write(p); vm_page_unlock(p); mc[--page_base] = pb = p; ++pageout_count; @@ -392,17 +434,17 @@ } while (pageout_count < vm_pageout_page_count && pindex + is < object->size) { - if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p)) + if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p) || + vm_page_wired(p)) break; vm_page_test_dirty(p); if (p->dirty == 0) break; vm_page_lock(p); - if (vm_page_wired(p) || !vm_page_in_laundry(p)) { + if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { vm_page_unlock(p); break; } - pmap_remove_write(p); vm_page_unlock(p); mc[page_base + pageout_count] = ps = p; ++pageout_count; @@ -657,6 +699,13 @@ } } + if (!vm_page_try_remove_write(m)) { + vm_page_unlock(m); + error = EBUSY; + goto unlock_all; + } + vm_page_unlock(m); + /* * If a page is dirty, then it is either being washed * (but not yet cleaned) or it is still in the @@ -726,7 +775,8 @@ if (__predict_false((m->flags & PG_MARKER) != 0)) continue; - vm_page_change_lock(m, &mtx); + if (!vm_pageout_lock_and_hold_page(m, &mtx)) + continue; recheck: /* @@ -734,7 +784,7 @@ * while locks were dropped. */ if (vm_page_queue(m) != queue) - continue; + goto drop; /* * A requeue was requested, so this page gets a second @@ -742,17 +792,19 @@ */ if ((m->aflags & PGA_REQUEUE) != 0) { vm_page_requeue(m); - continue; + goto drop; } /* * Wired pages may not be freed. Complete their removal * from the queue now to avoid needless revisits during - * future scans. + * future scans. This check is racy and must be reverified once + * we hold the object lock and have verified that the page + * is not busy. */ if (vm_page_wired(m)) { vm_page_dequeue_deferred(m); - continue; + goto drop; } if (object != m->object) { @@ -767,10 +819,33 @@ goto recheck; } } + if (__predict_false(object == NULL)) + /* + * The page has been removed from its object. + * Drop our reference and move on. + */ + goto drop; + + /* + * We can drop our transient reference now that we hold + * the object lock. + */ + vm_pageout_drop_page_quick(m); if (vm_page_busied(m)) continue; + /* + * Re-check for wirings now that we hold the object lock. If + * the page is mapped, it may still be wired by pmap lookups. + * The call to vm_page_try_remove_all() below atomically checks + * for such wirings and removes mappings. + */ + if (__predict_false(vm_page_wired(m))) { + vm_page_dequeue_deferred(m); + continue; + } + /* * Invalid pages can be easily freed. They cannot be * mapped; vm_page_free() asserts this. @@ -838,8 +913,10 @@ */ if (object->ref_count != 0) { vm_page_test_dirty(m); - if (m->dirty == 0) - pmap_remove_all(m); + if (m->dirty == 0 && !vm_page_try_remove_all(m)) { + vm_page_requeue(m); + continue; + } } /* @@ -889,6 +966,11 @@ mtx = NULL; object = NULL; } + + continue; +drop: + if (vm_pageout_drop_page(m)) + goto free_page; } if (mtx != NULL) { mtx_unlock(mtx); @@ -1131,6 +1213,7 @@ { struct scan_state ss; struct mtx *mtx; + vm_object_t object; vm_page_t m, marker; struct vm_pagequeue *pq; long min_scan; @@ -1187,23 +1270,31 @@ if (__predict_false((m->flags & PG_MARKER) != 0)) continue; - vm_page_change_lock(m, &mtx); + if (!vm_pageout_lock_and_hold_page(m, &mtx)) + continue; /* * The page may have been disassociated from the queue * while locks were dropped. */ if (vm_page_queue(m) != PQ_ACTIVE) - continue; + goto drop; /* * Wired pages are dequeued lazily. */ if (vm_page_wired(m)) { vm_page_dequeue_deferred(m); - continue; + goto drop; } + if (__predict_false((object = m->object) == NULL)) + /* + * The page has been removed from its object. + * Drop our reference and move on. + */ + goto drop; + /* * Check to see "how much" the page has been used. * @@ -1223,7 +1314,7 @@ * This race delays the detection of a new reference. At * worst, we will deactivate and reactivate the page. */ - if (m->object->ref_count != 0) + if (object->ref_count != 0) act_delta = pmap_ts_referenced(m); else act_delta = 0; @@ -1278,6 +1369,9 @@ } } } +drop: + if (vm_pageout_drop_page(m)) + vm_page_free(m); } if (mtx != NULL) { mtx_unlock(mtx); @@ -1392,7 +1486,8 @@ KASSERT((m->flags & PG_MARKER) == 0, ("marker page %p was dequeued", m)); - vm_page_change_lock(m, &mtx); + if (!vm_pageout_lock_and_hold_page(m, &mtx)) + continue; recheck: /* @@ -1401,7 +1496,7 @@ */ if (vm_page_queue(m) != PQ_INACTIVE) { addl_page_shortage++; - continue; + goto drop; } /* @@ -1410,24 +1505,28 @@ * chance. */ if ((m->aflags & (PGA_ENQUEUED | PGA_REQUEUE | - PGA_REQUEUE_HEAD)) != 0) - goto reinsert; + PGA_REQUEUE_HEAD)) != 0) { + vm_pageout_reinsert_inactive(&ss, &rq, m); + goto drop; + } /* * Wired pages may not be freed. Complete their removal * from the queue now to avoid needless revisits during - * future scans. + * future scans. This check is racy and must be reverified once + * we hold the object lock and have verified that the page + * is not busy. */ if (vm_page_wired(m)) { vm_page_dequeue_deferred(m); - continue; + goto drop; } if (object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); object = m->object; - if (!VM_OBJECT_TRYWLOCK(object)) { + if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); @@ -1435,6 +1534,18 @@ goto recheck; } } + if (__predict_false(object == NULL)) + /* + * The page has been removed from its object. + * Drop our reference and move on. + */ + goto drop; + + /* + * We can drop our transient reference now that we hold + * the object lock. + */ + vm_pageout_drop_page_quick(m); if (vm_page_busied(m)) { /* @@ -1446,7 +1557,19 @@ * inactive count. */ addl_page_shortage++; - goto reinsert; + vm_pageout_reinsert_inactive(&ss, &rq, m); + continue; + } + + /* + * Re-check for wirings now that we hold the object lock. If + * the page is mapped, it may still be wired by pmap lookups. + * The call to vm_page_try_remove_all() below atomically checks + * for such wirings and removes mappings. + */ + if (__predict_false(vm_page_wired(m))) { + vm_page_dequeue_deferred(m); + continue; } /* @@ -1492,7 +1615,8 @@ continue; } else if ((object->flags & OBJ_DEAD) == 0) { vm_page_aflag_set(m, PGA_REQUEUE); - goto reinsert; + vm_pageout_reinsert_inactive(&ss, &rq, m); + continue; } } @@ -1505,8 +1629,10 @@ */ if (object->ref_count != 0) { vm_page_test_dirty(m); - if (m->dirty == 0) - pmap_remove_all(m); + if (m->dirty == 0 && !vm_page_try_remove_all(m)) { + vm_page_dequeue_deferred(m); + continue; + } } /* @@ -1532,8 +1658,13 @@ } else if ((object->flags & OBJ_DEAD) == 0) vm_page_launder(m); continue; -reinsert: - vm_pageout_reinsert_inactive(&ss, &rq, m); + +drop: + /* + * Drop our transient reference. + */ + if (vm_pageout_drop_page(m)) + goto free_page; } if (mtx != NULL) mtx_unlock(mtx); Index: sys/vm/vm_swapout.c =================================================================== --- sys/vm/vm_swapout.c +++ sys/vm/vm_swapout.c @@ -208,12 +208,12 @@ goto unlock_return; if (should_yield()) goto unlock_return; - if (vm_page_busied(p)) + + if (vm_page_busied(p) || vm_page_wired(p)) continue; VM_CNT_INC(v_pdpages); vm_page_lock(p); - if (vm_page_wired(p) || - !pmap_page_exists_quick(pmap, p)) { + if (!pmap_page_exists_quick(pmap, p)) { vm_page_unlock(p); continue; } @@ -231,7 +231,7 @@ p->act_count -= min(p->act_count, ACT_DECLINE); if (!remove_mode && p->act_count == 0) { - pmap_remove_all(p); + (void)vm_page_try_remove_all(p); vm_page_deactivate(p); } else vm_page_requeue(p); @@ -243,7 +243,7 @@ vm_page_requeue(p); } } else if (vm_page_inactive(p)) - pmap_remove_all(p); + (void)vm_page_try_remove_all(p); vm_page_unlock(p); } if ((backing_object = object->backing_object) == NULL)