Index: share/man/man9/Makefile =================================================================== --- share/man/man9/Makefile +++ share/man/man9/Makefile @@ -2223,7 +2223,9 @@ MLINKS+=vm_map_max.9 vm_map_min.9 \ vm_map_max.9 vm_map_pmap.9 MLINKS+=vm_map_stack.9 vm_map_growstack.9 -MLINKS+=vm_map_wire.9 vm_map_unwire.9 +MLINKS+=vm_map_wire.9 vm_map_wire_mapped.9 \ + vm_page_wire.9 vm_page_unwire.9 \ + vm_page_wire.9 vm_page_unwire_noq.9 MLINKS+=vm_page_bits.9 vm_page_clear_dirty.9 \ vm_page_bits.9 vm_page_dirty.9 \ vm_page_bits.9 vm_page_is_valid.9 \ Index: share/man/man9/vm_page_wire.9 =================================================================== --- share/man/man9/vm_page_wire.9 +++ share/man/man9/vm_page_wire.9 @@ -26,12 +26,13 @@ .\" .\" $FreeBSD$ .\" -.Dd July 13, 2001 +.Dd June 3, 2019 .Dt VM_PAGE_WIRE 9 .Os .Sh NAME .Nm vm_page_wire , -.Nm vm_page_unwire +.Nm vm_page_unwire , +.Nm vm_page_unwire_noq .Nd "wire and unwire pages" .Sh SYNOPSIS .In sys/param.h @@ -39,29 +40,44 @@ .In vm/vm_page.h .Ft void .Fn vm_page_wire "vm_page_t m" +.Ft bool +.Fn vm_page_wire_mapped "vm_page_t m" .Ft void -.Fn vm_page_unwire "vm_page_t m" "int activate" +.Fn vm_page_unwire "vm_page_t m" "int queue" +.Ft bool +.Fn vm_page_unwire_noq "vm_page_t m" .Sh DESCRIPTION The .Fn vm_page_wire -function increments the wire count on a page, and removes it from -whatever queue it is on. +and +.Fn vm_page_wire_mapped +function wire the page, prevent it from being reclaimed by the page +daemon or when its containing object is destroyed. +Both functions require that the page belong to an object. +The +.Fn vm_page_wire_mapped +function is for use by the +.Xr pmap 9 +layer following a lookup. +This function may fail if mappings of the page are concurrently +being destroyed, in which case it will return false. .Pp The .Fn vm_page_unwire -function releases one of the wirings on the page. -When -.Va write_count -reaches zero the page is placed back onto either the active queue -(if -.Fa activate -is non-zero) or onto the inactive queue (if -.Fa activate -is zero). -If the page is unmanaged -.Dv ( PG_UNMANAGED -is set) then the page is left on -.Dv PQ_NONE . +and +.Fn vm_page_unwire_noq +functions release a wiring of a page. +The +.Fn vm_page_unwire +function takes a queue index and will insert the page into the +corresponding page queue upon releasing its last wiring. +If the page does not belong to an object and no other references +to the page exist, +.Fn vm_page_unwire +will free the page. +.Fn vm_page_unwire_noq +releases the wiring and returns true if it was the last wiring +of the page. .Sh AUTHORS This manual page was written by .An Chad David Aq Mt davidc@acns.ab.ca . Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -3039,31 +3039,23 @@ m = NULL; PG_RW = pmap_rw_bit(pmap); PG_V = pmap_valid_bit(pmap); + PMAP_LOCK(pmap); -retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { - if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - if (vm_page_pa_tryrelock(pmap, (pde & - PG_PS_FRAME) | (va & PDRMASK), &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + if ((pde & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0) + m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | + (va & PDRMASK)); } else { pte = *pmap_pde_to_pte(pdep, va); - if ((pte & PG_V) && - ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, - &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + if ((pte & PG_V) != 0 && + ((pte & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0)) + m = PHYS_TO_VM_PAGE(pte & PG_FRAME); } - if (m != NULL) - vm_page_wire(m); + if (m != NULL && !vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } Index: sys/amd64/vmm/vmm.c =================================================================== --- sys/amd64/vmm/vmm.c +++ sys/amd64/vmm/vmm.c @@ -1002,9 +1002,7 @@ { vm_page_t m = cookie; - vm_page_lock(m); vm_page_unwire(m, PQ_ACTIVE); - vm_page_unlock(m); } int Index: sys/arm/arm/pmap-v4.c =================================================================== --- sys/arm/arm/pmap-v4.c +++ sys/arm/arm/pmap-v4.c @@ -3415,14 +3415,14 @@ struct l2_dtable *l2; pd_entry_t l1pd; pt_entry_t *ptep, pte; - vm_paddr_t pa, paddr; - vm_page_t m = NULL; + vm_paddr_t pa; + vm_page_t m; u_int l1idx; + l1idx = L1_IDX(va); - paddr = 0; + m = NULL; PMAP_LOCK(pmap); -retry: l1pd = pmap->pm_l1->l1_kva[l1idx]; if (l1pte_section_p(l1pd)) { /* @@ -3434,11 +3434,10 @@ pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); else pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); - if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) - goto retry; if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) { m = PHYS_TO_VM_PAGE(pa); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } else { /* @@ -3466,15 +3465,12 @@ pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET); else pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); - if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) - goto retry; m = PHYS_TO_VM_PAGE(pa); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PMAP_UNLOCK(pmap); - PA_UNLOCK_COND(paddr); return (m); } Index: sys/arm/arm/pmap-v6.c =================================================================== --- sys/arm/arm/pmap-v6.c +++ sys/arm/arm/pmap-v6.c @@ -1986,23 +1986,20 @@ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { - vm_paddr_t pa, lockpa; + vm_paddr_t pa; pt1_entry_t pte1; pt2_entry_t pte2, *pte2p; vm_page_t m; - lockpa = 0; m = NULL; PMAP_LOCK(pmap); -retry: pte1 = pte1_load(pmap_pte1(pmap, va)); if (pte1_is_section(pte1)) { if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) { pa = pte1_pa(pte1) | (va & PTE1_OFFSET); - if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) - goto retry; m = PHYS_TO_VM_PAGE(pa); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } else if (pte1_is_link(pte1)) { pte2p = pmap_pte2(pmap, va); @@ -2011,13 +2008,11 @@ if (pte2_is_valid(pte2) && (!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) { pa = pte2_pa(pte2); - if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) - goto retry; m = PHYS_TO_VM_PAGE(pa); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(lockpa); PMAP_UNLOCK(pmap); return (m); } Index: sys/arm64/arm64/pmap.c =================================================================== --- sys/arm64/arm64/pmap.c +++ sys/arm64/arm64/pmap.c @@ -1080,14 +1080,11 @@ { pt_entry_t *pte, tpte; vm_offset_t off; - vm_paddr_t pa; vm_page_t m; int lvl; - pa = 0; m = NULL; PMAP_LOCK(pmap); -retry: pte = pmap_pte(pmap, va, &lvl); if (pte != NULL) { tpte = pmap_load(pte); @@ -1112,14 +1109,11 @@ default: off = 0; } - if (vm_page_pa_tryrelock(pmap, - (tpte & ~ATTR_MASK) | off, &pa)) - goto retry; m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -481,9 +481,7 @@ } ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_page_lock(pp); vm_page_wire(pp); - vm_page_unlock(pp); } else pp = NULL; break; @@ -495,9 +493,7 @@ page_unwire(vm_page_t pp) { - vm_page_lock(pp); vm_page_unwire(pp, PQ_ACTIVE); - vm_page_unlock(pp); } /* Index: sys/compat/linuxkpi/common/include/linux/mm.h =================================================================== --- sys/compat/linuxkpi/common/include/linux/mm.h +++ sys/compat/linuxkpi/common/include/linux/mm.h @@ -227,9 +227,7 @@ static inline void get_page(struct vm_page *page) { - vm_page_lock(page); vm_page_wire(page); - vm_page_unlock(page); } extern long @@ -250,10 +248,7 @@ static inline void put_page(struct vm_page *page) { - vm_page_lock(page); - if (vm_page_unwire(page, PQ_ACTIVE) && page->object == NULL) - vm_page_free(page); - vm_page_unlock(page); + vm_page_unwire(page, PQ_ACTIVE); } #define copy_highpage(to, from) pmap_copy_page(from, to) Index: sys/compat/linuxkpi/common/src/linux_page.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_page.c +++ sys/compat/linuxkpi/common/src/linux_page.c @@ -154,10 +154,8 @@ for (x = 0; x != npages; x++) { vm_page_t pgo = page + x; - vm_page_lock(pgo); if (vm_page_unwire_noq(pgo)) vm_page_free(pgo); - vm_page_unlock(pgo); } } else { vm_offset_t vaddr; Index: sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c =================================================================== --- sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c +++ sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c @@ -377,10 +377,7 @@ static void pagelist_page_free(vm_page_t pp) { - vm_page_lock(pp); - if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL) - vm_page_free(pp); - vm_page_unlock(pp); + vm_page_unwire(pp, PQ_INACTIVE); } /* There is a potential problem with partial cache lines (pages?) Index: sys/dev/agp/agp.c =================================================================== --- sys/dev/agp/agp.c +++ sys/dev/agp/agp.c @@ -616,9 +616,7 @@ m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(k)); if (k >= i) vm_page_xunbusy(m); - vm_page_lock(m); vm_page_unwire(m, PQ_INACTIVE); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(mem->am_obj); @@ -653,9 +651,7 @@ VM_OBJECT_WLOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, atop(i)); - vm_page_lock(m); vm_page_unwire(m, PQ_INACTIVE); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(mem->am_obj); Index: sys/dev/agp/agp_i810.c =================================================================== --- sys/dev/agp/agp_i810.c +++ sys/dev/agp/agp_i810.c @@ -1795,9 +1795,7 @@ */ VM_OBJECT_WLOCK(mem->am_obj); m = vm_page_lookup(mem->am_obj, 0); - vm_page_lock(m); vm_page_unwire(m, PQ_INACTIVE); - vm_page_unlock(m); VM_OBJECT_WUNLOCK(mem->am_obj); } else { contigfree(sc->argb_cursor, mem->am_size, M_AGP); Index: sys/dev/cxgbe/tom/t4_cpl_io.c =================================================================== --- sys/dev/cxgbe/tom/t4_cpl_io.c +++ sys/dev/cxgbe/tom/t4_cpl_io.c @@ -1929,7 +1929,6 @@ { struct mbuf_ext_pgs *ext_pgs; struct kaiocb *job; - struct mtx *mtx; vm_page_t pg; MBUF_EXT_PGS_ASSERT(m); @@ -1940,14 +1939,10 @@ m->m_len, jobtotid(job)); #endif - mtx = NULL; for (int i = 0; i < ext_pgs->npgs; i++) { pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]); - vm_page_change_lock(pg, &mtx); vm_page_unwire(pg, PQ_ACTIVE); } - if (mtx != NULL) - mtx_unlock(mtx); aiotx_free_job(job); } Index: sys/dev/cxgbe/tom/t4_ddp.c =================================================================== --- sys/dev/cxgbe/tom/t4_ddp.c +++ sys/dev/cxgbe/tom/t4_ddp.c @@ -114,9 +114,7 @@ for (i = 0; i < ps->npages; i++) { p = ps->pages[i]; - vm_page_lock(p); vm_page_unwire(p, PQ_INACTIVE); - vm_page_unlock(p); } mtx_lock(&ddp_orphan_pagesets_lock); TAILQ_INSERT_TAIL(&ddp_orphan_pagesets, ps, link); Index: sys/dev/md/md.c =================================================================== --- sys/dev/md/md.c +++ sys/dev/md/md.c @@ -1030,9 +1030,7 @@ { vm_page_xunbusy(m); - vm_page_lock(m); vm_page_free(m); - vm_page_unlock(m); } static int Index: sys/i386/i386/pmap.c =================================================================== --- sys/i386/i386/pmap.c +++ sys/i386/i386/pmap.c @@ -1690,35 +1690,24 @@ pd_entry_t pde; pt_entry_t pte; vm_page_t m; - vm_paddr_t pa; - pa = 0; m = NULL; PMAP_LOCK(pmap); -retry: pde = *pmap_pde(pmap, va); if (pde != 0) { if (pde & PG_PS) { - if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - if (vm_page_pa_tryrelock(pmap, (pde & - PG_PS_FRAME) | (va & PDRMASK), &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) + m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | + (va & PDRMASK)); } else { pte = pmap_pte_ufast(pmap, va, pde); if (pte != 0 && - ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, - &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) + m = PHYS_TO_VM_PAGE(pte & PG_FRAME); } - if (m != NULL) - vm_page_wire(m); + if (m != NULL && !vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } Index: sys/kern/kern_exec.c =================================================================== --- sys/kern/kern_exec.c +++ sys/kern/kern_exec.c @@ -972,13 +972,13 @@ #if VM_NRESERVLEVEL > 0 vm_object_color(object, 0); #endif - ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); + ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | + VM_ALLOC_WIRED); if (ma[0]->valid != VM_PAGE_BITS_ALL) { vm_page_xbusy(ma[0]); if (!vm_pager_has_page(object, 0, NULL, &after)) { - vm_page_lock(ma[0]); + vm_page_unwire_noq(ma[0]); vm_page_free(ma[0]); - vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); return (EIO); } @@ -1002,11 +1002,9 @@ initial_pagein = i; rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL); if (rv != VM_PAGER_OK) { - for (i = 0; i < initial_pagein; i++) { - vm_page_lock(ma[i]); + vm_page_unwire_noq(ma[0]); + for (i = 0; i < initial_pagein; i++) vm_page_free(ma[i]); - vm_page_unlock(ma[i]); - } VM_OBJECT_WUNLOCK(object); return (EIO); } @@ -1014,9 +1012,6 @@ for (i = 1; i < initial_pagein; i++) vm_page_readahead_finish(ma[i]); } - vm_page_lock(ma[0]); - vm_page_wire(ma[0]); - vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); imgp->firstpage = sf_buf_alloc(ma[0], 0); @@ -1034,9 +1029,7 @@ m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; - vm_page_lock(m); vm_page_unwire(m, PQ_ACTIVE); - vm_page_unlock(m); } } Index: sys/kern/kern_sendfile.c =================================================================== --- sys/kern/kern_sendfile.c +++ sys/kern/kern_sendfile.c @@ -383,11 +383,8 @@ &sendfile_iodone, sfio); if (rv != VM_PAGER_OK) { for (j = i; j < i + count; j++) { - if (pa[j] != bogus_page) { - vm_page_lock(pa[j]); + if (pa[j] != bogus_page) vm_page_unwire(pa[j], PQ_INACTIVE); - vm_page_unlock(pa[j]); - } } VM_OBJECT_WUNLOCK(obj); return (EIO); Index: sys/kern/sys_process.c =================================================================== --- sys/kern/sys_process.c +++ sys/kern/sys_process.c @@ -312,10 +312,7 @@ /* * Release the page. */ - vm_page_lock(m); - if (vm_page_unwire(m, PQ_ACTIVE) && m->object == NULL) - vm_page_free(m); - vm_page_unlock(m); + vm_page_unwire(m, PQ_ACTIVE); } while (error == 0 && uio->uio_resid > 0); Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c +++ sys/kern/uipc_shm.c @@ -188,7 +188,8 @@ * lock to page out tobj's pages because tobj is a OBJT_SWAP * type object. */ - m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); + m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | + VM_ALLOC_WIRED); if (m->valid != VM_PAGE_BITS_ALL) { vm_page_xbusy(m); if (vm_pager_has_page(obj, idx, NULL, NULL)) { @@ -197,9 +198,8 @@ printf( "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n", obj, idx, m->valid, rv); - vm_page_lock(m); + vm_page_unwire_noq(m); vm_page_free(m); - vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); return (EIO); } @@ -207,9 +207,6 @@ vm_page_zero_invalid(m, TRUE); vm_page_xunbusy(m); } - vm_page_lock(m); - vm_page_wire(m); - vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); error = uiomove_fromphys(&m, offset, tlen, uio); if (uio->uio_rw == UIO_WRITE && error == 0) { @@ -218,9 +215,7 @@ vm_pager_page_unswapped(m); VM_OBJECT_WUNLOCK(obj); } - vm_page_lock(m); vm_page_unwire(m, PQ_ACTIVE); - vm_page_unlock(m); return (error); } Index: sys/mips/mips/pmap.c =================================================================== --- sys/mips/mips/pmap.c +++ sys/mips/mips/pmap.c @@ -796,26 +796,22 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t pte, *ptep; - vm_paddr_t pa, pte_pa; + vm_paddr_t pa; vm_page_t m; m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: ptep = pmap_pte(pmap, va); if (ptep != NULL) { pte = *ptep; if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) || (prot & VM_PROT_WRITE) == 0)) { - pte_pa = TLBLO_PTE_TO_PA(pte); - if (vm_page_pa_tryrelock(pmap, pte_pa, &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pte_pa); - vm_page_wire(m); + pa = TLBLO_PTE_TO_PA(pte); + m = PHYS_TO_VM_PAGE(pa); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } Index: sys/net/bpf_zerocopy.c =================================================================== --- sys/net/bpf_zerocopy.c +++ sys/net/bpf_zerocopy.c @@ -115,10 +115,7 @@ zbuf_page_free(vm_page_t pp) { - vm_page_lock(pp); - if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL) - vm_page_free(pp); - vm_page_unlock(pp); + vm_page_unwire(pp, PQ_INACTIVE); } /* Index: sys/powerpc/aim/mmu_oea.c =================================================================== --- sys/powerpc/aim/mmu_oea.c +++ sys/powerpc/aim/mmu_oea.c @@ -1264,22 +1264,17 @@ { struct pvo_entry *pvo; vm_page_t m; - vm_paddr_t pa; m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) && ((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa)) - goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } Index: sys/powerpc/aim/mmu_oea64.c =================================================================== --- sys/powerpc/aim/mmu_oea64.c +++ sys/powerpc/aim/mmu_oea64.c @@ -1584,21 +1584,15 @@ { struct pvo_entry *pvo; vm_page_t m; - vm_paddr_t pa; m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) { - if (vm_page_pa_tryrelock(pmap, - pvo->pvo_pte.pa & LPTE_RPGN, &pa)) - goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } Index: sys/powerpc/booke/pmap.c =================================================================== --- sys/powerpc/booke/pmap.c +++ sys/powerpc/booke/pmap.c @@ -2784,12 +2784,9 @@ pte_t *pte; vm_page_t m; uint32_t pte_wbit; - vm_paddr_t pa; - + m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: pte = pte_find(mmu, pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) { if (pmap == kernel_pmap) @@ -2798,14 +2795,11 @@ pte_wbit = PTE_UW; if ((*pte & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa)) - goto retry; m = PHYS_TO_VM_PAGE(PTE_PA(pte)); - m->wire_count++; + if (!vm_page_wire_mapped(m)) + m = NULL; } } - - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } Index: sys/riscv/riscv/pmap.c =================================================================== --- sys/riscv/riscv/pmap.c +++ sys/riscv/riscv/pmap.c @@ -870,24 +870,19 @@ { pt_entry_t *l3p, l3; vm_paddr_t phys; - vm_paddr_t pa; vm_page_t m; - pa = 0; m = NULL; PMAP_LOCK(pmap); -retry: l3p = pmap_l3(pmap, va); if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { if ((l3 & PTE_W) != 0 || (prot & VM_PROT_WRITE) == 0) { phys = PTE_TO_PHYS(l3); - if (vm_page_pa_tryrelock(pmap, phys, &pa)) - goto retry; m = PHYS_TO_VM_PAGE(phys); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } Index: sys/sparc64/sparc64/pmap.c =================================================================== --- sys/sparc64/sparc64/pmap.c +++ sys/sparc64/sparc64/pmap.c @@ -846,19 +846,15 @@ { struct tte *tp; vm_page_t m; - vm_paddr_t pa; m = NULL; - pa = 0; PMAP_LOCK(pm); -retry: if (pm == kernel_pmap) { if (va >= VM_MIN_DIRECT_ADDRESS) { tp = NULL; m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va)); - (void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va), - &pa); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } else { tp = tsb_kvtotte(va); if ((tp->tte_data & TD_V) == 0) @@ -868,12 +864,10 @@ tp = tsb_tte_lookup(pm, va); if (tp != NULL && ((tp->tte_data & TD_SW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa)) - goto retry; m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp)); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pm); return (m); } Index: sys/vm/device_pager.c =================================================================== --- sys/vm/device_pager.c +++ sys/vm/device_pager.c @@ -235,9 +235,7 @@ if (object->type == OBJT_MGTDEVICE) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m)); pmap_remove_all(m); - vm_page_lock(m); (void)vm_page_remove(m); - vm_page_unlock(m); } else if (object->type == OBJT_DEVICE) dev_pager_free_page(object, m); } Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -251,18 +251,6 @@ vm_pager_page_unswapped(m); } -static void -vm_fault_fill_hold(vm_page_t *m_hold, vm_page_t m) -{ - - if (m_hold != NULL) { - *m_hold = m; - vm_page_lock(m); - vm_page_wire(m); - vm_page_unlock(m); - } -} - /* * Unlocks fs.first_object and fs.map on success. */ @@ -323,7 +311,10 @@ PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind); if (rv != KERN_SUCCESS) return (rv); - vm_fault_fill_hold(m_hold, m); + if (m_hold != NULL) { + *m_hold = m; + vm_page_wire(m); + } vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false); if (psind == 0 && !wired) vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true); @@ -499,11 +490,12 @@ VM_OBJECT_WLOCK(fs->first_object); m_mtx = NULL; for (i = 0; i < npages; i++) { - vm_page_change_lock(&m[i], &m_mtx); - if ((fault_flags & VM_FAULT_WIRE) != 0) + if ((fault_flags & VM_FAULT_WIRE) != 0) { vm_page_wire(&m[i]); - else + } else { + vm_page_change_lock(&m[i], &m_mtx); vm_page_activate(&m[i]); + } if (m_hold != NULL && m[i].pindex == fs->first_pindex) { *m_hold = &m[i]; vm_page_wire(&m[i]); @@ -564,7 +556,6 @@ struct faultstate fs; struct vnode *vp; struct domainset *dset; - struct mtx *mtx; vm_object_t next_object, retry_object; vm_offset_t e_end, e_start; vm_pindex_t retry_pindex; @@ -1005,12 +996,10 @@ * an error. */ if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { - vm_page_lock(fs.m); if (!vm_page_wired(fs.m)) vm_page_free(fs.m); else - vm_page_xunbusy_maybelocked(fs.m); - vm_page_unlock(fs.m); + vm_page_xunbusy(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return (rv == VM_PAGER_ERROR ? KERN_FAILURE : @@ -1028,12 +1017,10 @@ * that we are. */ if (fs.object != fs.first_object) { - vm_page_lock(fs.m); if (!vm_page_wired(fs.m)) vm_page_free(fs.m); else - vm_page_xunbusy_maybelocked(fs.m); - vm_page_unlock(fs.m); + vm_page_xunbusy(fs.m); fs.m = NULL; } } @@ -1150,17 +1137,17 @@ * daemon, while it is disassociated from an * object. */ - mtx = NULL; - vm_page_change_lock(fs.m, &mtx); vm_page_wire(fs.m); + + vm_page_lock(fs.m); (void)vm_page_remove(fs.m); - vm_page_change_lock(fs.first_m, &mtx); + vm_page_unlock(fs.m); + vm_page_lock(fs.first_m); vm_page_replace_checked(fs.m, fs.first_object, fs.first_pindex, fs.first_m); + vm_page_unlock(fs.first_m); vm_page_free(fs.first_m); - vm_page_change_lock(fs.m, &mtx); vm_page_unwire(fs.m, PQ_ACTIVE); - mtx_unlock(mtx); vm_page_dirty(fs.m); #if VM_NRESERVLEVEL > 0 /* @@ -1186,13 +1173,8 @@ fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_WIRE) == 0) { - vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); - vm_page_unlock(fs.first_m); - - vm_page_lock(fs.m); vm_page_unwire(fs.m, PQ_INACTIVE); - vm_page_unlock(fs.m); } /* * We no longer need the old page or object. @@ -1325,21 +1307,22 @@ faultcount > 0 ? behind : PFBAK, faultcount > 0 ? ahead : PFFOR, false); VM_OBJECT_WLOCK(fs.object); - vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ - if ((fault_flags & VM_FAULT_WIRE) != 0) + if ((fault_flags & VM_FAULT_WIRE) != 0) { vm_page_wire(fs.m); - else + } else { + vm_page_lock(fs.m); vm_page_activate(fs.m); + vm_page_unlock(fs.m); + } if (m_hold != NULL) { *m_hold = fs.m; vm_page_wire(fs.m); } - vm_page_unlock(fs.m); vm_page_xunbusy(fs.m); /* @@ -1608,13 +1591,8 @@ return (count); error: for (mp = ma; mp < ma + count; mp++) - if (*mp != NULL) { - vm_page_lock(*mp); - if (vm_page_unwire(*mp, PQ_INACTIVE) && - (*mp)->object == NULL) - vm_page_free(*mp); - vm_page_unlock(*mp); - } + if (*mp != NULL) + vm_page_unwire(*mp, PQ_INACTIVE); return (-1); } @@ -1810,12 +1788,8 @@ if (upgrade) { if (src_m != dst_m) { - vm_page_lock(src_m); vm_page_unwire(src_m, PQ_INACTIVE); - vm_page_unlock(src_m); - vm_page_lock(dst_m); vm_page_wire(dst_m); - vm_page_unlock(dst_m); } else { KASSERT(vm_page_wired(dst_m), ("dst_m %p is not wired", dst_m)); Index: sys/vm/vm_glue.c =================================================================== --- sys/vm/vm_glue.c +++ sys/vm/vm_glue.c @@ -229,10 +229,8 @@ vm_page_xbusy(m); rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { - vm_page_lock(m); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); - vm_page_unlock(m); m = NULL; goto out; } @@ -270,9 +268,7 @@ m = sf_buf_page(sf); sf_buf_free(sf); sched_unpin(); - vm_page_lock(m); vm_page_unwire(m, PQ_ACTIVE); - vm_page_unlock(m); } void @@ -380,10 +376,8 @@ m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_dispose: kstack already missing?"); - vm_page_lock(m); vm_page_unwire_noq(m); vm_page_free(m); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(ksobj); vm_object_deallocate(ksobj); Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -699,12 +699,9 @@ vm_object_terminate_pages(vm_object_t object) { vm_page_t p, p_next; - struct mtx *mtx; VM_OBJECT_ASSERT_WLOCKED(object); - mtx = NULL; - /* * Free any remaining pageable pages. This also removes them from the * paging queues. However, don't free wired pages, just remove them @@ -713,20 +710,24 @@ */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { vm_page_assert_unbusied(p); - if ((object->flags & OBJ_UNMANAGED) == 0) + KASSERT(p->object == object && + (p->ref_count & VPRC_OBJREF) != 0, + ("vm_object_terminate_pages: page %p is inconsistent", p)); + + p->object = NULL; + if (VPRC_WIRE_COUNT(p->ref_count) == 0) /* - * vm_page_free_prep() only needs the page - * lock for managed pages. + * We hold the object lock and the page is unmapped and + * unbusied, so new wirings cannot be created. As an + * optimization, clear the reference count field using + * a regular store. */ - vm_page_change_lock(p, &mtx); - p->object = NULL; - if (vm_page_wired(p)) + atomic_store_rel_int(&p->ref_count, 0); + else if (vm_page_drop(p, VPRC_OBJREF) != VPRC_OBJREF) continue; VM_CNT_INC(v_pfree); vm_page_free(p); } - if (mtx != NULL) - mtx_unlock(mtx); /* * If the object contained any pages, then reset it to an empty state. @@ -1588,16 +1589,10 @@ swap_pager_freespace(backing_object, p->pindex, 1); - /* - * Page is out of the parent object's range, we can - * simply destroy it. - */ - vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (vm_page_remove(p)) vm_page_free(p); - vm_page_unlock(p); continue; } @@ -1634,12 +1629,10 @@ if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); - vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (vm_page_remove(p)) vm_page_free(p); - vm_page_unlock(p); continue; } @@ -1940,6 +1933,7 @@ VM_OBJECT_WLOCK(object); goto again; } +wired: if (vm_page_wired(p)) { if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) @@ -1960,14 +1954,17 @@ ("vm_object_page_remove: page %p is fictitious", p)); if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { if ((options & OBJPR_NOTMAPPED) == 0 && - object->ref_count != 0) - pmap_remove_write(p); + object->ref_count != 0 && + !vm_page_try_remove_write(p)) + goto wired; if (p->dirty != 0) continue; } - if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) - pmap_remove_all(p); - vm_page_free(p); + if ((options & OBJPR_NOTMAPPED) == 0 && + object->ref_count != 0 && !vm_page_try_remove_all(p)) + goto wired; + if (vm_page_remove(p)) + vm_page_free(p); } if (mtx != NULL) mtx_unlock(mtx); @@ -2256,8 +2253,8 @@ tm = m; m = TAILQ_NEXT(m, listq); } - vm_page_lock(tm); if (vm_page_xbusied(tm)) { + vm_page_lock(tm); for (tobject = object; locked_depth >= 1; locked_depth--) { t1object = tobject->backing_object; @@ -2268,7 +2265,6 @@ goto again; } vm_page_unwire(tm, queue); - vm_page_unlock(tm); next_page: pindex++; } Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -115,24 +115,19 @@ * the implementation of read-modify-write operations on the * field is encapsulated in vm_page_clear_dirty_mask(). * - * The page structure contains two counters which prevent page reuse. - * Both counters are protected by the page lock (P). The hold - * counter counts transient references obtained via a pmap lookup, and - * is also used to prevent page reclamation in situations where it is - * undesirable to block other accesses to the page. The wire counter - * is used to implement mlock(2) and is non-zero for pages containing - * kernel memory. Pages that are wired or held will not be reclaimed - * or laundered by the page daemon, but are treated differently during - * a page queue scan: held pages remain at their position in the queue, - * while wired pages are removed from the queue and must later be - * re-enqueued appropriately by the unwiring thread. It is legal to - * call vm_page_free() on a held page; doing so causes it to be removed - * from its object and page queue, and the page is released to the - * allocator once the last hold reference is dropped. In contrast, - * wired pages may not be freed. - * - * In some pmap implementations, the wire count of a page table page is - * used to track the number of populated entries. + * The ref_count field tracks references to the page. References that + * prevent the page from being reclaimable are called wirings and are + * counted in the low bits of ref_count. Upper bits are reserved for + * special references that do not prevent reclamation of the page. + * Specifically, the containing object, if any, holds such a reference, + * and the page daemon takes a transient reference when it is scanning + * a page. Updates to ref_count are atomic unless the page is + * unallocated. To wire a page after it has been allocated, the object + * lock must be held, or the page must be busy, or the wiring thread + * must atomically take a reference and verify that the VPRC_BLOCKED + * bit is not set. No locks are required to unwire a page, but care + * must be taken to free the page if that wiring represented the last + * reference to the page. * * The busy lock is an embedded reader-writer lock which protects the * page's contents and identity (i.e., its tuple) and @@ -204,11 +199,14 @@ } memguard; } plinks; TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ - vm_object_t object; /* which object am I in (O,P) */ + vm_object_t object; /* which object am I in (O) */ vm_pindex_t pindex; /* offset into object (O,P) */ vm_paddr_t phys_addr; /* physical address of page (C) */ struct md_page md; /* machine dependent stuff */ - u_int wire_count; /* wired down maps refs (P) */ + union { + u_int wire_count; + u_int ref_count; /* page references */ + }; volatile u_int busy_lock; /* busy owners lock */ uint16_t flags; /* page PG_* flags (P) */ uint8_t order; /* index of the buddy queue (F) */ @@ -225,6 +223,26 @@ vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ }; +/* + * Special bits used in the ref_count field. + * + * ref_count is normally used to count wirings that prevent the page from being + * reclaimed, but also supports several special types of references that do not + * prevent reclamation. Accesses to the ref_count field must be atomic unless + * the page is unallocated. + * + * VPRC_OBJREF is the reference held by the containing object. It can set or + * cleared only when the corresponding object's write lock is held. + * + * VPRC_BLOCKED is used to atomically block wirings via pmap lookups while + * attempting to tear down all mappings of a given page. The page lock and + * object write lock must both be held in order to set or clear this bit. + */ +#define VPRC_BLOCKED 0x40000000u /* mappings are being removed */ +#define VPRC_OBJREF 0x80000000u /* object reference, cleared with (O) */ +#define VPRC_WIRE_COUNT(c) ((c) & ~(VPRC_BLOCKED | VPRC_OBJREF)) +#define VPRC_WIRE_COUNT_MAX (~(VPRC_BLOCKED | VPRC_OBJREF)) + /* * Page flags stored in oflags: * @@ -585,13 +603,16 @@ vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_sunbusy(vm_page_t m); void vm_page_swapqueue(vm_page_t m, int oldq, int newq); +bool vm_page_try_remove_all(vm_page_t m); +bool vm_page_try_remove_write(vm_page_t m); int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unswappable(vm_page_t m); -bool vm_page_unwire(vm_page_t m, uint8_t queue); +void vm_page_unwire(vm_page_t m, uint8_t queue); bool vm_page_unwire_noq(vm_page_t m); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); -void vm_page_wire (vm_page_t); +void vm_page_wire(vm_page_t); +bool vm_page_wire_mapped(vm_page_t m); void vm_page_xunbusy_hard(vm_page_t m); void vm_page_xunbusy_maybelocked(vm_page_t m); void vm_page_set_validclean (vm_page_t, int, int); @@ -868,16 +889,34 @@ return (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE); } +/* + * vm_page_drop: + * + * Release a reference to a page and return the old reference count. + */ +static inline u_int +vm_page_drop(vm_page_t m, u_int val) +{ + + /* + * Synchronize with vm_page_free_prep(): ensure that all updates to the + * page structure are visible before it is freed. + */ + atomic_thread_fence_rel(); + return (atomic_fetchadd_int(&m->ref_count, -val)); +} + /* * vm_page_wired: * - * Return true if a reference prevents the page from being reclaimable. + * Perform a racy check to determine whether a reference prevents the page + * from being reclaimable. */ static inline bool vm_page_wired(vm_page_t m) { - return (m->wire_count > 0); + return (VPRC_WIRE_COUNT(m->ref_count) > 0); } #endif /* _KERNEL */ Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -168,6 +168,7 @@ vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); +static void vm_page_mvqueue(vm_page_t m, int queue); static int vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, vm_paddr_t high); static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, @@ -513,7 +514,7 @@ { m->object = NULL; - m->wire_count = 0; + m->ref_count = 0; m->busy_lock = VPB_UNBUSIED; m->flags = m->aflags = 0; m->phys_addr = pa; @@ -1107,17 +1108,11 @@ void vm_page_unhold_pages(vm_page_t *ma, int count) { - struct mtx *mtx; - mtx = NULL; for (; count != 0; count--) { - vm_page_change_lock(*ma, &mtx); - if (vm_page_unwire(*ma, PQ_ACTIVE) && (*ma)->object == NULL) - vm_page_free(*ma); + vm_page_unwire(*ma, PQ_ACTIVE); ma++; } - if (mtx != NULL) - mtx_unlock(mtx); } vm_page_t @@ -1180,7 +1175,8 @@ /* Fictitious pages don't use "order" or "pool". */ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_SINGLE_EXCLUSIVER; - m->wire_count = 1; + /* Fictitious pages are unevictable. */ + m->ref_count = 1; pmap_page_init(m); memattr: pmap_page_set_memattr(m, memattr); @@ -1375,10 +1371,11 @@ ("vm_page_insert_after: msucc doesn't succeed pindex")); /* - * Record the object/offset pair in this page + * Record the object/offset pair in this page. */ m->object = object; m->pindex = pindex; + m->ref_count |= VPRC_OBJREF; /* * Now link into the object's ordered list of backed pages. @@ -1386,6 +1383,7 @@ if (vm_radix_insert(&object->rtree, m)) { m->object = NULL; m->pindex = 0; + m->ref_count &= ~VPRC_OBJREF; return (1); } vm_page_insert_radixdone(m, object, mpred); @@ -1410,11 +1408,13 @@ VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object != NULL && m->object == object, ("vm_page_insert_radixdone: page %p has inconsistent object", m)); + KASSERT((m->ref_count & VPRC_OBJREF) != 0, + ("vm_page_insert_radixdone: page %p is missing object ref", m)); if (mpred != NULL) { KASSERT(mpred->object == object, - ("vm_page_insert_after: object doesn't contain mpred")); + ("vm_page_insert_radixdone: object doesn't contain mpred")); KASSERT(mpred->pindex < m->pindex, - ("vm_page_insert_after: mpred doesn't precede pindex")); + ("vm_page_insert_radixdone: mpred doesn't precede pindex")); } if (mpred != NULL) @@ -1442,25 +1442,19 @@ } /* - * vm_page_remove: - * - * Removes the specified page from its containing object, but does not - * invalidate any backing storage. Return true if the page may be safely - * freed and false otherwise. - * - * The object must be locked. The page must be locked if it is managed. + * Do the work to remove a page from its object. The caller is responsible for + * updating the page's fields to reflect this removal. */ -bool -vm_page_remove(vm_page_t m) +static void +vm_page_object_remove(vm_page_t m) { vm_object_t object; vm_page_t mrem; object = m->object; - - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_assert_locked(m); VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT((m->ref_count & VPRC_OBJREF) != 0, + ("page %p is missing its object ref", m)); if (vm_page_xbusied(m)) vm_page_xunbusy_maybelocked(m); mrem = vm_radix_remove(&object->rtree, m->pindex); @@ -1481,9 +1475,24 @@ */ if (object->resident_page_count == 0 && object->type == OBJT_VNODE) vdrop(object->handle); +} + +/* + * vm_page_remove: + * + * Removes the specified page from its containing object, but does not + * invalidate any backing storage. Returns true if the object's reference + * was the last reference to the page, and false otherwise. + * + * The object must be locked. + */ +bool +vm_page_remove(vm_page_t m) +{ + vm_page_object_remove(m); m->object = NULL; - return (!vm_page_wired(m)); + return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); } /* @@ -1564,8 +1573,6 @@ /* * Uses the page mnew as a replacement for an existing page at index * pindex which must be already present in the object. - * - * The existing page must not be on a paging queue. */ vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) @@ -1575,8 +1582,6 @@ VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(mnew->object == NULL, ("vm_page_replace: page %p already in object", mnew)); - KASSERT(mnew->queue == PQ_NONE || vm_page_wired(mnew), - ("vm_page_replace: new page %p is on a paging queue", mnew)); /* * This function mostly follows vm_page_insert() and @@ -1586,6 +1591,7 @@ mnew->object = object; mnew->pindex = pindex; + atomic_set_int(&mnew->ref_count, VPRC_OBJREF); mold = vm_radix_replace(&object->rtree, mnew); KASSERT(mold->queue == PQ_NONE, ("vm_page_replace: old page %p is on a paging queue", mold)); @@ -1595,6 +1601,7 @@ TAILQ_REMOVE(&object->memq, mold, listq); mold->object = NULL; + atomic_clear_int(&mold->ref_count, VPRC_OBJREF); vm_page_xunbusy_maybelocked(mold); /* @@ -1632,6 +1639,7 @@ VM_OBJECT_ASSERT_WLOCKED(new_object); + KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m)); mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex); KASSERT(mpred == NULL || mpred->pindex != new_pindex, ("vm_page_rename: pindex already renamed")); @@ -1654,11 +1662,12 @@ */ m->pindex = opidx; vm_page_lock(m); - (void)vm_page_remove(m); + vm_page_object_remove(m); /* Return back to the new pindex to complete vm_page_insert(). */ m->pindex = new_pindex; m->object = new_object; + vm_page_unlock(m); vm_page_insert_radixdone(m, new_object, mpred); vm_page_dirty(m); @@ -1876,7 +1885,7 @@ * page is inserted into the object. */ vm_wire_add(1); - m->wire_count = 1; + m->ref_count = 1; } m->act_count = 0; @@ -1884,7 +1893,7 @@ if (vm_page_insert_after(m, object, pindex, mpred)) { if (req & VM_ALLOC_WIRED) { vm_wire_sub(1); - m->wire_count = 0; + m->ref_count = 0; } KASSERT(m->object == NULL, ("page %p has object", m)); m->oflags = VPO_UNMANAGED; @@ -2076,7 +2085,7 @@ m->flags = (m->flags | PG_NODUMP) & flags; m->busy_lock = busy_lock; if ((req & VM_ALLOC_WIRED) != 0) - m->wire_count = 1; + m->ref_count = 1; m->act_count = 0; m->oflags = oflags; if (object != NULL) { @@ -2089,7 +2098,7 @@ for (m = m_ret; m < &m_ret[npages]; m++) { if (m <= mpred && (req & VM_ALLOC_WIRED) != 0) - m->wire_count = 0; + m->ref_count = 0; m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ @@ -2123,7 +2132,7 @@ KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0, ("page %p has unexpected queue %d, flags %#x", m, m->queue, (m->aflags & PGA_QUEUE_STATE_MASK))); - KASSERT(!vm_page_wired(m), ("page %p is wired", m)); + KASSERT(m->ref_count == 0, ("page %p has references", m)); KASSERT(!vm_page_busied(m), ("page %p is busy", m)); KASSERT(m->dirty == 0, ("page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, @@ -2207,7 +2216,7 @@ * not belong to an object. */ vm_wire_add(1); - m->wire_count = 1; + m->ref_count = 1; } /* Unmanaged pages don't use "act_count". */ m->oflags = VPO_UNMANAGED; @@ -2300,8 +2309,8 @@ for (m = m_start; m < m_end && run_len < npages; m += m_inc) { KASSERT((m->flags & PG_MARKER) == 0, ("page %p is PG_MARKER", m)); - KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->wire_count == 1, - ("fictitious page %p has invalid wire count", m)); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1, + ("fictitious page %p has invalid ref count", m)); /* * If the current page would be the start of a run, check its @@ -2358,9 +2367,6 @@ */ VM_OBJECT_RUNLOCK(object); goto retry; - } else if (vm_page_wired(m)) { - run_ext = 0; - goto unlock; } } /* Don't care: PG_NODUMP, PG_ZERO. */ @@ -2378,7 +2384,8 @@ vm_reserv_size(level)) - pa); #endif } else if (object->memattr == VM_MEMATTR_DEFAULT && - vm_page_queue(m) != PQ_NONE && !vm_page_busied(m)) { + vm_page_queue(m) != PQ_NONE && !vm_page_busied(m) && + !vm_page_wired(m)) { /* * The page is allocated but eligible for * relocation. Extend the current run by one @@ -2394,7 +2401,6 @@ run_ext = 1; } else run_ext = 0; -unlock: VM_OBJECT_RUNLOCK(object); #if VM_NRESERVLEVEL > 0 } else if (level >= 0) { @@ -2515,9 +2521,6 @@ */ VM_OBJECT_WUNLOCK(object); goto retry; - } else if (vm_page_wired(m)) { - error = EBUSY; - goto unlock; } } /* Don't care: PG_NODUMP, PG_ZERO. */ @@ -2528,7 +2531,7 @@ else if (object->memattr != VM_MEMATTR_DEFAULT) error = EINVAL; else if (vm_page_queue(m) != PQ_NONE && - !vm_page_busied(m)) { + !vm_page_busied(m) && !vm_page_wired(m)) { KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); @@ -2577,8 +2580,6 @@ error = ENOMEM; goto unlock; } - KASSERT(!vm_page_wired(m_new), - ("page %p is wired", m_new)); /* * Replace "m" with the new page. For @@ -2586,8 +2587,11 @@ * and dequeued. Finally, change "m" * as if vm_page_free() was called. */ - if (object->ref_count != 0) - pmap_remove_all(m); + if (object->ref_count != 0 && + !vm_page_try_remove_all(m)) { + error = EBUSY; + goto unlock; + } m_new->aflags = m->aflags & ~PGA_QUEUE_STATE_MASK; KASSERT(m_new->oflags == VPO_UNMANAGED, @@ -3147,8 +3151,7 @@ KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m)); - KASSERT(mtx_owned(vm_page_lockptr(m)) || - (m->object == NULL && (m->aflags & PGA_DEQUEUE) != 0), + KASSERT(mtx_owned(vm_page_lockptr(m)) || m->object == NULL, ("missing synchronization for page %p", m)); KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue)); @@ -3287,7 +3290,7 @@ { uint8_t queue; - KASSERT(m->object == NULL, ("page %p has an object reference", m)); + KASSERT(m->ref_count == 0, ("page %p has references", m)); if ((m->aflags & PGA_DEQUEUE) != 0) return; @@ -3461,6 +3464,12 @@ vm_page_free_prep(vm_page_t m) { + /* + * Synchronize with threads that have dropped a reference to this + * page. + */ + atomic_thread_fence_acq(); + #if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP) if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) { uint64_t *p; @@ -3471,11 +3480,10 @@ m, i, (uintmax_t)*p)); } #endif - if ((m->oflags & VPO_UNMANAGED) == 0) { - vm_page_lock_assert(m, MA_OWNED); + if ((m->oflags & VPO_UNMANAGED) == 0) KASSERT(!pmap_page_is_mapped(m), ("vm_page_free_prep: freeing mapped page %p", m)); - } else + else KASSERT(m->queue == PQ_NONE, ("vm_page_free_prep: unmanaged page %p is queued", m)); VM_CNT_INC(v_tfree); @@ -3483,16 +3491,23 @@ if (vm_page_sbusied(m)) panic("vm_page_free_prep: freeing busy page %p", m); - if (m->object != NULL) - (void)vm_page_remove(m); + if (m->object != NULL) { + vm_page_object_remove(m); + + KASSERT(m->ref_count == VPRC_OBJREF, + ("vm_page_free_prep: page %p has unexpected ref_count %u", + m, m->ref_count)); + m->object = NULL; + m->ref_count = 0; + } /* * If fictitious remove object association and * return. */ if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("fictitious page %p is not wired", m)); + KASSERT(m->ref_count == 1, + ("fictitious page %p is referenced", m)); KASSERT(m->queue == PQ_NONE, ("fictitious page %p is queued", m)); return (false); @@ -3509,8 +3524,8 @@ m->valid = 0; vm_page_undirty(m); - if (vm_page_wired(m) != 0) - panic("vm_page_free_prep: freeing wired page %p", m); + if (m->ref_count != 0) + panic("vm_page_free_prep: page %p has references", m); /* * Restore the default memory attribute to the page. @@ -3592,161 +3607,195 @@ } /* - * vm_page_wire: - * - * Mark this page as wired down. If the page is fictitious, then - * its wire count must remain one. - * - * The page must be locked. + * Mark this page as wired down, preventing reclamation by the page daemon + * or when the containing object is destroyed. */ void vm_page_wire(vm_page_t m) { + u_int old; - vm_page_assert_locked(m); - if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("vm_page_wire: fictitious page %p's wire count isn't one", - m)); - return; - } - if (!vm_page_wired(m)) { - KASSERT((m->oflags & VPO_UNMANAGED) == 0 || - m->queue == PQ_NONE, - ("vm_page_wire: unmanaged page %p is queued", m)); + KASSERT(m->object != NULL, + ("vm_page_wire: page %p does not belong to an object", m)); + if (!vm_page_busied(m)) + VM_OBJECT_ASSERT_LOCKED(m->object); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || + VPRC_WIRE_COUNT(m->ref_count) >= 1, + ("vm_page_wire: fictitious page %p has zero wirings", m)); + + old = atomic_fetchadd_int(&m->ref_count, 1); + KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX, + ("vm_page_wire: counter overflow for page %p", m)); + if (VPRC_WIRE_COUNT(old) == 0) vm_wire_add(1); - } - m->wire_count++; - KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); } /* - * vm_page_unwire: - * + * Attempt to wire a mapped page following a pmap lookup of that page. + * This may fail if a thread is concurrently tearing down mappings of the page. + */ +bool +vm_page_wire_mapped(vm_page_t m) +{ + u_int old; + + old = m->ref_count; + do { + KASSERT(old > 0, + ("vm_page_wire_mapped: wiring unreferenced page %p", m)); + if ((old & VPRC_BLOCKED) != 0) + return (false); + } while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1)); + + if (VPRC_WIRE_COUNT(old) == 0) + vm_wire_add(1); + return (true); +} + +/* * Release one wiring of the specified page, potentially allowing it to be - * paged out. Returns TRUE if the number of wirings transitions to zero and - * FALSE otherwise. + * paged out. * * Only managed pages belonging to an object can be paged out. If the number * of wirings transitions to zero and the page is eligible for page out, then - * the page is added to the specified paging queue (unless PQ_NONE is - * specified, in which case the page is dequeued if it belongs to a paging - * queue). - * - * If a page is fictitious, then its wire count must always be one. + * the page is added to the specified paging queue. If the released wiring + * represented the last reference to the page, the page is freed. * * A managed page must be locked. */ -bool +void vm_page_unwire(vm_page_t m, uint8_t queue) { - bool unwired; + u_int old; + bool locked; - KASSERT(queue < PQ_COUNT || queue == PQ_NONE, - ("vm_page_unwire: invalid queue %u request for page %p", - queue, m)); - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_assert_locked(m); + KASSERT(queue < PQ_COUNT, + ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); - unwired = vm_page_unwire_noq(m); - if (!unwired || (m->oflags & VPO_UNMANAGED) != 0 || m->object == NULL) - return (unwired); + if ((m->oflags & VPO_UNMANAGED) != 0) { + if (vm_page_unwire_noq(m) && m->ref_count == 0) + vm_page_free(m); + return; + } - if (vm_page_queue(m) == queue) { - if (queue == PQ_ACTIVE) - vm_page_reference(m); - else if (queue != PQ_NONE) - vm_page_requeue(m); - } else { - vm_page_dequeue(m); - if (queue != PQ_NONE) { - vm_page_enqueue(m, queue); - if (queue == PQ_ACTIVE) - /* Initialize act_count. */ - vm_page_activate(m); + /* + * Update LRU state before releasing the wiring reference. + * We only need to do this once since we hold the page lock. + * Use a release store when updating the reference count to + * synchronize with vm_page_free_prep(). + */ + old = m->ref_count; + locked = false; + do { + KASSERT(VPRC_WIRE_COUNT(old) > 0, + ("vm_page_unwire: wire count underflow for page %p", m)); + if (!locked && VPRC_WIRE_COUNT(old) == 1) { + vm_page_lock(m); + locked = true; + if (queue == PQ_ACTIVE && vm_page_queue(m) == PQ_ACTIVE) + vm_page_reference(m); + else + vm_page_mvqueue(m, queue); } + } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1)); + + /* + * Release the lock only after the wiring is released, to ensure that + * the page daemon does not encounter and dequeue the page while it is + * still wired. + */ + if (locked) + vm_page_unlock(m); + + if (VPRC_WIRE_COUNT(old) == 1) { + vm_wire_sub(1); + if (old == 1) + vm_page_free(m); } - return (unwired); } /* - * - * vm_page_unwire_noq: - * * Unwire a page without (re-)inserting it into a page queue. It is up * to the caller to enqueue, requeue, or free the page as appropriate. - * In most cases, vm_page_unwire() should be used instead. + * In most cases involving managed pages, vm_page_unwire() should be used + * instead. */ bool vm_page_unwire_noq(vm_page_t m) { + u_int old; - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_assert_locked(m); - if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); - return (false); - } - if (!vm_page_wired(m)) - panic("vm_page_unwire: page %p's wire count is zero", m); - m->wire_count--; - if (m->wire_count == 0) { - vm_wire_sub(1); - return (true); - } else + if ((m->oflags & VPO_UNMANAGED) != 0) + old = m->ref_count--; + else + old = vm_page_drop(m, 1); + KASSERT(VPRC_WIRE_COUNT(old) != 0, + ("vm_page_unref: counter underflow for page %p", m)); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || VPRC_WIRE_COUNT(old) > 1, + ("vm_page_unref: missing ref on fictitious page %p", m)); + + if (VPRC_WIRE_COUNT(old) > 1) return (false); + vm_wire_sub(1); + return (true); } /* - * vm_page_activate: - * - * Put the specified page on the active list (if appropriate). - * Ensure that act_count is at least ACT_INIT but do not otherwise - * mess with it. - * - * The page must be locked. + * Ensure that the page is in the specified page queue. If the page is + * active or being moved to the active queue, ensure that its act_count is + * at least ACT_INIT but do not otherwise mess with it. Otherwise, ensure that + * the page is at the tail of its page queue. + * + * The page may be wired. The caller should release any wiring references + * before releasing the page lock, otherwise the page daemon may immediately + * dequeue the page. */ -void -vm_page_activate(vm_page_t m) +static __always_inline void +vm_page_mvqueue(vm_page_t m, const int nqueue) { vm_page_assert_locked(m); + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_mvqueue: page %p is unmanaged", m)); - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) - return; - if (vm_page_queue(m) == PQ_ACTIVE) { - if (m->act_count < ACT_INIT) - m->act_count = ACT_INIT; - return; + if (vm_page_queue(m) != nqueue) { + vm_page_dequeue(m); + vm_page_enqueue(m, nqueue); + } else if (nqueue != PQ_ACTIVE) { + vm_page_requeue(m); } - vm_page_dequeue(m); - if (m->act_count < ACT_INIT) + if (nqueue == PQ_ACTIVE && m->act_count < ACT_INIT) m->act_count = ACT_INIT; - vm_page_enqueue(m, PQ_ACTIVE); +} + +/* + * Put the specified page on the active list (if appropriate). + * + * A managed page must be locked. + */ +void +vm_page_activate(vm_page_t m) +{ + + if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) + return; + vm_page_mvqueue(m, PQ_ACTIVE); } /* * Move the specified page to the tail of the inactive queue, or requeue * the page if it is already in the inactive queue. * - * The page must be locked. + * A managed page must be locked. */ void vm_page_deactivate(vm_page_t m) { - vm_page_assert_locked(m); - - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) + if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) return; - - if (!vm_page_inactive(m)) { - vm_page_dequeue(m); - vm_page_enqueue(m, PQ_INACTIVE); - } else - vm_page_requeue(m); + vm_page_mvqueue(m, PQ_INACTIVE); } /* @@ -3754,18 +3803,13 @@ * bypassing LRU. A marker page is used to maintain FIFO ordering. * As with regular enqueues, we use a per-CPU batch queue to reduce * contention on the page queue lock. - * - * The page must be locked. */ -void -vm_page_deactivate_noreuse(vm_page_t m) +static void +_vm_page_deactivate_noreuse(vm_page_t m) { vm_page_assert_locked(m); - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) - return; - if (!vm_page_inactive(m)) { vm_page_dequeue(m); m->queue = PQ_INACTIVE; @@ -3775,31 +3819,33 @@ vm_pqbatch_submit_page(m, PQ_INACTIVE); } +void +vm_page_deactivate_noreuse(vm_page_t m) +{ + + KASSERT(m->object != NULL, + ("vm_page_deactivate_noreuse: page %p has no object", m)); + + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_wired(m)) + _vm_page_deactivate_noreuse(m); +} + /* - * vm_page_launder + * Put a page in the laundry, or requeue it if it is already there. * - * Put a page in the laundry, or requeue it if it is already there. + * The page must be locked. */ void vm_page_launder(vm_page_t m) { - vm_page_assert_locked(m); - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) + if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) return; - - if (vm_page_in_laundry(m)) - vm_page_requeue(m); - else { - vm_page_dequeue(m); - vm_page_enqueue(m, PQ_LAUNDRY); - } + vm_page_mvqueue(m, PQ_LAUNDRY); } /* - * vm_page_unswappable - * - * Put a page in the PQ_UNSWAPPABLE holding queue. + * Put a page in the PQ_UNSWAPPABLE holding queue. */ void vm_page_unswappable(vm_page_t m) @@ -3842,41 +3888,53 @@ vm_page_release(vm_page_t m, int flags) { vm_object_t object; - bool freed; + u_int old; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("vm_page_release: page %p is unmanaged", m)); - vm_page_lock(m); - if (m->object != NULL) - VM_OBJECT_ASSERT_UNLOCKED(m->object); - if (vm_page_unwire_noq(m)) { - if ((object = m->object) == NULL) { - vm_page_free(m); - } else { - freed = false; - if ((flags & VPR_TRYFREE) != 0 && !vm_page_busied(m) && - /* Depends on type stability. */ - VM_OBJECT_TRYWLOCK(object)) { - /* - * Only free unmapped pages. The busy test from - * before the object was locked cannot be relied - * upon. - */ - if ((object->ref_count == 0 || - !pmap_page_is_mapped(m)) && m->dirty == 0 && - !vm_page_busied(m)) { - vm_page_free(m); - freed = true; - } - VM_OBJECT_WUNLOCK(object); + if ((flags & VPR_TRYFREE) != 0) { + while ((object = m->object) != NULL) { + /* Depends on type-stability. */ + if (vm_page_busied(m) || !VM_OBJECT_TRYWLOCK(object)) { + object = NULL; + break; } + if (object == m->object) + break; + VM_OBJECT_WUNLOCK(object); + } + if (__predict_true(object != NULL)) { + vm_page_release_locked(m, flags); + VM_OBJECT_WUNLOCK(object); + return; + } + } - if (!freed) - vm_page_release_toq(m, flags); + /* + * Update LRU state before releasing the wiring reference. + * Use a release store when updating the reference count to + * synchronize with vm_page_free_prep(). + */ + old = m->ref_count; + do { + if (VPRC_WIRE_COUNT(old) == 1) { + vm_page_lock(m); + if (m->valid == 0 || (flags & VPR_NOREUSE) != 0) + _vm_page_deactivate_noreuse(m); + else if (vm_page_active(m)) + vm_page_reference(m); + else + vm_page_mvqueue(m, PQ_INACTIVE); + vm_page_unlock(m); } + } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1)); + + if (VPRC_WIRE_COUNT(old) == 1) { + vm_wire_sub(1); + if (old == 1) + vm_page_free(m); } - vm_page_unlock(m); } /* See vm_page_release(). */ @@ -3888,17 +3946,66 @@ KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("vm_page_release_locked: page %p is unmanaged", m)); - vm_page_lock(m); if (vm_page_unwire_noq(m)) { if ((flags & VPR_TRYFREE) != 0 && (m->object->ref_count == 0 || !pmap_page_is_mapped(m)) && m->dirty == 0 && !vm_page_busied(m)) { vm_page_free(m); - } else { - vm_page_release_toq(m, flags); } + } else { + vm_page_lock(m); + vm_page_release_toq(m, flags); + vm_page_unlock(m); } - vm_page_unlock(m); +} + +static bool +vm_page_try_blocked_op(vm_page_t m, void (*op)(vm_page_t)) +{ + u_int old; + + vm_page_assert_locked(m); + KASSERT(m->object != NULL && (m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_try_blocked_op: page %p has no object", m)); + KASSERT(!vm_page_busied(m), + ("vm_page_try_blocked_op: page %p is busy", m)); + VM_OBJECT_ASSERT_LOCKED(m->object); + + old = m->ref_count; + do { + KASSERT(old != 0, + ("vm_page_try_blocked_op: page %p has no references", m)); + if (VPRC_WIRE_COUNT(old) != 0) + return (false); + } while (!atomic_fcmpset_int(&m->ref_count, &old, old | VPRC_BLOCKED)); + + (op)(m); + + old = vm_page_drop(m, VPRC_BLOCKED); + KASSERT(old == (VPRC_BLOCKED | VPRC_OBJREF), + ("vm_page_try_blocked_op: unexpected refcount value %u for %p", + old, m)); + return (true); +} + +/* + * Atomically check for wirings and remove all mappings of the page. + */ +bool +vm_page_try_remove_all(vm_page_t m) +{ + + return (vm_page_try_blocked_op(m, pmap_remove_all)); +} + +/* + * Atomically check for wirings and remove all writeable mappings of the page. + */ +bool +vm_page_try_remove_write(vm_page_t m) +{ + + return (vm_page_try_blocked_op(m, pmap_remove_write)); } /* @@ -3995,11 +4102,8 @@ VM_OBJECT_WLOCK(object); goto retrylookup; } else { - if ((allocflags & VM_ALLOC_WIRED) != 0) { - vm_page_lock(m); + if ((allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); - vm_page_unlock(m); - } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); @@ -4097,11 +4201,8 @@ VM_OBJECT_WLOCK(object); goto retrylookup; } - if ((allocflags & VM_ALLOC_WIRED) != 0) { - vm_page_lock(m); + if ((allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); - vm_page_unlock(m); - } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); @@ -4630,10 +4731,10 @@ else m = (vm_page_t)addr; db_printf( - "page %p obj %p pidx 0x%jx phys 0x%jx q %d wire %d\n" + "page %p obj %p pidx 0x%jx phys 0x%jx q %d ref %u\n" " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, - m->queue, m->wire_count, m->aflags, m->oflags, + m->queue, m->ref_count, m->aflags, m->oflags, m->flags, m->act_count, m->busy_lock, m->valid, m->dirty); } #endif /* DDB */ Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -305,7 +305,9 @@ vm_pagequeue_unlock(pq); } -/* Return the next page to be scanned, or NULL if the scan is complete. */ +/* + * Return the next page to be scanned, or NULL if the scan is complete. + */ static __always_inline vm_page_t vm_pageout_next(struct scan_state *ss, const bool dequeue) { @@ -328,16 +330,11 @@ vm_pindex_t pindex; int ib, is, page_base, pageout_count; - vm_page_assert_locked(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); pindex = m->pindex; vm_page_assert_unbusied(m); - KASSERT(!vm_page_wired(m), ("page %p is wired", m)); - - pmap_remove_write(m); - vm_page_unlock(m); mc[vm_pageout_page_count] = pb = ps = m; pageout_count = 1; @@ -363,7 +360,8 @@ ib = 0; break; } - if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) { + if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p) || + vm_page_wired(p)) { ib = 0; break; } @@ -373,12 +371,11 @@ break; } vm_page_lock(p); - if (vm_page_wired(p) || !vm_page_in_laundry(p)) { + if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { vm_page_unlock(p); ib = 0; break; } - pmap_remove_write(p); vm_page_unlock(p); mc[--page_base] = pb = p; ++pageout_count; @@ -393,17 +390,17 @@ } while (pageout_count < vm_pageout_page_count && pindex + is < object->size) { - if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p)) + if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p) || + vm_page_wired(p)) break; vm_page_test_dirty(p); if (p->dirty == 0) break; vm_page_lock(p); - if (vm_page_wired(p) || !vm_page_in_laundry(p)) { + if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { vm_page_unlock(p); break; } - pmap_remove_write(p); vm_page_unlock(p); mc[page_base + pageout_count] = ps = p; ++pageout_count; @@ -648,16 +645,26 @@ } /* - * The page may have been busied or referenced while the object - * and page locks were released. + * The page may have been busied while the object and page + * locks were released. */ - if (vm_page_busied(m) || vm_page_wired(m)) { + if (vm_page_busied(m)) { vm_page_unlock(m); error = EBUSY; goto unlock_all; } } + /* + * Remove all writeable mappings, failing if the page is wired. + */ + if (!vm_page_try_remove_write(m)) { + vm_page_unlock(m); + error = EBUSY; + goto unlock_all; + } + vm_page_unlock(m); + /* * If a page is dirty, then it is either being washed * (but not yet cleaned) or it is still in the @@ -732,7 +739,9 @@ recheck: /* * The page may have been disassociated from the queue - * while locks were dropped. + * or even freed while locks were dropped. We thus must be + * careful whenever modifying page state. Once the object lock + * has been acquired, we have a stable reference to the page. */ if (vm_page_queue(m) != queue) continue; @@ -749,7 +758,9 @@ /* * Wired pages may not be freed. Complete their removal * from the queue now to avoid needless revisits during - * future scans. + * future scans. This check is racy and must be reverified once + * we hold the object lock and have verified that the page + * is not busy. */ if (vm_page_wired(m)) { vm_page_dequeue_deferred(m); @@ -759,8 +770,8 @@ if (object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); - object = m->object; - if (!VM_OBJECT_TRYWLOCK(object)) { + object = (vm_object_t)atomic_load_ptr(&m->object); + if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); @@ -768,10 +779,29 @@ goto recheck; } } + if (__predict_false(m->object == NULL)) + /* + * The page has been removed from its object. + */ + continue; + + KASSERT(m->object == object, ("page %p does not belong to %p", + m, object)); if (vm_page_busied(m)) continue; + /* + * Re-check for wirings now that we hold the object lock. If + * the page is mapped, it may still be wired by pmap lookups. + * The call to vm_page_try_remove_all() below atomically checks + * for such wirings and removes mappings. + */ + if (__predict_false(vm_page_wired(m))) { + vm_page_dequeue_deferred(m); + continue; + } + /* * Invalid pages can be easily freed. They cannot be * mapped; vm_page_free() asserts this. @@ -839,8 +869,10 @@ */ if (object->ref_count != 0) { vm_page_test_dirty(m); - if (m->dirty == 0) - pmap_remove_all(m); + if (m->dirty == 0 && !vm_page_try_remove_all(m)) { + vm_page_dequeue_deferred(m); + continue; + } } /* @@ -1132,6 +1164,7 @@ { struct scan_state ss; struct mtx *mtx; + vm_object_t object; vm_page_t m, marker; struct vm_pagequeue *pq; long min_scan; @@ -1192,7 +1225,9 @@ /* * The page may have been disassociated from the queue - * while locks were dropped. + * or even freed while locks were dropped. We thus must be + * careful whenever modifying page state. Once the object lock + * has been acquired, we have a stable reference to the page. */ if (vm_page_queue(m) != PQ_ACTIVE) continue; @@ -1205,6 +1240,16 @@ continue; } + /* + * Acquire a pointer to the page's type-stable object. + */ + object = (vm_object_t)atomic_load_ptr(&m->object); + if (__predict_false(object == NULL)) + /* + * The page has been removed from its object. + */ + continue; + /* * Check to see "how much" the page has been used. * @@ -1224,7 +1269,7 @@ * This race delays the detection of a new reference. At * worst, we will deactivate and reactivate the page. */ - if (m->object->ref_count != 0) + if (object->ref_count != 0) act_delta = pmap_ts_referenced(m); else act_delta = 0; @@ -1400,7 +1445,9 @@ recheck: /* * The page may have been disassociated from the queue - * while locks were dropped. + * or even freed while locks were dropped. We thus must be + * careful whenever modifying page state. Once the object lock + * has been acquired, we have a stable reference to the page. */ if (vm_page_queue(m) != PQ_INACTIVE) { addl_page_shortage++; @@ -1419,7 +1466,9 @@ /* * Wired pages may not be freed. Complete their removal * from the queue now to avoid needless revisits during - * future scans. + * future scans. This check is racy and must be reverified once + * we hold the object lock and have verified that the page + * is not busy. */ if (vm_page_wired(m)) { vm_page_dequeue_deferred(m); @@ -1429,8 +1478,8 @@ if (object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); - object = m->object; - if (!VM_OBJECT_TRYWLOCK(object)) { + object = (vm_object_t)atomic_load_ptr(&m->object); + if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); @@ -1438,6 +1487,14 @@ goto recheck; } } + if (__predict_false(m->object == NULL)) + /* + * The page has been removed from its object. + */ + continue; + + KASSERT(m->object == object, ("page %p does not belong to %p", + m, object)); if (vm_page_busied(m)) { /* @@ -1449,7 +1506,21 @@ * inactive count. */ addl_page_shortage++; - goto reinsert; + vm_pageout_reinsert_inactive(&ss, &rq, m); + continue; + } + + /* + * Re-check for wirings now that we hold the object lock. If + * the page is mapped, it may still be wired by pmap lookups. + * The call to vm_page_try_remove_all() below atomically checks + * for such wirings and removes mappings. New mappings cannot + * be created while the object is locked and the page is + * unbusied. + */ + if (__predict_false(vm_page_wired(m))) { + vm_page_dequeue_deferred(m); + continue; } /* @@ -1495,7 +1566,8 @@ continue; } else if ((object->flags & OBJ_DEAD) == 0) { vm_page_aflag_set(m, PGA_REQUEUE); - goto reinsert; + vm_pageout_reinsert_inactive(&ss, &rq, m); + continue; } } @@ -1508,8 +1580,10 @@ */ if (object->ref_count != 0) { vm_page_test_dirty(m); - if (m->dirty == 0) - pmap_remove_all(m); + if (m->dirty == 0 && !vm_page_try_remove_all(m)) { + vm_page_dequeue_deferred(m); + continue; + } } /* Index: sys/vm/vm_swapout.c =================================================================== --- sys/vm/vm_swapout.c +++ sys/vm/vm_swapout.c @@ -207,12 +207,12 @@ goto unlock_return; if (should_yield()) goto unlock_return; - if (vm_page_busied(p)) + + if (vm_page_busied(p) || vm_page_wired(p)) continue; VM_CNT_INC(v_pdpages); vm_page_lock(p); - if (vm_page_wired(p) || - !pmap_page_exists_quick(pmap, p)) { + if (!pmap_page_exists_quick(pmap, p)) { vm_page_unlock(p); continue; } @@ -230,8 +230,8 @@ p->act_count -= min(p->act_count, ACT_DECLINE); if (!remove_mode && p->act_count == 0) { - pmap_remove_all(p); - vm_page_deactivate(p); + if (vm_page_try_remove_all(p)) + vm_page_deactivate(p); } else vm_page_requeue(p); } else { @@ -242,7 +242,7 @@ vm_page_requeue(p); } } else if (vm_page_inactive(p)) - pmap_remove_all(p); + (void)vm_page_try_remove_all(p); vm_page_unlock(p); } if ((backing_object = object->backing_object) == NULL) @@ -554,9 +554,7 @@ if (m == NULL) panic("vm_thread_swapout: kstack already missing?"); vm_page_dirty(m); - vm_page_lock(m); vm_page_unwire(m, PQ_LAUNDRY); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(ksobj); }