Page MenuHomeFreeBSD

D20486.id58419.diff
No OneTemporary

D20486.id58419.diff

Index: share/man/man9/Makefile
===================================================================
--- share/man/man9/Makefile
+++ share/man/man9/Makefile
@@ -2200,7 +2200,9 @@
MLINKS+=vm_map_max.9 vm_map_min.9 \
vm_map_max.9 vm_map_pmap.9
MLINKS+=vm_map_stack.9 vm_map_growstack.9
-MLINKS+=vm_map_wire.9 vm_map_unwire.9
+MLINKS+=vm_map_wire.9 vm_map_wire_mapped.9 \
+ vm_page_wire.9 vm_page_unwire.9 \
+ vm_page_wire.9 vm_page_unwire_noq.9
MLINKS+=vm_page_bits.9 vm_page_clear_dirty.9 \
vm_page_bits.9 vm_page_dirty.9 \
vm_page_bits.9 vm_page_is_valid.9 \
Index: share/man/man9/vm_page_wire.9
===================================================================
--- share/man/man9/vm_page_wire.9
+++ share/man/man9/vm_page_wire.9
@@ -26,12 +26,13 @@
.\"
.\" $FreeBSD$
.\"
-.Dd July 13, 2001
+.Dd June 3, 2019
.Dt VM_PAGE_WIRE 9
.Os
.Sh NAME
.Nm vm_page_wire ,
-.Nm vm_page_unwire
+.Nm vm_page_unwire ,
+.Nm vm_page_unwire_noq
.Nd "wire and unwire pages"
.Sh SYNOPSIS
.In sys/param.h
@@ -39,29 +40,44 @@
.In vm/vm_page.h
.Ft void
.Fn vm_page_wire "vm_page_t m"
+.Ft bool
+.Fn vm_page_wire_mapped "vm_page_t m"
.Ft void
-.Fn vm_page_unwire "vm_page_t m" "int activate"
+.Fn vm_page_unwire "vm_page_t m" "int queue"
+.Ft bool
+.Fn vm_page_unwire_noq "vm_page_t m"
.Sh DESCRIPTION
The
.Fn vm_page_wire
-function increments the wire count on a page, and removes it from
-whatever queue it is on.
+and
+.Fn vm_page_wire_mapped
+function wire the page, prevent it from being reclaimed by the page
+daemon or when its containing object is destroyed.
+Both functions require that the page belong to an object.
+The
+.Fn vm_page_wire_mapped
+function is for use by the
+.Xr pmap 9
+layer following a lookup.
+This function may fail if mappings of the page are concurrently
+being destroyed, in which case it will return false.
.Pp
The
.Fn vm_page_unwire
-function releases one of the wirings on the page.
-When
-.Va write_count
-reaches zero the page is placed back onto either the active queue
-(if
-.Fa activate
-is non-zero) or onto the inactive queue (if
-.Fa activate
-is zero).
-If the page is unmanaged
-.Dv ( PG_UNMANAGED
-is set) then the page is left on
-.Dv PQ_NONE .
+and
+.Fn vm_page_unwire_noq
+functions release a wiring of a page.
+The
+.Fn vm_page_unwire
+function takes a queue index and will insert the page into the
+corresponding page queue upon releasing its last wiring.
+If the page does not belong to an object and no other references
+to the page exist,
+.Fn vm_page_unwire
+will free the page.
+.Fn vm_page_unwire_noq
+releases the wiring and returns true if it was the last wiring
+of the page.
.Sh AUTHORS
This manual page was written by
.An Chad David Aq Mt davidc@acns.ab.ca .
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -2931,31 +2931,23 @@
m = NULL;
PG_RW = pmap_rw_bit(pmap);
PG_V = pmap_valid_bit(pmap);
+
PMAP_LOCK(pmap);
-retry:
pdep = pmap_pde(pmap, va);
if (pdep != NULL && (pde = *pdep)) {
if (pde & PG_PS) {
- if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- if (vm_page_pa_tryrelock(pmap, (pde &
- PG_PS_FRAME) | (va & PDRMASK), &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pa);
- }
+ if ((pde & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0)
+ m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
+ (va & PDRMASK));
} else {
pte = *pmap_pde_to_pte(pdep, va);
- if ((pte & PG_V) &&
- ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
- &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pa);
- }
+ if ((pte & PG_V) != 0 &&
+ ((pte & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0))
+ m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
}
- if (m != NULL)
- vm_page_wire(m);
+ if (m != NULL && !vm_page_wire_mapped(m))
+ m = NULL;
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/amd64/sgx/sgx.c
===================================================================
--- sys/amd64/sgx/sgx.c
+++ sys/amd64/sgx/sgx.c
@@ -358,7 +358,7 @@
uint64_t offs;
vm_page_lock(p);
- vm_page_remove(p);
+ (void)vm_page_remove(p);
vm_page_unlock(p);
dprintf("%s: p->pidx %ld\n", __func__, p->pindex);
Index: sys/arm/arm/pmap-v4.c
===================================================================
--- sys/arm/arm/pmap-v4.c
+++ sys/arm/arm/pmap-v4.c
@@ -3415,14 +3415,14 @@
struct l2_dtable *l2;
pd_entry_t l1pd;
pt_entry_t *ptep, pte;
- vm_paddr_t pa, paddr;
- vm_page_t m = NULL;
+ vm_paddr_t pa;
+ vm_page_t m;
u_int l1idx;
+
l1idx = L1_IDX(va);
- paddr = 0;
+ m = NULL;
PMAP_LOCK(pmap);
-retry:
l1pd = pmap->pm_l1->l1_kva[l1idx];
if (l1pte_section_p(l1pd)) {
/*
@@ -3434,11 +3434,10 @@
pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET);
else
pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
- if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr))
- goto retry;
if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) {
m = PHYS_TO_VM_PAGE(pa);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
} else {
/*
@@ -3466,15 +3465,12 @@
pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
else
pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
- if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr))
- goto retry;
m = PHYS_TO_VM_PAGE(pa);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
-
PMAP_UNLOCK(pmap);
- PA_UNLOCK_COND(paddr);
return (m);
}
Index: sys/arm/arm/pmap-v6.c
===================================================================
--- sys/arm/arm/pmap-v6.c
+++ sys/arm/arm/pmap-v6.c
@@ -1986,23 +1986,20 @@
vm_page_t
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
- vm_paddr_t pa, lockpa;
+ vm_paddr_t pa;
pt1_entry_t pte1;
pt2_entry_t pte2, *pte2p;
vm_page_t m;
- lockpa = 0;
m = NULL;
PMAP_LOCK(pmap);
-retry:
pte1 = pte1_load(pmap_pte1(pmap, va));
if (pte1_is_section(pte1)) {
if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) {
pa = pte1_pa(pte1) | (va & PTE1_OFFSET);
- if (vm_page_pa_tryrelock(pmap, pa, &lockpa))
- goto retry;
m = PHYS_TO_VM_PAGE(pa);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
} else if (pte1_is_link(pte1)) {
pte2p = pmap_pte2(pmap, va);
@@ -2011,13 +2008,11 @@
if (pte2_is_valid(pte2) &&
(!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) {
pa = pte2_pa(pte2);
- if (vm_page_pa_tryrelock(pmap, pa, &lockpa))
- goto retry;
m = PHYS_TO_VM_PAGE(pa);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
- PA_UNLOCK_COND(lockpa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/arm64/arm64/pmap.c
===================================================================
--- sys/arm64/arm64/pmap.c
+++ sys/arm64/arm64/pmap.c
@@ -1064,14 +1064,11 @@
{
pt_entry_t *pte, tpte;
vm_offset_t off;
- vm_paddr_t pa;
vm_page_t m;
int lvl;
- pa = 0;
m = NULL;
PMAP_LOCK(pmap);
-retry:
pte = pmap_pte(pmap, va, &lvl);
if (pte != NULL) {
tpte = pmap_load(pte);
@@ -1096,14 +1093,11 @@
default:
off = 0;
}
- if (vm_page_pa_tryrelock(pmap,
- (tpte & ~ATTR_MASK) | off, &pa))
- goto retry;
m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
@@ -481,9 +481,7 @@
}
ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
- vm_page_lock(pp);
vm_page_wire(pp);
- vm_page_unlock(pp);
} else
pp = NULL;
break;
Index: sys/compat/linuxkpi/common/include/linux/mm.h
===================================================================
--- sys/compat/linuxkpi/common/include/linux/mm.h
+++ sys/compat/linuxkpi/common/include/linux/mm.h
@@ -227,9 +227,7 @@
static inline void
get_page(struct vm_page *page)
{
- vm_page_lock(page);
vm_page_wire(page);
- vm_page_unlock(page);
}
extern long
@@ -251,8 +249,7 @@
put_page(struct vm_page *page)
{
vm_page_lock(page);
- if (vm_page_unwire(page, PQ_ACTIVE) && page->object == NULL)
- vm_page_free(page);
+ vm_page_unwire(page, PQ_ACTIVE);
vm_page_unlock(page);
}
Index: sys/compat/linuxkpi/common/src/linux_page.c
===================================================================
--- sys/compat/linuxkpi/common/src/linux_page.c
+++ sys/compat/linuxkpi/common/src/linux_page.c
@@ -158,10 +158,8 @@
for (x = 0; x != npages; x++) {
vm_page_t pgo = page + x;
- vm_page_lock(pgo);
if (vm_page_unwire_noq(pgo))
vm_page_free(pgo);
- vm_page_unlock(pgo);
}
} else {
vm_offset_t vaddr;
Index: sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
===================================================================
--- sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
+++ sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
@@ -378,8 +378,7 @@
pagelist_page_free(vm_page_t pp)
{
vm_page_lock(pp);
- if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL)
- vm_page_free(pp);
+ vm_page_unwire(pp, PQ_INACTIVE);
vm_page_unlock(pp);
}
Index: sys/i386/i386/pmap.c
===================================================================
--- sys/i386/i386/pmap.c
+++ sys/i386/i386/pmap.c
@@ -1685,35 +1685,24 @@
pd_entry_t pde;
pt_entry_t pte;
vm_page_t m;
- vm_paddr_t pa;
- pa = 0;
m = NULL;
PMAP_LOCK(pmap);
-retry:
pde = *pmap_pde(pmap, va);
if (pde != 0) {
if (pde & PG_PS) {
- if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- if (vm_page_pa_tryrelock(pmap, (pde &
- PG_PS_FRAME) | (va & PDRMASK), &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pa);
- }
+ if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0)
+ m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
+ (va & PDRMASK));
} else {
pte = pmap_pte_ufast(pmap, va, pde);
if (pte != 0 &&
- ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
- &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pa);
- }
+ ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0))
+ m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
}
- if (m != NULL)
- vm_page_wire(m);
+ if (m != NULL && !vm_page_wire_mapped(m))
+ m = NULL;
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/kern/kern_exec.c
===================================================================
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -976,9 +976,7 @@
if (ma[0]->valid != VM_PAGE_BITS_ALL) {
vm_page_xbusy(ma[0]);
if (!vm_pager_has_page(object, 0, NULL, &after)) {
- vm_page_lock(ma[0]);
vm_page_free(ma[0]);
- vm_page_unlock(ma[0]);
VM_OBJECT_WUNLOCK(object);
return (EIO);
}
@@ -1002,11 +1000,8 @@
initial_pagein = i;
rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL);
if (rv != VM_PAGER_OK) {
- for (i = 0; i < initial_pagein; i++) {
- vm_page_lock(ma[i]);
+ for (i = 0; i < initial_pagein; i++)
vm_page_free(ma[i]);
- vm_page_unlock(ma[i]);
- }
VM_OBJECT_WUNLOCK(object);
return (EIO);
}
@@ -1014,9 +1009,7 @@
for (i = 1; i < initial_pagein; i++)
vm_page_readahead_finish(ma[i]);
}
- vm_page_lock(ma[0]);
vm_page_wire(ma[0]);
- vm_page_unlock(ma[0]);
VM_OBJECT_WUNLOCK(object);
imgp->firstpage = sf_buf_alloc(ma[0], 0);
Index: sys/kern/kern_sendfile.c
===================================================================
--- sys/kern/kern_sendfile.c
+++ sys/kern/kern_sendfile.c
@@ -119,76 +119,20 @@
SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
-/*
- * Detach mapped page and release resources back to the system. Called
- * by mbuf(9) code when last reference to a page is freed.
- */
-static void
-sendfile_free_page(vm_page_t pg, bool nocache)
-{
- bool freed;
-
- vm_page_lock(pg);
- /*
- * In either case check for the object going away on us. This can
- * happen since we don't hold a reference to it. If so, we're
- * responsible for freeing the page. In 'noncache' case try to free
- * the page, but only if it is cheap to.
- */
- if (vm_page_unwire_noq(pg)) {
- vm_object_t obj;
-
- if ((obj = pg->object) == NULL)
- vm_page_free(pg);
- else {
- freed = false;
- if (nocache && !vm_page_xbusied(pg) &&
- VM_OBJECT_TRYWLOCK(obj)) {
- /* Only free unmapped pages. */
- if (obj->ref_count == 0 ||
- !pmap_page_is_mapped(pg))
- /*
- * The busy test before the object is
- * locked cannot be relied upon.
- */
- freed = vm_page_try_to_free(pg);
- VM_OBJECT_WUNLOCK(obj);
- }
- if (!freed) {
- /*
- * If we were asked to not cache the page, place
- * it near the head of the inactive queue so
- * that it is reclaimed sooner. Otherwise,
- * maintain LRU.
- */
- if (nocache)
- vm_page_deactivate_noreuse(pg);
- else if (vm_page_active(pg))
- vm_page_reference(pg);
- else
- vm_page_deactivate(pg);
- }
- }
- }
- vm_page_unlock(pg);
-}
-
static void
sendfile_free_mext(struct mbuf *m)
{
struct sf_buf *sf;
vm_page_t pg;
- bool nocache;
KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_SFBUF,
("%s: m %p !M_EXT or !EXT_SFBUF", __func__, m));
sf = m->m_ext.ext_arg1;
pg = sf_buf_page(sf);
- nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
sf_buf_free(sf);
- sendfile_free_page(pg, nocache);
+ vm_page_release(pg, (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0);
if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
struct sendfile_sync *sfs = m->m_ext.ext_arg2;
Index: sys/kern/sys_process.c
===================================================================
--- sys/kern/sys_process.c
+++ sys/kern/sys_process.c
@@ -307,8 +307,7 @@
* Release the page.
*/
vm_page_lock(m);
- if (vm_page_unwire(m, PQ_ACTIVE) && m->object == NULL)
- vm_page_free(m);
+ vm_page_unwire(m, PQ_ACTIVE);
vm_page_unlock(m);
} while (error == 0 && uio->uio_resid > 0);
Index: sys/kern/uipc_shm.c
===================================================================
--- sys/kern/uipc_shm.c
+++ sys/kern/uipc_shm.c
@@ -196,9 +196,7 @@
printf(
"uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n",
obj, idx, m->valid, rv);
- vm_page_lock(m);
vm_page_free(m);
- vm_page_unlock(m);
VM_OBJECT_WUNLOCK(obj);
return (EIO);
}
@@ -206,9 +204,7 @@
vm_page_zero_invalid(m, TRUE);
vm_page_xunbusy(m);
}
- vm_page_lock(m);
vm_page_wire(m);
- vm_page_unlock(m);
VM_OBJECT_WUNLOCK(obj);
error = uiomove_fromphys(&m, offset, tlen, uio);
if (uio->uio_rw == UIO_WRITE && error == 0) {
Index: sys/kern/vfs_bio.c
===================================================================
--- sys/kern/vfs_bio.c
+++ sys/kern/vfs_bio.c
@@ -2894,47 +2894,6 @@
}
}
-/*
- * Unwire a page held by a buf and either free it or update the page queues to
- * reflect its recent use.
- */
-static void
-vfs_vmio_unwire(struct buf *bp, vm_page_t m)
-{
- bool freed;
-
- vm_page_lock(m);
- if (vm_page_unwire_noq(m)) {
- if ((bp->b_flags & B_DIRECT) != 0)
- freed = vm_page_try_to_free(m);
- else
- freed = false;
- if (!freed) {
- /*
- * Use a racy check of the valid bits to determine
- * whether we can accelerate reclamation of the page.
- * The valid bits will be stable unless the page is
- * being mapped or is referenced by multiple buffers,
- * and in those cases we expect races to be rare. At
- * worst we will either accelerate reclamation of a
- * valid page and violate LRU, or unnecessarily defer
- * reclamation of an invalid page.
- *
- * The B_NOREUSE flag marks data that is not expected to
- * be reused, so accelerate reclamation in that case
- * too. Otherwise, maintain LRU.
- */
- if (m->valid == 0 || (bp->b_flags & B_NOREUSE) != 0)
- vm_page_deactivate_noreuse(m);
- else if (vm_page_active(m))
- vm_page_reference(m);
- else
- vm_page_deactivate(m);
- }
- }
- vm_page_unlock(m);
-}
-
/*
* Perform page invalidation when a buffer is released. The fully invalid
* pages will be reclaimed later in vfs_vmio_truncate().
@@ -2984,7 +2943,8 @@
}
if (pmap_page_wired_mappings(m) == 0)
vm_page_set_invalid(m, poffset, presid);
- vfs_vmio_unwire(bp, m);
+ vm_page_release_locked(m,
+ (bp->b_flags & (B_NOREUSE | B_DIRECT)) != 0);
resid -= presid;
poffset = 0;
}
@@ -3022,7 +2982,10 @@
m = bp->b_pages[i];
KASSERT(m != bogus_page, ("allocbuf: bogus page found"));
bp->b_pages[i] = NULL;
- vfs_vmio_unwire(bp, m);
+ if (obj != NULL)
+ vm_page_release_locked(m, true);
+ else
+ vm_page_release(m, (bp->b_flags & B_NOREUSE) != 0);
}
if (obj != NULL)
VM_OBJECT_WUNLOCK(obj);
Index: sys/mips/mips/pmap.c
===================================================================
--- sys/mips/mips/pmap.c
+++ sys/mips/mips/pmap.c
@@ -795,26 +795,22 @@
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
pt_entry_t pte, *ptep;
- vm_paddr_t pa, pte_pa;
+ vm_paddr_t pa;
vm_page_t m;
m = NULL;
- pa = 0;
PMAP_LOCK(pmap);
-retry:
ptep = pmap_pte(pmap, va);
if (ptep != NULL) {
pte = *ptep;
if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) ||
(prot & VM_PROT_WRITE) == 0)) {
- pte_pa = TLBLO_PTE_TO_PA(pte);
- if (vm_page_pa_tryrelock(pmap, pte_pa, &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pte_pa);
- vm_page_wire(m);
+ pa = TLBLO_PTE_TO_PA(pte);
+ m = PHYS_TO_VM_PAGE(pa);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/net/bpf_zerocopy.c
===================================================================
--- sys/net/bpf_zerocopy.c
+++ sys/net/bpf_zerocopy.c
@@ -116,8 +116,7 @@
{
vm_page_lock(pp);
- if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL)
- vm_page_free(pp);
+ vm_page_unwire(pp, PQ_INACTIVE);
vm_page_unlock(pp);
}
Index: sys/powerpc/aim/mmu_oea.c
===================================================================
--- sys/powerpc/aim/mmu_oea.c
+++ sys/powerpc/aim/mmu_oea.c
@@ -1262,22 +1262,17 @@
{
struct pvo_entry *pvo;
vm_page_t m;
- vm_paddr_t pa;
m = NULL;
- pa = 0;
PMAP_LOCK(pmap);
-retry:
pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL);
if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) &&
((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW ||
(prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa))
- goto retry;
m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/powerpc/aim/mmu_oea64.c
===================================================================
--- sys/powerpc/aim/mmu_oea64.c
+++ sys/powerpc/aim/mmu_oea64.c
@@ -1575,21 +1575,15 @@
{
struct pvo_entry *pvo;
vm_page_t m;
- vm_paddr_t pa;
m = NULL;
- pa = 0;
PMAP_LOCK(pmap);
-retry:
pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) {
- if (vm_page_pa_tryrelock(pmap,
- pvo->pvo_pte.pa & LPTE_RPGN, &pa))
- goto retry;
m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/powerpc/booke/pmap.c
===================================================================
--- sys/powerpc/booke/pmap.c
+++ sys/powerpc/booke/pmap.c
@@ -2934,12 +2934,9 @@
pte_t *pte;
vm_page_t m;
uint32_t pte_wbit;
- vm_paddr_t pa;
-
+
m = NULL;
- pa = 0;
PMAP_LOCK(pmap);
-retry:
pte = pte_find(mmu, pmap, va);
if ((pte != NULL) && PTE_ISVALID(pte)) {
if (pmap == kernel_pmap)
@@ -2948,14 +2945,11 @@
pte_wbit = PTE_UW;
if ((*pte & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa))
- goto retry;
m = PHYS_TO_VM_PAGE(PTE_PA(pte));
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
-
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/riscv/riscv/pmap.c
===================================================================
--- sys/riscv/riscv/pmap.c
+++ sys/riscv/riscv/pmap.c
@@ -869,24 +869,19 @@
{
pt_entry_t *l3p, l3;
vm_paddr_t phys;
- vm_paddr_t pa;
vm_page_t m;
- pa = 0;
m = NULL;
PMAP_LOCK(pmap);
-retry:
l3p = pmap_l3(pmap, va);
if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) {
if ((l3 & PTE_W) != 0 || (prot & VM_PROT_WRITE) == 0) {
phys = PTE_TO_PHYS(l3);
- if (vm_page_pa_tryrelock(pmap, phys, &pa))
- goto retry;
m = PHYS_TO_VM_PAGE(phys);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/sparc64/sparc64/pmap.c
===================================================================
--- sys/sparc64/sparc64/pmap.c
+++ sys/sparc64/sparc64/pmap.c
@@ -847,19 +847,15 @@
{
struct tte *tp;
vm_page_t m;
- vm_paddr_t pa;
m = NULL;
- pa = 0;
PMAP_LOCK(pm);
-retry:
if (pm == kernel_pmap) {
if (va >= VM_MIN_DIRECT_ADDRESS) {
tp = NULL;
m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
- (void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va),
- &pa);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
} else {
tp = tsb_kvtotte(va);
if ((tp->tte_data & TD_V) == 0)
@@ -869,12 +865,10 @@
tp = tsb_tte_lookup(pm, va);
if (tp != NULL && ((tp->tte_data & TD_SW) ||
(prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa))
- goto retry;
m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pm);
return (m);
}
Index: sys/vm/device_pager.c
===================================================================
--- sys/vm/device_pager.c
+++ sys/vm/device_pager.c
@@ -235,9 +235,7 @@
if (object->type == OBJT_MGTDEVICE) {
KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m));
pmap_remove_all(m);
- vm_page_lock(m);
- vm_page_remove(m);
- vm_page_unlock(m);
+ (void)vm_page_remove(m);
} else if (object->type == OBJT_DEVICE)
dev_pager_free_page(object, m);
}
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c
+++ sys/vm/vm_fault.c
@@ -251,18 +251,6 @@
vm_pager_page_unswapped(m);
}
-static void
-vm_fault_fill_hold(vm_page_t *m_hold, vm_page_t m)
-{
-
- if (m_hold != NULL) {
- *m_hold = m;
- vm_page_lock(m);
- vm_page_wire(m);
- vm_page_unlock(m);
- }
-}
-
/*
* Unlocks fs.first_object and fs.map on success.
*/
@@ -323,7 +311,10 @@
PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind);
if (rv != KERN_SUCCESS)
return (rv);
- vm_fault_fill_hold(m_hold, m);
+ if (m_hold != NULL) {
+ *m_hold = m;
+ vm_page_wire(m);
+ }
vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false);
if (psind == 0 && !wired)
vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true);
@@ -499,11 +490,12 @@
VM_OBJECT_WLOCK(fs->first_object);
m_mtx = NULL;
for (i = 0; i < npages; i++) {
- vm_page_change_lock(&m[i], &m_mtx);
- if ((fault_flags & VM_FAULT_WIRE) != 0)
+ if ((fault_flags & VM_FAULT_WIRE) != 0) {
vm_page_wire(&m[i]);
- else
+ } else {
+ vm_page_change_lock(&m[i], &m_mtx);
vm_page_activate(&m[i]);
+ }
if (m_hold != NULL && m[i].pindex == fs->first_pindex) {
*m_hold = &m[i];
vm_page_wire(&m[i]);
@@ -1151,10 +1143,11 @@
* daemon, while it is disassociated from an
* object.
*/
+ vm_page_wire(fs.m);
+
mtx = NULL;
vm_page_change_lock(fs.m, &mtx);
- vm_page_wire(fs.m);
- vm_page_remove(fs.m);
+ (void)vm_page_remove(fs.m);
vm_page_change_lock(fs.first_m, &mtx);
vm_page_replace_checked(fs.m, fs.first_object,
fs.first_pindex, fs.first_m);
@@ -1187,10 +1180,8 @@
fs.first_m->valid = VM_PAGE_BITS_ALL;
if (wired && (fault_flags &
VM_FAULT_WIRE) == 0) {
- vm_page_lock(fs.first_m);
vm_page_wire(fs.first_m);
- vm_page_unlock(fs.first_m);
-
+
vm_page_lock(fs.m);
vm_page_unwire(fs.m, PQ_INACTIVE);
vm_page_unlock(fs.m);
@@ -1326,21 +1317,22 @@
faultcount > 0 ? behind : PFBAK,
faultcount > 0 ? ahead : PFFOR, false);
VM_OBJECT_WLOCK(fs.object);
- vm_page_lock(fs.m);
/*
* If the page is not wired down, then put it where the pageout daemon
* can find it.
*/
- if ((fault_flags & VM_FAULT_WIRE) != 0)
+ if ((fault_flags & VM_FAULT_WIRE) != 0) {
vm_page_wire(fs.m);
- else
+ } else {
+ vm_page_lock(fs.m);
vm_page_activate(fs.m);
+ vm_page_unlock(fs.m);
+ }
if (m_hold != NULL) {
*m_hold = fs.m;
vm_page_wire(fs.m);
}
- vm_page_unlock(fs.m);
vm_page_xunbusy(fs.m);
/*
@@ -1611,9 +1603,7 @@
for (mp = ma; mp < ma + count; mp++)
if (*mp != NULL) {
vm_page_lock(*mp);
- if (vm_page_unwire(*mp, PQ_INACTIVE) &&
- (*mp)->object == NULL)
- vm_page_free(*mp);
+ vm_page_unwire(*mp, PQ_INACTIVE);
vm_page_unlock(*mp);
}
return (-1);
@@ -1814,9 +1804,7 @@
vm_page_lock(src_m);
vm_page_unwire(src_m, PQ_INACTIVE);
vm_page_unlock(src_m);
- vm_page_lock(dst_m);
vm_page_wire(dst_m);
- vm_page_unlock(dst_m);
} else {
KASSERT(vm_page_wired(dst_m),
("dst_m %p is not wired", dst_m));
Index: sys/vm/vm_glue.c
===================================================================
--- sys/vm/vm_glue.c
+++ sys/vm/vm_glue.c
@@ -405,10 +405,8 @@
m = vm_page_lookup(ksobj, i);
if (m == NULL)
panic("vm_thread_dispose: kstack already missing?");
- vm_page_lock(m);
vm_page_unwire_noq(m);
vm_page_free(m);
- vm_page_unlock(m);
}
VM_OBJECT_WUNLOCK(ksobj);
vm_object_deallocate(ksobj);
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -699,12 +699,9 @@
vm_object_terminate_pages(vm_object_t object)
{
vm_page_t p, p_next;
- struct mtx *mtx;
VM_OBJECT_ASSERT_WLOCKED(object);
- mtx = NULL;
-
/*
* Free any remaining pageable pages. This also removes them from the
* paging queues. However, don't free wired pages, just remove them
@@ -713,20 +710,15 @@
*/
TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
vm_page_assert_unbusied(p);
- if ((object->flags & OBJ_UNMANAGED) == 0)
- /*
- * vm_page_free_prep() only needs the page
- * lock for managed pages.
- */
- vm_page_change_lock(p, &mtx);
+ KASSERT(p->object == object && p->ref_count > 0,
+ ("vm_object_terminate_pages: page %p is inconsistent", p));
+
p->object = NULL;
- if (vm_page_wired(p))
- continue;
- VM_CNT_INC(v_pfree);
- vm_page_free(p);
+ if (vm_page_drop(p, -VPRC_OBJREF) == VPRC_OBJREF) {
+ VM_CNT_INC(v_pfree);
+ vm_page_free(p);
+ }
}
- if (mtx != NULL)
- mtx_unlock(mtx);
/*
* If the object contained any pages, then reset it to an empty state.
@@ -1588,18 +1580,10 @@
swap_pager_freespace(backing_object, p->pindex,
1);
- /*
- * Page is out of the parent object's range, we can
- * simply destroy it.
- */
- vm_page_lock(p);
KASSERT(!pmap_page_is_mapped(p),
("freeing mapped page %p", p));
- if (!vm_page_wired(p))
+ if (vm_page_remove(p))
vm_page_free(p);
- else
- vm_page_remove(p);
- vm_page_unlock(p);
continue;
}
@@ -1636,14 +1620,10 @@
if (backing_object->type == OBJT_SWAP)
swap_pager_freespace(backing_object, p->pindex,
1);
- vm_page_lock(p);
KASSERT(!pmap_page_is_mapped(p),
("freeing mapped page %p", p));
- if (!vm_page_wired(p))
+ if (vm_page_remove(p))
vm_page_free(p);
- else
- vm_page_remove(p);
- vm_page_unlock(p);
continue;
}
@@ -1944,6 +1924,7 @@
VM_OBJECT_WLOCK(object);
goto again;
}
+wired:
if (vm_page_wired(p)) {
if ((options & OBJPR_NOTMAPPED) == 0 &&
object->ref_count != 0)
@@ -1964,14 +1945,17 @@
("vm_object_page_remove: page %p is fictitious", p));
if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) {
if ((options & OBJPR_NOTMAPPED) == 0 &&
- object->ref_count != 0)
- pmap_remove_write(p);
+ object->ref_count != 0 &&
+ !vm_page_try_remove_write(p))
+ goto wired;
if (p->dirty != 0)
continue;
}
- if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0)
- pmap_remove_all(p);
- vm_page_free(p);
+ if ((options & OBJPR_NOTMAPPED) == 0 &&
+ object->ref_count != 0 && !vm_page_try_remove_all(p))
+ goto wired;
+ if (vm_page_remove(p))
+ vm_page_free(p);
}
if (mtx != NULL)
mtx_unlock(mtx);
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -115,24 +115,19 @@
* the implementation of read-modify-write operations on the
* field is encapsulated in vm_page_clear_dirty_mask().
*
- * The page structure contains two counters which prevent page reuse.
- * Both counters are protected by the page lock (P). The hold
- * counter counts transient references obtained via a pmap lookup, and
- * is also used to prevent page reclamation in situations where it is
- * undesirable to block other accesses to the page. The wire counter
- * is used to implement mlock(2) and is non-zero for pages containing
- * kernel memory. Pages that are wired or held will not be reclaimed
- * or laundered by the page daemon, but are treated differently during
- * a page queue scan: held pages remain at their position in the queue,
- * while wired pages are removed from the queue and must later be
- * re-enqueued appropriately by the unwiring thread. It is legal to
- * call vm_page_free() on a held page; doing so causes it to be removed
- * from its object and page queue, and the page is released to the
- * allocator once the last hold reference is dropped. In contrast,
- * wired pages may not be freed.
- *
- * In some pmap implementations, the wire count of a page table page is
- * used to track the number of populated entries.
+ * The ref_count field tracks references to the page. References that
+ * prevent the page from being reclaimable are called wirings and are
+ * counted in the low bits of ref_count. Upper bits are reserved for
+ * special references that do not prevent reclamation of the page.
+ * Specifically, the containing object, if any, holds such a reference,
+ * and the page daemon takes a transient reference when it is scanning
+ * a page. Updates to ref_count are atomic unless the page is
+ * unallocated. To wire a page after it has been allocated, the object
+ * lock must be held, or the page must be busy, or the wiring thread
+ * must atomically take a reference and verify that the VPRC_BLOCKED
+ * bit is not set. No locks are required to unwire a page, but care
+ * must be taken to free the page if that wiring represented the last
+ * reference to the page.
*
* The busy lock is an embedded reader-writer lock which protects the
* page's contents and identity (i.e., its <object, pindex> tuple) and
@@ -155,7 +150,11 @@
* be held. It is invalid for a page's queue field to transition
* between two distinct page queue indices. That is, when updating
* the queue field, either the new value or the old value must be
- * PQ_NONE.
+ * PQ_NONE. There is one exception to this rule: the page daemon may
+ * transition the queue field from PQ_INACTIVE to PQ_NONE immediately
+ * prior to freeing a page during an inactive queue scan. At that
+ * point the page will have already been physically dequeued, and it
+ * is known that no other references to that vm_page structure exist.
*
* To avoid contention on page queue locks, page queue operations
* (enqueue, dequeue, requeue) are batched using per-CPU queues.
@@ -168,7 +167,9 @@
* may be freed before its pending batch queue entries have been
* processed. The page lock (P) must be held to schedule a batched
* queue operation, and the page queue lock must be held in order to
- * process batch queue entries for the page queue.
+ * process batch queue entries for the page queue. When the page is
+ * being freed, the thread freeing the page is permitted to schedule
+ * a dequeue of the page without the page lock held.
*/
#if PAGE_SIZE == 4096
@@ -198,11 +199,14 @@
} memguard;
} plinks;
TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */
- vm_object_t object; /* which object am I in (O,P) */
+ vm_object_t object; /* which object am I in (O) */
vm_pindex_t pindex; /* offset into object (O,P) */
vm_paddr_t phys_addr; /* physical address of page (C) */
struct md_page md; /* machine dependent stuff */
- u_int wire_count; /* wired down maps refs (P) */
+ union {
+ u_int wire_count;
+ u_int ref_count; /* page references */
+ };
volatile u_int busy_lock; /* busy owners lock */
uint16_t flags; /* page PG_* flags (P) */
uint8_t order; /* index of the buddy queue (F) */
@@ -219,6 +223,34 @@
vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */
};
+/*
+ * Special bits used in the ref_count field.
+ *
+ * ref_count is normally used to count wirings that prevent the page from being
+ * reclaimed, but also supports several special types of references that do not
+ * prevent reclamation. Accesses to the ref_count field must be atomic unless
+ * the page is unallocated.
+ *
+ * VPRC_PDREF is a transient reference acquired by the page daemon when
+ * scanning. Pages may be dequeued without the page lock held when they are
+ * being freed, and this reference ensures that the page daemon is not
+ * simultaneously manipulating the queue state of the page. The page lock must
+ * be held to set or clear this bit.
+ *
+ * VPRC_OBJREF is the reference held by the containing object. It can set or
+ * cleared only when the corresponding object's write lock is held.
+ *
+ * VPRC_BLOCKED is used to atomically block wirings via pmap lookups while
+ * attempting to tear down all mappings of a given page. The page lock and
+ * object write lock must both be held in order to set or clear this bit.
+ */
+#define VPRC_BLOCKED 0x20000000u /* mappings are being removed */
+#define VPRC_OBJREF 0x40000000u /* object reference, cleared with (O) */
+#define VPRC_PDREF 0x80000000u /* page daemon reference for scanning */
+#define _VPRC_REFMASK (VPRC_BLOCKED | VPRC_OBJREF | VPRC_PDREF)
+#define VPRC_WIRE_COUNT(c) ((c) & ~_VPRC_REFMASK)
+#define VPRC_WIRE_COUNT_MAX (~_VPRC_REFMASK)
+
/*
* Page flags stored in oflags:
*
@@ -557,8 +589,10 @@
bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
void vm_page_reference(vm_page_t m);
-void vm_page_remove (vm_page_t);
-int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
+void vm_page_release(vm_page_t m, bool nocache);
+void vm_page_release_locked(vm_page_t m, bool nocache);
+bool vm_page_remove(vm_page_t);
+int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t);
vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object,
vm_pindex_t pindex);
void vm_page_requeue(vm_page_t m);
@@ -569,14 +603,16 @@
int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
vm_offset_t vm_page_startup(vm_offset_t vaddr);
void vm_page_sunbusy(vm_page_t m);
-bool vm_page_try_to_free(vm_page_t m);
+bool vm_page_try_remove_all(vm_page_t m);
+bool vm_page_try_remove_write(vm_page_t m);
int vm_page_trysbusy(vm_page_t m);
void vm_page_unhold_pages(vm_page_t *ma, int count);
void vm_page_unswappable(vm_page_t m);
-bool vm_page_unwire(vm_page_t m, uint8_t queue);
+void vm_page_unwire(vm_page_t m, uint8_t queue);
bool vm_page_unwire_noq(vm_page_t m);
void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
-void vm_page_wire (vm_page_t);
+void vm_page_wire(vm_page_t);
+bool vm_page_wire_mapped(vm_page_t m);
void vm_page_xunbusy_hard(vm_page_t m);
void vm_page_xunbusy_maybelocked(vm_page_t m);
void vm_page_set_validclean (vm_page_t, int, int);
@@ -806,6 +842,23 @@
return (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE);
}
+/*
+ * vm_page_drop:
+ *
+ * Release a reference to a page and return the old reference count.
+ */
+static inline u_int
+vm_page_drop(vm_page_t m, u_int val)
+{
+
+ /*
+ * Synchronize with vm_page_free_prep(): ensure that all updates to the
+ * page structure are visible before it is freed.
+ */
+ atomic_thread_fence_rel();
+ return (atomic_fetchadd_int(&m->ref_count, val));
+}
+
/*
* vm_page_wired:
*
@@ -815,7 +868,7 @@
vm_page_wired(vm_page_t m)
{
- return (m->wire_count > 0);
+ return (VPRC_WIRE_COUNT(m->ref_count) > 0);
}
#endif /* _KERNEL */
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -161,16 +161,17 @@
static void vm_page_enqueue(vm_page_t m, uint8_t queue);
static void vm_page_init(void *dummy);
static int vm_page_insert_after(vm_page_t m, vm_object_t object,
- vm_pindex_t pindex, vm_page_t mpred);
+ vm_pindex_t pindex, vm_page_t mpred, const bool alloc);
static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,
vm_page_t mpred);
+static void vm_page_mvqueue(vm_page_t m, int queue);
static int vm_page_reclaim_run(int req_class, int domain, u_long npages,
vm_page_t m_run, vm_paddr_t high);
static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object,
int req);
-static int vm_page_import(void *arg, void **store, int cnt, int domain,
+static int vm_page_zone_import(void *arg, void **store, int cnt, int domain,
int flags);
-static void vm_page_release(void *arg, void **store, int cnt);
+static void vm_page_zone_release(void *arg, void **store, int cnt);
SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL);
@@ -204,7 +205,7 @@
continue;
vmd->vmd_pgcache = uma_zcache_create("vm pgcache",
sizeof(struct vm_page), NULL, NULL, NULL, NULL,
- vm_page_import, vm_page_release, vmd,
+ vm_page_zone_import, vm_page_zone_release, vmd,
UMA_ZONE_MAXBUCKET | UMA_ZONE_VM);
(void )uma_zone_set_maxcache(vmd->vmd_pgcache, 0);
}
@@ -502,7 +503,7 @@
{
m->object = NULL;
- m->wire_count = 0;
+ m->ref_count = 0;
m->busy_lock = VPB_UNBUSIED;
m->flags = m->aflags = 0;
m->phys_addr = pa;
@@ -1098,8 +1099,7 @@
mtx = NULL;
for (; count != 0; count--) {
vm_page_change_lock(*ma, &mtx);
- if (vm_page_unwire(*ma, PQ_ACTIVE) && (*ma)->object == NULL)
- vm_page_free(*ma);
+ vm_page_unwire(*ma, PQ_ACTIVE);
ma++;
}
if (mtx != NULL)
@@ -1166,7 +1166,8 @@
/* Fictitious pages don't use "order" or "pool". */
m->oflags = VPO_UNMANAGED;
m->busy_lock = VPB_SINGLE_EXCLUSIVER;
- m->wire_count = 1;
+ /* Fictitious pages are unevictable. */
+ m->ref_count = 1;
pmap_page_init(m);
memattr:
pmap_page_set_memattr(m, memattr);
@@ -1326,7 +1327,7 @@
VM_OBJECT_ASSERT_WLOCKED(object);
mpred = vm_radix_lookup_le(&object->rtree, pindex);
- return (vm_page_insert_after(m, object, pindex, mpred));
+ return (vm_page_insert_after(m, object, pindex, mpred, false));
}
/*
@@ -1337,11 +1338,14 @@
* The page "mpred" must immediately precede the offset "pindex" within
* the specified object.
*
+ * "alloc" should be true if the page is being allocated and false
+ * otherwise.
+ *
* The object must be locked.
*/
static int
vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
- vm_page_t mpred)
+ vm_page_t mpred, const bool alloc)
{
vm_page_t msucc;
@@ -1361,10 +1365,14 @@
("vm_page_insert_after: msucc doesn't succeed pindex"));
/*
- * Record the object/offset pair in this page
+ * Record the object/offset pair in this page.
*/
m->object = object;
m->pindex = pindex;
+ if (alloc)
+ m->ref_count |= VPRC_OBJREF;
+ else
+ atomic_set_int(&m->ref_count, VPRC_OBJREF);
/*
* Now link into the object's ordered list of backed pages.
@@ -1372,6 +1380,10 @@
if (vm_radix_insert(&object->rtree, m)) {
m->object = NULL;
m->pindex = 0;
+ if (alloc)
+ m->ref_count &= ~VPRC_OBJREF;
+ else
+ atomic_clear_int(&m->ref_count, VPRC_OBJREF);
return (1);
}
vm_page_insert_radixdone(m, object, mpred);
@@ -1396,11 +1408,13 @@
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(object != NULL && m->object == object,
("vm_page_insert_radixdone: page %p has inconsistent object", m));
+ KASSERT((m->ref_count & VPRC_OBJREF) != 0,
+ ("vm_page_insert_radixdone: page %p is missing object ref", m));
if (mpred != NULL) {
KASSERT(mpred->object == object,
- ("vm_page_insert_after: object doesn't contain mpred"));
+ ("vm_page_insert_radixdone: object doesn't contain mpred"));
KASSERT(mpred->pindex < m->pindex,
- ("vm_page_insert_after: mpred doesn't precede pindex"));
+ ("vm_page_insert_radixdone: mpred doesn't precede pindex"));
}
if (mpred != NULL)
@@ -1431,21 +1445,21 @@
* vm_page_remove:
*
* Removes the specified page from its containing object, but does not
- * invalidate any backing storage.
+ * invalidate any backing storage. Returns true if the object's reference
+ * was the last reference to the page, and false otherwise.
*
- * The object must be locked. The page must be locked if it is managed.
+ * The object must be locked.
*/
-void
+bool
vm_page_remove(vm_page_t m)
{
vm_object_t object;
vm_page_t mrem;
- if ((m->oflags & VPO_UNMANAGED) == 0)
- vm_page_assert_locked(m);
- if ((object = m->object) == NULL)
- return;
+ object = m->object;
VM_OBJECT_ASSERT_WLOCKED(object);
+ KASSERT((m->ref_count & VPRC_OBJREF) != 0,
+ ("page %p is missing its object ref", m));
if (vm_page_xbusied(m))
vm_page_xunbusy_maybelocked(m);
mrem = vm_radix_remove(&object->rtree, m->pindex);
@@ -1467,7 +1481,12 @@
if (object->resident_page_count == 0 && object->type == OBJT_VNODE)
vdrop(object->handle);
+ /*
+ * Release the object reference. The caller may free the page
+ * after this point.
+ */
m->object = NULL;
+ return (vm_page_drop(m, -VPRC_OBJREF) == VPRC_OBJREF);
}
/*
@@ -1548,8 +1567,6 @@
/*
* Uses the page mnew as a replacement for an existing page at index
* pindex which must be already present in the object.
- *
- * The existing page must not be on a paging queue.
*/
vm_page_t
vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex)
@@ -1559,8 +1576,6 @@
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(mnew->object == NULL,
("vm_page_replace: page %p already in object", mnew));
- KASSERT(mnew->queue == PQ_NONE || vm_page_wired(mnew),
- ("vm_page_replace: new page %p is on a paging queue", mnew));
/*
* This function mostly follows vm_page_insert() and
@@ -1570,6 +1585,7 @@
mnew->object = object;
mnew->pindex = pindex;
+ atomic_set_int(&mnew->ref_count, VPRC_OBJREF);
mold = vm_radix_replace(&object->rtree, mnew);
KASSERT(mold->queue == PQ_NONE,
("vm_page_replace: old page %p is on a paging queue", mold));
@@ -1579,6 +1595,7 @@
TAILQ_REMOVE(&object->memq, mold, listq);
mold->object = NULL;
+ atomic_clear_int(&mold->ref_count, VPRC_OBJREF);
vm_page_xunbusy_maybelocked(mold);
/*
@@ -1616,6 +1633,7 @@
VM_OBJECT_ASSERT_WLOCKED(new_object);
+ KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m));
mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex);
KASSERT(mpred == NULL || mpred->pindex != new_pindex,
("vm_page_rename: pindex already renamed"));
@@ -1638,11 +1656,13 @@
*/
m->pindex = opidx;
vm_page_lock(m);
- vm_page_remove(m);
+ (void)vm_page_remove(m);
/* Return back to the new pindex to complete vm_page_insert(). */
m->pindex = new_pindex;
m->object = new_object;
+ atomic_set_int(&m->ref_count, VPRC_OBJREF);
+
vm_page_unlock(m);
vm_page_insert_radixdone(m, new_object, mpred);
vm_page_dirty(m);
@@ -1861,15 +1881,15 @@
* page is inserted into the object.
*/
vm_wire_add(1);
- m->wire_count = 1;
+ m->ref_count = 1;
}
m->act_count = 0;
if (object != NULL) {
- if (vm_page_insert_after(m, object, pindex, mpred)) {
+ if (vm_page_insert_after(m, object, pindex, mpred, true)) {
if (req & VM_ALLOC_WIRED) {
vm_wire_sub(1);
- m->wire_count = 0;
+ m->ref_count = 0;
}
KASSERT(m->object == NULL, ("page %p has object", m));
m->oflags = VPO_UNMANAGED;
@@ -2061,11 +2081,12 @@
m->flags = (m->flags | PG_NODUMP) & flags;
m->busy_lock = busy_lock;
if ((req & VM_ALLOC_WIRED) != 0)
- m->wire_count = 1;
+ m->ref_count = 1;
m->act_count = 0;
m->oflags = oflags;
if (object != NULL) {
- if (vm_page_insert_after(m, object, pindex, mpred)) {
+ if (vm_page_insert_after(m, object, pindex, mpred,
+ true)) {
if ((req & VM_ALLOC_WIRED) != 0)
vm_wire_sub(npages);
KASSERT(m->object == NULL,
@@ -2074,7 +2095,7 @@
for (m = m_ret; m < &m_ret[npages]; m++) {
if (m <= mpred &&
(req & VM_ALLOC_WIRED) != 0)
- m->wire_count = 0;
+ m->ref_count = 0;
m->oflags = VPO_UNMANAGED;
m->busy_lock = VPB_UNBUSIED;
/* Don't change PG_ZERO. */
@@ -2108,7 +2129,7 @@
KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0,
("page %p has unexpected queue %d, flags %#x",
m, m->queue, (m->aflags & PGA_QUEUE_STATE_MASK)));
- KASSERT(!vm_page_wired(m), ("page %p is wired", m));
+ KASSERT(m->ref_count == 0, ("page %p has references", m));
KASSERT(!vm_page_busied(m), ("page %p is busy", m));
KASSERT(m->dirty == 0, ("page %p is dirty", m));
KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
@@ -2192,7 +2213,7 @@
* not belong to an object.
*/
vm_wire_add(1);
- m->wire_count = 1;
+ m->ref_count = 1;
}
/* Unmanaged pages don't use "act_count". */
m->oflags = VPO_UNMANAGED;
@@ -2200,7 +2221,7 @@
}
static int
-vm_page_import(void *arg, void **store, int cnt, int domain, int flags)
+vm_page_zone_import(void *arg, void **store, int cnt, int domain, int flags)
{
struct vm_domain *vmd;
int i;
@@ -2221,7 +2242,7 @@
}
static void
-vm_page_release(void *arg, void **store, int cnt)
+vm_page_zone_release(void *arg, void **store, int cnt)
{
struct vm_domain *vmd;
vm_page_t m;
@@ -2281,8 +2302,8 @@
for (m = m_start; m < m_end && run_len < npages; m += m_inc) {
KASSERT((m->flags & PG_MARKER) == 0,
("page %p is PG_MARKER", m));
- KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->wire_count == 1,
- ("fictitious page %p has invalid wire count", m));
+ KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1,
+ ("fictitious page %p has invalid ref count", m));
/*
* If the current page would be the start of a run, check its
@@ -2339,9 +2360,6 @@
*/
VM_OBJECT_RUNLOCK(object);
goto retry;
- } else if (vm_page_wired(m)) {
- run_ext = 0;
- goto unlock;
}
}
/* Don't care: PG_NODUMP, PG_ZERO. */
@@ -2359,7 +2377,8 @@
vm_reserv_size(level)) - pa);
#endif
} else if (object->memattr == VM_MEMATTR_DEFAULT &&
- vm_page_queue(m) != PQ_NONE && !vm_page_busied(m)) {
+ vm_page_queue(m) != PQ_NONE && !vm_page_busied(m) &&
+ !vm_page_wired(m)) {
/*
* The page is allocated but eligible for
* relocation. Extend the current run by one
@@ -2375,7 +2394,6 @@
run_ext = 1;
} else
run_ext = 0;
-unlock:
VM_OBJECT_RUNLOCK(object);
#if VM_NRESERVLEVEL > 0
} else if (level >= 0) {
@@ -2496,9 +2514,6 @@
*/
VM_OBJECT_WUNLOCK(object);
goto retry;
- } else if (vm_page_wired(m)) {
- error = EBUSY;
- goto unlock;
}
}
/* Don't care: PG_NODUMP, PG_ZERO. */
@@ -2509,7 +2524,7 @@
else if (object->memattr != VM_MEMATTR_DEFAULT)
error = EINVAL;
else if (vm_page_queue(m) != PQ_NONE &&
- !vm_page_busied(m)) {
+ !vm_page_busied(m) && !vm_page_wired(m)) {
KASSERT(pmap_page_get_memattr(m) ==
VM_MEMATTR_DEFAULT,
("page %p has an unexpected memattr", m));
@@ -2558,8 +2573,6 @@
error = ENOMEM;
goto unlock;
}
- KASSERT(!vm_page_wired(m_new),
- ("page %p is wired", m_new));
/*
* Replace "m" with the new page. For
@@ -2567,8 +2580,11 @@
* and dequeued. Finally, change "m"
* as if vm_page_free() was called.
*/
- if (object->ref_count != 0)
- pmap_remove_all(m);
+ if (object->ref_count != 0 &&
+ !vm_page_try_remove_all(m)) {
+ error = EBUSY;
+ goto unlock;
+ }
m_new->aflags = m->aflags &
~PGA_QUEUE_STATE_MASK;
KASSERT(m_new->oflags == VPO_UNMANAGED,
@@ -3128,8 +3144,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("page %p is unmanaged", m));
- KASSERT(mtx_owned(vm_page_lockptr(m)) ||
- (m->object == NULL && (m->aflags & PGA_DEQUEUE) != 0),
+ KASSERT(mtx_owned(vm_page_lockptr(m)) || m->object == NULL,
("missing synchronization for page %p", m));
KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue));
@@ -3258,7 +3273,7 @@
{
uint8_t queue;
- KASSERT(m->object == NULL, ("page %p has an object reference", m));
+ KASSERT(m->ref_count == 0, ("page %p has references", m));
if ((m->aflags & PGA_DEQUEUE) != 0)
return;
@@ -3369,35 +3384,6 @@
vm_pqbatch_submit_page(m, atomic_load_8(&m->queue));
}
-/*
- * vm_page_activate:
- *
- * Put the specified page on the active list (if appropriate).
- * Ensure that act_count is at least ACT_INIT but do not otherwise
- * mess with it.
- *
- * The page must be locked.
- */
-void
-vm_page_activate(vm_page_t m)
-{
-
- vm_page_assert_locked(m);
-
- if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0)
- return;
- if (vm_page_queue(m) == PQ_ACTIVE) {
- if (m->act_count < ACT_INIT)
- m->act_count = ACT_INIT;
- return;
- }
-
- vm_page_dequeue(m);
- if (m->act_count < ACT_INIT)
- m->act_count = ACT_INIT;
- vm_page_enqueue(m, PQ_ACTIVE);
-}
-
/*
* vm_page_free_prep:
*
@@ -3412,6 +3398,12 @@
vm_page_free_prep(vm_page_t m)
{
+ /*
+ * Synchronize with vm_page_drop(): ensure that all page modifications
+ * are visible before proceeding.
+ */
+ atomic_thread_fence_acq();
+
#if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP)
if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) {
uint64_t *p;
@@ -3422,11 +3414,10 @@
m, i, (uintmax_t)*p));
}
#endif
- if ((m->oflags & VPO_UNMANAGED) == 0) {
- vm_page_lock_assert(m, MA_OWNED);
+ if ((m->oflags & VPO_UNMANAGED) == 0)
KASSERT(!pmap_page_is_mapped(m),
("vm_page_free_prep: freeing mapped page %p", m));
- } else
+ else
KASSERT(m->queue == PQ_NONE,
("vm_page_free_prep: unmanaged page %p is queued", m));
VM_CNT_INC(v_tfree);
@@ -3434,15 +3425,16 @@
if (vm_page_sbusied(m))
panic("vm_page_free_prep: freeing busy page %p", m);
- vm_page_remove(m);
+ if (m->object != NULL)
+ (void)vm_page_remove(m);
/*
* If fictitious remove object association and
* return.
*/
if ((m->flags & PG_FICTITIOUS) != 0) {
- KASSERT(m->wire_count == 1,
- ("fictitious page %p is not wired", m));
+ KASSERT(m->ref_count == 1,
+ ("fictitious page %p is referenced", m));
KASSERT(m->queue == PQ_NONE,
("fictitious page %p is queued", m));
return (false);
@@ -3459,8 +3451,8 @@
m->valid = 0;
vm_page_undirty(m);
- if (vm_page_wired(m) != 0)
- panic("vm_page_free_prep: freeing wired page %p", m);
+ if (m->ref_count != 0)
+ panic("vm_page_free_prep: page %p has references", m);
/*
* Restore the default memory attribute to the page.
@@ -3535,132 +3527,187 @@
}
/*
- * vm_page_wire:
- *
- * Mark this page as wired down. If the page is fictitious, then
- * its wire count must remain one.
- *
- * The page must be locked.
+ * Mark this page as wired down, preventing reclamation by the page daemon
+ * or when the containing object is destroyed.
*/
void
vm_page_wire(vm_page_t m)
{
+ u_int old;
- vm_page_assert_locked(m);
- if ((m->flags & PG_FICTITIOUS) != 0) {
- KASSERT(m->wire_count == 1,
- ("vm_page_wire: fictitious page %p's wire count isn't one",
- m));
- return;
- }
- if (!vm_page_wired(m)) {
- KASSERT((m->oflags & VPO_UNMANAGED) == 0 ||
- m->queue == PQ_NONE,
- ("vm_page_wire: unmanaged page %p is queued", m));
+ KASSERT(m->object != NULL,
+ ("vm_page_wire: page %p does not belong to an object", m));
+ if (!vm_page_busied(m))
+ VM_OBJECT_ASSERT_LOCKED(m->object);
+ KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1,
+ ("vm_page_wire: fictitious page %p has zero refs", m));
+
+ old = atomic_fetchadd_int(&m->ref_count, 1);
+ KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX,
+ ("vm_page_wire: counter overflow for page %p", m));
+ if (VPRC_WIRE_COUNT(old) == 0)
vm_wire_add(1);
- }
- m->wire_count++;
- KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m));
}
/*
- * vm_page_unwire:
- *
+ * Attempt to wire a mapped page following a pmap lookup of that page.
+ * This may fail if a thread is concurrently tearing down mappings of the page.
+ */
+bool
+vm_page_wire_mapped(vm_page_t m)
+{
+ u_int old;
+
+ KASSERT(m->object != NULL,
+ ("vm_page_try_wire: page %p does not belong to an object", m));
+
+ old = m->ref_count;
+ do {
+ KASSERT(old > 0,
+ ("vm_page_try_wire: wiring unreferenced page %p", m));
+ if ((old & VPRC_BLOCKED) != 0)
+ return (false);
+ } while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1));
+
+ if (VPRC_WIRE_COUNT(old) == 0)
+ vm_wire_add(1);
+ return (true);
+}
+
+/*
* Release one wiring of the specified page, potentially allowing it to be
- * paged out. Returns TRUE if the number of wirings transitions to zero and
- * FALSE otherwise.
+ * paged out.
*
* Only managed pages belonging to an object can be paged out. If the number
* of wirings transitions to zero and the page is eligible for page out, then
- * the page is added to the specified paging queue (unless PQ_NONE is
- * specified, in which case the page is dequeued if it belongs to a paging
- * queue).
- *
- * If a page is fictitious, then its wire count must always be one.
+ * the page is added to the specified paging queue. If the released wiring
+ * represented the last reference to the page, the page is freed.
*
* A managed page must be locked.
*/
-bool
+void
vm_page_unwire(vm_page_t m, uint8_t queue)
{
- bool unwired;
+ u_int old;
+ bool queued;
- KASSERT(queue < PQ_COUNT || queue == PQ_NONE,
- ("vm_page_unwire: invalid queue %u request for page %p",
- queue, m));
- if ((m->oflags & VPO_UNMANAGED) == 0)
- vm_page_assert_locked(m);
+ KASSERT(queue < PQ_COUNT,
+ ("vm_page_unwire: invalid queue %u request for page %p", queue, m));
- unwired = vm_page_unwire_noq(m);
- if (!unwired || (m->oflags & VPO_UNMANAGED) != 0 || m->object == NULL)
- return (unwired);
+ if ((m->oflags & VPO_UNMANAGED) != 0) {
+ if (vm_page_unwire_noq(m) && m->ref_count == 0)
+ vm_page_free(m);
+ return;
+ }
- if (vm_page_queue(m) == queue) {
- if (queue == PQ_ACTIVE)
- vm_page_reference(m);
- else if (queue != PQ_NONE)
- vm_page_requeue(m);
- } else {
- vm_page_dequeue(m);
- if (queue != PQ_NONE) {
- vm_page_enqueue(m, queue);
- if (queue == PQ_ACTIVE)
- /* Initialize act_count. */
- vm_page_activate(m);
+ vm_page_assert_locked(m);
+
+ /*
+ * Update LRU state before releasing the wiring reference.
+ * We only need to do this once since we hold the page lock.
+ * Use a release store when updating the reference count to
+ * synchronize with vm_page_free_prep().
+ */
+ old = m->ref_count;
+ queued = false;
+ do {
+ KASSERT(VPRC_WIRE_COUNT(old) > 0,
+ ("vm_page_unwire: wire count underflow for page %p", m));
+ if (!queued && VPRC_WIRE_COUNT(old) == 1) {
+ if (queue == PQ_ACTIVE && vm_page_queue(m) == PQ_ACTIVE)
+ vm_page_reference(m);
+ else
+ vm_page_mvqueue(m, queue);
+ queued = true;
}
+ } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1));
+
+ if (VPRC_WIRE_COUNT(old) == 1) {
+ vm_wire_sub(1);
+ if (old == 1)
+ vm_page_free(m);
}
- return (unwired);
}
/*
- *
- * vm_page_unwire_noq:
- *
* Unwire a page without (re-)inserting it into a page queue. It is up
* to the caller to enqueue, requeue, or free the page as appropriate.
- * In most cases, vm_page_unwire() should be used instead.
+ * In most cases involving managed pages, vm_page_unwire() should be used
+ * instead.
*/
bool
vm_page_unwire_noq(vm_page_t m)
{
+ u_int old;
- if ((m->oflags & VPO_UNMANAGED) == 0)
- vm_page_assert_locked(m);
- if ((m->flags & PG_FICTITIOUS) != 0) {
- KASSERT(m->wire_count == 1,
- ("vm_page_unwire: fictitious page %p's wire count isn't one", m));
+ old = vm_page_drop(m, -1);
+ KASSERT(VPRC_WIRE_COUNT(old) != 0,
+ ("vm_page_unref: counter underflow for page %p", m));
+ KASSERT((m->flags & PG_FICTITIOUS) == 0 || VPRC_WIRE_COUNT(old) > 1,
+ ("vm_page_unref: missing ref on fictitious page %p", m));
+
+ if (VPRC_WIRE_COUNT(old) > 1)
return (false);
+ vm_wire_sub(1);
+ return (true);
+}
+
+/*
+ * Ensure that the page is in the specified page queue. If the page is
+ * active or being moved to the active queue, ensure that its act_count is
+ * at least ACT_INIT but do not otherwise mess with it. Otherwise, ensure that
+ * the page is at the tail of its page queue.
+ *
+ * The page may be wired. The caller should release any wiring references
+ * before releasing the page lock, otherwise the page daemon may immediately
+ * dequeue the page.
+ */
+static __always_inline void
+vm_page_mvqueue(vm_page_t m, const int nqueue)
+{
+
+ vm_page_assert_locked(m);
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("vm_page_mvqueue: page %p is unmanaged", m));
+
+ if (vm_page_queue(m) != nqueue) {
+ vm_page_dequeue(m);
+ vm_page_enqueue(m, nqueue);
+ } else if (nqueue != PQ_ACTIVE) {
+ vm_page_requeue(m);
}
- if (!vm_page_wired(m))
- panic("vm_page_unwire: page %p's wire count is zero", m);
- m->wire_count--;
- if (m->wire_count == 0) {
- vm_wire_sub(1);
- return (true);
- } else
- return (false);
+
+ if (nqueue == PQ_ACTIVE && m->act_count < ACT_INIT)
+ m->act_count = ACT_INIT;
+}
+
+/*
+ * Put the specified page on the active list (if appropriate).
+ *
+ * A managed page must be locked.
+ */
+void
+vm_page_activate(vm_page_t m)
+{
+
+ if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
+ return;
+ vm_page_mvqueue(m, PQ_ACTIVE);
}
/*
* Move the specified page to the tail of the inactive queue, or requeue
* the page if it is already in the inactive queue.
*
- * The page must be locked.
+ * A managed page must be locked.
*/
void
vm_page_deactivate(vm_page_t m)
{
- vm_page_assert_locked(m);
-
- if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0)
+ if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
return;
-
- if (!vm_page_inactive(m)) {
- vm_page_dequeue(m);
- vm_page_enqueue(m, PQ_INACTIVE);
- } else
- vm_page_requeue(m);
+ vm_page_mvqueue(m, PQ_INACTIVE);
}
/*
@@ -3668,18 +3715,13 @@
* bypassing LRU. A marker page is used to maintain FIFO ordering.
* As with regular enqueues, we use a per-CPU batch queue to reduce
* contention on the page queue lock.
- *
- * The page must be locked.
*/
-void
-vm_page_deactivate_noreuse(vm_page_t m)
+static void
+_vm_page_deactivate_noreuse(vm_page_t m)
{
vm_page_assert_locked(m);
- if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0)
- return;
-
if (!vm_page_inactive(m)) {
vm_page_dequeue(m);
m->queue = PQ_INACTIVE;
@@ -3689,31 +3731,33 @@
vm_pqbatch_submit_page(m, PQ_INACTIVE);
}
+void
+vm_page_deactivate_noreuse(vm_page_t m)
+{
+
+ KASSERT(m->object != NULL,
+ ("vm_page_deactivate_noreuse: page %p has no object", m));
+
+ if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_wired(m))
+ _vm_page_deactivate_noreuse(m);
+}
+
/*
- * vm_page_launder
+ * Put a page in the laundry, or requeue it if it is already there.
*
- * Put a page in the laundry, or requeue it if it is already there.
+ * The page must be locked.
*/
void
vm_page_launder(vm_page_t m)
{
- vm_page_assert_locked(m);
- if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0)
+ if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
return;
-
- if (vm_page_in_laundry(m))
- vm_page_requeue(m);
- else {
- vm_page_dequeue(m);
- vm_page_enqueue(m, PQ_LAUNDRY);
- }
+ vm_page_mvqueue(m, PQ_LAUNDRY);
}
/*
- * vm_page_unswappable
- *
- * Put a page in the PQ_UNSWAPPABLE holding queue.
+ * Put a page in the PQ_UNSWAPPABLE holding queue.
*/
void
vm_page_unswappable(vm_page_t m)
@@ -3728,30 +3772,158 @@
}
/*
- * Attempt to free the page. If it cannot be freed, do nothing. Returns true
- * if the page is freed and false otherwise.
- *
- * The page must be managed. The page and its containing object must be
- * locked.
+ * Release a wired page to the page cache, and optionally attempt to free it.
+ * The page's object must be locked. See the comment above vm_page_release().
*/
-bool
-vm_page_try_to_free(vm_page_t m)
+void
+vm_page_release_locked(vm_page_t m, bool nocache)
+{
+ vm_object_t object;
+
+ object = m->object;
+ VM_OBJECT_ASSERT_WLOCKED(object);
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("vm_page_release_locked: page %p is unmanaged", m));
+
+ if (!vm_page_unwire_noq(m))
+ return;
+ if (m->valid == 0 || nocache) {
+ if ((object->ref_count == 0 || !pmap_page_is_mapped(m)) &&
+ m->dirty == 0 && !vm_page_busied(m) && !vm_page_wired(m)) {
+ vm_page_free(m);
+ } else {
+ vm_page_lock(m);
+ vm_page_deactivate_noreuse(m);
+ vm_page_unlock(m);
+ }
+ } else {
+ vm_page_lock(m);
+ if (vm_page_active(m))
+ vm_page_reference(m);
+ else
+ vm_page_deactivate(m);
+ vm_page_unlock(m);
+ }
+}
+
+/*
+ * Release a wired page to the page cache, and optionally attempt to free it.
+ * If the caller wishes to attempt to free the page, and the page is mapped,
+ * dirty, busy or wired, we do not free it but instead place it near the head of
+ * the inactive queue to accelerate reclamation.
+ */
+void
+vm_page_release(vm_page_t m, bool nocache)
{
+ vm_object_t object;
+ u_int old;
+
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("vm_page_release: page %p is unmanaged", m));
+
+ if (nocache) {
+ /*
+ * Attempt to free the page. The page may be renamed between
+ * objects so we must verify the page's object pointer after
+ * acquiring the lock and retry if they do not match.
+ */
+ while ((object = m->object) != NULL) {
+ if (!VM_OBJECT_TRYWLOCK(object)) {
+ object = NULL;
+ break;
+ }
+ if (m->object == object)
+ break;
+ VM_OBJECT_WUNLOCK(object);
+ }
+ if (object != NULL) {
+ vm_page_release_locked(m, nocache);
+ VM_OBJECT_WUNLOCK(object);
+ return;
+ }
+ }
+
+ /*
+ * Update LRU state before releasing the wiring reference.
+ * Use a release store when updating the reference count to
+ * synchronize with vm_page_free_prep().
+ */
+ old = m->ref_count;
+ do {
+ if (VPRC_WIRE_COUNT(old) == 1) {
+ vm_page_lock(m);
+
+ /*
+ * Use a racy check of the valid bits to determine
+ * whether we can accelerate reclamation of the page.
+ * The valid bits will be stable unless the page is
+ * being mapped or is referenced by multiple buffers,
+ * and in those cases we expect races to be rare. At
+ * worst we will either accelerate reclamation of a
+ * valid page and violate LRU, or unnecessarily defer
+ * reclamation of an invalid page.
+ */
+ if (m->valid == 0 || nocache)
+ _vm_page_deactivate_noreuse(m);
+ else if (vm_page_active(m))
+ vm_page_reference(m);
+ else
+ vm_page_mvqueue(m, PQ_INACTIVE);
+ vm_page_unlock(m);
+ }
+ } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1));
+
+ if (VPRC_WIRE_COUNT(old) == 1) {
+ vm_wire_sub(1);
+ if (old == 1)
+ vm_page_free(m);
+ }
+}
+
+/*
+ * Attempt to invoke the requested operation while blocking new wirings of the
+ * page.
+ */
+static bool
+vm_page_try_blocked_op(vm_page_t m, void (*op)(vm_page_t))
+{
+ u_int old;
vm_page_assert_locked(m);
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m));
- if (m->dirty != 0 || vm_page_wired(m) || vm_page_busied(m))
- return (false);
- if (m->object->ref_count != 0) {
- pmap_remove_all(m);
- if (m->dirty != 0)
+ KASSERT(m->object != NULL && (m->oflags & VPO_UNMANAGED) == 0,
+ ("vm_page_try_blocked_op: page %p has no object", m));
+ KASSERT(!vm_page_busied(m),
+ ("vm_page_try_blocked_op: page %p is busy", m));
+ VM_OBJECT_ASSERT_LOCKED(m->object);
+
+ old = m->ref_count;
+ do {
+ KASSERT(old != 0,
+ ("vm_page_try_blocked_op: page %p has no references", m));
+ if (VPRC_WIRE_COUNT(old) != 0)
return (false);
- }
- vm_page_free(m);
+ } while (!atomic_fcmpset_int(&m->ref_count, &old, old | VPRC_BLOCKED));
+
+ (op)(m);
+
+ atomic_clear_int(&m->ref_count, VPRC_BLOCKED);
return (true);
}
+bool
+vm_page_try_remove_all(vm_page_t m)
+{
+
+ return (vm_page_try_blocked_op(m, pmap_remove_all));
+}
+
+bool
+vm_page_try_remove_write(vm_page_t m)
+{
+
+ return (vm_page_try_blocked_op(m, pmap_remove_write));
+}
+
/*
* vm_page_advise
*
@@ -3846,11 +4018,8 @@
VM_OBJECT_WLOCK(object);
goto retrylookup;
} else {
- if ((allocflags & VM_ALLOC_WIRED) != 0) {
- vm_page_lock(m);
+ if ((allocflags & VM_ALLOC_WIRED) != 0)
vm_page_wire(m);
- vm_page_unlock(m);
- }
if ((allocflags &
(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
vm_page_xbusy(m);
@@ -3948,11 +4117,8 @@
VM_OBJECT_WLOCK(object);
goto retrylookup;
}
- if ((allocflags & VM_ALLOC_WIRED) != 0) {
- vm_page_lock(m);
+ if ((allocflags & VM_ALLOC_WIRED) != 0)
vm_page_wire(m);
- vm_page_unlock(m);
- }
if ((allocflags & (VM_ALLOC_NOBUSY |
VM_ALLOC_SBUSY)) == 0)
vm_page_xbusy(m);
@@ -4481,10 +4647,10 @@
else
m = (vm_page_t)addr;
db_printf(
- "page %p obj %p pidx 0x%jx phys 0x%jx q %d wire %d\n"
+ "page %p obj %p pidx 0x%jx phys 0x%jx q %d ref %u\n"
" af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n",
m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr,
- m->queue, m->wire_count, m->aflags, m->oflags,
+ m->queue, m->ref_count, m->aflags, m->oflags,
m->flags, m->act_count, m->busy_lock, m->valid, m->dirty);
}
#endif /* DDB */
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -314,6 +314,54 @@
return (vm_batchqueue_pop(&ss->bq));
}
+/*
+ * Lock a page and set a reference bit to ensure that it does not get freed out
+ * from under us.
+ */
+static bool
+vm_pageout_lock_and_hold_page(vm_page_t m, struct mtx **mtx)
+{
+ u_int ref_count;
+
+ vm_page_change_lock(m, mtx);
+
+ ref_count = m->ref_count;
+ do {
+ if (ref_count == 0)
+ return (false);
+ } while (!atomic_fcmpset_int(&m->ref_count, &ref_count, ref_count |
+ VPRC_PDREF));
+ return (true);
+}
+
+/*
+ * Drop the page daemon's transient page reference and determine whether we need
+ * to free the page.
+ */
+static bool
+vm_pageout_drop_page(vm_page_t m)
+{
+
+ KASSERT((m->ref_count & VPRC_PDREF) != 0,
+ ("vm_pageout_drop_page: page %p missing pagedaemon ref", m));
+ return (vm_page_drop(m, -VPRC_PDREF) == VPRC_PDREF);
+}
+
+/*
+ * Drop the page daemon's transient reference once we know that the page's
+ * identity is stable.
+ */
+static void
+vm_pageout_drop_page_quick(vm_page_t m)
+{
+
+ VM_OBJECT_ASSERT_LOCKED(m->object);
+ KASSERT((m->ref_count & (VPRC_OBJREF | VPRC_PDREF)) ==
+ (VPRC_OBJREF | VPRC_PDREF),
+ ("vm_pageout_drop_page_quick: page %p missing refs", m));
+ atomic_clear_int(&m->ref_count, VPRC_PDREF);
+}
+
/*
* Scan for pages at adjacent offsets within the given page's object that are
* eligible for laundering, form a cluster of these pages and the given page,
@@ -327,16 +375,11 @@
vm_pindex_t pindex;
int ib, is, page_base, pageout_count;
- vm_page_assert_locked(m);
object = m->object;
VM_OBJECT_ASSERT_WLOCKED(object);
pindex = m->pindex;
vm_page_assert_unbusied(m);
- KASSERT(!vm_page_wired(m), ("page %p is wired", m));
-
- pmap_remove_write(m);
- vm_page_unlock(m);
mc[vm_pageout_page_count] = pb = ps = m;
pageout_count = 1;
@@ -362,7 +405,8 @@
ib = 0;
break;
}
- if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) {
+ if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p) ||
+ vm_page_wired(p)) {
ib = 0;
break;
}
@@ -372,12 +416,11 @@
break;
}
vm_page_lock(p);
- if (vm_page_wired(p) || !vm_page_in_laundry(p)) {
+ if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {
vm_page_unlock(p);
ib = 0;
break;
}
- pmap_remove_write(p);
vm_page_unlock(p);
mc[--page_base] = pb = p;
++pageout_count;
@@ -392,17 +435,17 @@
}
while (pageout_count < vm_pageout_page_count &&
pindex + is < object->size) {
- if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
+ if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p) ||
+ vm_page_wired(p))
break;
vm_page_test_dirty(p);
if (p->dirty == 0)
break;
vm_page_lock(p);
- if (vm_page_wired(p) || !vm_page_in_laundry(p)) {
+ if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {
vm_page_unlock(p);
break;
}
- pmap_remove_write(p);
vm_page_unlock(p);
mc[page_base + pageout_count] = ps = p;
++pageout_count;
@@ -657,6 +700,13 @@
}
}
+ if (!vm_page_try_remove_write(m)) {
+ vm_page_unlock(m);
+ error = EBUSY;
+ goto unlock_all;
+ }
+ vm_page_unlock(m);
+
/*
* If a page is dirty, then it is either being washed
* (but not yet cleaned) or it is still in the
@@ -726,7 +776,8 @@
if (__predict_false((m->flags & PG_MARKER) != 0))
continue;
- vm_page_change_lock(m, &mtx);
+ if (!vm_pageout_lock_and_hold_page(m, &mtx))
+ continue;
recheck:
/*
@@ -734,7 +785,7 @@
* while locks were dropped.
*/
if (vm_page_queue(m) != queue)
- continue;
+ goto drop;
/*
* A requeue was requested, so this page gets a second
@@ -742,17 +793,19 @@
*/
if ((m->aflags & PGA_REQUEUE) != 0) {
vm_page_requeue(m);
- continue;
+ goto drop;
}
/*
* Wired pages may not be freed. Complete their removal
* from the queue now to avoid needless revisits during
- * future scans.
+ * future scans. This check is racy and must be reverified once
+ * we hold the object lock and have verified that the page
+ * is not busy.
*/
if (vm_page_wired(m)) {
vm_page_dequeue_deferred(m);
- continue;
+ goto drop;
}
if (object != m->object) {
@@ -767,10 +820,33 @@
goto recheck;
}
}
+ if (__predict_false(object == NULL))
+ /*
+ * The page has been removed from its object.
+ * Drop our reference and move on.
+ */
+ goto drop;
+
+ /*
+ * We can drop our transient reference now that we hold
+ * the object lock.
+ */
+ vm_pageout_drop_page_quick(m);
if (vm_page_busied(m))
continue;
+ /*
+ * Re-check for wirings now that we hold the object lock. If
+ * the page is mapped, it may still be wired by pmap lookups.
+ * The call to vm_page_try_remove_all() below atomically checks
+ * for such wirings and removes mappings.
+ */
+ if (__predict_false(vm_page_wired(m))) {
+ vm_page_dequeue_deferred(m);
+ continue;
+ }
+
/*
* Invalid pages can be easily freed. They cannot be
* mapped; vm_page_free() asserts this.
@@ -838,8 +914,10 @@
*/
if (object->ref_count != 0) {
vm_page_test_dirty(m);
- if (m->dirty == 0)
- pmap_remove_all(m);
+ if (m->dirty == 0 && !vm_page_try_remove_all(m)) {
+ vm_page_dequeue_deferred(m);
+ continue;
+ }
}
/*
@@ -889,6 +967,11 @@
mtx = NULL;
object = NULL;
}
+
+ continue;
+drop:
+ if (vm_pageout_drop_page(m))
+ goto free_page;
}
if (mtx != NULL) {
mtx_unlock(mtx);
@@ -1131,6 +1214,7 @@
{
struct scan_state ss;
struct mtx *mtx;
+ vm_object_t object;
vm_page_t m, marker;
struct vm_pagequeue *pq;
long min_scan;
@@ -1187,23 +1271,31 @@
if (__predict_false((m->flags & PG_MARKER) != 0))
continue;
- vm_page_change_lock(m, &mtx);
+ if (!vm_pageout_lock_and_hold_page(m, &mtx))
+ continue;
/*
* The page may have been disassociated from the queue
* while locks were dropped.
*/
if (vm_page_queue(m) != PQ_ACTIVE)
- continue;
+ goto drop;
/*
* Wired pages are dequeued lazily.
*/
if (vm_page_wired(m)) {
vm_page_dequeue_deferred(m);
- continue;
+ goto drop;
}
+ if (__predict_false((object = m->object) == NULL))
+ /*
+ * The page has been removed from its object.
+ * Drop our reference and move on.
+ */
+ goto drop;
+
/*
* Check to see "how much" the page has been used.
*
@@ -1223,7 +1315,7 @@
* This race delays the detection of a new reference. At
* worst, we will deactivate and reactivate the page.
*/
- if (m->object->ref_count != 0)
+ if (object->ref_count != 0)
act_delta = pmap_ts_referenced(m);
else
act_delta = 0;
@@ -1278,6 +1370,9 @@
}
}
}
+drop:
+ if (vm_pageout_drop_page(m))
+ vm_page_free(m);
}
if (mtx != NULL) {
mtx_unlock(mtx);
@@ -1392,7 +1487,8 @@
KASSERT((m->flags & PG_MARKER) == 0,
("marker page %p was dequeued", m));
- vm_page_change_lock(m, &mtx);
+ if (!vm_pageout_lock_and_hold_page(m, &mtx))
+ continue;
recheck:
/*
@@ -1401,7 +1497,7 @@
*/
if (vm_page_queue(m) != PQ_INACTIVE) {
addl_page_shortage++;
- continue;
+ goto drop;
}
/*
@@ -1410,24 +1506,28 @@
* chance.
*/
if ((m->aflags & (PGA_ENQUEUED | PGA_REQUEUE |
- PGA_REQUEUE_HEAD)) != 0)
- goto reinsert;
+ PGA_REQUEUE_HEAD)) != 0) {
+ vm_pageout_reinsert_inactive(&ss, &rq, m);
+ goto drop;
+ }
/*
* Wired pages may not be freed. Complete their removal
* from the queue now to avoid needless revisits during
- * future scans.
+ * future scans. This check is racy and must be reverified once
+ * we hold the object lock and have verified that the page
+ * is not busy.
*/
if (vm_page_wired(m)) {
vm_page_dequeue_deferred(m);
- continue;
+ goto drop;
}
if (object != m->object) {
if (object != NULL)
VM_OBJECT_WUNLOCK(object);
object = m->object;
- if (!VM_OBJECT_TRYWLOCK(object)) {
+ if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) {
mtx_unlock(mtx);
/* Depends on type-stability. */
VM_OBJECT_WLOCK(object);
@@ -1435,6 +1535,18 @@
goto recheck;
}
}
+ if (__predict_false(object == NULL))
+ /*
+ * The page has been removed from its object.
+ * Drop our reference and move on.
+ */
+ goto drop;
+
+ /*
+ * We can drop our transient reference now that we hold
+ * the object lock.
+ */
+ vm_pageout_drop_page_quick(m);
if (vm_page_busied(m)) {
/*
@@ -1446,7 +1558,19 @@
* inactive count.
*/
addl_page_shortage++;
- goto reinsert;
+ vm_pageout_reinsert_inactive(&ss, &rq, m);
+ continue;
+ }
+
+ /*
+ * Re-check for wirings now that we hold the object lock. If
+ * the page is mapped, it may still be wired by pmap lookups.
+ * The call to vm_page_try_remove_all() below atomically checks
+ * for such wirings and removes mappings.
+ */
+ if (__predict_false(vm_page_wired(m))) {
+ vm_page_dequeue_deferred(m);
+ continue;
}
/*
@@ -1492,7 +1616,8 @@
continue;
} else if ((object->flags & OBJ_DEAD) == 0) {
vm_page_aflag_set(m, PGA_REQUEUE);
- goto reinsert;
+ vm_pageout_reinsert_inactive(&ss, &rq, m);
+ continue;
}
}
@@ -1505,8 +1630,10 @@
*/
if (object->ref_count != 0) {
vm_page_test_dirty(m);
- if (m->dirty == 0)
- pmap_remove_all(m);
+ if (m->dirty == 0 && !vm_page_try_remove_all(m)) {
+ vm_page_dequeue_deferred(m);
+ continue;
+ }
}
/*
@@ -1532,8 +1659,13 @@
} else if ((object->flags & OBJ_DEAD) == 0)
vm_page_launder(m);
continue;
-reinsert:
- vm_pageout_reinsert_inactive(&ss, &rq, m);
+
+drop:
+ /*
+ * Drop our transient reference.
+ */
+ if (vm_pageout_drop_page(m))
+ goto free_page;
}
if (mtx != NULL)
mtx_unlock(mtx);
Index: sys/vm/vm_swapout.c
===================================================================
--- sys/vm/vm_swapout.c
+++ sys/vm/vm_swapout.c
@@ -208,12 +208,12 @@
goto unlock_return;
if (should_yield())
goto unlock_return;
- if (vm_page_busied(p))
+
+ if (vm_page_busied(p) || vm_page_wired(p))
continue;
VM_CNT_INC(v_pdpages);
vm_page_lock(p);
- if (vm_page_wired(p) ||
- !pmap_page_exists_quick(pmap, p)) {
+ if (!pmap_page_exists_quick(pmap, p)) {
vm_page_unlock(p);
continue;
}
@@ -231,8 +231,8 @@
p->act_count -= min(p->act_count,
ACT_DECLINE);
if (!remove_mode && p->act_count == 0) {
- pmap_remove_all(p);
- vm_page_deactivate(p);
+ if (vm_page_try_remove_all(p))
+ vm_page_deactivate(p);
} else
vm_page_requeue(p);
} else {
@@ -243,7 +243,7 @@
vm_page_requeue(p);
}
} else if (vm_page_inactive(p))
- pmap_remove_all(p);
+ (void)vm_page_try_remove_all(p);
vm_page_unlock(p);
}
if ((backing_object = object->backing_object) == NULL)

File Metadata

Mime Type
text/plain
Expires
Tue, Feb 17, 2:16 AM (18 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28797444
Default Alt Text
D20486.id58419.diff (76 KB)

Event Timeline