Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F145110589
D20486.id58419.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
76 KB
Referenced Files
None
Subscribers
None
D20486.id58419.diff
View Options
Index: share/man/man9/Makefile
===================================================================
--- share/man/man9/Makefile
+++ share/man/man9/Makefile
@@ -2200,7 +2200,9 @@
MLINKS+=vm_map_max.9 vm_map_min.9 \
vm_map_max.9 vm_map_pmap.9
MLINKS+=vm_map_stack.9 vm_map_growstack.9
-MLINKS+=vm_map_wire.9 vm_map_unwire.9
+MLINKS+=vm_map_wire.9 vm_map_wire_mapped.9 \
+ vm_page_wire.9 vm_page_unwire.9 \
+ vm_page_wire.9 vm_page_unwire_noq.9
MLINKS+=vm_page_bits.9 vm_page_clear_dirty.9 \
vm_page_bits.9 vm_page_dirty.9 \
vm_page_bits.9 vm_page_is_valid.9 \
Index: share/man/man9/vm_page_wire.9
===================================================================
--- share/man/man9/vm_page_wire.9
+++ share/man/man9/vm_page_wire.9
@@ -26,12 +26,13 @@
.\"
.\" $FreeBSD$
.\"
-.Dd July 13, 2001
+.Dd June 3, 2019
.Dt VM_PAGE_WIRE 9
.Os
.Sh NAME
.Nm vm_page_wire ,
-.Nm vm_page_unwire
+.Nm vm_page_unwire ,
+.Nm vm_page_unwire_noq
.Nd "wire and unwire pages"
.Sh SYNOPSIS
.In sys/param.h
@@ -39,29 +40,44 @@
.In vm/vm_page.h
.Ft void
.Fn vm_page_wire "vm_page_t m"
+.Ft bool
+.Fn vm_page_wire_mapped "vm_page_t m"
.Ft void
-.Fn vm_page_unwire "vm_page_t m" "int activate"
+.Fn vm_page_unwire "vm_page_t m" "int queue"
+.Ft bool
+.Fn vm_page_unwire_noq "vm_page_t m"
.Sh DESCRIPTION
The
.Fn vm_page_wire
-function increments the wire count on a page, and removes it from
-whatever queue it is on.
+and
+.Fn vm_page_wire_mapped
+function wire the page, prevent it from being reclaimed by the page
+daemon or when its containing object is destroyed.
+Both functions require that the page belong to an object.
+The
+.Fn vm_page_wire_mapped
+function is for use by the
+.Xr pmap 9
+layer following a lookup.
+This function may fail if mappings of the page are concurrently
+being destroyed, in which case it will return false.
.Pp
The
.Fn vm_page_unwire
-function releases one of the wirings on the page.
-When
-.Va write_count
-reaches zero the page is placed back onto either the active queue
-(if
-.Fa activate
-is non-zero) or onto the inactive queue (if
-.Fa activate
-is zero).
-If the page is unmanaged
-.Dv ( PG_UNMANAGED
-is set) then the page is left on
-.Dv PQ_NONE .
+and
+.Fn vm_page_unwire_noq
+functions release a wiring of a page.
+The
+.Fn vm_page_unwire
+function takes a queue index and will insert the page into the
+corresponding page queue upon releasing its last wiring.
+If the page does not belong to an object and no other references
+to the page exist,
+.Fn vm_page_unwire
+will free the page.
+.Fn vm_page_unwire_noq
+releases the wiring and returns true if it was the last wiring
+of the page.
.Sh AUTHORS
This manual page was written by
.An Chad David Aq Mt davidc@acns.ab.ca .
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -2931,31 +2931,23 @@
m = NULL;
PG_RW = pmap_rw_bit(pmap);
PG_V = pmap_valid_bit(pmap);
+
PMAP_LOCK(pmap);
-retry:
pdep = pmap_pde(pmap, va);
if (pdep != NULL && (pde = *pdep)) {
if (pde & PG_PS) {
- if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- if (vm_page_pa_tryrelock(pmap, (pde &
- PG_PS_FRAME) | (va & PDRMASK), &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pa);
- }
+ if ((pde & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0)
+ m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
+ (va & PDRMASK));
} else {
pte = *pmap_pde_to_pte(pdep, va);
- if ((pte & PG_V) &&
- ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
- &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pa);
- }
+ if ((pte & PG_V) != 0 &&
+ ((pte & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0))
+ m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
}
- if (m != NULL)
- vm_page_wire(m);
+ if (m != NULL && !vm_page_wire_mapped(m))
+ m = NULL;
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/amd64/sgx/sgx.c
===================================================================
--- sys/amd64/sgx/sgx.c
+++ sys/amd64/sgx/sgx.c
@@ -358,7 +358,7 @@
uint64_t offs;
vm_page_lock(p);
- vm_page_remove(p);
+ (void)vm_page_remove(p);
vm_page_unlock(p);
dprintf("%s: p->pidx %ld\n", __func__, p->pindex);
Index: sys/arm/arm/pmap-v4.c
===================================================================
--- sys/arm/arm/pmap-v4.c
+++ sys/arm/arm/pmap-v4.c
@@ -3415,14 +3415,14 @@
struct l2_dtable *l2;
pd_entry_t l1pd;
pt_entry_t *ptep, pte;
- vm_paddr_t pa, paddr;
- vm_page_t m = NULL;
+ vm_paddr_t pa;
+ vm_page_t m;
u_int l1idx;
+
l1idx = L1_IDX(va);
- paddr = 0;
+ m = NULL;
PMAP_LOCK(pmap);
-retry:
l1pd = pmap->pm_l1->l1_kva[l1idx];
if (l1pte_section_p(l1pd)) {
/*
@@ -3434,11 +3434,10 @@
pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET);
else
pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
- if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr))
- goto retry;
if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) {
m = PHYS_TO_VM_PAGE(pa);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
} else {
/*
@@ -3466,15 +3465,12 @@
pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
else
pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
- if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr))
- goto retry;
m = PHYS_TO_VM_PAGE(pa);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
-
PMAP_UNLOCK(pmap);
- PA_UNLOCK_COND(paddr);
return (m);
}
Index: sys/arm/arm/pmap-v6.c
===================================================================
--- sys/arm/arm/pmap-v6.c
+++ sys/arm/arm/pmap-v6.c
@@ -1986,23 +1986,20 @@
vm_page_t
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
- vm_paddr_t pa, lockpa;
+ vm_paddr_t pa;
pt1_entry_t pte1;
pt2_entry_t pte2, *pte2p;
vm_page_t m;
- lockpa = 0;
m = NULL;
PMAP_LOCK(pmap);
-retry:
pte1 = pte1_load(pmap_pte1(pmap, va));
if (pte1_is_section(pte1)) {
if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) {
pa = pte1_pa(pte1) | (va & PTE1_OFFSET);
- if (vm_page_pa_tryrelock(pmap, pa, &lockpa))
- goto retry;
m = PHYS_TO_VM_PAGE(pa);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
} else if (pte1_is_link(pte1)) {
pte2p = pmap_pte2(pmap, va);
@@ -2011,13 +2008,11 @@
if (pte2_is_valid(pte2) &&
(!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) {
pa = pte2_pa(pte2);
- if (vm_page_pa_tryrelock(pmap, pa, &lockpa))
- goto retry;
m = PHYS_TO_VM_PAGE(pa);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
- PA_UNLOCK_COND(lockpa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/arm64/arm64/pmap.c
===================================================================
--- sys/arm64/arm64/pmap.c
+++ sys/arm64/arm64/pmap.c
@@ -1064,14 +1064,11 @@
{
pt_entry_t *pte, tpte;
vm_offset_t off;
- vm_paddr_t pa;
vm_page_t m;
int lvl;
- pa = 0;
m = NULL;
PMAP_LOCK(pmap);
-retry:
pte = pmap_pte(pmap, va, &lvl);
if (pte != NULL) {
tpte = pmap_load(pte);
@@ -1096,14 +1093,11 @@
default:
off = 0;
}
- if (vm_page_pa_tryrelock(pmap,
- (tpte & ~ATTR_MASK) | off, &pa))
- goto retry;
m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
@@ -481,9 +481,7 @@
}
ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
- vm_page_lock(pp);
vm_page_wire(pp);
- vm_page_unlock(pp);
} else
pp = NULL;
break;
Index: sys/compat/linuxkpi/common/include/linux/mm.h
===================================================================
--- sys/compat/linuxkpi/common/include/linux/mm.h
+++ sys/compat/linuxkpi/common/include/linux/mm.h
@@ -227,9 +227,7 @@
static inline void
get_page(struct vm_page *page)
{
- vm_page_lock(page);
vm_page_wire(page);
- vm_page_unlock(page);
}
extern long
@@ -251,8 +249,7 @@
put_page(struct vm_page *page)
{
vm_page_lock(page);
- if (vm_page_unwire(page, PQ_ACTIVE) && page->object == NULL)
- vm_page_free(page);
+ vm_page_unwire(page, PQ_ACTIVE);
vm_page_unlock(page);
}
Index: sys/compat/linuxkpi/common/src/linux_page.c
===================================================================
--- sys/compat/linuxkpi/common/src/linux_page.c
+++ sys/compat/linuxkpi/common/src/linux_page.c
@@ -158,10 +158,8 @@
for (x = 0; x != npages; x++) {
vm_page_t pgo = page + x;
- vm_page_lock(pgo);
if (vm_page_unwire_noq(pgo))
vm_page_free(pgo);
- vm_page_unlock(pgo);
}
} else {
vm_offset_t vaddr;
Index: sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
===================================================================
--- sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
+++ sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
@@ -378,8 +378,7 @@
pagelist_page_free(vm_page_t pp)
{
vm_page_lock(pp);
- if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL)
- vm_page_free(pp);
+ vm_page_unwire(pp, PQ_INACTIVE);
vm_page_unlock(pp);
}
Index: sys/i386/i386/pmap.c
===================================================================
--- sys/i386/i386/pmap.c
+++ sys/i386/i386/pmap.c
@@ -1685,35 +1685,24 @@
pd_entry_t pde;
pt_entry_t pte;
vm_page_t m;
- vm_paddr_t pa;
- pa = 0;
m = NULL;
PMAP_LOCK(pmap);
-retry:
pde = *pmap_pde(pmap, va);
if (pde != 0) {
if (pde & PG_PS) {
- if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- if (vm_page_pa_tryrelock(pmap, (pde &
- PG_PS_FRAME) | (va & PDRMASK), &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pa);
- }
+ if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0)
+ m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
+ (va & PDRMASK));
} else {
pte = pmap_pte_ufast(pmap, va, pde);
if (pte != 0 &&
- ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
- &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pa);
- }
+ ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0))
+ m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
}
- if (m != NULL)
- vm_page_wire(m);
+ if (m != NULL && !vm_page_wire_mapped(m))
+ m = NULL;
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/kern/kern_exec.c
===================================================================
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -976,9 +976,7 @@
if (ma[0]->valid != VM_PAGE_BITS_ALL) {
vm_page_xbusy(ma[0]);
if (!vm_pager_has_page(object, 0, NULL, &after)) {
- vm_page_lock(ma[0]);
vm_page_free(ma[0]);
- vm_page_unlock(ma[0]);
VM_OBJECT_WUNLOCK(object);
return (EIO);
}
@@ -1002,11 +1000,8 @@
initial_pagein = i;
rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL);
if (rv != VM_PAGER_OK) {
- for (i = 0; i < initial_pagein; i++) {
- vm_page_lock(ma[i]);
+ for (i = 0; i < initial_pagein; i++)
vm_page_free(ma[i]);
- vm_page_unlock(ma[i]);
- }
VM_OBJECT_WUNLOCK(object);
return (EIO);
}
@@ -1014,9 +1009,7 @@
for (i = 1; i < initial_pagein; i++)
vm_page_readahead_finish(ma[i]);
}
- vm_page_lock(ma[0]);
vm_page_wire(ma[0]);
- vm_page_unlock(ma[0]);
VM_OBJECT_WUNLOCK(object);
imgp->firstpage = sf_buf_alloc(ma[0], 0);
Index: sys/kern/kern_sendfile.c
===================================================================
--- sys/kern/kern_sendfile.c
+++ sys/kern/kern_sendfile.c
@@ -119,76 +119,20 @@
SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
-/*
- * Detach mapped page and release resources back to the system. Called
- * by mbuf(9) code when last reference to a page is freed.
- */
-static void
-sendfile_free_page(vm_page_t pg, bool nocache)
-{
- bool freed;
-
- vm_page_lock(pg);
- /*
- * In either case check for the object going away on us. This can
- * happen since we don't hold a reference to it. If so, we're
- * responsible for freeing the page. In 'noncache' case try to free
- * the page, but only if it is cheap to.
- */
- if (vm_page_unwire_noq(pg)) {
- vm_object_t obj;
-
- if ((obj = pg->object) == NULL)
- vm_page_free(pg);
- else {
- freed = false;
- if (nocache && !vm_page_xbusied(pg) &&
- VM_OBJECT_TRYWLOCK(obj)) {
- /* Only free unmapped pages. */
- if (obj->ref_count == 0 ||
- !pmap_page_is_mapped(pg))
- /*
- * The busy test before the object is
- * locked cannot be relied upon.
- */
- freed = vm_page_try_to_free(pg);
- VM_OBJECT_WUNLOCK(obj);
- }
- if (!freed) {
- /*
- * If we were asked to not cache the page, place
- * it near the head of the inactive queue so
- * that it is reclaimed sooner. Otherwise,
- * maintain LRU.
- */
- if (nocache)
- vm_page_deactivate_noreuse(pg);
- else if (vm_page_active(pg))
- vm_page_reference(pg);
- else
- vm_page_deactivate(pg);
- }
- }
- }
- vm_page_unlock(pg);
-}
-
static void
sendfile_free_mext(struct mbuf *m)
{
struct sf_buf *sf;
vm_page_t pg;
- bool nocache;
KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_SFBUF,
("%s: m %p !M_EXT or !EXT_SFBUF", __func__, m));
sf = m->m_ext.ext_arg1;
pg = sf_buf_page(sf);
- nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
sf_buf_free(sf);
- sendfile_free_page(pg, nocache);
+ vm_page_release(pg, (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0);
if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
struct sendfile_sync *sfs = m->m_ext.ext_arg2;
Index: sys/kern/sys_process.c
===================================================================
--- sys/kern/sys_process.c
+++ sys/kern/sys_process.c
@@ -307,8 +307,7 @@
* Release the page.
*/
vm_page_lock(m);
- if (vm_page_unwire(m, PQ_ACTIVE) && m->object == NULL)
- vm_page_free(m);
+ vm_page_unwire(m, PQ_ACTIVE);
vm_page_unlock(m);
} while (error == 0 && uio->uio_resid > 0);
Index: sys/kern/uipc_shm.c
===================================================================
--- sys/kern/uipc_shm.c
+++ sys/kern/uipc_shm.c
@@ -196,9 +196,7 @@
printf(
"uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n",
obj, idx, m->valid, rv);
- vm_page_lock(m);
vm_page_free(m);
- vm_page_unlock(m);
VM_OBJECT_WUNLOCK(obj);
return (EIO);
}
@@ -206,9 +204,7 @@
vm_page_zero_invalid(m, TRUE);
vm_page_xunbusy(m);
}
- vm_page_lock(m);
vm_page_wire(m);
- vm_page_unlock(m);
VM_OBJECT_WUNLOCK(obj);
error = uiomove_fromphys(&m, offset, tlen, uio);
if (uio->uio_rw == UIO_WRITE && error == 0) {
Index: sys/kern/vfs_bio.c
===================================================================
--- sys/kern/vfs_bio.c
+++ sys/kern/vfs_bio.c
@@ -2894,47 +2894,6 @@
}
}
-/*
- * Unwire a page held by a buf and either free it or update the page queues to
- * reflect its recent use.
- */
-static void
-vfs_vmio_unwire(struct buf *bp, vm_page_t m)
-{
- bool freed;
-
- vm_page_lock(m);
- if (vm_page_unwire_noq(m)) {
- if ((bp->b_flags & B_DIRECT) != 0)
- freed = vm_page_try_to_free(m);
- else
- freed = false;
- if (!freed) {
- /*
- * Use a racy check of the valid bits to determine
- * whether we can accelerate reclamation of the page.
- * The valid bits will be stable unless the page is
- * being mapped or is referenced by multiple buffers,
- * and in those cases we expect races to be rare. At
- * worst we will either accelerate reclamation of a
- * valid page and violate LRU, or unnecessarily defer
- * reclamation of an invalid page.
- *
- * The B_NOREUSE flag marks data that is not expected to
- * be reused, so accelerate reclamation in that case
- * too. Otherwise, maintain LRU.
- */
- if (m->valid == 0 || (bp->b_flags & B_NOREUSE) != 0)
- vm_page_deactivate_noreuse(m);
- else if (vm_page_active(m))
- vm_page_reference(m);
- else
- vm_page_deactivate(m);
- }
- }
- vm_page_unlock(m);
-}
-
/*
* Perform page invalidation when a buffer is released. The fully invalid
* pages will be reclaimed later in vfs_vmio_truncate().
@@ -2984,7 +2943,8 @@
}
if (pmap_page_wired_mappings(m) == 0)
vm_page_set_invalid(m, poffset, presid);
- vfs_vmio_unwire(bp, m);
+ vm_page_release_locked(m,
+ (bp->b_flags & (B_NOREUSE | B_DIRECT)) != 0);
resid -= presid;
poffset = 0;
}
@@ -3022,7 +2982,10 @@
m = bp->b_pages[i];
KASSERT(m != bogus_page, ("allocbuf: bogus page found"));
bp->b_pages[i] = NULL;
- vfs_vmio_unwire(bp, m);
+ if (obj != NULL)
+ vm_page_release_locked(m, true);
+ else
+ vm_page_release(m, (bp->b_flags & B_NOREUSE) != 0);
}
if (obj != NULL)
VM_OBJECT_WUNLOCK(obj);
Index: sys/mips/mips/pmap.c
===================================================================
--- sys/mips/mips/pmap.c
+++ sys/mips/mips/pmap.c
@@ -795,26 +795,22 @@
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
pt_entry_t pte, *ptep;
- vm_paddr_t pa, pte_pa;
+ vm_paddr_t pa;
vm_page_t m;
m = NULL;
- pa = 0;
PMAP_LOCK(pmap);
-retry:
ptep = pmap_pte(pmap, va);
if (ptep != NULL) {
pte = *ptep;
if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) ||
(prot & VM_PROT_WRITE) == 0)) {
- pte_pa = TLBLO_PTE_TO_PA(pte);
- if (vm_page_pa_tryrelock(pmap, pte_pa, &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pte_pa);
- vm_page_wire(m);
+ pa = TLBLO_PTE_TO_PA(pte);
+ m = PHYS_TO_VM_PAGE(pa);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/net/bpf_zerocopy.c
===================================================================
--- sys/net/bpf_zerocopy.c
+++ sys/net/bpf_zerocopy.c
@@ -116,8 +116,7 @@
{
vm_page_lock(pp);
- if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL)
- vm_page_free(pp);
+ vm_page_unwire(pp, PQ_INACTIVE);
vm_page_unlock(pp);
}
Index: sys/powerpc/aim/mmu_oea.c
===================================================================
--- sys/powerpc/aim/mmu_oea.c
+++ sys/powerpc/aim/mmu_oea.c
@@ -1262,22 +1262,17 @@
{
struct pvo_entry *pvo;
vm_page_t m;
- vm_paddr_t pa;
m = NULL;
- pa = 0;
PMAP_LOCK(pmap);
-retry:
pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL);
if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) &&
((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW ||
(prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa))
- goto retry;
m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/powerpc/aim/mmu_oea64.c
===================================================================
--- sys/powerpc/aim/mmu_oea64.c
+++ sys/powerpc/aim/mmu_oea64.c
@@ -1575,21 +1575,15 @@
{
struct pvo_entry *pvo;
vm_page_t m;
- vm_paddr_t pa;
m = NULL;
- pa = 0;
PMAP_LOCK(pmap);
-retry:
pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) {
- if (vm_page_pa_tryrelock(pmap,
- pvo->pvo_pte.pa & LPTE_RPGN, &pa))
- goto retry;
m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/powerpc/booke/pmap.c
===================================================================
--- sys/powerpc/booke/pmap.c
+++ sys/powerpc/booke/pmap.c
@@ -2934,12 +2934,9 @@
pte_t *pte;
vm_page_t m;
uint32_t pte_wbit;
- vm_paddr_t pa;
-
+
m = NULL;
- pa = 0;
PMAP_LOCK(pmap);
-retry:
pte = pte_find(mmu, pmap, va);
if ((pte != NULL) && PTE_ISVALID(pte)) {
if (pmap == kernel_pmap)
@@ -2948,14 +2945,11 @@
pte_wbit = PTE_UW;
if ((*pte & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa))
- goto retry;
m = PHYS_TO_VM_PAGE(PTE_PA(pte));
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
-
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/riscv/riscv/pmap.c
===================================================================
--- sys/riscv/riscv/pmap.c
+++ sys/riscv/riscv/pmap.c
@@ -869,24 +869,19 @@
{
pt_entry_t *l3p, l3;
vm_paddr_t phys;
- vm_paddr_t pa;
vm_page_t m;
- pa = 0;
m = NULL;
PMAP_LOCK(pmap);
-retry:
l3p = pmap_l3(pmap, va);
if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) {
if ((l3 & PTE_W) != 0 || (prot & VM_PROT_WRITE) == 0) {
phys = PTE_TO_PHYS(l3);
- if (vm_page_pa_tryrelock(pmap, phys, &pa))
- goto retry;
m = PHYS_TO_VM_PAGE(phys);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
Index: sys/sparc64/sparc64/pmap.c
===================================================================
--- sys/sparc64/sparc64/pmap.c
+++ sys/sparc64/sparc64/pmap.c
@@ -847,19 +847,15 @@
{
struct tte *tp;
vm_page_t m;
- vm_paddr_t pa;
m = NULL;
- pa = 0;
PMAP_LOCK(pm);
-retry:
if (pm == kernel_pmap) {
if (va >= VM_MIN_DIRECT_ADDRESS) {
tp = NULL;
m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
- (void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va),
- &pa);
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
} else {
tp = tsb_kvtotte(va);
if ((tp->tte_data & TD_V) == 0)
@@ -869,12 +865,10 @@
tp = tsb_tte_lookup(pm, va);
if (tp != NULL && ((tp->tte_data & TD_SW) ||
(prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa))
- goto retry;
m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
- vm_page_wire(m);
+ if (!vm_page_wire_mapped(m))
+ m = NULL;
}
- PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pm);
return (m);
}
Index: sys/vm/device_pager.c
===================================================================
--- sys/vm/device_pager.c
+++ sys/vm/device_pager.c
@@ -235,9 +235,7 @@
if (object->type == OBJT_MGTDEVICE) {
KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m));
pmap_remove_all(m);
- vm_page_lock(m);
- vm_page_remove(m);
- vm_page_unlock(m);
+ (void)vm_page_remove(m);
} else if (object->type == OBJT_DEVICE)
dev_pager_free_page(object, m);
}
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c
+++ sys/vm/vm_fault.c
@@ -251,18 +251,6 @@
vm_pager_page_unswapped(m);
}
-static void
-vm_fault_fill_hold(vm_page_t *m_hold, vm_page_t m)
-{
-
- if (m_hold != NULL) {
- *m_hold = m;
- vm_page_lock(m);
- vm_page_wire(m);
- vm_page_unlock(m);
- }
-}
-
/*
* Unlocks fs.first_object and fs.map on success.
*/
@@ -323,7 +311,10 @@
PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind);
if (rv != KERN_SUCCESS)
return (rv);
- vm_fault_fill_hold(m_hold, m);
+ if (m_hold != NULL) {
+ *m_hold = m;
+ vm_page_wire(m);
+ }
vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false);
if (psind == 0 && !wired)
vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true);
@@ -499,11 +490,12 @@
VM_OBJECT_WLOCK(fs->first_object);
m_mtx = NULL;
for (i = 0; i < npages; i++) {
- vm_page_change_lock(&m[i], &m_mtx);
- if ((fault_flags & VM_FAULT_WIRE) != 0)
+ if ((fault_flags & VM_FAULT_WIRE) != 0) {
vm_page_wire(&m[i]);
- else
+ } else {
+ vm_page_change_lock(&m[i], &m_mtx);
vm_page_activate(&m[i]);
+ }
if (m_hold != NULL && m[i].pindex == fs->first_pindex) {
*m_hold = &m[i];
vm_page_wire(&m[i]);
@@ -1151,10 +1143,11 @@
* daemon, while it is disassociated from an
* object.
*/
+ vm_page_wire(fs.m);
+
mtx = NULL;
vm_page_change_lock(fs.m, &mtx);
- vm_page_wire(fs.m);
- vm_page_remove(fs.m);
+ (void)vm_page_remove(fs.m);
vm_page_change_lock(fs.first_m, &mtx);
vm_page_replace_checked(fs.m, fs.first_object,
fs.first_pindex, fs.first_m);
@@ -1187,10 +1180,8 @@
fs.first_m->valid = VM_PAGE_BITS_ALL;
if (wired && (fault_flags &
VM_FAULT_WIRE) == 0) {
- vm_page_lock(fs.first_m);
vm_page_wire(fs.first_m);
- vm_page_unlock(fs.first_m);
-
+
vm_page_lock(fs.m);
vm_page_unwire(fs.m, PQ_INACTIVE);
vm_page_unlock(fs.m);
@@ -1326,21 +1317,22 @@
faultcount > 0 ? behind : PFBAK,
faultcount > 0 ? ahead : PFFOR, false);
VM_OBJECT_WLOCK(fs.object);
- vm_page_lock(fs.m);
/*
* If the page is not wired down, then put it where the pageout daemon
* can find it.
*/
- if ((fault_flags & VM_FAULT_WIRE) != 0)
+ if ((fault_flags & VM_FAULT_WIRE) != 0) {
vm_page_wire(fs.m);
- else
+ } else {
+ vm_page_lock(fs.m);
vm_page_activate(fs.m);
+ vm_page_unlock(fs.m);
+ }
if (m_hold != NULL) {
*m_hold = fs.m;
vm_page_wire(fs.m);
}
- vm_page_unlock(fs.m);
vm_page_xunbusy(fs.m);
/*
@@ -1611,9 +1603,7 @@
for (mp = ma; mp < ma + count; mp++)
if (*mp != NULL) {
vm_page_lock(*mp);
- if (vm_page_unwire(*mp, PQ_INACTIVE) &&
- (*mp)->object == NULL)
- vm_page_free(*mp);
+ vm_page_unwire(*mp, PQ_INACTIVE);
vm_page_unlock(*mp);
}
return (-1);
@@ -1814,9 +1804,7 @@
vm_page_lock(src_m);
vm_page_unwire(src_m, PQ_INACTIVE);
vm_page_unlock(src_m);
- vm_page_lock(dst_m);
vm_page_wire(dst_m);
- vm_page_unlock(dst_m);
} else {
KASSERT(vm_page_wired(dst_m),
("dst_m %p is not wired", dst_m));
Index: sys/vm/vm_glue.c
===================================================================
--- sys/vm/vm_glue.c
+++ sys/vm/vm_glue.c
@@ -405,10 +405,8 @@
m = vm_page_lookup(ksobj, i);
if (m == NULL)
panic("vm_thread_dispose: kstack already missing?");
- vm_page_lock(m);
vm_page_unwire_noq(m);
vm_page_free(m);
- vm_page_unlock(m);
}
VM_OBJECT_WUNLOCK(ksobj);
vm_object_deallocate(ksobj);
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -699,12 +699,9 @@
vm_object_terminate_pages(vm_object_t object)
{
vm_page_t p, p_next;
- struct mtx *mtx;
VM_OBJECT_ASSERT_WLOCKED(object);
- mtx = NULL;
-
/*
* Free any remaining pageable pages. This also removes them from the
* paging queues. However, don't free wired pages, just remove them
@@ -713,20 +710,15 @@
*/
TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
vm_page_assert_unbusied(p);
- if ((object->flags & OBJ_UNMANAGED) == 0)
- /*
- * vm_page_free_prep() only needs the page
- * lock for managed pages.
- */
- vm_page_change_lock(p, &mtx);
+ KASSERT(p->object == object && p->ref_count > 0,
+ ("vm_object_terminate_pages: page %p is inconsistent", p));
+
p->object = NULL;
- if (vm_page_wired(p))
- continue;
- VM_CNT_INC(v_pfree);
- vm_page_free(p);
+ if (vm_page_drop(p, -VPRC_OBJREF) == VPRC_OBJREF) {
+ VM_CNT_INC(v_pfree);
+ vm_page_free(p);
+ }
}
- if (mtx != NULL)
- mtx_unlock(mtx);
/*
* If the object contained any pages, then reset it to an empty state.
@@ -1588,18 +1580,10 @@
swap_pager_freespace(backing_object, p->pindex,
1);
- /*
- * Page is out of the parent object's range, we can
- * simply destroy it.
- */
- vm_page_lock(p);
KASSERT(!pmap_page_is_mapped(p),
("freeing mapped page %p", p));
- if (!vm_page_wired(p))
+ if (vm_page_remove(p))
vm_page_free(p);
- else
- vm_page_remove(p);
- vm_page_unlock(p);
continue;
}
@@ -1636,14 +1620,10 @@
if (backing_object->type == OBJT_SWAP)
swap_pager_freespace(backing_object, p->pindex,
1);
- vm_page_lock(p);
KASSERT(!pmap_page_is_mapped(p),
("freeing mapped page %p", p));
- if (!vm_page_wired(p))
+ if (vm_page_remove(p))
vm_page_free(p);
- else
- vm_page_remove(p);
- vm_page_unlock(p);
continue;
}
@@ -1944,6 +1924,7 @@
VM_OBJECT_WLOCK(object);
goto again;
}
+wired:
if (vm_page_wired(p)) {
if ((options & OBJPR_NOTMAPPED) == 0 &&
object->ref_count != 0)
@@ -1964,14 +1945,17 @@
("vm_object_page_remove: page %p is fictitious", p));
if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) {
if ((options & OBJPR_NOTMAPPED) == 0 &&
- object->ref_count != 0)
- pmap_remove_write(p);
+ object->ref_count != 0 &&
+ !vm_page_try_remove_write(p))
+ goto wired;
if (p->dirty != 0)
continue;
}
- if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0)
- pmap_remove_all(p);
- vm_page_free(p);
+ if ((options & OBJPR_NOTMAPPED) == 0 &&
+ object->ref_count != 0 && !vm_page_try_remove_all(p))
+ goto wired;
+ if (vm_page_remove(p))
+ vm_page_free(p);
}
if (mtx != NULL)
mtx_unlock(mtx);
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -115,24 +115,19 @@
* the implementation of read-modify-write operations on the
* field is encapsulated in vm_page_clear_dirty_mask().
*
- * The page structure contains two counters which prevent page reuse.
- * Both counters are protected by the page lock (P). The hold
- * counter counts transient references obtained via a pmap lookup, and
- * is also used to prevent page reclamation in situations where it is
- * undesirable to block other accesses to the page. The wire counter
- * is used to implement mlock(2) and is non-zero for pages containing
- * kernel memory. Pages that are wired or held will not be reclaimed
- * or laundered by the page daemon, but are treated differently during
- * a page queue scan: held pages remain at their position in the queue,
- * while wired pages are removed from the queue and must later be
- * re-enqueued appropriately by the unwiring thread. It is legal to
- * call vm_page_free() on a held page; doing so causes it to be removed
- * from its object and page queue, and the page is released to the
- * allocator once the last hold reference is dropped. In contrast,
- * wired pages may not be freed.
- *
- * In some pmap implementations, the wire count of a page table page is
- * used to track the number of populated entries.
+ * The ref_count field tracks references to the page. References that
+ * prevent the page from being reclaimable are called wirings and are
+ * counted in the low bits of ref_count. Upper bits are reserved for
+ * special references that do not prevent reclamation of the page.
+ * Specifically, the containing object, if any, holds such a reference,
+ * and the page daemon takes a transient reference when it is scanning
+ * a page. Updates to ref_count are atomic unless the page is
+ * unallocated. To wire a page after it has been allocated, the object
+ * lock must be held, or the page must be busy, or the wiring thread
+ * must atomically take a reference and verify that the VPRC_BLOCKED
+ * bit is not set. No locks are required to unwire a page, but care
+ * must be taken to free the page if that wiring represented the last
+ * reference to the page.
*
* The busy lock is an embedded reader-writer lock which protects the
* page's contents and identity (i.e., its <object, pindex> tuple) and
@@ -155,7 +150,11 @@
* be held. It is invalid for a page's queue field to transition
* between two distinct page queue indices. That is, when updating
* the queue field, either the new value or the old value must be
- * PQ_NONE.
+ * PQ_NONE. There is one exception to this rule: the page daemon may
+ * transition the queue field from PQ_INACTIVE to PQ_NONE immediately
+ * prior to freeing a page during an inactive queue scan. At that
+ * point the page will have already been physically dequeued, and it
+ * is known that no other references to that vm_page structure exist.
*
* To avoid contention on page queue locks, page queue operations
* (enqueue, dequeue, requeue) are batched using per-CPU queues.
@@ -168,7 +167,9 @@
* may be freed before its pending batch queue entries have been
* processed. The page lock (P) must be held to schedule a batched
* queue operation, and the page queue lock must be held in order to
- * process batch queue entries for the page queue.
+ * process batch queue entries for the page queue. When the page is
+ * being freed, the thread freeing the page is permitted to schedule
+ * a dequeue of the page without the page lock held.
*/
#if PAGE_SIZE == 4096
@@ -198,11 +199,14 @@
} memguard;
} plinks;
TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */
- vm_object_t object; /* which object am I in (O,P) */
+ vm_object_t object; /* which object am I in (O) */
vm_pindex_t pindex; /* offset into object (O,P) */
vm_paddr_t phys_addr; /* physical address of page (C) */
struct md_page md; /* machine dependent stuff */
- u_int wire_count; /* wired down maps refs (P) */
+ union {
+ u_int wire_count;
+ u_int ref_count; /* page references */
+ };
volatile u_int busy_lock; /* busy owners lock */
uint16_t flags; /* page PG_* flags (P) */
uint8_t order; /* index of the buddy queue (F) */
@@ -219,6 +223,34 @@
vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */
};
+/*
+ * Special bits used in the ref_count field.
+ *
+ * ref_count is normally used to count wirings that prevent the page from being
+ * reclaimed, but also supports several special types of references that do not
+ * prevent reclamation. Accesses to the ref_count field must be atomic unless
+ * the page is unallocated.
+ *
+ * VPRC_PDREF is a transient reference acquired by the page daemon when
+ * scanning. Pages may be dequeued without the page lock held when they are
+ * being freed, and this reference ensures that the page daemon is not
+ * simultaneously manipulating the queue state of the page. The page lock must
+ * be held to set or clear this bit.
+ *
+ * VPRC_OBJREF is the reference held by the containing object. It can set or
+ * cleared only when the corresponding object's write lock is held.
+ *
+ * VPRC_BLOCKED is used to atomically block wirings via pmap lookups while
+ * attempting to tear down all mappings of a given page. The page lock and
+ * object write lock must both be held in order to set or clear this bit.
+ */
+#define VPRC_BLOCKED 0x20000000u /* mappings are being removed */
+#define VPRC_OBJREF 0x40000000u /* object reference, cleared with (O) */
+#define VPRC_PDREF 0x80000000u /* page daemon reference for scanning */
+#define _VPRC_REFMASK (VPRC_BLOCKED | VPRC_OBJREF | VPRC_PDREF)
+#define VPRC_WIRE_COUNT(c) ((c) & ~_VPRC_REFMASK)
+#define VPRC_WIRE_COUNT_MAX (~_VPRC_REFMASK)
+
/*
* Page flags stored in oflags:
*
@@ -557,8 +589,10 @@
bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
void vm_page_reference(vm_page_t m);
-void vm_page_remove (vm_page_t);
-int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
+void vm_page_release(vm_page_t m, bool nocache);
+void vm_page_release_locked(vm_page_t m, bool nocache);
+bool vm_page_remove(vm_page_t);
+int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t);
vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object,
vm_pindex_t pindex);
void vm_page_requeue(vm_page_t m);
@@ -569,14 +603,16 @@
int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
vm_offset_t vm_page_startup(vm_offset_t vaddr);
void vm_page_sunbusy(vm_page_t m);
-bool vm_page_try_to_free(vm_page_t m);
+bool vm_page_try_remove_all(vm_page_t m);
+bool vm_page_try_remove_write(vm_page_t m);
int vm_page_trysbusy(vm_page_t m);
void vm_page_unhold_pages(vm_page_t *ma, int count);
void vm_page_unswappable(vm_page_t m);
-bool vm_page_unwire(vm_page_t m, uint8_t queue);
+void vm_page_unwire(vm_page_t m, uint8_t queue);
bool vm_page_unwire_noq(vm_page_t m);
void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
-void vm_page_wire (vm_page_t);
+void vm_page_wire(vm_page_t);
+bool vm_page_wire_mapped(vm_page_t m);
void vm_page_xunbusy_hard(vm_page_t m);
void vm_page_xunbusy_maybelocked(vm_page_t m);
void vm_page_set_validclean (vm_page_t, int, int);
@@ -806,6 +842,23 @@
return (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE);
}
+/*
+ * vm_page_drop:
+ *
+ * Release a reference to a page and return the old reference count.
+ */
+static inline u_int
+vm_page_drop(vm_page_t m, u_int val)
+{
+
+ /*
+ * Synchronize with vm_page_free_prep(): ensure that all updates to the
+ * page structure are visible before it is freed.
+ */
+ atomic_thread_fence_rel();
+ return (atomic_fetchadd_int(&m->ref_count, val));
+}
+
/*
* vm_page_wired:
*
@@ -815,7 +868,7 @@
vm_page_wired(vm_page_t m)
{
- return (m->wire_count > 0);
+ return (VPRC_WIRE_COUNT(m->ref_count) > 0);
}
#endif /* _KERNEL */
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -161,16 +161,17 @@
static void vm_page_enqueue(vm_page_t m, uint8_t queue);
static void vm_page_init(void *dummy);
static int vm_page_insert_after(vm_page_t m, vm_object_t object,
- vm_pindex_t pindex, vm_page_t mpred);
+ vm_pindex_t pindex, vm_page_t mpred, const bool alloc);
static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,
vm_page_t mpred);
+static void vm_page_mvqueue(vm_page_t m, int queue);
static int vm_page_reclaim_run(int req_class, int domain, u_long npages,
vm_page_t m_run, vm_paddr_t high);
static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object,
int req);
-static int vm_page_import(void *arg, void **store, int cnt, int domain,
+static int vm_page_zone_import(void *arg, void **store, int cnt, int domain,
int flags);
-static void vm_page_release(void *arg, void **store, int cnt);
+static void vm_page_zone_release(void *arg, void **store, int cnt);
SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL);
@@ -204,7 +205,7 @@
continue;
vmd->vmd_pgcache = uma_zcache_create("vm pgcache",
sizeof(struct vm_page), NULL, NULL, NULL, NULL,
- vm_page_import, vm_page_release, vmd,
+ vm_page_zone_import, vm_page_zone_release, vmd,
UMA_ZONE_MAXBUCKET | UMA_ZONE_VM);
(void )uma_zone_set_maxcache(vmd->vmd_pgcache, 0);
}
@@ -502,7 +503,7 @@
{
m->object = NULL;
- m->wire_count = 0;
+ m->ref_count = 0;
m->busy_lock = VPB_UNBUSIED;
m->flags = m->aflags = 0;
m->phys_addr = pa;
@@ -1098,8 +1099,7 @@
mtx = NULL;
for (; count != 0; count--) {
vm_page_change_lock(*ma, &mtx);
- if (vm_page_unwire(*ma, PQ_ACTIVE) && (*ma)->object == NULL)
- vm_page_free(*ma);
+ vm_page_unwire(*ma, PQ_ACTIVE);
ma++;
}
if (mtx != NULL)
@@ -1166,7 +1166,8 @@
/* Fictitious pages don't use "order" or "pool". */
m->oflags = VPO_UNMANAGED;
m->busy_lock = VPB_SINGLE_EXCLUSIVER;
- m->wire_count = 1;
+ /* Fictitious pages are unevictable. */
+ m->ref_count = 1;
pmap_page_init(m);
memattr:
pmap_page_set_memattr(m, memattr);
@@ -1326,7 +1327,7 @@
VM_OBJECT_ASSERT_WLOCKED(object);
mpred = vm_radix_lookup_le(&object->rtree, pindex);
- return (vm_page_insert_after(m, object, pindex, mpred));
+ return (vm_page_insert_after(m, object, pindex, mpred, false));
}
/*
@@ -1337,11 +1338,14 @@
* The page "mpred" must immediately precede the offset "pindex" within
* the specified object.
*
+ * "alloc" should be true if the page is being allocated and false
+ * otherwise.
+ *
* The object must be locked.
*/
static int
vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
- vm_page_t mpred)
+ vm_page_t mpred, const bool alloc)
{
vm_page_t msucc;
@@ -1361,10 +1365,14 @@
("vm_page_insert_after: msucc doesn't succeed pindex"));
/*
- * Record the object/offset pair in this page
+ * Record the object/offset pair in this page.
*/
m->object = object;
m->pindex = pindex;
+ if (alloc)
+ m->ref_count |= VPRC_OBJREF;
+ else
+ atomic_set_int(&m->ref_count, VPRC_OBJREF);
/*
* Now link into the object's ordered list of backed pages.
@@ -1372,6 +1380,10 @@
if (vm_radix_insert(&object->rtree, m)) {
m->object = NULL;
m->pindex = 0;
+ if (alloc)
+ m->ref_count &= ~VPRC_OBJREF;
+ else
+ atomic_clear_int(&m->ref_count, VPRC_OBJREF);
return (1);
}
vm_page_insert_radixdone(m, object, mpred);
@@ -1396,11 +1408,13 @@
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(object != NULL && m->object == object,
("vm_page_insert_radixdone: page %p has inconsistent object", m));
+ KASSERT((m->ref_count & VPRC_OBJREF) != 0,
+ ("vm_page_insert_radixdone: page %p is missing object ref", m));
if (mpred != NULL) {
KASSERT(mpred->object == object,
- ("vm_page_insert_after: object doesn't contain mpred"));
+ ("vm_page_insert_radixdone: object doesn't contain mpred"));
KASSERT(mpred->pindex < m->pindex,
- ("vm_page_insert_after: mpred doesn't precede pindex"));
+ ("vm_page_insert_radixdone: mpred doesn't precede pindex"));
}
if (mpred != NULL)
@@ -1431,21 +1445,21 @@
* vm_page_remove:
*
* Removes the specified page from its containing object, but does not
- * invalidate any backing storage.
+ * invalidate any backing storage. Returns true if the object's reference
+ * was the last reference to the page, and false otherwise.
*
- * The object must be locked. The page must be locked if it is managed.
+ * The object must be locked.
*/
-void
+bool
vm_page_remove(vm_page_t m)
{
vm_object_t object;
vm_page_t mrem;
- if ((m->oflags & VPO_UNMANAGED) == 0)
- vm_page_assert_locked(m);
- if ((object = m->object) == NULL)
- return;
+ object = m->object;
VM_OBJECT_ASSERT_WLOCKED(object);
+ KASSERT((m->ref_count & VPRC_OBJREF) != 0,
+ ("page %p is missing its object ref", m));
if (vm_page_xbusied(m))
vm_page_xunbusy_maybelocked(m);
mrem = vm_radix_remove(&object->rtree, m->pindex);
@@ -1467,7 +1481,12 @@
if (object->resident_page_count == 0 && object->type == OBJT_VNODE)
vdrop(object->handle);
+ /*
+ * Release the object reference. The caller may free the page
+ * after this point.
+ */
m->object = NULL;
+ return (vm_page_drop(m, -VPRC_OBJREF) == VPRC_OBJREF);
}
/*
@@ -1548,8 +1567,6 @@
/*
* Uses the page mnew as a replacement for an existing page at index
* pindex which must be already present in the object.
- *
- * The existing page must not be on a paging queue.
*/
vm_page_t
vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex)
@@ -1559,8 +1576,6 @@
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(mnew->object == NULL,
("vm_page_replace: page %p already in object", mnew));
- KASSERT(mnew->queue == PQ_NONE || vm_page_wired(mnew),
- ("vm_page_replace: new page %p is on a paging queue", mnew));
/*
* This function mostly follows vm_page_insert() and
@@ -1570,6 +1585,7 @@
mnew->object = object;
mnew->pindex = pindex;
+ atomic_set_int(&mnew->ref_count, VPRC_OBJREF);
mold = vm_radix_replace(&object->rtree, mnew);
KASSERT(mold->queue == PQ_NONE,
("vm_page_replace: old page %p is on a paging queue", mold));
@@ -1579,6 +1595,7 @@
TAILQ_REMOVE(&object->memq, mold, listq);
mold->object = NULL;
+ atomic_clear_int(&mold->ref_count, VPRC_OBJREF);
vm_page_xunbusy_maybelocked(mold);
/*
@@ -1616,6 +1633,7 @@
VM_OBJECT_ASSERT_WLOCKED(new_object);
+ KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m));
mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex);
KASSERT(mpred == NULL || mpred->pindex != new_pindex,
("vm_page_rename: pindex already renamed"));
@@ -1638,11 +1656,13 @@
*/
m->pindex = opidx;
vm_page_lock(m);
- vm_page_remove(m);
+ (void)vm_page_remove(m);
/* Return back to the new pindex to complete vm_page_insert(). */
m->pindex = new_pindex;
m->object = new_object;
+ atomic_set_int(&m->ref_count, VPRC_OBJREF);
+
vm_page_unlock(m);
vm_page_insert_radixdone(m, new_object, mpred);
vm_page_dirty(m);
@@ -1861,15 +1881,15 @@
* page is inserted into the object.
*/
vm_wire_add(1);
- m->wire_count = 1;
+ m->ref_count = 1;
}
m->act_count = 0;
if (object != NULL) {
- if (vm_page_insert_after(m, object, pindex, mpred)) {
+ if (vm_page_insert_after(m, object, pindex, mpred, true)) {
if (req & VM_ALLOC_WIRED) {
vm_wire_sub(1);
- m->wire_count = 0;
+ m->ref_count = 0;
}
KASSERT(m->object == NULL, ("page %p has object", m));
m->oflags = VPO_UNMANAGED;
@@ -2061,11 +2081,12 @@
m->flags = (m->flags | PG_NODUMP) & flags;
m->busy_lock = busy_lock;
if ((req & VM_ALLOC_WIRED) != 0)
- m->wire_count = 1;
+ m->ref_count = 1;
m->act_count = 0;
m->oflags = oflags;
if (object != NULL) {
- if (vm_page_insert_after(m, object, pindex, mpred)) {
+ if (vm_page_insert_after(m, object, pindex, mpred,
+ true)) {
if ((req & VM_ALLOC_WIRED) != 0)
vm_wire_sub(npages);
KASSERT(m->object == NULL,
@@ -2074,7 +2095,7 @@
for (m = m_ret; m < &m_ret[npages]; m++) {
if (m <= mpred &&
(req & VM_ALLOC_WIRED) != 0)
- m->wire_count = 0;
+ m->ref_count = 0;
m->oflags = VPO_UNMANAGED;
m->busy_lock = VPB_UNBUSIED;
/* Don't change PG_ZERO. */
@@ -2108,7 +2129,7 @@
KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0,
("page %p has unexpected queue %d, flags %#x",
m, m->queue, (m->aflags & PGA_QUEUE_STATE_MASK)));
- KASSERT(!vm_page_wired(m), ("page %p is wired", m));
+ KASSERT(m->ref_count == 0, ("page %p has references", m));
KASSERT(!vm_page_busied(m), ("page %p is busy", m));
KASSERT(m->dirty == 0, ("page %p is dirty", m));
KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
@@ -2192,7 +2213,7 @@
* not belong to an object.
*/
vm_wire_add(1);
- m->wire_count = 1;
+ m->ref_count = 1;
}
/* Unmanaged pages don't use "act_count". */
m->oflags = VPO_UNMANAGED;
@@ -2200,7 +2221,7 @@
}
static int
-vm_page_import(void *arg, void **store, int cnt, int domain, int flags)
+vm_page_zone_import(void *arg, void **store, int cnt, int domain, int flags)
{
struct vm_domain *vmd;
int i;
@@ -2221,7 +2242,7 @@
}
static void
-vm_page_release(void *arg, void **store, int cnt)
+vm_page_zone_release(void *arg, void **store, int cnt)
{
struct vm_domain *vmd;
vm_page_t m;
@@ -2281,8 +2302,8 @@
for (m = m_start; m < m_end && run_len < npages; m += m_inc) {
KASSERT((m->flags & PG_MARKER) == 0,
("page %p is PG_MARKER", m));
- KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->wire_count == 1,
- ("fictitious page %p has invalid wire count", m));
+ KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1,
+ ("fictitious page %p has invalid ref count", m));
/*
* If the current page would be the start of a run, check its
@@ -2339,9 +2360,6 @@
*/
VM_OBJECT_RUNLOCK(object);
goto retry;
- } else if (vm_page_wired(m)) {
- run_ext = 0;
- goto unlock;
}
}
/* Don't care: PG_NODUMP, PG_ZERO. */
@@ -2359,7 +2377,8 @@
vm_reserv_size(level)) - pa);
#endif
} else if (object->memattr == VM_MEMATTR_DEFAULT &&
- vm_page_queue(m) != PQ_NONE && !vm_page_busied(m)) {
+ vm_page_queue(m) != PQ_NONE && !vm_page_busied(m) &&
+ !vm_page_wired(m)) {
/*
* The page is allocated but eligible for
* relocation. Extend the current run by one
@@ -2375,7 +2394,6 @@
run_ext = 1;
} else
run_ext = 0;
-unlock:
VM_OBJECT_RUNLOCK(object);
#if VM_NRESERVLEVEL > 0
} else if (level >= 0) {
@@ -2496,9 +2514,6 @@
*/
VM_OBJECT_WUNLOCK(object);
goto retry;
- } else if (vm_page_wired(m)) {
- error = EBUSY;
- goto unlock;
}
}
/* Don't care: PG_NODUMP, PG_ZERO. */
@@ -2509,7 +2524,7 @@
else if (object->memattr != VM_MEMATTR_DEFAULT)
error = EINVAL;
else if (vm_page_queue(m) != PQ_NONE &&
- !vm_page_busied(m)) {
+ !vm_page_busied(m) && !vm_page_wired(m)) {
KASSERT(pmap_page_get_memattr(m) ==
VM_MEMATTR_DEFAULT,
("page %p has an unexpected memattr", m));
@@ -2558,8 +2573,6 @@
error = ENOMEM;
goto unlock;
}
- KASSERT(!vm_page_wired(m_new),
- ("page %p is wired", m_new));
/*
* Replace "m" with the new page. For
@@ -2567,8 +2580,11 @@
* and dequeued. Finally, change "m"
* as if vm_page_free() was called.
*/
- if (object->ref_count != 0)
- pmap_remove_all(m);
+ if (object->ref_count != 0 &&
+ !vm_page_try_remove_all(m)) {
+ error = EBUSY;
+ goto unlock;
+ }
m_new->aflags = m->aflags &
~PGA_QUEUE_STATE_MASK;
KASSERT(m_new->oflags == VPO_UNMANAGED,
@@ -3128,8 +3144,7 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("page %p is unmanaged", m));
- KASSERT(mtx_owned(vm_page_lockptr(m)) ||
- (m->object == NULL && (m->aflags & PGA_DEQUEUE) != 0),
+ KASSERT(mtx_owned(vm_page_lockptr(m)) || m->object == NULL,
("missing synchronization for page %p", m));
KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue));
@@ -3258,7 +3273,7 @@
{
uint8_t queue;
- KASSERT(m->object == NULL, ("page %p has an object reference", m));
+ KASSERT(m->ref_count == 0, ("page %p has references", m));
if ((m->aflags & PGA_DEQUEUE) != 0)
return;
@@ -3369,35 +3384,6 @@
vm_pqbatch_submit_page(m, atomic_load_8(&m->queue));
}
-/*
- * vm_page_activate:
- *
- * Put the specified page on the active list (if appropriate).
- * Ensure that act_count is at least ACT_INIT but do not otherwise
- * mess with it.
- *
- * The page must be locked.
- */
-void
-vm_page_activate(vm_page_t m)
-{
-
- vm_page_assert_locked(m);
-
- if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0)
- return;
- if (vm_page_queue(m) == PQ_ACTIVE) {
- if (m->act_count < ACT_INIT)
- m->act_count = ACT_INIT;
- return;
- }
-
- vm_page_dequeue(m);
- if (m->act_count < ACT_INIT)
- m->act_count = ACT_INIT;
- vm_page_enqueue(m, PQ_ACTIVE);
-}
-
/*
* vm_page_free_prep:
*
@@ -3412,6 +3398,12 @@
vm_page_free_prep(vm_page_t m)
{
+ /*
+ * Synchronize with vm_page_drop(): ensure that all page modifications
+ * are visible before proceeding.
+ */
+ atomic_thread_fence_acq();
+
#if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP)
if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) {
uint64_t *p;
@@ -3422,11 +3414,10 @@
m, i, (uintmax_t)*p));
}
#endif
- if ((m->oflags & VPO_UNMANAGED) == 0) {
- vm_page_lock_assert(m, MA_OWNED);
+ if ((m->oflags & VPO_UNMANAGED) == 0)
KASSERT(!pmap_page_is_mapped(m),
("vm_page_free_prep: freeing mapped page %p", m));
- } else
+ else
KASSERT(m->queue == PQ_NONE,
("vm_page_free_prep: unmanaged page %p is queued", m));
VM_CNT_INC(v_tfree);
@@ -3434,15 +3425,16 @@
if (vm_page_sbusied(m))
panic("vm_page_free_prep: freeing busy page %p", m);
- vm_page_remove(m);
+ if (m->object != NULL)
+ (void)vm_page_remove(m);
/*
* If fictitious remove object association and
* return.
*/
if ((m->flags & PG_FICTITIOUS) != 0) {
- KASSERT(m->wire_count == 1,
- ("fictitious page %p is not wired", m));
+ KASSERT(m->ref_count == 1,
+ ("fictitious page %p is referenced", m));
KASSERT(m->queue == PQ_NONE,
("fictitious page %p is queued", m));
return (false);
@@ -3459,8 +3451,8 @@
m->valid = 0;
vm_page_undirty(m);
- if (vm_page_wired(m) != 0)
- panic("vm_page_free_prep: freeing wired page %p", m);
+ if (m->ref_count != 0)
+ panic("vm_page_free_prep: page %p has references", m);
/*
* Restore the default memory attribute to the page.
@@ -3535,132 +3527,187 @@
}
/*
- * vm_page_wire:
- *
- * Mark this page as wired down. If the page is fictitious, then
- * its wire count must remain one.
- *
- * The page must be locked.
+ * Mark this page as wired down, preventing reclamation by the page daemon
+ * or when the containing object is destroyed.
*/
void
vm_page_wire(vm_page_t m)
{
+ u_int old;
- vm_page_assert_locked(m);
- if ((m->flags & PG_FICTITIOUS) != 0) {
- KASSERT(m->wire_count == 1,
- ("vm_page_wire: fictitious page %p's wire count isn't one",
- m));
- return;
- }
- if (!vm_page_wired(m)) {
- KASSERT((m->oflags & VPO_UNMANAGED) == 0 ||
- m->queue == PQ_NONE,
- ("vm_page_wire: unmanaged page %p is queued", m));
+ KASSERT(m->object != NULL,
+ ("vm_page_wire: page %p does not belong to an object", m));
+ if (!vm_page_busied(m))
+ VM_OBJECT_ASSERT_LOCKED(m->object);
+ KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1,
+ ("vm_page_wire: fictitious page %p has zero refs", m));
+
+ old = atomic_fetchadd_int(&m->ref_count, 1);
+ KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX,
+ ("vm_page_wire: counter overflow for page %p", m));
+ if (VPRC_WIRE_COUNT(old) == 0)
vm_wire_add(1);
- }
- m->wire_count++;
- KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m));
}
/*
- * vm_page_unwire:
- *
+ * Attempt to wire a mapped page following a pmap lookup of that page.
+ * This may fail if a thread is concurrently tearing down mappings of the page.
+ */
+bool
+vm_page_wire_mapped(vm_page_t m)
+{
+ u_int old;
+
+ KASSERT(m->object != NULL,
+ ("vm_page_try_wire: page %p does not belong to an object", m));
+
+ old = m->ref_count;
+ do {
+ KASSERT(old > 0,
+ ("vm_page_try_wire: wiring unreferenced page %p", m));
+ if ((old & VPRC_BLOCKED) != 0)
+ return (false);
+ } while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1));
+
+ if (VPRC_WIRE_COUNT(old) == 0)
+ vm_wire_add(1);
+ return (true);
+}
+
+/*
* Release one wiring of the specified page, potentially allowing it to be
- * paged out. Returns TRUE if the number of wirings transitions to zero and
- * FALSE otherwise.
+ * paged out.
*
* Only managed pages belonging to an object can be paged out. If the number
* of wirings transitions to zero and the page is eligible for page out, then
- * the page is added to the specified paging queue (unless PQ_NONE is
- * specified, in which case the page is dequeued if it belongs to a paging
- * queue).
- *
- * If a page is fictitious, then its wire count must always be one.
+ * the page is added to the specified paging queue. If the released wiring
+ * represented the last reference to the page, the page is freed.
*
* A managed page must be locked.
*/
-bool
+void
vm_page_unwire(vm_page_t m, uint8_t queue)
{
- bool unwired;
+ u_int old;
+ bool queued;
- KASSERT(queue < PQ_COUNT || queue == PQ_NONE,
- ("vm_page_unwire: invalid queue %u request for page %p",
- queue, m));
- if ((m->oflags & VPO_UNMANAGED) == 0)
- vm_page_assert_locked(m);
+ KASSERT(queue < PQ_COUNT,
+ ("vm_page_unwire: invalid queue %u request for page %p", queue, m));
- unwired = vm_page_unwire_noq(m);
- if (!unwired || (m->oflags & VPO_UNMANAGED) != 0 || m->object == NULL)
- return (unwired);
+ if ((m->oflags & VPO_UNMANAGED) != 0) {
+ if (vm_page_unwire_noq(m) && m->ref_count == 0)
+ vm_page_free(m);
+ return;
+ }
- if (vm_page_queue(m) == queue) {
- if (queue == PQ_ACTIVE)
- vm_page_reference(m);
- else if (queue != PQ_NONE)
- vm_page_requeue(m);
- } else {
- vm_page_dequeue(m);
- if (queue != PQ_NONE) {
- vm_page_enqueue(m, queue);
- if (queue == PQ_ACTIVE)
- /* Initialize act_count. */
- vm_page_activate(m);
+ vm_page_assert_locked(m);
+
+ /*
+ * Update LRU state before releasing the wiring reference.
+ * We only need to do this once since we hold the page lock.
+ * Use a release store when updating the reference count to
+ * synchronize with vm_page_free_prep().
+ */
+ old = m->ref_count;
+ queued = false;
+ do {
+ KASSERT(VPRC_WIRE_COUNT(old) > 0,
+ ("vm_page_unwire: wire count underflow for page %p", m));
+ if (!queued && VPRC_WIRE_COUNT(old) == 1) {
+ if (queue == PQ_ACTIVE && vm_page_queue(m) == PQ_ACTIVE)
+ vm_page_reference(m);
+ else
+ vm_page_mvqueue(m, queue);
+ queued = true;
}
+ } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1));
+
+ if (VPRC_WIRE_COUNT(old) == 1) {
+ vm_wire_sub(1);
+ if (old == 1)
+ vm_page_free(m);
}
- return (unwired);
}
/*
- *
- * vm_page_unwire_noq:
- *
* Unwire a page without (re-)inserting it into a page queue. It is up
* to the caller to enqueue, requeue, or free the page as appropriate.
- * In most cases, vm_page_unwire() should be used instead.
+ * In most cases involving managed pages, vm_page_unwire() should be used
+ * instead.
*/
bool
vm_page_unwire_noq(vm_page_t m)
{
+ u_int old;
- if ((m->oflags & VPO_UNMANAGED) == 0)
- vm_page_assert_locked(m);
- if ((m->flags & PG_FICTITIOUS) != 0) {
- KASSERT(m->wire_count == 1,
- ("vm_page_unwire: fictitious page %p's wire count isn't one", m));
+ old = vm_page_drop(m, -1);
+ KASSERT(VPRC_WIRE_COUNT(old) != 0,
+ ("vm_page_unref: counter underflow for page %p", m));
+ KASSERT((m->flags & PG_FICTITIOUS) == 0 || VPRC_WIRE_COUNT(old) > 1,
+ ("vm_page_unref: missing ref on fictitious page %p", m));
+
+ if (VPRC_WIRE_COUNT(old) > 1)
return (false);
+ vm_wire_sub(1);
+ return (true);
+}
+
+/*
+ * Ensure that the page is in the specified page queue. If the page is
+ * active or being moved to the active queue, ensure that its act_count is
+ * at least ACT_INIT but do not otherwise mess with it. Otherwise, ensure that
+ * the page is at the tail of its page queue.
+ *
+ * The page may be wired. The caller should release any wiring references
+ * before releasing the page lock, otherwise the page daemon may immediately
+ * dequeue the page.
+ */
+static __always_inline void
+vm_page_mvqueue(vm_page_t m, const int nqueue)
+{
+
+ vm_page_assert_locked(m);
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("vm_page_mvqueue: page %p is unmanaged", m));
+
+ if (vm_page_queue(m) != nqueue) {
+ vm_page_dequeue(m);
+ vm_page_enqueue(m, nqueue);
+ } else if (nqueue != PQ_ACTIVE) {
+ vm_page_requeue(m);
}
- if (!vm_page_wired(m))
- panic("vm_page_unwire: page %p's wire count is zero", m);
- m->wire_count--;
- if (m->wire_count == 0) {
- vm_wire_sub(1);
- return (true);
- } else
- return (false);
+
+ if (nqueue == PQ_ACTIVE && m->act_count < ACT_INIT)
+ m->act_count = ACT_INIT;
+}
+
+/*
+ * Put the specified page on the active list (if appropriate).
+ *
+ * A managed page must be locked.
+ */
+void
+vm_page_activate(vm_page_t m)
+{
+
+ if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
+ return;
+ vm_page_mvqueue(m, PQ_ACTIVE);
}
/*
* Move the specified page to the tail of the inactive queue, or requeue
* the page if it is already in the inactive queue.
*
- * The page must be locked.
+ * A managed page must be locked.
*/
void
vm_page_deactivate(vm_page_t m)
{
- vm_page_assert_locked(m);
-
- if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0)
+ if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
return;
-
- if (!vm_page_inactive(m)) {
- vm_page_dequeue(m);
- vm_page_enqueue(m, PQ_INACTIVE);
- } else
- vm_page_requeue(m);
+ vm_page_mvqueue(m, PQ_INACTIVE);
}
/*
@@ -3668,18 +3715,13 @@
* bypassing LRU. A marker page is used to maintain FIFO ordering.
* As with regular enqueues, we use a per-CPU batch queue to reduce
* contention on the page queue lock.
- *
- * The page must be locked.
*/
-void
-vm_page_deactivate_noreuse(vm_page_t m)
+static void
+_vm_page_deactivate_noreuse(vm_page_t m)
{
vm_page_assert_locked(m);
- if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0)
- return;
-
if (!vm_page_inactive(m)) {
vm_page_dequeue(m);
m->queue = PQ_INACTIVE;
@@ -3689,31 +3731,33 @@
vm_pqbatch_submit_page(m, PQ_INACTIVE);
}
+void
+vm_page_deactivate_noreuse(vm_page_t m)
+{
+
+ KASSERT(m->object != NULL,
+ ("vm_page_deactivate_noreuse: page %p has no object", m));
+
+ if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_wired(m))
+ _vm_page_deactivate_noreuse(m);
+}
+
/*
- * vm_page_launder
+ * Put a page in the laundry, or requeue it if it is already there.
*
- * Put a page in the laundry, or requeue it if it is already there.
+ * The page must be locked.
*/
void
vm_page_launder(vm_page_t m)
{
- vm_page_assert_locked(m);
- if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0)
+ if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
return;
-
- if (vm_page_in_laundry(m))
- vm_page_requeue(m);
- else {
- vm_page_dequeue(m);
- vm_page_enqueue(m, PQ_LAUNDRY);
- }
+ vm_page_mvqueue(m, PQ_LAUNDRY);
}
/*
- * vm_page_unswappable
- *
- * Put a page in the PQ_UNSWAPPABLE holding queue.
+ * Put a page in the PQ_UNSWAPPABLE holding queue.
*/
void
vm_page_unswappable(vm_page_t m)
@@ -3728,30 +3772,158 @@
}
/*
- * Attempt to free the page. If it cannot be freed, do nothing. Returns true
- * if the page is freed and false otherwise.
- *
- * The page must be managed. The page and its containing object must be
- * locked.
+ * Release a wired page to the page cache, and optionally attempt to free it.
+ * The page's object must be locked. See the comment above vm_page_release().
*/
-bool
-vm_page_try_to_free(vm_page_t m)
+void
+vm_page_release_locked(vm_page_t m, bool nocache)
+{
+ vm_object_t object;
+
+ object = m->object;
+ VM_OBJECT_ASSERT_WLOCKED(object);
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("vm_page_release_locked: page %p is unmanaged", m));
+
+ if (!vm_page_unwire_noq(m))
+ return;
+ if (m->valid == 0 || nocache) {
+ if ((object->ref_count == 0 || !pmap_page_is_mapped(m)) &&
+ m->dirty == 0 && !vm_page_busied(m) && !vm_page_wired(m)) {
+ vm_page_free(m);
+ } else {
+ vm_page_lock(m);
+ vm_page_deactivate_noreuse(m);
+ vm_page_unlock(m);
+ }
+ } else {
+ vm_page_lock(m);
+ if (vm_page_active(m))
+ vm_page_reference(m);
+ else
+ vm_page_deactivate(m);
+ vm_page_unlock(m);
+ }
+}
+
+/*
+ * Release a wired page to the page cache, and optionally attempt to free it.
+ * If the caller wishes to attempt to free the page, and the page is mapped,
+ * dirty, busy or wired, we do not free it but instead place it near the head of
+ * the inactive queue to accelerate reclamation.
+ */
+void
+vm_page_release(vm_page_t m, bool nocache)
{
+ vm_object_t object;
+ u_int old;
+
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("vm_page_release: page %p is unmanaged", m));
+
+ if (nocache) {
+ /*
+ * Attempt to free the page. The page may be renamed between
+ * objects so we must verify the page's object pointer after
+ * acquiring the lock and retry if they do not match.
+ */
+ while ((object = m->object) != NULL) {
+ if (!VM_OBJECT_TRYWLOCK(object)) {
+ object = NULL;
+ break;
+ }
+ if (m->object == object)
+ break;
+ VM_OBJECT_WUNLOCK(object);
+ }
+ if (object != NULL) {
+ vm_page_release_locked(m, nocache);
+ VM_OBJECT_WUNLOCK(object);
+ return;
+ }
+ }
+
+ /*
+ * Update LRU state before releasing the wiring reference.
+ * Use a release store when updating the reference count to
+ * synchronize with vm_page_free_prep().
+ */
+ old = m->ref_count;
+ do {
+ if (VPRC_WIRE_COUNT(old) == 1) {
+ vm_page_lock(m);
+
+ /*
+ * Use a racy check of the valid bits to determine
+ * whether we can accelerate reclamation of the page.
+ * The valid bits will be stable unless the page is
+ * being mapped or is referenced by multiple buffers,
+ * and in those cases we expect races to be rare. At
+ * worst we will either accelerate reclamation of a
+ * valid page and violate LRU, or unnecessarily defer
+ * reclamation of an invalid page.
+ */
+ if (m->valid == 0 || nocache)
+ _vm_page_deactivate_noreuse(m);
+ else if (vm_page_active(m))
+ vm_page_reference(m);
+ else
+ vm_page_mvqueue(m, PQ_INACTIVE);
+ vm_page_unlock(m);
+ }
+ } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1));
+
+ if (VPRC_WIRE_COUNT(old) == 1) {
+ vm_wire_sub(1);
+ if (old == 1)
+ vm_page_free(m);
+ }
+}
+
+/*
+ * Attempt to invoke the requested operation while blocking new wirings of the
+ * page.
+ */
+static bool
+vm_page_try_blocked_op(vm_page_t m, void (*op)(vm_page_t))
+{
+ u_int old;
vm_page_assert_locked(m);
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m));
- if (m->dirty != 0 || vm_page_wired(m) || vm_page_busied(m))
- return (false);
- if (m->object->ref_count != 0) {
- pmap_remove_all(m);
- if (m->dirty != 0)
+ KASSERT(m->object != NULL && (m->oflags & VPO_UNMANAGED) == 0,
+ ("vm_page_try_blocked_op: page %p has no object", m));
+ KASSERT(!vm_page_busied(m),
+ ("vm_page_try_blocked_op: page %p is busy", m));
+ VM_OBJECT_ASSERT_LOCKED(m->object);
+
+ old = m->ref_count;
+ do {
+ KASSERT(old != 0,
+ ("vm_page_try_blocked_op: page %p has no references", m));
+ if (VPRC_WIRE_COUNT(old) != 0)
return (false);
- }
- vm_page_free(m);
+ } while (!atomic_fcmpset_int(&m->ref_count, &old, old | VPRC_BLOCKED));
+
+ (op)(m);
+
+ atomic_clear_int(&m->ref_count, VPRC_BLOCKED);
return (true);
}
+bool
+vm_page_try_remove_all(vm_page_t m)
+{
+
+ return (vm_page_try_blocked_op(m, pmap_remove_all));
+}
+
+bool
+vm_page_try_remove_write(vm_page_t m)
+{
+
+ return (vm_page_try_blocked_op(m, pmap_remove_write));
+}
+
/*
* vm_page_advise
*
@@ -3846,11 +4018,8 @@
VM_OBJECT_WLOCK(object);
goto retrylookup;
} else {
- if ((allocflags & VM_ALLOC_WIRED) != 0) {
- vm_page_lock(m);
+ if ((allocflags & VM_ALLOC_WIRED) != 0)
vm_page_wire(m);
- vm_page_unlock(m);
- }
if ((allocflags &
(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
vm_page_xbusy(m);
@@ -3948,11 +4117,8 @@
VM_OBJECT_WLOCK(object);
goto retrylookup;
}
- if ((allocflags & VM_ALLOC_WIRED) != 0) {
- vm_page_lock(m);
+ if ((allocflags & VM_ALLOC_WIRED) != 0)
vm_page_wire(m);
- vm_page_unlock(m);
- }
if ((allocflags & (VM_ALLOC_NOBUSY |
VM_ALLOC_SBUSY)) == 0)
vm_page_xbusy(m);
@@ -4481,10 +4647,10 @@
else
m = (vm_page_t)addr;
db_printf(
- "page %p obj %p pidx 0x%jx phys 0x%jx q %d wire %d\n"
+ "page %p obj %p pidx 0x%jx phys 0x%jx q %d ref %u\n"
" af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n",
m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr,
- m->queue, m->wire_count, m->aflags, m->oflags,
+ m->queue, m->ref_count, m->aflags, m->oflags,
m->flags, m->act_count, m->busy_lock, m->valid, m->dirty);
}
#endif /* DDB */
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -314,6 +314,54 @@
return (vm_batchqueue_pop(&ss->bq));
}
+/*
+ * Lock a page and set a reference bit to ensure that it does not get freed out
+ * from under us.
+ */
+static bool
+vm_pageout_lock_and_hold_page(vm_page_t m, struct mtx **mtx)
+{
+ u_int ref_count;
+
+ vm_page_change_lock(m, mtx);
+
+ ref_count = m->ref_count;
+ do {
+ if (ref_count == 0)
+ return (false);
+ } while (!atomic_fcmpset_int(&m->ref_count, &ref_count, ref_count |
+ VPRC_PDREF));
+ return (true);
+}
+
+/*
+ * Drop the page daemon's transient page reference and determine whether we need
+ * to free the page.
+ */
+static bool
+vm_pageout_drop_page(vm_page_t m)
+{
+
+ KASSERT((m->ref_count & VPRC_PDREF) != 0,
+ ("vm_pageout_drop_page: page %p missing pagedaemon ref", m));
+ return (vm_page_drop(m, -VPRC_PDREF) == VPRC_PDREF);
+}
+
+/*
+ * Drop the page daemon's transient reference once we know that the page's
+ * identity is stable.
+ */
+static void
+vm_pageout_drop_page_quick(vm_page_t m)
+{
+
+ VM_OBJECT_ASSERT_LOCKED(m->object);
+ KASSERT((m->ref_count & (VPRC_OBJREF | VPRC_PDREF)) ==
+ (VPRC_OBJREF | VPRC_PDREF),
+ ("vm_pageout_drop_page_quick: page %p missing refs", m));
+ atomic_clear_int(&m->ref_count, VPRC_PDREF);
+}
+
/*
* Scan for pages at adjacent offsets within the given page's object that are
* eligible for laundering, form a cluster of these pages and the given page,
@@ -327,16 +375,11 @@
vm_pindex_t pindex;
int ib, is, page_base, pageout_count;
- vm_page_assert_locked(m);
object = m->object;
VM_OBJECT_ASSERT_WLOCKED(object);
pindex = m->pindex;
vm_page_assert_unbusied(m);
- KASSERT(!vm_page_wired(m), ("page %p is wired", m));
-
- pmap_remove_write(m);
- vm_page_unlock(m);
mc[vm_pageout_page_count] = pb = ps = m;
pageout_count = 1;
@@ -362,7 +405,8 @@
ib = 0;
break;
}
- if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) {
+ if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p) ||
+ vm_page_wired(p)) {
ib = 0;
break;
}
@@ -372,12 +416,11 @@
break;
}
vm_page_lock(p);
- if (vm_page_wired(p) || !vm_page_in_laundry(p)) {
+ if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {
vm_page_unlock(p);
ib = 0;
break;
}
- pmap_remove_write(p);
vm_page_unlock(p);
mc[--page_base] = pb = p;
++pageout_count;
@@ -392,17 +435,17 @@
}
while (pageout_count < vm_pageout_page_count &&
pindex + is < object->size) {
- if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
+ if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p) ||
+ vm_page_wired(p))
break;
vm_page_test_dirty(p);
if (p->dirty == 0)
break;
vm_page_lock(p);
- if (vm_page_wired(p) || !vm_page_in_laundry(p)) {
+ if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {
vm_page_unlock(p);
break;
}
- pmap_remove_write(p);
vm_page_unlock(p);
mc[page_base + pageout_count] = ps = p;
++pageout_count;
@@ -657,6 +700,13 @@
}
}
+ if (!vm_page_try_remove_write(m)) {
+ vm_page_unlock(m);
+ error = EBUSY;
+ goto unlock_all;
+ }
+ vm_page_unlock(m);
+
/*
* If a page is dirty, then it is either being washed
* (but not yet cleaned) or it is still in the
@@ -726,7 +776,8 @@
if (__predict_false((m->flags & PG_MARKER) != 0))
continue;
- vm_page_change_lock(m, &mtx);
+ if (!vm_pageout_lock_and_hold_page(m, &mtx))
+ continue;
recheck:
/*
@@ -734,7 +785,7 @@
* while locks were dropped.
*/
if (vm_page_queue(m) != queue)
- continue;
+ goto drop;
/*
* A requeue was requested, so this page gets a second
@@ -742,17 +793,19 @@
*/
if ((m->aflags & PGA_REQUEUE) != 0) {
vm_page_requeue(m);
- continue;
+ goto drop;
}
/*
* Wired pages may not be freed. Complete their removal
* from the queue now to avoid needless revisits during
- * future scans.
+ * future scans. This check is racy and must be reverified once
+ * we hold the object lock and have verified that the page
+ * is not busy.
*/
if (vm_page_wired(m)) {
vm_page_dequeue_deferred(m);
- continue;
+ goto drop;
}
if (object != m->object) {
@@ -767,10 +820,33 @@
goto recheck;
}
}
+ if (__predict_false(object == NULL))
+ /*
+ * The page has been removed from its object.
+ * Drop our reference and move on.
+ */
+ goto drop;
+
+ /*
+ * We can drop our transient reference now that we hold
+ * the object lock.
+ */
+ vm_pageout_drop_page_quick(m);
if (vm_page_busied(m))
continue;
+ /*
+ * Re-check for wirings now that we hold the object lock. If
+ * the page is mapped, it may still be wired by pmap lookups.
+ * The call to vm_page_try_remove_all() below atomically checks
+ * for such wirings and removes mappings.
+ */
+ if (__predict_false(vm_page_wired(m))) {
+ vm_page_dequeue_deferred(m);
+ continue;
+ }
+
/*
* Invalid pages can be easily freed. They cannot be
* mapped; vm_page_free() asserts this.
@@ -838,8 +914,10 @@
*/
if (object->ref_count != 0) {
vm_page_test_dirty(m);
- if (m->dirty == 0)
- pmap_remove_all(m);
+ if (m->dirty == 0 && !vm_page_try_remove_all(m)) {
+ vm_page_dequeue_deferred(m);
+ continue;
+ }
}
/*
@@ -889,6 +967,11 @@
mtx = NULL;
object = NULL;
}
+
+ continue;
+drop:
+ if (vm_pageout_drop_page(m))
+ goto free_page;
}
if (mtx != NULL) {
mtx_unlock(mtx);
@@ -1131,6 +1214,7 @@
{
struct scan_state ss;
struct mtx *mtx;
+ vm_object_t object;
vm_page_t m, marker;
struct vm_pagequeue *pq;
long min_scan;
@@ -1187,23 +1271,31 @@
if (__predict_false((m->flags & PG_MARKER) != 0))
continue;
- vm_page_change_lock(m, &mtx);
+ if (!vm_pageout_lock_and_hold_page(m, &mtx))
+ continue;
/*
* The page may have been disassociated from the queue
* while locks were dropped.
*/
if (vm_page_queue(m) != PQ_ACTIVE)
- continue;
+ goto drop;
/*
* Wired pages are dequeued lazily.
*/
if (vm_page_wired(m)) {
vm_page_dequeue_deferred(m);
- continue;
+ goto drop;
}
+ if (__predict_false((object = m->object) == NULL))
+ /*
+ * The page has been removed from its object.
+ * Drop our reference and move on.
+ */
+ goto drop;
+
/*
* Check to see "how much" the page has been used.
*
@@ -1223,7 +1315,7 @@
* This race delays the detection of a new reference. At
* worst, we will deactivate and reactivate the page.
*/
- if (m->object->ref_count != 0)
+ if (object->ref_count != 0)
act_delta = pmap_ts_referenced(m);
else
act_delta = 0;
@@ -1278,6 +1370,9 @@
}
}
}
+drop:
+ if (vm_pageout_drop_page(m))
+ vm_page_free(m);
}
if (mtx != NULL) {
mtx_unlock(mtx);
@@ -1392,7 +1487,8 @@
KASSERT((m->flags & PG_MARKER) == 0,
("marker page %p was dequeued", m));
- vm_page_change_lock(m, &mtx);
+ if (!vm_pageout_lock_and_hold_page(m, &mtx))
+ continue;
recheck:
/*
@@ -1401,7 +1497,7 @@
*/
if (vm_page_queue(m) != PQ_INACTIVE) {
addl_page_shortage++;
- continue;
+ goto drop;
}
/*
@@ -1410,24 +1506,28 @@
* chance.
*/
if ((m->aflags & (PGA_ENQUEUED | PGA_REQUEUE |
- PGA_REQUEUE_HEAD)) != 0)
- goto reinsert;
+ PGA_REQUEUE_HEAD)) != 0) {
+ vm_pageout_reinsert_inactive(&ss, &rq, m);
+ goto drop;
+ }
/*
* Wired pages may not be freed. Complete their removal
* from the queue now to avoid needless revisits during
- * future scans.
+ * future scans. This check is racy and must be reverified once
+ * we hold the object lock and have verified that the page
+ * is not busy.
*/
if (vm_page_wired(m)) {
vm_page_dequeue_deferred(m);
- continue;
+ goto drop;
}
if (object != m->object) {
if (object != NULL)
VM_OBJECT_WUNLOCK(object);
object = m->object;
- if (!VM_OBJECT_TRYWLOCK(object)) {
+ if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) {
mtx_unlock(mtx);
/* Depends on type-stability. */
VM_OBJECT_WLOCK(object);
@@ -1435,6 +1535,18 @@
goto recheck;
}
}
+ if (__predict_false(object == NULL))
+ /*
+ * The page has been removed from its object.
+ * Drop our reference and move on.
+ */
+ goto drop;
+
+ /*
+ * We can drop our transient reference now that we hold
+ * the object lock.
+ */
+ vm_pageout_drop_page_quick(m);
if (vm_page_busied(m)) {
/*
@@ -1446,7 +1558,19 @@
* inactive count.
*/
addl_page_shortage++;
- goto reinsert;
+ vm_pageout_reinsert_inactive(&ss, &rq, m);
+ continue;
+ }
+
+ /*
+ * Re-check for wirings now that we hold the object lock. If
+ * the page is mapped, it may still be wired by pmap lookups.
+ * The call to vm_page_try_remove_all() below atomically checks
+ * for such wirings and removes mappings.
+ */
+ if (__predict_false(vm_page_wired(m))) {
+ vm_page_dequeue_deferred(m);
+ continue;
}
/*
@@ -1492,7 +1616,8 @@
continue;
} else if ((object->flags & OBJ_DEAD) == 0) {
vm_page_aflag_set(m, PGA_REQUEUE);
- goto reinsert;
+ vm_pageout_reinsert_inactive(&ss, &rq, m);
+ continue;
}
}
@@ -1505,8 +1630,10 @@
*/
if (object->ref_count != 0) {
vm_page_test_dirty(m);
- if (m->dirty == 0)
- pmap_remove_all(m);
+ if (m->dirty == 0 && !vm_page_try_remove_all(m)) {
+ vm_page_dequeue_deferred(m);
+ continue;
+ }
}
/*
@@ -1532,8 +1659,13 @@
} else if ((object->flags & OBJ_DEAD) == 0)
vm_page_launder(m);
continue;
-reinsert:
- vm_pageout_reinsert_inactive(&ss, &rq, m);
+
+drop:
+ /*
+ * Drop our transient reference.
+ */
+ if (vm_pageout_drop_page(m))
+ goto free_page;
}
if (mtx != NULL)
mtx_unlock(mtx);
Index: sys/vm/vm_swapout.c
===================================================================
--- sys/vm/vm_swapout.c
+++ sys/vm/vm_swapout.c
@@ -208,12 +208,12 @@
goto unlock_return;
if (should_yield())
goto unlock_return;
- if (vm_page_busied(p))
+
+ if (vm_page_busied(p) || vm_page_wired(p))
continue;
VM_CNT_INC(v_pdpages);
vm_page_lock(p);
- if (vm_page_wired(p) ||
- !pmap_page_exists_quick(pmap, p)) {
+ if (!pmap_page_exists_quick(pmap, p)) {
vm_page_unlock(p);
continue;
}
@@ -231,8 +231,8 @@
p->act_count -= min(p->act_count,
ACT_DECLINE);
if (!remove_mode && p->act_count == 0) {
- pmap_remove_all(p);
- vm_page_deactivate(p);
+ if (vm_page_try_remove_all(p))
+ vm_page_deactivate(p);
} else
vm_page_requeue(p);
} else {
@@ -243,7 +243,7 @@
vm_page_requeue(p);
}
} else if (vm_page_inactive(p))
- pmap_remove_all(p);
+ (void)vm_page_try_remove_all(p);
vm_page_unlock(p);
}
if ((backing_object = object->backing_object) == NULL)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Feb 17, 2:16 AM (18 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28797444
Default Alt Text
D20486.id58419.diff (76 KB)
Attached To
Mode
D20486: Change synchonization rules for page reference counting.
Attached
Detach File
Event Timeline
Log In to Comment