Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -2723,8 +2723,7 @@ /* Have to allocate a new pdp, recurse */ if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index, lockp) == NULL) { - --m->wire_count; - atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } @@ -2756,8 +2755,7 @@ /* Have to allocate a new pd, recurse */ if (_pmap_allocpte(pmap, NUPDE + pdpindex, lockp) == NULL) { - --m->wire_count; - atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } @@ -2770,9 +2768,7 @@ /* Have to allocate a new pd, recurse */ if (_pmap_allocpte(pmap, NUPDE + pdpindex, lockp) == NULL) { - --m->wire_count; - atomic_subtract_int(&vm_cnt.v_wire_count, - 1); + vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } @@ -2904,8 +2900,7 @@ pmap->pm_pml4[DMPML4I + i] = 0; pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */ - m->wire_count--; - atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_unwire_noq(m); vm_page_free_zero(m); if (pmap->pm_pml4u != NULL) { @@ -2915,7 +2910,7 @@ vm_page_free(m); } } - + static int kvm_size(SYSCTL_HANDLER_ARGS) { Index: sys/arm64/arm64/pmap.c =================================================================== --- sys/arm64/arm64/pmap.c +++ sys/arm64/arm64/pmap.c @@ -1362,12 +1362,7 @@ } pmap_invalidate_page(pmap, va); - /* - * This is a release store so that the ordinary store unmapping - * the page table page is globally performed before TLB shoot- - * down is begun. - */ - atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); + atomic_subtract_int(&vm_cnt.v_wire_count, 1); /* * Put page on a list so that it is released after @@ -1493,9 +1488,7 @@ /* recurse for allocating page dir */ if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, lockp) == NULL) { - --m->wire_count; - /* XXX: release mem barrier? */ - atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } @@ -1521,8 +1514,7 @@ /* recurse for allocating page dir */ if (_pmap_alloc_l3(pmap, NUL2E + l1index, lockp) == NULL) { - --m->wire_count; - atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } @@ -1537,10 +1529,7 @@ /* recurse for allocating page dir */ if (_pmap_alloc_l3(pmap, NUL2E + l1index, lockp) == NULL) { - --m->wire_count; - /* XXX: release mem barrier? */ - atomic_subtract_int( - &vm_cnt.v_wire_count, 1); + vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } @@ -1648,8 +1637,7 @@ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); - m->wire_count--; - atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_unwire_noq(m); vm_page_free_zero(m); } Index: sys/kern/kern_sendfile.c =================================================================== --- sys/kern/kern_sendfile.c +++ sys/kern/kern_sendfile.c @@ -128,6 +128,7 @@ static void sendfile_free_page(vm_page_t pg, bool nocache) { + bool freed; vm_page_lock(pg); /* @@ -136,15 +137,15 @@ * responsible for freeing the page. In 'noncache' case try to free * the page, but only if it is cheap to. */ - if (vm_page_unwire(pg, nocache ? PQ_NONE : PQ_INACTIVE)) { + if (vm_page_unwire_noq(pg)) { vm_object_t obj; if ((obj = pg->object) == NULL) vm_page_free(pg); - else if (nocache) { - if (!vm_page_xbusied(pg) && VM_OBJECT_TRYWLOCK(obj)) { - bool freed; - + else { + freed = false; + if (nocache && !vm_page_xbusied(pg) && + VM_OBJECT_TRYWLOCK(obj)) { /* Only free unmapped pages. */ if (obj->ref_count == 0 || !pmap_page_is_mapped(pg)) @@ -153,13 +154,24 @@ * locked cannot be relied upon. */ freed = vm_page_try_to_free(pg); - else - freed = false; VM_OBJECT_WUNLOCK(obj); - if (!freed) + } + if (!freed) { + /* + * If we were asked to not cache the page, place + * it near the head of the inactive queue so + * that it is reclaimed sooner. Otherwise, + * maintain LRU. + */ + if (nocache) vm_page_deactivate_noreuse(pg); - } else - vm_page_deactivate_noreuse(pg); + else if (pg->queue == PQ_ACTIVE) + vm_page_reference(pg); + else if (pg->queue != PQ_INACTIVE) + vm_page_deactivate(pg); + else + vm_page_requeue(pg); + } } } vm_page_unlock(pg); Index: sys/kern/vfs_bio.c =================================================================== --- sys/kern/vfs_bio.c +++ sys/kern/vfs_bio.c @@ -2621,7 +2621,7 @@ bool freed; vm_page_lock(m); - if (vm_page_unwire(m, PQ_NONE)) { + if (vm_page_unwire_noq(m)) { /* * Determine if the page should be freed before adding * it to the inactive queue. @@ -2637,14 +2637,16 @@ if (!freed) { /* * If the page is unlikely to be reused, let the - * VM know. Otherwise, maintain LRU page - * ordering and put the page at the tail of the - * inactive queue. + * VM know. Otherwise, maintain LRU. */ if ((bp->b_flags & B_NOREUSE) != 0) vm_page_deactivate_noreuse(m); - else + else if (m->queue == PQ_ACTIVE) + vm_page_reference(m); + else if (m->queue != PQ_INACTIVE) vm_page_deactivate(m); + else + vm_page_requeue(m); } } vm_page_unlock(m); Index: sys/mips/mips/pmap.c =================================================================== --- sys/mips/mips/pmap.c +++ sys/mips/mips/pmap.c @@ -1159,8 +1159,7 @@ if (_pmap_allocpte(pmap, NUPDE + segindex, flags) == NULL) { /* alloc failed, release current */ - --m->wire_count; - atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } @@ -1238,8 +1237,7 @@ ptdva = (vm_offset_t)pmap->pm_segtab; ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva)); - ptdpg->wire_count--; - atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_unwire_noq(m); vm_page_free_zero(ptdpg); } Index: sys/riscv/riscv/pmap.c =================================================================== --- sys/riscv/riscv/pmap.c +++ sys/riscv/riscv/pmap.c @@ -1153,12 +1153,7 @@ } pmap_invalidate_page(pmap, va); - /* - * This is a release store so that the ordinary store unmapping - * the page table page is globally performed before TLB shoot- - * down is begun. - */ - atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); + atomic_subtract_int(&vm_cnt.v_wire_count, 1); /* * Put page on a list so that it is released after @@ -1302,8 +1297,7 @@ /* recurse for allocating page dir */ if (_pmap_alloc_l3(pmap, NUPDE + l1index, lockp) == NULL) { - --m->wire_count; - atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } @@ -1388,8 +1382,7 @@ pmap->pm_stats.resident_count)); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); - m->wire_count--; - atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_unwire_noq(m); vm_page_free_zero(m); /* Remove pmap from the allpmaps list */ Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -97,7 +97,7 @@ * or the lock for either the free or paging queue (Q). If a field is * annotated below with two of these locks, then holding either lock is * sufficient for read access, but both locks are required for write - * access. + * access. An annotation of (C) indicates that the field is immutable. * * In contrast, the synchronization of accesses to the page's * dirty field is machine dependent (M). In the @@ -111,6 +111,39 @@ * contains the dirty field. In the machine-independent layer, * the implementation of read-modify-write operations on the * field is encapsulated in vm_page_clear_dirty_mask(). + * + * The page structure contains two fields which count references to the + * structure (as opposed to the page's contents), and the busy lock, an + * embedded sleepable reader-writer lock used to synchronize access to + * the page's contents while they are being written to or read from a + * pager. Both reference counters are protected by the page lock (P). + * The hold counter tracks transient references obtained via a pmap + * lookup, and is also used in cases where the busy lock cannot be used + * due to ordering constraints with the vnode lock. The wire counter + * is used to implement mlock(2) and is non-zero for pages containing + * kernel memory. Pages that are wired or held will not be reclaimed + * or laundered by the page daemon, but are treated differently during + * a page queue scan: held pages remain at their position in the queue, + * while wired pages are removed from the queue and must later be + * re-enqueued appropriately by the unwiring thread. It is legal to + * call vm_page_free() on a held page; doing so causes it to be removed + * from its object and page queue, and the page is released to the + * allocator once the last hold reference is dropped. In contrast, + * wired pages may not be freed. + * + * In some pmap implementations, the wire count of a page table page is + * used to track the number of populated entries. + * + * The busy lock protects a page's contents and interlocks with the + * object lock (O). In particular, a page may be busied or unbusied + * only with the object write lock held. To avoid bloating the page + * structure, the busy lock lacks some of the features available to the + * kernel's general-purpose synchronization primitives. As a result, + * busy lock ordering rules are not verified, lock recursion is not + * detected, and an attempt to xbusy a busy page or sbusy an xbusy + * page results will trigger a panic rather than causing the thread to + * block. vm_page_sleep_if_busy() can be used to sleep until the + * page's busy state changes. */ #if PAGE_SIZE == 4096 @@ -152,9 +185,9 @@ uint8_t oflags; /* page VPO_* flags (O) */ uint8_t queue; /* page queue index (P,Q) */ int8_t psind; /* pagesizes[] index (O) */ - int8_t segind; + int8_t segind; /* vm_phys segment index (C) */ uint8_t order; /* index of the buddy queue */ - uint8_t pool; + uint8_t pool; /* vm_phys freepool index (C) */ u_char act_count; /* page usage count (P) */ /* NOTE that these must support one bit per DEV_BSIZE in a page */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ @@ -535,7 +568,8 @@ int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unswappable(vm_page_t m); -boolean_t vm_page_unwire(vm_page_t m, uint8_t queue); +bool vm_page_unwire(vm_page_t m, uint8_t queue); +bool vm_page_unwire_noq(vm_page_t m); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_wire (vm_page_t); void vm_page_xunbusy_hard(vm_page_t m); Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -2873,9 +2873,7 @@ if (queue != PQ_NONE) vm_page_dequeue(m); vm_page_enqueue(PQ_ACTIVE, m); - } else - KASSERT(queue == PQ_NONE, - ("vm_page_activate: wired page %p is queued", m)); + } } else { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; @@ -3047,26 +3045,18 @@ } /* - * vm_page_wire: + * vm_page_wire: * - * Mark this page as wired down by yet - * another map, removing it from paging queues - * as necessary. + * Mark this page as wired down. If the page is fictitious, then + * its wire count must remain one. * - * If the page is fictitious, then its wire count must remain one. - * - * The page must be locked. + * The page must be locked. */ void vm_page_wire(vm_page_t m) { - /* - * Only bump the wire statistics if the page is not already wired, - * and only unqueue the page if it is on some queue (if it is unmanaged - * it is already off the queues). - */ - vm_page_lock_assert(m, MA_OWNED); + vm_page_assert_locked(m); if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->wire_count == 1, ("vm_page_wire: fictitious page %p's wire count isn't one", @@ -3077,7 +3067,6 @@ KASSERT((m->oflags & VPO_UNMANAGED) == 0 || m->queue == PQ_NONE, ("vm_page_wire: unmanaged page %p is queued", m)); - vm_page_remque(m); atomic_add_int(&vm_cnt.v_wire_count, 1); } m->wire_count++; @@ -3094,38 +3083,69 @@ * Only managed pages belonging to an object can be paged out. If the number * of wirings transitions to zero and the page is eligible for page out, then * the page is added to the specified paging queue (unless PQ_NONE is - * specified). + * specified, in which case the page is dequeued if it belongs to a paging + * queue). * * If a page is fictitious, then its wire count must always be one. * * A managed page must be locked. */ -boolean_t +bool vm_page_unwire(vm_page_t m, uint8_t queue) { + bool unwired; KASSERT(queue < PQ_COUNT || queue == PQ_NONE, ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); + + unwired = vm_page_unwire_noq(m); + if (unwired && (m->oflags & VPO_UNMANAGED) == 0 && m->object != NULL) { + if (m->queue == queue) { + if (queue == PQ_ACTIVE) + vm_page_reference(m); + else if (queue != PQ_NONE) + vm_page_requeue(m); + } else { + vm_page_remque(m); + if (queue != PQ_NONE) { + vm_page_enqueue(queue, m); + if (queue == PQ_ACTIVE) + /* Initialize act_count. */ + vm_page_activate(m); + } + } + } + return (unwired); +} + +/* + * + * vm_page_unwire_noq: + * + * Unwire a page without (re-)inserting it into a page queue. It is up + * to the caller to enqueue, requeue, or free the page as appropriate. + * In most cases, vm_page_unwire() should be used instead. + */ +bool +vm_page_unwire_noq(vm_page_t m) +{ + if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_assert_locked(m); if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->wire_count == 1, ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); - return (FALSE); - } - if (m->wire_count > 0) { - m->wire_count--; - if (m->wire_count == 0) { - atomic_subtract_int(&vm_cnt.v_wire_count, 1); - if ((m->oflags & VPO_UNMANAGED) == 0 && - m->object != NULL && queue != PQ_NONE) - vm_page_enqueue(queue, m); - return (TRUE); - } else - return (FALSE); - } else + return (false); + } + if (m->wire_count == 0) panic("vm_page_unwire: page %p's wire count is zero", m); + m->wire_count--; + if (m->wire_count == 0) { + atomic_subtract_int(&vm_cnt.v_wire_count, 1); + return (true); + } else + return (false); } /* Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -355,9 +355,6 @@ VM_OBJECT_ASSERT_WLOCKED(object); pindex = m->pindex; - /* - * We can't clean the page if it is busy or held. - */ vm_page_assert_unbusied(m); KASSERT(m->hold_count == 0, ("page %p is held", m)); @@ -399,7 +396,7 @@ } vm_page_lock(p); if (!vm_page_in_laundry(p) || - p->hold_count != 0) { /* may be undergoing I/O */ + p->hold_count != 0 || p->wire_count != 0) { vm_page_unlock(p); ib = 0; break; @@ -426,7 +423,7 @@ break; vm_page_lock(p); if (!vm_page_in_laundry(p) || - p->hold_count != 0) { /* may be undergoing I/O */ + p->hold_count != 0 || p->wire_count != 0) { vm_page_unlock(p); break; } @@ -675,10 +672,11 @@ } /* - * The page may have been busied or held while the object + * The page may have been busied or referenced while the object * and page locks were released. */ - if (vm_page_busied(m) || m->hold_count != 0) { + if (vm_page_busied(m) || m->hold_count != 0 || + m->wire_count != 0) { vm_page_unlock(m); error = EBUSY; goto unlock_all; @@ -767,11 +765,19 @@ vm_page_unlock(m); continue; } + if (m->wire_count != 0) { + vm_page_dequeue_locked(m); + vm_page_unlock(m); + continue; + } object = m->object; if ((!VM_OBJECT_TRYWLOCK(object) && (!vm_pageout_fallback_object_lock(m, &next) || - m->hold_count != 0)) || vm_page_busied(m)) { + m->hold_count != 0 || m->wire_count != 0)) || + vm_page_busied(m)) { VM_OBJECT_WUNLOCK(object); + if (m->wire_count != 0 && vm_page_pagequeue(m) == pq) + vm_page_dequeue_locked(m); vm_page_unlock(m); continue; } @@ -1208,7 +1214,16 @@ */ if (!vm_pageout_page_lock(m, &next)) goto unlock_page; - else if (m->hold_count != 0) { + else if (m->wire_count != 0) { + /* + * Wired pages may not be freed, and unwiring a queued + * page will cause it to be requeued. Thus, remove them + * from the queue now to avoid unnecessary revisits. + */ + vm_page_dequeue_locked(m); + addl_page_shortage++; + goto unlock_page; + } else if (m->hold_count != 0) { /* * Held pages are essentially stuck in the * queue. So, they ought to be discounted @@ -1223,7 +1238,11 @@ if (!VM_OBJECT_TRYWLOCK(object)) { if (!vm_pageout_fallback_object_lock(m, &next)) goto unlock_object; - else if (m->hold_count != 0) { + else if (m->wire_count != 0) { + vm_page_dequeue_locked(m); + addl_page_shortage++; + goto unlock_object; + } else if (m->hold_count != 0) { addl_page_shortage++; goto unlock_object; } @@ -1245,6 +1264,7 @@ continue; } KASSERT(m->hold_count == 0, ("Held page %p", m)); + KASSERT(m->wire_count == 0, ("Wired page %p", m)); /* * Dequeue the inactive page and unlock the inactive page @@ -1448,6 +1468,15 @@ */ VM_CNT_INC(v_pdpages); + /* + * Wired pages are dequeued lazily. + */ + if (m->wire_count != 0) { + vm_page_dequeue_locked(m); + vm_page_unlock(m); + continue; + } + /* * Check to see "how much" the page has been used. */