Index: sys/amd64/amd64/efirt_machdep.c =================================================================== --- sys/amd64/amd64/efirt_machdep.c +++ sys/amd64/amd64/efirt_machdep.c @@ -54,6 +54,7 @@ #include #include #include +#include static pml5_entry_t *efi_pml5; static pml4_entry_t *efi_pml4; @@ -64,11 +65,13 @@ void efi_destroy_1t1_map(void) { + struct pctrie_iter pages; vm_page_t m; if (obj_1t1_pt != NULL) { + vm_page_iter_init(&pages, obj_1t1_pt); VM_OBJECT_RLOCK(obj_1t1_pt); - TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq) + VM_RADIX_FOREACH(m, pages) m->ref_count = VPRC_OBJREF; vm_wire_sub(obj_1t1_pt->resident_page_count); VM_OBJECT_RUNLOCK(obj_1t1_pt); Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -7697,30 +7697,32 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { + struct pctrie_iter pages; struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; - vm_pindex_t diff, psize; int rv; VM_OBJECT_ASSERT_LOCKED(m_start->object); - psize = atop(end - start); mpte = NULL; - m = m_start; + vm_page_iter_limit_init(&pages, m_start->object, + m_start->pindex + atop(end - start)); + m = vm_radix_iter_lookup(&pages, m_start->pindex); lock = NULL; PMAP_LOCK(pmap); - while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - va = start + ptoa(diff); + while (m != NULL) { + va = start + ptoa(m->pindex - m_start->pindex); if ((va & PDRMASK) == 0 && va + NBPDR <= end && m->psind == 1 && pmap_ps_enabled(pmap) && ((rv = pmap_enter_2mpage(pmap, va, m, prot, &lock)) == KERN_SUCCESS || rv == KERN_NO_SPACE)) - m = &m[NBPDR / PAGE_SIZE - 1]; - else + m = vm_radix_iter_jump(&pages, NBPDR / PAGE_SIZE); + else { mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); - m = TAILQ_NEXT(m, listq); + m = vm_radix_iter_step(&pages); + } } if (lock != NULL) rw_wunlock(lock); @@ -7894,6 +7896,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { + struct pctrie_iter pages; pd_entry_t *pde; pt_entry_t PG_A, PG_M, PG_RW, PG_V; vm_paddr_t pa, ptepa; @@ -7913,7 +7916,8 @@ return; if (!vm_object_populate(object, pindex, pindex + atop(size))) return; - p = vm_page_lookup(object, pindex); + vm_page_iter_init(&pages, object); + p = vm_radix_iter_lookup(&pages, pindex); KASSERT(vm_page_all_valid(p), ("pmap_object_init_pt: invalid page %p", p)); pat_mode = p->md.pat_mode; @@ -7931,15 +7935,15 @@ * the pages are not physically contiguous or have differing * memory attributes. */ - p = TAILQ_NEXT(p, listq); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { + p = vm_radix_iter_next(&pages); + if (p == NULL) + return; KASSERT(vm_page_all_valid(p), ("pmap_object_init_pt: invalid page %p", p)); - if (pa != VM_PAGE_TO_PHYS(p) || - pat_mode != p->md.pat_mode) + if (pat_mode != p->md.pat_mode) return; - p = TAILQ_NEXT(p, listq); } /* Index: sys/amd64/sgx/sgx.c =================================================================== --- sys/amd64/sgx/sgx.c +++ sys/amd64/sgx/sgx.c @@ -346,13 +346,17 @@ } static void -sgx_page_remove(struct sgx_softc *sc, vm_page_t p) +sgx_page_remove(struct sgx_softc *sc, vm_page_t p, + struct pctrie_iter *pages) { struct epc_page *epc; vm_paddr_t pa; uint64_t offs; - (void)vm_page_remove(p); + if (pages != NULL) + (void)vm_page_iter_remove(pages, p); + else + (void) vm_page_remove(p); dprintf("%s: p->pidx %ld\n", __func__, p->pindex); @@ -369,8 +373,9 @@ sgx_enclave_remove(struct sgx_softc *sc, struct sgx_enclave *enclave) { + struct pctrie_iter pages; vm_object_t object; - vm_page_t p, p_secs, p_next; + vm_page_t p, p_secs; mtx_lock(&sc->mtx); TAILQ_REMOVE(&sc->enclaves, enclave, next); @@ -378,6 +383,7 @@ object = enclave->object; + vm_page_iter_init(&pages, object); VM_OBJECT_WLOCK(object); /* @@ -385,20 +391,21 @@ * then remove SECS page. */ restart: - TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { + VM_RADIX_FOREACH(p, pages) { if (p->pindex == SGX_SECS_VM_OBJECT_INDEX) continue; - if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0) + if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0) { + pctrie_iter_reset(&pages); goto restart; - sgx_page_remove(sc, p); + } + sgx_page_remove(sc, p, &pages); } p_secs = vm_page_grab(object, SGX_SECS_VM_OBJECT_INDEX, VM_ALLOC_NOCREAT); /* Now remove SECS page */ if (p_secs != NULL) - sgx_page_remove(sc, p_secs); + sgx_page_remove(sc, p_secs, NULL); - KASSERT(TAILQ_EMPTY(&object->memq) == 1, ("not empty")); KASSERT(object->resident_page_count == 0, ("count")); VM_OBJECT_WUNLOCK(object); @@ -722,7 +729,7 @@ p = vm_page_grab(enclave->object, - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX, VM_ALLOC_NOCREAT); - sgx_page_remove(sc, p); + sgx_page_remove(sc, p, NULL); VM_OBJECT_WUNLOCK(object); goto error; } @@ -736,7 +743,7 @@ p = vm_page_grab(enclave->object, - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX, VM_ALLOC_NOCREAT); - sgx_page_remove(sc, p); + sgx_page_remove(sc, p, NULL); VM_OBJECT_WUNLOCK(object); goto error; } Index: sys/arm/arm/pmap-v6.c =================================================================== --- sys/arm/arm/pmap-v6.c +++ sys/arm/arm/pmap-v6.c @@ -123,6 +123,7 @@ #include #include #include +#include #include #include #include @@ -4784,29 +4785,32 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { + struct pctrie_iter pages; vm_offset_t va; vm_page_t m, mpt2pg; - vm_pindex_t diff, psize; PDEBUG(6, printf("%s: pmap %p start %#x end %#x m %p prot %#x\n", __func__, pmap, start, end, m_start, prot)); VM_OBJECT_ASSERT_LOCKED(m_start->object); - psize = atop(end - start); + mpt2pg = NULL; - m = m_start; + vm_page_iter_limit_init(&pages, m_start->object, + m_start->pindex + atop(end - start)); + m = vm_radix_iter_lookup(&pages, m_start->pindex); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); - while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - va = start + ptoa(diff); + while (m != NULL) { + va = start + ptoa(m->pindex - m_start->pindex); if ((va & PTE1_OFFSET) == 0 && va + PTE1_SIZE <= end && m->psind == 1 && sp_enabled && pmap_enter_1mpage(pmap, va, m, prot)) - m = &m[PTE1_SIZE / PAGE_SIZE - 1]; - else + m = vm_radix_iter_jump(&pages, NBPDR / PAGE_SIZE); + else { mpt2pg = pmap_enter_quick_locked(pmap, va, m, prot, mpt2pg); - m = TAILQ_NEXT(m, listq); + m = vm_radix_iter_step(&pages); + } } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); @@ -4821,6 +4825,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { + struct pctrie_iter pages; pt1_entry_t *pte1p; vm_paddr_t pa, pte2_pa; vm_page_t p; @@ -4833,7 +4838,8 @@ if ((addr & PTE1_OFFSET) == 0 && (size & PTE1_OFFSET) == 0) { if (!vm_object_populate(object, pindex, pindex + atop(size))) return; - p = vm_page_lookup(object, pindex); + vm_page_iter_init(&pages, object); + p = vm_radix_iter_lookup(&pages, pindex); KASSERT(p->valid == VM_PAGE_BITS_ALL, ("%s: invalid page %p", __func__, p)); pat_mode = p->md.pat_mode; @@ -4851,15 +4857,15 @@ * the pages are not physically contiguous or have differing * memory attributes. */ - p = TAILQ_NEXT(p, listq); for (pa = pte2_pa + PAGE_SIZE; pa < pte2_pa + size; pa += PAGE_SIZE) { + p = vm_radix_iter_next(&pages); + if (p == NULL) + return; KASSERT(p->valid == VM_PAGE_BITS_ALL, ("%s: invalid page %p", __func__, p)); - if (pa != VM_PAGE_TO_PHYS(p) || - pat_mode != p->md.pat_mode) + if (pat_mode != p->md.pat_mode) return; - p = TAILQ_NEXT(p, listq); } /* Index: sys/arm64/arm64/efirt_machdep.c =================================================================== --- sys/arm64/arm64/efirt_machdep.c +++ sys/arm64/arm64/efirt_machdep.c @@ -55,6 +55,7 @@ #include #include #include +#include static vm_object_t obj_1t1_pt; static vm_pindex_t efi_1t1_idx; @@ -64,11 +65,13 @@ void efi_destroy_1t1_map(void) { + struct pctrie_iter pages; vm_page_t m; if (obj_1t1_pt != NULL) { + vm_page_iter_init(&pages, obj_1t1_pt); VM_OBJECT_RLOCK(obj_1t1_pt); - TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq) + VM_RADIX_FOREACH(m, pages) m->ref_count = VPRC_OBJREF; vm_wire_sub(obj_1t1_pt->resident_page_count); VM_OBJECT_RUNLOCK(obj_1t1_pt); Index: sys/arm64/arm64/pmap.c =================================================================== --- sys/arm64/arm64/pmap.c +++ sys/arm64/arm64/pmap.c @@ -5973,31 +5973,32 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { + struct pctrie_iter pages; struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; - vm_pindex_t diff, psize; int rv; VM_OBJECT_ASSERT_LOCKED(m_start->object); - psize = atop(end - start); mpte = NULL; - m = m_start; + vm_page_iter_limit_init(&pages, m_start->object, + m_start->pindex + atop(end - start)); + m = vm_radix_iter_lookup(&pages, m_start->pindex); lock = NULL; PMAP_LOCK(pmap); - while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - va = start + ptoa(diff); + while (m != NULL) { + va = start + ptoa(m->pindex - m_start->pindex); if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end && m->psind == 2 && pmap_ps_enabled(pmap) && ((rv = pmap_enter_l2_rx(pmap, va, m, prot, &lock)) == KERN_SUCCESS || rv == KERN_NO_SPACE)) - m = &m[L2_SIZE / PAGE_SIZE - 1]; + m = vm_radix_iter_jump(&pages, L2_SIZE / PAGE_SIZE); else if ((va & L3C_OFFSET) == 0 && va + L3C_SIZE <= end && m->psind >= 1 && pmap_ps_enabled(pmap) && ((rv = pmap_enter_l3c_rx(pmap, va, m, &mpte, prot, &lock)) == KERN_SUCCESS || rv == KERN_NO_SPACE)) - m = &m[L3C_ENTRIES - 1]; + m = vm_radix_iter_jump(&pages, L3C_ENTRIES); else { /* * In general, if a superpage mapping were possible, @@ -6010,8 +6011,8 @@ */ mpte = pmap_enter_quick_locked(pmap, va, m, prot | VM_PROT_NO_PROMOTE, mpte, &lock); + m = vm_radix_iter_step(&pages); } - m = TAILQ_NEXT(m, listq); } if (lock != NULL) rw_wunlock(lock); Index: sys/compat/linuxkpi/common/src/linux_compat.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_compat.c +++ sys/compat/linuxkpi/common/src/linux_compat.c @@ -59,6 +59,7 @@ #include #include #include +#include #include @@ -644,6 +645,7 @@ zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size) { + struct pctrie_iter pages; vm_object_t obj; vm_page_t m; @@ -651,9 +653,8 @@ if (obj == NULL || (obj->flags & OBJ_UNMANAGED) != 0) return (-ENOTSUP); VM_OBJECT_RLOCK(obj); - for (m = vm_page_find_least(obj, OFF_TO_IDX(address)); - m != NULL && m->pindex < OFF_TO_IDX(address + size); - m = TAILQ_NEXT(m, listq)) + vm_page_iter_limit_init(&pages, obj, OFF_TO_IDX(address + size)); + VM_RADIX_FOREACH_FROM(m, pages, OFF_TO_IDX(address)) pmap_remove_all(m); VM_OBJECT_RUNLOCK(obj); return (0); Index: sys/dev/agp/agp.c =================================================================== --- sys/dev/agp/agp.c +++ sys/dev/agp/agp.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -539,6 +540,7 @@ agp_generic_bind_memory(device_t dev, struct agp_memory *mem, vm_offset_t offset) { + struct pctrie_iter pages; struct agp_softc *sc = device_get_softc(dev); vm_offset_t i, j, k; vm_page_t m; @@ -571,7 +573,7 @@ AGP_DPF("found page pa=%#jx\n", (uintmax_t)VM_PAGE_TO_PHYS(m)); } VM_OBJECT_WUNLOCK(mem->am_obj); - + vm_page_iter_init(&pages, mem->am_obj); mtx_lock(&sc->as_lock); if (mem->am_is_bound) { @@ -588,7 +590,7 @@ */ VM_OBJECT_WLOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { - m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(i)); + m = vm_radix_iter_lookup(&pages, OFF_TO_IDX(i)); /* * Install entries in the GATT, making sure that if @@ -609,6 +611,7 @@ */ for (k = 0; k < i + j; k += AGP_PAGE_SIZE) AGP_UNBIND_PAGE(dev, offset + k); + pctrie_iter_reset(&pages); goto bad; } } @@ -631,7 +634,7 @@ mtx_unlock(&sc->as_lock); VM_OBJECT_ASSERT_WLOCKED(mem->am_obj); for (k = 0; k < mem->am_size; k += PAGE_SIZE) { - m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(k)); + m = vm_radix_iter_lookup(&pages, OFF_TO_IDX(k)); if (k >= i) vm_page_xunbusy(m); vm_page_unwire(m, PQ_INACTIVE); @@ -644,6 +647,7 @@ int agp_generic_unbind_memory(device_t dev, struct agp_memory *mem) { + struct pctrie_iter pages; struct agp_softc *sc = device_get_softc(dev); vm_page_t m; int i; @@ -665,9 +669,10 @@ AGP_FLUSH_TLB(dev); + vm_page_iter_init(&pages, mem->am_obj); VM_OBJECT_WLOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { - m = vm_page_lookup(mem->am_obj, atop(i)); + m = vm_radix_iter_lookup(&pages, atop(i)); vm_page_unwire(m, PQ_INACTIVE); } VM_OBJECT_WUNLOCK(mem->am_obj); Index: sys/i386/i386/pmap.c =================================================================== --- sys/i386/i386/pmap.c +++ sys/i386/i386/pmap.c @@ -4104,29 +4104,30 @@ __CONCAT(PMTYPE, enter_object)(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { + struct pctrie_iter pages; vm_offset_t va; vm_page_t m, mpte; - vm_pindex_t diff, psize; int rv; VM_OBJECT_ASSERT_LOCKED(m_start->object); - psize = atop(end - start); mpte = NULL; - m = m_start; + vm_page_iter_limit_init(&pages, m_start->object, + m_start->pindex + atop(end - start)); + m = vm_radix_iter_lookup(&pages, m_start->pindex); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); - while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - va = start + ptoa(diff); + while (m != NULL) { + va = start + ptoa(m->pindex - m_start->pindex); if ((va & PDRMASK) == 0 && va + NBPDR <= end && m->psind == 1 && pg_ps_enabled && ((rv = pmap_enter_4mpage(pmap, va, m, prot)) == KERN_SUCCESS || rv == KERN_NO_SPACE)) - m = &m[NBPDR / PAGE_SIZE - 1]; - else - mpte = pmap_enter_quick_locked(pmap, va, m, prot, - mpte); - m = TAILQ_NEXT(m, listq); + m = vm_radix_iter_jump(&pages, NBPDR / PAGE_SIZE); + else { + mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte); + m = vm_radix_iter_step(&pages); + } } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); @@ -4294,6 +4295,7 @@ __CONCAT(PMTYPE, object_init_pt)(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { + struct pctrie_iter pages; pd_entry_t *pde; vm_paddr_t pa, ptepa; vm_page_t p; @@ -4306,7 +4308,8 @@ (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { if (!vm_object_populate(object, pindex, pindex + atop(size))) return; - p = vm_page_lookup(object, pindex); + vm_page_iter_init(&pages, object); + p = vm_radix_iter_lookup(&pages, pindex); KASSERT(vm_page_all_valid(p), ("pmap_object_init_pt: invalid page %p", p)); pat_mode = p->md.pat_mode; @@ -4324,15 +4327,15 @@ * the pages are not physically contiguous or have differing * memory attributes. */ - p = TAILQ_NEXT(p, listq); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { + p = vm_radix_iter_next(&pages); + if (p == NULL) + return; KASSERT(vm_page_all_valid(p), ("pmap_object_init_pt: invalid page %p", p)); - if (pa != VM_PAGE_TO_PHYS(p) || - pat_mode != p->md.pat_mode) + if (pat_mode != p->md.pat_mode) return; - p = TAILQ_NEXT(p, listq); } /* Index: sys/kern/kern_kcov.c =================================================================== --- sys/kern/kern_kcov.c +++ sys/kern/kern_kcov.c @@ -396,20 +396,23 @@ static void kcov_free(struct kcov_info *info) { + struct pctrie_iter pages; vm_page_t m; - size_t i; if (info->kvaddr != 0) { pmap_qremove(info->kvaddr, info->bufsize / PAGE_SIZE); kva_free(info->kvaddr, info->bufsize); } if (info->bufobj != NULL) { + vm_page_iter_init(&pages, info->bufobj); VM_OBJECT_WLOCK(info->bufobj); - m = vm_page_lookup(info->bufobj, 0); - for (i = 0; i < info->bufsize / PAGE_SIZE; i++) { + VM_RADIX_FORALL(m, npages) { + if (m->pindex >= info->bufsize / PAGE_SIZE) + break; vm_page_unwire_noq(m); - m = vm_page_next(m); } + KASSERT(m == NULL || m->pindex == info->bufsize / PAGE_SIZE, + ("%s: null page", __func__)); VM_OBJECT_WUNLOCK(info->bufobj); vm_object_deallocate(info->bufobj); } Index: sys/kern/kern_proc.c =================================================================== --- sys/kern/kern_proc.c +++ sys/kern/kern_proc.c @@ -91,6 +91,7 @@ #include #include #include +#include #include #include @@ -2559,7 +2560,7 @@ pi_adv = atop(entry->end - addr); pindex = pi; for (tobj = obj;; tobj = tobj->backing_object) { - m = vm_page_find_least(tobj, pindex); + m = vm_radix_lookup_ge(&tobj->rtree, pindex); if (m != NULL) { if (m->pindex == pindex) break; Index: sys/kern/subr_pctrie.c =================================================================== --- sys/kern/subr_pctrie.c +++ sys/kern/subr_pctrie.c @@ -547,7 +547,7 @@ */ while (it->top != 0) { node = it->path[it->top - 1]; - KASSERT(!powerof2(node->pn_popmap), + KASSERT(access == PCTRIE_SMR || !powerof2(node->pn_popmap), ("%s: freed node in iter path", __func__)); if (!pctrie_keybarr(node, index, &slot)) { node = pctrie_node_load( @@ -584,7 +584,8 @@ } /* - * Returns the value stored at a given index value, possibly NULL. + * Returns the value stored at a given index value, possibly NULL, assuming + * access is externally synchronized by a lock. */ uint64_t * pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index) @@ -632,9 +633,8 @@ * Returns the value stored at a fixed offset from the current index value, * possibly NULL. */ -static __always_inline uint64_t * -_pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr, - enum pctrie_access access) +uint64_t * +pctrie_iter_stride(struct pctrie_iter *it, int stride) { uint64_t index = it->index + stride; @@ -645,17 +645,7 @@ if ((index < it->limit) != (it->index < it->limit)) return (NULL); - return (_pctrie_iter_lookup(it, index, smr, access)); -} - -/* - * Returns the value stored at a fixed offset from the current index value, - * possibly NULL. - */ -uint64_t * -pctrie_iter_stride(struct pctrie_iter *it, int stride) -{ - return (_pctrie_iter_stride(it, stride, NULL, PCTRIE_LOCKED)); + return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED)); } /* @@ -665,7 +655,7 @@ uint64_t * pctrie_iter_next(struct pctrie_iter *it) { - return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_LOCKED)); + return (pctrie_iter_stride(it, 1)); } /* @@ -675,7 +665,27 @@ uint64_t * pctrie_iter_prev(struct pctrie_iter *it) { - return (_pctrie_iter_stride(it, -1, NULL, PCTRIE_LOCKED)); + return (pctrie_iter_stride(it, -1)); +} + +/* + * Returns the number of contiguous, non-NULL entries read into the value[] + * array, without requiring an external lock. + */ +int +pctrie_lookup_range_unlocked(struct pctrie *ptree, uint64_t index, + uint64_t *value[], int count, smr_t smr) +{ + struct pctrie_iter it; + int i = 0; + + pctrie_iter_init(&it, ptree); + smr_enter(smr); + while (i < count && NULL != + (value[i] = _pctrie_iter_lookup(&it, index + i, smr, PCTRIE_SMR))) + i++; + smr_exit(smr); + return (i); } /* Index: sys/powerpc/aim/mmu_oea.c =================================================================== --- sys/powerpc/aim/mmu_oea.c +++ sys/powerpc/aim/mmu_oea.c @@ -131,6 +131,7 @@ #include #include #include +#include #include #include #include @@ -1235,20 +1236,23 @@ moea_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { + struct pctrie_iter pages; + vm_offset_t va; vm_page_t m; - vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); - psize = atop(end - start); - m = m_start; + vm_page_iter_limit_init(&pages, m_start->object, + m_start->pindex + atop(end - start)); + m = vm_radix_iter_lookup(&pages, m_start->pindex); rw_wlock(&pvh_global_lock); PMAP_LOCK(pm); - while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - moea_enter_locked(pm, start + ptoa(diff), m, prot & + while (m != NULL) { + va = start + ptoa(m->pindex - m_start->pindex); + moea_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_QUICK_LOCKED, 0); - m = TAILQ_NEXT(m, listq); + m = vm_radix_iter_step(&pages); } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pm); Index: sys/powerpc/aim/mmu_oea64.c =================================================================== --- sys/powerpc/aim/mmu_oea64.c +++ sys/powerpc/aim/mmu_oea64.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include #include @@ -364,7 +365,9 @@ static struct pvo_entry *moea64_sp_remove(struct pvo_entry *sp, struct pvo_dlist *tofree); +#if VM_NRESERVLEVEL > 0 static void moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m); +#endif static void moea64_sp_demote_aligned(struct pvo_entry *sp); static void moea64_sp_demote(struct pvo_entry *pvo); @@ -1824,17 +1827,18 @@ moea64_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { + struct pctrie_iter pages; vm_page_t m; - vm_pindex_t diff, psize; vm_offset_t va; int8_t psind; VM_OBJECT_ASSERT_LOCKED(m_start->object); - psize = atop(end - start); - m = m_start; - while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - va = start + ptoa(diff); + vm_page_iter_limit_init(&pages, m_start->object, + m_start->pindex + atop(end - start)); + m = vm_radix_iter_lookup(&pages, m_start->pindex); + while (m != NULL) { + va = start + ptoa(m->pindex - m_start->pindex); if ((va & HPT_SP_MASK) == 0 && va + HPT_SP_SIZE <= end && m->psind == 1 && moea64_ps_enabled(pm)) psind = 1; @@ -1844,8 +1848,9 @@ (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, psind); if (psind == 1) - m = &m[HPT_SP_SIZE / PAGE_SIZE - 1]; - m = TAILQ_NEXT(m, listq); + m = vm_radix_iter_jump(&pages, HPT_SP_SIZE / PAGE_SIZE); + else + m = vm_radix_iter_step(&pages); } } @@ -3794,6 +3799,7 @@ return (KERN_SUCCESS); } +#if VM_NRESERVLEVEL > 0 static void moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m) { @@ -3920,6 +3926,7 @@ atomic_add_long(&sp_p_failures, 1); PMAP_UNLOCK(pmap); } +#endif static void moea64_sp_demote_aligned(struct pvo_entry *sp) Index: sys/powerpc/aim/mmu_radix.c =================================================================== --- sys/powerpc/aim/mmu_radix.c +++ sys/powerpc/aim/mmu_radix.c @@ -3334,33 +3334,34 @@ mmu_radix_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { - + struct pctrie_iter pages; struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; - vm_pindex_t diff, psize; bool invalidate; + VM_OBJECT_ASSERT_LOCKED(m_start->object); CTR6(KTR_PMAP, "%s(%p, %#x, %#x, %p, %#x)", __func__, pmap, start, end, m_start, prot); - invalidate = false; - psize = atop(end - start); mpte = NULL; - m = m_start; + vm_page_iter_limit_init(&pages, m_start->object, + m_start->pindex + atop(end - start)); + m = vm_radix_iter_lookup(&pages, m_start->pindex); lock = NULL; PMAP_LOCK(pmap); - while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - va = start + ptoa(diff); + while (m != NULL) { + va = start + ptoa(m->pindex - m_start->pindex); if ((va & L3_PAGE_MASK) == 0 && va + L3_PAGE_SIZE <= end && m->psind == 1 && mmu_radix_ps_enabled(pmap) && pmap_enter_2mpage(pmap, va, m, prot, &lock)) - m = &m[L3_PAGE_SIZE / PAGE_SIZE - 1]; - else + m = vm_radix_iter_jump(&pages, L3_PAGE_SIZE / PAGE_SIZE); + else { mpte = mmu_radix_enter_quick_locked(pmap, va, m, prot, mpte, &lock, &invalidate); - m = TAILQ_NEXT(m, listq); + m = vm_radix_iter_step(&pages); + } } ptesync(); if (lock != NULL) @@ -4043,6 +4044,7 @@ mmu_radix_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { + struct pctrie_iter pages; pml3_entry_t *l3e; vm_paddr_t pa, ptepa; vm_page_t p, pdpg; @@ -4059,7 +4061,9 @@ return; if (!vm_object_populate(object, pindex, pindex + atop(size))) return; - p = vm_page_lookup(object, pindex); + vm_page_iter_init(&pages, object); + p = vm_radix_iter_lookup(&pages, pindex); + KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); ma = p->md.mdpg_cache_attrs; @@ -4077,15 +4081,15 @@ * the pages are not physically contiguous or have differing * memory attributes. */ - p = TAILQ_NEXT(p, listq); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { + p = vm_radix_iter_next(&pages); + if (p == NULL) + return; KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); - if (pa != VM_PAGE_TO_PHYS(p) || - ma != p->md.mdpg_cache_attrs) + if (ma != p->md.mdpg_cache_attrs) return; - p = TAILQ_NEXT(p, listq); } PMAP_LOCK(pmap); Index: sys/powerpc/booke/pmap.c =================================================================== --- sys/powerpc/booke/pmap.c +++ sys/powerpc/booke/pmap.c @@ -1457,20 +1457,23 @@ mmu_booke_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { + struct pctrie_iter pages; + vm_offset_t va; vm_page_t m; - vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); - psize = atop(end - start); - m = m_start; + vm_page_iter_limit_init(&pages, m_start->object, + m_start->pindex + atop(end - start)); + m = vm_radix_iter_lookup(&pages, m_start->pindex); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); - while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - mmu_booke_enter_locked(pmap, start + ptoa(diff), m, + while (m != NULL) { + va = start + ptoa(m->pindex - m_start->pindex); + mmu_booke_enter_locked(pmap, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); - m = TAILQ_NEXT(m, listq); + m = vm_radix_iter_step(&pages); } PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); Index: sys/riscv/riscv/pmap.c =================================================================== --- sys/riscv/riscv/pmap.c +++ sys/riscv/riscv/pmap.c @@ -3614,31 +3614,33 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { + struct pctrie_iter pages; struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; - vm_pindex_t diff, psize; int rv; VM_OBJECT_ASSERT_LOCKED(m_start->object); - psize = atop(end - start); mpte = NULL; - m = m_start; + vm_page_iter_limit_init(&pages, m_start->object, + m_start->pindex + atop(end - start)); + m = vm_radix_iter_lookup(&pages, m_start->pindex); lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); - while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - va = start + ptoa(diff); + while (m != NULL) { + va = start + ptoa(m->pindex - m_start->pindex); if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end && m->psind == 1 && pmap_ps_enabled(pmap) && ((rv = pmap_enter_2mpage(pmap, va, m, prot, &lock)) == KERN_SUCCESS || rv == KERN_NO_SPACE)) - m = &m[L2_SIZE / PAGE_SIZE - 1]; - else + m = vm_radix_iter_jump(&pages, L2_SIZE / PAGE_SIZE); + else { mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); - m = TAILQ_NEXT(m, listq); + m = vm_radix_iter_step(&pages); + } } if (lock != NULL) rw_wunlock(lock); Index: sys/sys/pctrie.h =================================================================== --- sys/sys/pctrie.h +++ sys/sys/pctrie.h @@ -48,6 +48,19 @@ return name##_PCTRIE_VAL2PTR(pctrie_lookup_unlocked(ptree, \ key, (smr))); \ } \ + \ +static __inline __unused int \ +name##_PCTRIE_LOOKUP_RANGE_UNLOCKED(struct pctrie *ptree, uint64_t key, \ + struct type *value[], int count) \ +{ \ + uint64_t *data[count]; \ + \ + count = pctrie_lookup_range_unlocked(ptree, key, data, count, \ + (smr)); \ + for (int i = 0; i < count; i++) \ + value[i] = name##_PCTRIE_VAL2PTR(data[i]); \ + return (count); \ +} \ #ifdef INVARIANTS void pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, @@ -375,6 +388,8 @@ uint64_t *pctrie_lookup(struct pctrie *ptree, uint64_t key); uint64_t *pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t key, smr_t smr); +int pctrie_lookup_range_unlocked(struct pctrie *ptree, + uint64_t index, uint64_t *value[], int count, smr_t smr); uint64_t *pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index); uint64_t *pctrie_iter_stride(struct pctrie_iter *it, int stride); uint64_t *pctrie_iter_next(struct pctrie_iter *it); Index: sys/vm/device_pager.c =================================================================== --- sys/vm/device_pager.c +++ sys/vm/device_pager.c @@ -267,7 +267,7 @@ struct pctrie_iter pages; vm_page_iter_init(&pages, object); - vm_page_iter_lookup(&pages, m->pindex); + vm_radix_iter_lookup(&pages, m->pindex); cdev_mgtdev_pager_free_page(&pages, m); } else if (object->type == OBJT_DEVICE) dev_pager_free_page(object, m); @@ -292,8 +292,7 @@ vm_page_iter_init(&pages, object); VM_OBJECT_WLOCK(object); retry: - for (m = vm_page_iter_lookup_ge(&pages, 0); m != NULL; - m = vm_radix_iter_step(&pages)) { + VM_RADIX_FOREACH(m, pages) { if (!vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL)) { pctrie_iter_reset(&pages); goto retry; Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -1359,7 +1359,7 @@ vm_page_t *ma, int count, int *rbehind, int *rahead) { struct buf *bp; - vm_page_t bm, mpred, msucc, p; + vm_page_t mpred, msucc, p; vm_pindex_t pindex; daddr_t blk; int i, maxahead, maxbehind, reqcount; @@ -1393,56 +1393,61 @@ * Clip the readahead and readbehind ranges to exclude resident pages. */ if (rahead != NULL) { - *rahead = imin(*rahead, maxahead - (reqcount - 1)); + maxahead = imin(*rahead, maxahead - (reqcount - 1)); pindex = ma[reqcount - 1]->pindex; - msucc = TAILQ_NEXT(ma[reqcount - 1], listq); - if (msucc != NULL && msucc->pindex - pindex - 1 < *rahead) - *rahead = msucc->pindex - pindex - 1; + msucc = vm_radix_lookup_ge(&object->rtree, pindex + 1); + if (msucc != NULL && msucc->pindex - pindex - 1 < maxahead) + maxahead = msucc->pindex - pindex - 1; + count += maxahead; } if (rbehind != NULL) { - *rbehind = imin(*rbehind, maxbehind); + maxbehind = imin(*rbehind, maxbehind); pindex = ma[0]->pindex; - mpred = TAILQ_PREV(ma[0], pglist, listq); - if (mpred != NULL && pindex - mpred->pindex - 1 < *rbehind) - *rbehind = pindex - mpred->pindex - 1; - } + mpred = vm_radix_lookup_le(&object->rtree, pindex - 1); + if (mpred != NULL && pindex - mpred->pindex - 1 < maxbehind) + maxbehind = pindex - mpred->pindex - 1; + count += maxbehind; + } else + maxbehind = 0; - bm = ma[0]; - for (i = 0; i < count; i++) + vm_page_t b_pages[count]; + for (i = 0; i < reqcount; i++) { + b_pages[i + maxbehind] = ma[i]; ma[i]->oflags |= VPO_SWAPINPROG; + } /* * Allocate readahead and readbehind pages. */ - if (rbehind != NULL) { - for (i = 1; i <= *rbehind; i++) { - p = vm_page_alloc(object, ma[0]->pindex - i, - VM_ALLOC_NORMAL); + if (rahead != NULL) { + pindex = ma[reqcount - 1]->pindex; + for (i = 1; i <= maxahead; i++) { + p = vm_page_alloc(object, pindex + i, VM_ALLOC_NORMAL); if (p == NULL) break; + b_pages[count - maxahead + i - 1] = p; p->oflags |= VPO_SWAPINPROG; - bm = p; } - *rbehind = i - 1; + *rahead = i - 1; + count -= maxahead - *rahead; } - if (rahead != NULL) { - for (i = 0; i < *rahead; i++) { - p = vm_page_alloc(object, - ma[reqcount - 1]->pindex + i + 1, VM_ALLOC_NORMAL); + pindex = ma[0]->pindex; + if (rbehind != NULL) { + for (i = 1; i <= maxbehind; i++) { + p = vm_page_alloc(object, pindex - i, VM_ALLOC_NORMAL); if (p == NULL) break; + b_pages[maxbehind - i] = p; p->oflags |= VPO_SWAPINPROG; } - *rahead = i; + *rbehind = i - 1; + count -= maxbehind - *rbehind; + maxbehind -= *rbehind; + pindex -= *rbehind; } - if (rbehind != NULL) - count += *rbehind; - if (rahead != NULL) - count += *rahead; vm_object_pip_add(object, count); - pindex = bm->pindex; blk = swp_pager_meta_lookup(blks, pindex); KASSERT(blk != SWAPBLK_NONE, ("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex)); @@ -1450,12 +1455,8 @@ VM_OBJECT_WUNLOCK(object); bp = uma_zalloc(swrbuf_zone, M_WAITOK); MPASS((bp->b_flags & B_MAXPHYS) != 0); - /* Pages cannot leave the object while busy. */ - for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) { - MPASS(p->pindex == bm->pindex + i); - bp->b_pages[i] = p; - } - + for (i = 0; i < count; i++) + bp->b_pages[i] = b_pages[i + maxbehind]; bp->b_flags |= B_PAGING; bp->b_iocmd = BIO_READ; bp->b_iodone = swp_pager_async_iodone; @@ -1962,7 +1963,7 @@ * found page has pending operations, sleep and restart * the scan. */ - m = vm_page_iter_lookup(&pages, blks.index + i); + m = vm_radix_iter_lookup(&pages, blks.index + i); if (m != NULL && (m->oflags & VPO_SWAPINPROG) != 0) { m->oflags |= VPO_SWAPSLEEP; VM_OBJECT_SLEEP(object, &object->handle, PSWP, @@ -2384,7 +2385,7 @@ continue; swp_pager_update_freerange(&range, sb->d[i]); if (freed != NULL) { - m = vm_page_iter_lookup(&pages, blks.index + i); + m = vm_radix_iter_lookup(&pages, blks.index + i); if (m == NULL || vm_page_none_valid(m)) fc++; } @@ -2502,7 +2503,7 @@ VM_OBJECT_ASSERT_RLOCKED(object); vm_page_iter_init(&pages, object); - m = vm_page_iter_lookup_ge(&pages, pindex); + m = vm_radix_iter_lookup_ge(&pages, pindex); if (m != NULL && pages.index == pindex && vm_page_any_valid(m)) return (pages.index); swblk_iter_init_only(&blks, object); @@ -2537,7 +2538,7 @@ VM_OBJECT_ASSERT_RLOCKED(object); vm_page_iter_init(&pages, object); swblk_iter_init_only(&blks, object); - while (((m = vm_page_iter_lookup(&pages, pindex)) != NULL && + while (((m = vm_radix_iter_lookup(&pages, pindex)) != NULL && vm_page_any_valid(m)) || ((sb = swblk_iter_lookup(&blks, pindex)) != NULL && sb->d[pindex % SWAP_META_PAGES] != SWAPBLK_NONE)) @@ -2582,7 +2583,7 @@ pv = ps = pi = backing_offset_index - 1; for (;;) { if (pi == pv) { - p = vm_page_iter_lookup_ge(&backing_pages, pv + 1); + p = vm_radix_iter_lookup_ge(&backing_pages, pv + 1); pv = p != NULL ? p->pindex : backing_object->size; } if (pi == ps) @@ -2624,7 +2625,7 @@ * object and we might as well give up now. */ new_pindex = pi - backing_offset_index; - pp = vm_page_iter_lookup(&pages, new_pindex); + pp = vm_radix_iter_lookup(&pages, new_pindex); /* * The valid check here is stable due to object lock being Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -107,6 +107,7 @@ #include #include #include +#include #define PFBAK 4 #define PFFOR 4 @@ -479,24 +480,27 @@ vm_fault_populate_cleanup(vm_object_t object, vm_pindex_t first, vm_pindex_t last) { + struct pctrie_iter pages; vm_page_t m; - vm_pindex_t pidx; VM_OBJECT_ASSERT_WLOCKED(object); MPASS(first <= last); - for (pidx = first, m = vm_page_lookup(object, pidx); - pidx <= last; pidx++, m = TAILQ_NEXT(m, listq)) { - KASSERT(m != NULL && m->pindex == pidx, - ("%s: pindex mismatch", __func__)); + vm_page_iter_init(&pages, object); + VM_RADIX_FORALL_FROM(m, pages, first) { + if (m->pindex > last) + break; vm_fault_populate_check_page(m); vm_page_deactivate(m); vm_page_xunbusy(m); } + KASSERT(m == NULL || m->pindex - 1 == last, + ("%s: pindex mismatch", __func__)); } static enum fault_status vm_fault_populate(struct faultstate *fs) { + struct pctrie_iter pages; vm_offset_t vaddr; vm_page_t m; vm_pindex_t map_first, map_last, pager_first, pager_last, pidx; @@ -623,9 +627,10 @@ pager_last); pager_last = map_last; } - for (pidx = pager_first, m = vm_page_lookup(fs->first_object, pidx); + vm_page_iter_init(&pages, fs->first_object); + for (pidx = pager_first, m = vm_radix_iter_lookup(&pages, pidx); pidx <= pager_last; - pidx += npages, m = TAILQ_NEXT(&m[npages - 1], listq)) { + pidx += npages, m = vm_radix_iter_stride(&pages, npages)) { vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset; KASSERT(m != NULL && m->pindex == pidx, ("%s: pindex mismatch", __func__)); @@ -1835,59 +1840,58 @@ static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead) { + struct pctrie_iter pages; vm_map_entry_t entry; vm_object_t first_object; vm_offset_t end, start; - vm_page_t m, m_next; - vm_pindex_t pend, pstart; + vm_page_t m; vm_size_t size; VM_OBJECT_ASSERT_UNLOCKED(fs->object); first_object = fs->first_object; /* Neither fictitious nor unmanaged pages can be reclaimed. */ - if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) { - VM_OBJECT_RLOCK(first_object); - size = VM_FAULT_DONTNEED_MIN; - if (MAXPAGESIZES > 1 && size < pagesizes[1]) - size = pagesizes[1]; - end = rounddown2(vaddr, size); - if (vaddr - end >= size - PAGE_SIZE - ptoa(ahead) && - (entry = fs->entry)->start < end) { - if (end - entry->start < size) - start = entry->start; - else - start = end - size; - pmap_advise(fs->map->pmap, start, end, MADV_DONTNEED); - pstart = OFF_TO_IDX(entry->offset) + atop(start - - entry->start); - m_next = vm_page_find_least(first_object, pstart); - pend = OFF_TO_IDX(entry->offset) + atop(end - - entry->start); - while ((m = m_next) != NULL && m->pindex < pend) { - m_next = TAILQ_NEXT(m, listq); - if (!vm_page_all_valid(m) || - vm_page_busied(m)) - continue; + if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) != 0) + return; - /* - * Don't clear PGA_REFERENCED, since it would - * likely represent a reference by a different - * process. - * - * Typically, at this point, prefetched pages - * are still in the inactive queue. Only - * pages that triggered page faults are in the - * active queue. The test for whether the page - * is in the inactive queue is racy; in the - * worst case we will requeue the page - * unnecessarily. - */ - if (!vm_page_inactive(m)) - vm_page_deactivate(m); - } - } - VM_OBJECT_RUNLOCK(first_object); + size = VM_FAULT_DONTNEED_MIN; + if (MAXPAGESIZES > 1 && size < pagesizes[1]) + size = pagesizes[1]; + end = rounddown2(vaddr, size); + if (vaddr - end < size - PAGE_SIZE - ptoa(ahead) || + (entry = fs->entry)->start >= end) + return; + + VM_OBJECT_RLOCK(first_object); + if (end - entry->start < size) + start = entry->start; + else + start = end - size; + pmap_advise(fs->map->pmap, start, end, MADV_DONTNEED); + vm_page_iter_limit_init(&pages, first_object, + OFF_TO_IDX(entry->offset) + atop(end - entry->start)); + VM_RADIX_FOREACH_FROM(m, pages, + OFF_TO_IDX(entry->offset) + atop(start - entry->start)) { + if (!vm_page_all_valid(m) || + vm_page_busied(m)) + continue; + + /* + * Don't clear PGA_REFERENCED, since it would + * likely represent a reference by a different + * process. + * + * Typically, at this point, prefetched pages + * are still in the inactive queue. Only + * pages that triggered page faults are in the + * active queue. The test for whether the page + * is in the inactive queue is racy; in the + * worst case we will requeue the page + * unnecessarily. + */ + if (!vm_page_inactive(m)) + vm_page_deactivate(m); } + VM_OBJECT_RUNLOCK(first_object); } /* Index: sys/vm/vm_glue.c =================================================================== --- sys/vm/vm_glue.c +++ sys/vm/vm_glue.c @@ -621,9 +621,8 @@ VM_OBJECT_WLOCK(obj); for (n = 0; n < npages;) { m = vm_page_grab(obj, pindex + n, - VM_ALLOC_NOCREAT | VM_ALLOC_WIRED); - if (m == NULL) { - m = n > 0 ? ma[n - 1] : vm_page_mpred(obj, pindex); + VM_ALLOC_NOCREAT_LT | VM_ALLOC_WIRED); + if (m == NULL || m->pindex != pindex + n) { m = vm_page_alloc_domain_after(obj, pindex + n, domain, req_class | VM_ALLOC_WIRED, m); } Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -648,16 +648,16 @@ pmap_remove(kernel_pmap, addr, addr + size); offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; - VM_OBJECT_WLOCK(object); vm_page_iter_init(&pages, object); - m = vm_page_iter_lookup(&pages, atop(offset)); + VM_OBJECT_WLOCK(object); + m = vm_radix_iter_lookup(&pages, atop(offset)); domain = vm_page_domain(m); if (__predict_true((m->oflags & VPO_KMEM_EXEC) == 0)) arena = vm_dom[domain].vmd_kernel_arena; else arena = vm_dom[domain].vmd_kernel_rwx_arena; for (; offset < end; offset += PAGE_SIZE, - m = vm_page_iter_lookup(&pages, atop(offset))) { + m = vm_radix_iter_lookup(&pages, atop(offset))) { vm_page_xbusy_claim(m); vm_page_unwire_noq(m); vm_page_iter_free(&pages, m); Index: sys/vm/vm_map.c =================================================================== --- sys/vm/vm_map.c +++ sys/vm/vm_map.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -2674,6 +2675,7 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags) { + struct pctrie_iter pages; vm_offset_t start; vm_page_t p, p_start; vm_pindex_t mask, psize, threshold, tmpidx; @@ -2706,19 +2708,13 @@ p_start = NULL; threshold = MAX_INIT_PT; - p = vm_page_find_least(object, pindex); - /* - * Assert: the variable p is either (1) the page with the - * least pindex greater than or equal to the parameter pindex - * or (2) NULL. - */ - for (; - p != NULL && (tmpidx = p->pindex - pindex) < psize; - p = TAILQ_NEXT(p, listq)) { + vm_page_iter_limit_init(&pages, object, pindex + psize); + VM_RADIX_FOREACH_FROM(p, pages, pindex) { /* * don't allow an madvise to blow away our really * free pages allocating pv entries. */ + tmpidx = p->pindex - pindex; if (((flags & MAP_PREFAULT_MADVISE) != 0 && vm_page_count_severe()) || ((flags & MAP_PREFAULT_PARTIAL) != 0 && Index: sys/vm/vm_object.h =================================================================== --- sys/vm/vm_object.h +++ sys/vm/vm_object.h @@ -98,7 +98,6 @@ TAILQ_ENTRY(vm_object) object_list; /* list of all objects */ LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */ LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */ - struct pglist memq; /* list of resident pages */ struct vm_radix rtree; /* root of the resident page radix trie*/ vm_pindex_t size; /* Object size */ struct domainset_ref domain; /* NUMA policy. */ Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -110,8 +110,8 @@ SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0, "Use old (insecure) msync behavior"); -static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, - int pagerflags, int flags, boolean_t *allclean, +static int vm_object_page_collect_flush(struct pctrie_iter *pages, + vm_page_t p, int pagerflags, int flags, boolean_t *allclean, boolean_t *eio); static boolean_t vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *allclean); @@ -181,8 +181,6 @@ object = (vm_object_t)mem; KASSERT(object->ref_count == 0, ("object %p ref_count = %d", object, object->ref_count)); - KASSERT(TAILQ_EMPTY(&object->memq), - ("object %p has resident pages in its memq", object)); KASSERT(vm_radix_is_empty(&object->rtree), ("object %p has resident pages in its trie", object)); #if VM_NRESERVLEVEL > 0 @@ -235,8 +233,6 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, u_short flags, vm_object_t object, void *handle) { - - TAILQ_INIT(&object->memq); LIST_INIT(&object->shadow_head); object->type = type; @@ -336,7 +332,7 @@ if (object->type == OBJT_DEAD) return (KERN_INVALID_ARGUMENT); - if (!TAILQ_EMPTY(&object->memq)) + if (!vm_radix_is_empty(&object->rtree)) return (KERN_FAILURE); object->memattr = memattr; @@ -922,7 +918,6 @@ vm_radix_reclaim_callback(&object->rtree, vm_object_terminate_single_page, object); - TAILQ_INIT(&object->memq); object->resident_page_count = 0; if (object->type == OBJT_VNODE) vdrop(object->handle); @@ -1032,6 +1027,7 @@ vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end, int flags) { + struct pctrie_iter pages; vm_page_t np, p; vm_pindex_t pi, tend, tstart; int curgeneration, n, pagerflags; @@ -1050,30 +1046,35 @@ tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK); allclean = tstart == 0 && tend >= object->size; res = TRUE; + vm_page_iter_init(&pages, object); rescan: curgeneration = object->generation; - for (p = vm_page_find_least(object, tstart); p != NULL; p = np) { + for (p = vm_radix_iter_lookup_ge(&pages, tstart); p != NULL; p = np) { pi = p->pindex; if (pi >= tend) break; - np = TAILQ_NEXT(p, listq); - if (vm_page_none_valid(p)) + if (vm_page_none_valid(p)) { + np = vm_radix_iter_step(&pages); continue; - if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0) { + } + if (!vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL)) { + pctrie_iter_reset(&pages); if (object->generation != curgeneration && (flags & OBJPC_SYNC) != 0) goto rescan; - np = vm_page_find_least(object, pi); + np = vm_radix_iter_lookup_ge(&pages, pi); continue; } if (!vm_object_page_remove_write(p, flags, &allclean)) { + np = vm_radix_iter_step(&pages); vm_page_xunbusy(p); continue; } if (object->type == OBJT_VNODE) { - n = vm_object_page_collect_flush(object, p, pagerflags, + pctrie_iter_reset(&pages); + n = vm_object_page_collect_flush(&pages, p, pagerflags, flags, &allclean, &eio); if (eio) { res = FALSE; @@ -1103,7 +1104,7 @@ n = 1; vm_page_xunbusy(p); } - np = vm_page_find_least(object, pi + n); + np = vm_radix_iter_lookup_ge(&pages, pi + n); } #if 0 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0); @@ -1120,37 +1121,37 @@ } static int -vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags, - int flags, boolean_t *allclean, boolean_t *eio) +vm_object_page_collect_flush(struct pctrie_iter *pages, vm_page_t p, + int pagerflags, int flags, boolean_t *allclean, boolean_t *eio) { - vm_page_t ma[2 * vm_pageout_page_count - 1], tp; + vm_page_t ma[2 * vm_pageout_page_count - 1]; int base, count, runlen; vm_page_lock_assert(p, MA_NOTOWNED); vm_page_assert_xbusied(p); - VM_OBJECT_ASSERT_WLOCKED(object); base = nitems(ma) / 2; ma[base] = p; - for (count = 1, tp = p; count < vm_pageout_page_count; count++) { - tp = vm_page_next(tp); - if (tp == NULL || vm_page_tryxbusy(tp) == 0) + for (count = 1; count < vm_pageout_page_count; count++) { + p = vm_radix_iter_next(pages); + if (p == NULL || vm_page_tryxbusy(p) == 0) break; - if (!vm_object_page_remove_write(tp, flags, allclean)) { - vm_page_xunbusy(tp); + if (!vm_object_page_remove_write(p, flags, allclean)) { + vm_page_xunbusy(p); break; } - ma[base + count] = tp; + ma[base + count] = p; } - for (tp = p; count < vm_pageout_page_count; count++) { - tp = vm_page_prev(tp); - if (tp == NULL || vm_page_tryxbusy(tp) == 0) + pages->index -= count; + for (; count < vm_pageout_page_count; count++) { + p = vm_radix_iter_prev(pages); + if (p == NULL || vm_page_tryxbusy(p) == 0) break; - if (!vm_object_page_remove_write(tp, flags, allclean)) { - vm_page_xunbusy(tp); + if (!vm_object_page_remove_write(p, flags, allclean)) { + vm_page_xunbusy(p); break; } - ma[--base] = tp; + ma[--base] = p; } vm_pageout_flush(&ma[base], count, pagerflags, nitems(ma) / 2 - base, @@ -1326,6 +1327,7 @@ vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end, int advice) { + struct pctrie_iter pages; vm_pindex_t tpindex; vm_object_t backing_object, tobject; vm_page_t m, tm; @@ -1333,13 +1335,15 @@ if (object == NULL) return; + vm_page_iter_init(&pages, object); relookup: VM_OBJECT_WLOCK(object); if (!vm_object_advice_applies(object, advice)) { VM_OBJECT_WUNLOCK(object); return; } - for (m = vm_page_find_least(object, pindex); pindex < end; pindex++) { + for (m = vm_radix_iter_lookup_ge(&pages, pindex); pindex < end; + pindex++) { tobject = object; /* @@ -1388,7 +1392,7 @@ } else { next_page: tm = m; - m = TAILQ_NEXT(m, listq); + m = vm_radix_iter_step(&pages); } /* @@ -1414,6 +1418,7 @@ } if (!vm_page_busy_sleep(tm, "madvpo", 0)) VM_OBJECT_WUNLOCK(tobject); + pctrie_iter_reset(&pages); goto relookup; } vm_page_advise(tm, advice); @@ -1577,8 +1582,7 @@ vm_page_iter_limit_init(&pages, orig_object, offidxstart + size); retry: pctrie_iter_reset(&pages); - for (m = vm_page_iter_lookup_ge(&pages, offidxstart); m != NULL; - m = vm_radix_iter_step(&pages)) { + VM_RADIX_FOREACH_FROM(m, pages, offidxstart) { /* * We must wait for pending I/O to complete before we can * rename the page. @@ -1638,8 +1642,8 @@ * and new_object's locks are released and reacquired. */ swap_pager_copy(orig_object, new_object, offidxstart, 0); - - TAILQ_FOREACH(m, &new_object->memq, listq) + vm_page_iter_init(&pages, new_object); + VM_RADIX_FOREACH(m, pages) vm_page_xunbusy(m); vm_object_clear_flag(orig_object, OBJ_SPLIT); @@ -1681,7 +1685,7 @@ } VM_OBJECT_WLOCK(backing_object); vm_page_iter_init(pages, backing_object); - return (vm_page_iter_lookup_ge(pages, 0)); + return (vm_radix_iter_lookup_ge(pages, 0)); } static void @@ -1702,8 +1706,7 @@ * Our scan */ vm_page_iter_init(&pages, backing_object); - for (p = vm_page_iter_lookup_ge(&pages, 0); p != NULL; p = next) { - next = TAILQ_NEXT(p, listq); + for (p = vm_radix_iter_lookup_ge(&pages, 0); p != NULL; p = next) { new_pindex = p->pindex - backing_offset_index; /* @@ -1999,9 +2002,7 @@ vm_object_pip_add(object, 1); vm_page_iter_limit_init(&pages, object, end); again: - pctrie_iter_reset(&pages); - for (p = vm_page_iter_lookup_ge(&pages, start); p != NULL; - p = vm_radix_iter_step(&pages)) { + VM_RADIX_FOREACH_FROM(p, pages, start) { /* * Skip invalid pages if asked to do so. Try to avoid acquiring * the busy lock, as some consumers rely on this to avoid @@ -2028,6 +2029,7 @@ if (vm_page_tryxbusy(p) == 0) { if (vm_page_busy_sleep(p, "vmopar", 0)) VM_OBJECT_WLOCK(object); + pctrie_iter_reset(&pages); goto again; } if ((options & OBJPR_VALIDONLY) != 0 && vm_page_none_valid(p)) { @@ -2089,23 +2091,18 @@ void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { - vm_page_t p, next; + struct pctrie_iter pages; + vm_page_t p; VM_OBJECT_ASSERT_LOCKED(object); KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0, ("vm_object_page_noreuse: illegal object %p", object)); if (object->resident_page_count == 0) return; - p = vm_page_find_least(object, start); - /* - * Here, the variable "p" is either (1) the page with the least pindex - * greater than or equal to the parameter "start" or (2) NULL. - */ - for (; p != NULL && (p->pindex < end || end == 0); p = next) { - next = TAILQ_NEXT(p, listq); + vm_page_iter_limit_init(&pages, object, end); + VM_RADIX_FOREACH_FROM(p, pages, start) vm_page_deactivate_noreuse(p); - } } /* @@ -2121,6 +2118,7 @@ boolean_t vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { + struct pctrie_iter pages; vm_page_t m; vm_pindex_t pindex; int rv; @@ -2137,11 +2135,10 @@ */ } if (pindex > start) { - m = vm_page_lookup(object, start); - while (m != NULL && m->pindex < pindex) { + vm_page_iter_limit_init(&pages, object, pindex); + for (m = vm_radix_iter_lookup(&pages, start); + m != NULL; m = vm_radix_iter_step(&pages)) vm_page_xunbusy(m); - m = TAILQ_NEXT(m, listq); - } } return (pindex == end); } @@ -2280,6 +2277,7 @@ vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length, uint8_t queue) { + struct pctrie_iter pages; vm_object_t tobject, t1object; vm_page_t m, tm; vm_pindex_t end_pindex, pindex, tpindex; @@ -2294,10 +2292,11 @@ return; pindex = OFF_TO_IDX(offset); end_pindex = pindex + atop(length); + vm_page_iter_init(&pages, object); again: locked_depth = 1; VM_OBJECT_RLOCK(object); - m = vm_page_find_least(object, pindex); + m = vm_radix_iter_lookup_ge(&pages, pindex); while (pindex < end_pindex) { if (m == NULL || pindex < m->pindex) { /* @@ -2325,7 +2324,7 @@ NULL); } else { tm = m; - m = TAILQ_NEXT(m, listq); + m = vm_radix_iter_step(&pages); } if (vm_page_trysbusy(tm) == 0) { for (tobject = object; locked_depth >= 1; @@ -2339,6 +2338,7 @@ if (!vm_page_busy_sleep(tm, "unwbo", VM_ALLOC_IGN_SBUSY)) VM_OBJECT_RUNLOCK(tobject); + pctrie_iter_reset(&pages); goto again; } vm_page_unwire(tm, queue); @@ -2419,6 +2419,7 @@ static int vm_object_list_handler(struct sysctl_req *req, bool swap_only) { + struct pctrie_iter pages; struct kinfo_vmobject *kvo; char *fullpath, *freepath; struct vnode *vp; @@ -2480,7 +2481,8 @@ kvo->kvo_inactive = 0; kvo->kvo_flags = 0; if (!swap_only) { - TAILQ_FOREACH(m, &obj->memq, listq) { + vm_page_iter_init(&pages, obj); + VM_RADIX_FOREACH(m, pages) { /* * A page may belong to the object but be * dequeued and set to PQ_NONE while the @@ -2692,6 +2694,7 @@ */ DB_SHOW_COMMAND(object, vm_object_print_static) { + struct pctrie_iter pages; /* XXX convert args. */ vm_object_t object = (vm_object_t)addr; boolean_t full = have_addr; @@ -2721,7 +2724,8 @@ db_indent += 2; count = 0; - TAILQ_FOREACH(p, &object->memq, listq) { + vm_page_iter_init(&pages, object); + VM_RADIX_FOREACH(p, pages) { if (count == 0) db_iprintf("memory:="); else if (count == 6) { @@ -2759,10 +2763,11 @@ DB_SHOW_COMMAND_FLAGS(vmopag, vm_object_print_pages, DB_CMD_MEMSAFE) { + struct pctrie_iter pages; vm_object_t object; vm_pindex_t fidx; vm_paddr_t pa; - vm_page_t m, prev_m; + vm_page_t m; int rcount; TAILQ_FOREACH(object, &vm_object_list, object_list) { @@ -2773,19 +2778,17 @@ rcount = 0; fidx = 0; pa = -1; - TAILQ_FOREACH(m, &object->memq, listq) { - if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL && - prev_m->pindex + 1 != m->pindex) { - if (rcount) { - db_printf(" index(%ld)run(%d)pa(0x%lx)\n", - (long)fidx, rcount, (long)pa); - if (db_pager_quit) - return; - rcount = 0; - } + vm_page_iter_init(&pages, object); + VM_RADIX_FOREACH(m, pages) { + if (rcount && fidx + 1 != m->pindex) { + db_printf(" index(%ld)run(%d)pa(0x%lx)\n", + (long)fidx, rcount, (long)pa); + if (db_pager_quit) + return; + rcount = 0; } if (rcount && - (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { + (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { ++rcount; continue; } Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -78,10 +78,6 @@ * A radix tree used to quickly * perform object/offset lookups * - * A list of all pages for a given object, - * so they can be quickly deactivated at - * time of deallocation. - * * An ordered list of pages due for pageout. * * In addition, the structure contains the object @@ -219,6 +215,13 @@ } vm_page_astate_t; struct vm_page { + union { + TAILQ_ENTRY(vm_page) listq; /* freed pages (F) */ + struct { + vm_pindex_t pindex; /* offset into object (O,P) */ + vm_object_t object; /* which object am I in (O) */ + }; + }; union { TAILQ_ENTRY(vm_page) q; /* page queue or free list (Q) */ struct { @@ -233,9 +236,6 @@ void *zone; } uma; } plinks; - TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ - vm_object_t object; /* which object am I in (O) */ - vm_pindex_t pindex; /* offset into object (O,P) */ vm_paddr_t phys_addr; /* physical address of page (C) */ struct md_page md; /* machine dependent stuff */ u_int ref_count; /* page references (A) */ @@ -541,8 +541,8 @@ #define VM_ALLOC_NORECLAIM 0x0080 /* (c) Do not reclaim after failure */ #define VM_ALLOC_NOFREE 0x0100 /* (an) Page will never be released */ #define VM_ALLOC_NOBUSY 0x0200 /* (acgp) Do not excl busy the page */ -#define VM_ALLOC_NOCREAT 0x0400 /* (gp) Don't create a page */ -#define VM_ALLOC_AVAIL1 0x0800 +#define VM_ALLOC_NOCREAT 0x0400 /* (gp) Don't create page; ret NULL */ +#define VM_ALLOC_NOCREAT_LT 0x0800 /* (gp) Don't create page; ret pred */ #define VM_ALLOC_IGN_SBUSY 0x1000 /* (gp) Ignore shared busy flag */ #define VM_ALLOC_NODUMP 0x2000 /* (ag) don't include in dump */ #define VM_ALLOC_SBUSY 0x4000 /* (acgp) Shared busy the page */ @@ -606,7 +606,6 @@ void vm_page_activate (vm_page_t); void vm_page_advise(vm_page_t m, int advice); -vm_page_t vm_page_mpred(vm_object_t, vm_pindex_t); vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int); vm_page_t vm_page_alloc_domain_after(vm_object_t, vm_pindex_t, int, int, vm_page_t); @@ -641,8 +640,6 @@ void vm_page_deactivate_noreuse(vm_page_t); void vm_page_dequeue(vm_page_t m); void vm_page_dequeue_deferred(vm_page_t m); -vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t); -vm_page_t vm_page_iter_lookup_ge(struct pctrie_iter *, vm_pindex_t); void vm_page_free_invalid(vm_page_t); vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); @@ -653,19 +650,16 @@ void vm_page_iter_free(struct pctrie_iter *pages, vm_page_t m); void vm_page_iter_init(struct pctrie_iter *, vm_object_t); void vm_page_iter_limit_init(struct pctrie_iter *, vm_object_t, vm_pindex_t); -vm_page_t vm_page_iter_lookup(struct pctrie_iter *, vm_pindex_t); bool vm_page_iter_remove(struct pctrie_iter *pages, vm_page_t m); bool vm_page_iter_rename(struct pctrie_iter *old_pages, vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex); void vm_page_launder(vm_page_t m); vm_page_t vm_page_lookup(vm_object_t, vm_pindex_t); vm_page_t vm_page_lookup_unlocked(vm_object_t, vm_pindex_t); -vm_page_t vm_page_next(vm_page_t m); void vm_page_pqbatch_drain(void); void vm_page_pqbatch_submit(vm_page_t m, uint8_t queue); bool vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new); -vm_page_t vm_page_prev(vm_page_t m); bool vm_page_ps_test(vm_page_t m, int psind, int flags, vm_page_t skip_m); void vm_page_putfake(vm_page_t m); void vm_page_readahead_finish(vm_page_t m); Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -173,10 +173,7 @@ static bool vm_page_free_prep(vm_page_t m); static void vm_page_free_toq(vm_page_t m); static void vm_page_init(void *dummy); -static int vm_page_insert_after(vm_page_t m, vm_object_t object, - vm_pindex_t pindex, vm_page_t mpred); -static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, - vm_page_t mpred); +static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object); static void vm_page_mvqueue(vm_page_t m, const uint8_t queue, const uint16_t nflag); static int vm_page_reclaim_run(int req_class, int domain, u_long npages, @@ -1475,19 +1472,14 @@ } /* - * Insert the given page into the given object at the given pindex. mpred is - * used for memq linkage. From vm_page_insert, lookup is true, mpred is - * initially NULL, and this procedure looks it up. From vm_page_insert_after - * and vm_page_iter_insert, lookup is false and mpred is known to the caller - * to be valid, and may be NULL if this will be the page with the lowest - * pindex. + * Insert the given page into the given object at the given pindex. * * The procedure is marked __always_inline to suggest to the compiler to * eliminate the lookup parameter and the associated alternate branch. */ static __always_inline int vm_page_insert_lookup(vm_page_t m, vm_object_t object, vm_pindex_t pindex, - struct pctrie_iter *pages, bool iter, vm_page_t mpred, bool lookup) + struct pctrie_iter *pages, bool iter) { int error; @@ -1503,14 +1495,10 @@ m->ref_count |= VPRC_OBJREF; /* - * Add this page to the object's radix tree, and look up mpred if - * needed. + * Add this page to the object's radix tree. */ - if (iter) { - KASSERT(!lookup, ("%s: cannot lookup mpred", __func__)); + if (iter) error = vm_radix_iter_insert(pages, m); - } else if (lookup) - error = vm_radix_insert_lookup_lt(&object->rtree, m, &mpred); else error = vm_radix_insert(&object->rtree, m); if (__predict_false(error != 0)) { @@ -1523,7 +1511,7 @@ /* * Now link into the object's ordered list of backed pages. */ - vm_page_insert_radixdone(m, object, mpred); + vm_page_insert_radixdone(m, object); vm_pager_page_inserted(object, m); return (0); } @@ -1538,26 +1526,7 @@ int vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { - return (vm_page_insert_lookup(m, object, pindex, NULL, false, NULL, - true)); -} - -/* - * vm_page_insert_after: - * - * Inserts the page "m" into the specified object at offset "pindex". - * - * The page "mpred" must immediately precede the offset "pindex" within - * the specified object. - * - * The object must be locked. - */ -static int -vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, - vm_page_t mpred) -{ - return (vm_page_insert_lookup(m, object, pindex, NULL, false, mpred, - false)); + return (vm_page_insert_lookup(m, object, pindex, NULL, false)); } /* @@ -1567,17 +1536,13 @@ * "pindex" using the iterator "pages". Returns 0 if the insertion was * successful. * - * The page "mpred" must immediately precede the offset "pindex" within - * the specified object. - * * The object must be locked. */ static int vm_page_iter_insert(struct pctrie_iter *pages, vm_page_t m, vm_object_t object, - vm_pindex_t pindex, vm_page_t mpred) + vm_pindex_t pindex) { - return (vm_page_insert_lookup(m, object, pindex, pages, true, mpred, - false)); + return (vm_page_insert_lookup(m, object, pindex, pages, true)); } /* @@ -1586,13 +1551,10 @@ * Complete page "m" insertion into the specified object after the * radix trie hooking. * - * The page "mpred" must precede the offset "m->pindex" within the - * specified object. - * * The object must be locked. */ static void -vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) +vm_page_insert_radixdone(vm_page_t m, vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); @@ -1600,24 +1562,6 @@ ("vm_page_insert_radixdone: page %p has inconsistent object", m)); KASSERT((m->ref_count & VPRC_OBJREF) != 0, ("vm_page_insert_radixdone: page %p is missing object ref", m)); - if (mpred != NULL) { - KASSERT(mpred->object == object, - ("vm_page_insert_radixdone: object doesn't contain mpred")); - KASSERT(mpred->pindex < m->pindex, - ("vm_page_insert_radixdone: mpred doesn't precede pindex")); - KASSERT(TAILQ_NEXT(mpred, listq) == NULL || - m->pindex < TAILQ_NEXT(mpred, listq)->pindex, - ("vm_page_insert_radixdone: pindex doesn't precede msucc")); - } else { - KASSERT(TAILQ_EMPTY(&object->memq) || - m->pindex < TAILQ_FIRST(&object->memq)->pindex, - ("vm_page_insert_radixdone: no mpred but not first page")); - } - - if (mpred != NULL) - TAILQ_INSERT_AFTER(&object->memq, mpred, m, listq); - else - TAILQ_INSERT_HEAD(&object->memq, m, listq); /* * Show that the object has one more resident page. @@ -1665,11 +1609,6 @@ vm_pager_page_removed(object, m); m->object = NULL; - /* - * Now remove from the object's list of backed pages. - */ - TAILQ_REMOVE(&object->memq, m, listq); - /* * And show that the object has one fewer resident page. */ @@ -1839,21 +1778,6 @@ vm_radix_iter_limit_init(pages, &object->rtree, limit); } -/* - * vm_page_iter_lookup: - * - * Returns the page associated with the object/offset pair specified, and - * stores the path to its position; if none is found, NULL is returned. - * - * The iter pctrie must be locked. - */ -vm_page_t -vm_page_iter_lookup(struct pctrie_iter *pages, vm_pindex_t pindex) -{ - - return (vm_radix_iter_lookup(pages, pindex)); -} - /* * vm_page_lookup_unlocked: * @@ -1919,81 +1843,6 @@ } } -/* - * vm_page_find_least: - * - * Returns the page associated with the object with least pindex - * greater than or equal to the parameter pindex, or NULL. - * - * The object must be locked. - */ -vm_page_t -vm_page_find_least(vm_object_t object, vm_pindex_t pindex) -{ - vm_page_t m; - - VM_OBJECT_ASSERT_LOCKED(object); - if ((m = TAILQ_FIRST(&object->memq)) != NULL && m->pindex < pindex) - m = vm_radix_lookup_ge(&object->rtree, pindex); - return (m); -} - -/* - * vm_page_iter_lookup_ge: - * - * Returns the page associated with the object with least pindex - * greater than or equal to the parameter pindex, or NULL. Initializes the - * iterator to point to that page. - * - * The iter pctrie must be locked. - */ -vm_page_t -vm_page_iter_lookup_ge(struct pctrie_iter *pages, vm_pindex_t pindex) -{ - - return (vm_radix_iter_lookup_ge(pages, pindex)); -} - -/* - * Returns the given page's successor (by pindex) within the object if it is - * resident; if none is found, NULL is returned. - * - * The object must be locked. - */ -vm_page_t -vm_page_next(vm_page_t m) -{ - vm_page_t next; - - VM_OBJECT_ASSERT_LOCKED(m->object); - if ((next = TAILQ_NEXT(m, listq)) != NULL) { - MPASS(next->object == m->object); - if (next->pindex != m->pindex + 1) - next = NULL; - } - return (next); -} - -/* - * Returns the given page's predecessor (by pindex) within the object if it is - * resident; if none is found, NULL is returned. - * - * The object must be locked. - */ -vm_page_t -vm_page_prev(vm_page_t m) -{ - vm_page_t prev; - - VM_OBJECT_ASSERT_LOCKED(m->object); - if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL) { - MPASS(prev->object == m->object); - if (prev->pindex != m->pindex - 1) - prev = NULL; - } - return (prev); -} - /* * Uses the page mnew as a replacement for an existing page at index * pindex which must be already present in the object. @@ -2033,9 +1882,6 @@ (mnew->oflags & VPO_UNMANAGED), ("vm_page_replace: mismatched VPO_UNMANAGED")); - /* Keep the resident page list in sorted order. */ - TAILQ_INSERT_AFTER(&object->memq, mold, mnew, listq); - TAILQ_REMOVE(&object->memq, mold, listq); mold->object = NULL; /* @@ -2088,7 +1934,6 @@ vm_page_iter_rename(struct pctrie_iter *old_pages, vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) { - vm_page_t mpred; vm_pindex_t opidx; KASSERT((m->ref_count & VPRC_OBJREF) != 0, @@ -2103,15 +1948,11 @@ */ opidx = m->pindex; m->pindex = new_pindex; - if (vm_radix_insert_lookup_lt(&new_object->rtree, m, &mpred) != 0) { + if (vm_radix_insert(&new_object->rtree, m) != 0) { m->pindex = opidx; return (false); } - /* - * The operation cannot fail anymore. The removal must happen before - * the listq iterator is tainted. - */ m->pindex = opidx; vm_radix_iter_remove(old_pages); vm_page_remove_radixdone(m); @@ -2120,24 +1961,12 @@ m->pindex = new_pindex; m->object = new_object; - vm_page_insert_radixdone(m, new_object, mpred); + vm_page_insert_radixdone(m, new_object); vm_page_dirty(m); vm_pager_page_inserted(new_object, m); return (true); } -/* - * vm_page_mpred: - * - * Return the greatest page of the object with index <= pindex, - * or NULL, if there is none. Assumes object lock is held. - */ -vm_page_t -vm_page_mpred(vm_object_t object, vm_pindex_t pindex) -{ - return (vm_radix_lookup_le(&object->rtree, pindex)); -} - /* * vm_page_alloc: * @@ -2165,7 +1994,7 @@ { return (vm_page_alloc_after(object, pindex, req, - vm_page_mpred(object, pindex))); + vm_radix_lookup_le(&object->rtree, pindex))); } /* @@ -2352,7 +2181,7 @@ } m->a.act_count = 0; - if (vm_page_insert_after(m, object, pindex, mpred)) { + if (vm_page_insert(m, object, pindex)) { if (req & VM_ALLOC_WIRED) { vm_wire_sub(1); m->ref_count = 0; @@ -2557,7 +2386,7 @@ m->ref_count = 1; m->a.act_count = 0; m->oflags = oflags; - if (vm_page_iter_insert(&pages, m, object, pindex, mpred)) { + if (vm_page_iter_insert(&pages, m, object, pindex)) { if ((req & VM_ALLOC_WIRED) != 0) vm_wire_sub(npages); KASSERT(m->object == NULL, @@ -2824,6 +2653,7 @@ vm_page_alloc_check(vm_page_t m) { + m->object = NULL; KASSERT(m->object == NULL, ("page %p has object", m)); KASSERT(m->a.queue == PQ_NONE && (m->a.flags & PGA_QUEUE_STATE_MASK) == 0, @@ -4872,7 +4702,8 @@ vm_page_grab_check(allocflags); retrylookup: - if ((m = vm_page_lookup(object, pindex)) != NULL) { + if ((m = vm_radix_lookup_le(&object->rtree, pindex)) != NULL && + m->pindex == pindex) { if (!vm_page_tryacquire(m, allocflags)) { if (vm_page_grab_sleep(object, m, pindex, "pgrbwt", allocflags, true)) @@ -4881,9 +4712,13 @@ } goto out; } - if ((allocflags & VM_ALLOC_NOCREAT) != 0) + if ((allocflags & (VM_ALLOC_NOCREAT | VM_ALLOC_NOCREAT_LT)) != 0) { + if ((allocflags & VM_ALLOC_NOCREAT_LT) != 0) + return (m); return (NULL); - m = vm_page_alloc(object, pindex, vm_page_grab_pflags(allocflags)); + } + m = vm_page_alloc_after(object, pindex, vm_page_grab_pflags(allocflags), + m); if (m == NULL) { if ((allocflags & (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL)) != 0) return (NULL); @@ -4980,6 +4815,7 @@ int vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int allocflags) { + struct pctrie_iter pages; vm_page_t m; vm_page_t ma[VM_INITIAL_PAGEIN]; int after, i, pflags, rv; @@ -4994,9 +4830,11 @@ pflags = allocflags & ~(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY); pflags |= VM_ALLOC_WAITFAIL; + vm_page_iter_init(&pages, object); retrylookup: - if ((m = vm_page_lookup(object, pindex)) != NULL) { + if ((m = vm_radix_iter_lookup_le(&pages, pindex)) != NULL && + m->pindex == pindex) { /* * If the page is fully valid it can only become invalid * with the object lock held. If it is not valid it can @@ -5010,6 +4848,7 @@ vm_page_all_valid(m) ? allocflags : 0)) { (void)vm_page_grab_sleep(object, m, pindex, "pgrbwt", allocflags, true); + pctrie_iter_reset(&pages); goto retrylookup; } if (vm_page_all_valid(m)) @@ -5022,7 +4861,8 @@ } else if ((allocflags & VM_ALLOC_NOCREAT) != 0) { *mp = NULL; return (VM_PAGER_FAIL); - } else if ((m = vm_page_alloc(object, pindex, pflags)) == NULL) { + } else if ((m = vm_page_alloc_after( + object, pindex, pflags, m)) == NULL) { if (!vm_pager_can_alloc_page(object, pindex)) { *mp = NULL; return (VM_PAGER_AGAIN); @@ -5037,13 +4877,14 @@ after = MAX(after, 1); ma[0] = m; for (i = 1; i < after; i++) { - if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { + ma[i] = vm_radix_iter_lookup(&pages, pindex + i); + if (ma[i] != NULL) { if (vm_page_any_valid(ma[i]) || !vm_page_tryxbusy(ma[i])) break; } else { - ma[i] = vm_page_alloc(object, m->pindex + i, - VM_ALLOC_NORMAL); + ma[i] = vm_page_alloc_after(object, pindex + i, + VM_ALLOC_NORMAL, ma[i - 1]); if (ma[i] == NULL) break; } @@ -5163,7 +5004,8 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, vm_page_t *ma, int count) { - vm_page_t m, mpred; + struct pctrie_iter pages; + vm_page_t m; int pflags; int i; @@ -5175,27 +5017,25 @@ vm_page_grab_check(allocflags); pflags = vm_page_grab_pflags(allocflags); + vm_page_iter_init(&pages, object); i = 0; retrylookup: - m = vm_page_mpred(object, pindex + i); - if (m == NULL || m->pindex != pindex + i) { - mpred = m; - m = NULL; - } else - mpred = TAILQ_PREV(m, pglist, listq); for (; i < count; i++) { - if (m != NULL) { + m = vm_radix_iter_lookup_le(&pages, pindex + i); + if (m != NULL && m->pindex == pindex + i) { if (!vm_page_tryacquire(m, allocflags)) { if (vm_page_grab_sleep(object, m, pindex + i, - "grbmaw", allocflags, true)) + "grbmaw", allocflags, true)) { + pctrie_iter_reset(&pages); goto retrylookup; + } break; } } else { if ((allocflags & VM_ALLOC_NOCREAT) != 0) break; m = vm_page_alloc_after(object, pindex + i, - pflags | VM_ALLOC_COUNT(count - i), mpred); + pflags | VM_ALLOC_COUNT(count - i), m); if (m == NULL) { if ((allocflags & (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL)) != 0) @@ -5210,8 +5050,7 @@ vm_page_valid(m); } vm_page_grab_release(m, allocflags); - ma[i] = mpred = m; - m = vm_page_next(m); + ma[i] = m; } return (i); } @@ -5226,7 +5065,7 @@ { vm_page_t m; int flags; - int i; + int i, num_fetched; KASSERT(count > 0, ("vm_page_grab_pages_unlocked: invalid page count %d", count)); @@ -5238,22 +5077,10 @@ */ flags = allocflags & ~VM_ALLOC_NOBUSY; vm_page_grab_check(flags); - m = NULL; - for (i = 0; i < count; i++, pindex++) { - /* - * We may see a false NULL here because the previous page has - * been removed or just inserted and the list is loaded without - * barriers. Switch to radix to verify. - */ - if (m == NULL || QMD_IS_TRASHED(m) || m->pindex != pindex || - atomic_load_ptr(&m->object) != object) { - /* - * This guarantees the result is instantaneously - * correct. - */ - m = NULL; - } - m = vm_page_acquire_unlocked(object, pindex, m, flags); + num_fetched = vm_radix_lookup_range_unlocked(&object->rtree, pindex, + ma, count); + for (i = 0; i < num_fetched; i++, pindex++) { + m = vm_page_acquire_unlocked(object, pindex, ma[i], flags); if (m == PAGE_NOT_ACQUIRED) return (i); if (m == NULL) @@ -5265,8 +5092,8 @@ } /* m will still be wired or busy according to flags. */ vm_page_grab_release(m, allocflags); + /* vm_page_acquire_unlocked may not return ma[i]. */ ma[i] = m; - m = TAILQ_NEXT(m, listq); } if (i == count || (allocflags & VM_ALLOC_NOCREAT) != 0) return (i); Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -108,6 +108,7 @@ #include #include #include +#include #include #include #include @@ -366,6 +367,7 @@ static int vm_pageout_cluster(vm_page_t m) { + struct pctrie_iter pages; vm_page_t mc[2 * vm_pageout_page_count - 1]; int alignment, num_ends, page_base, pageout_count; @@ -373,7 +375,9 @@ vm_page_assert_xbusied(m); - alignment = m->pindex % vm_pageout_page_count; + vm_page_iter_init(&pages, m->object); + pages.index = m->pindex; + alignment = m->pindex % vm_pageout_page_count + 1; num_ends = 0; page_base = nitems(mc) / 2; pageout_count = 1; @@ -387,19 +391,13 @@ * holes). To solve this problem we do the reverse scan * first and attempt to align our cluster, then do a * forward scan if room remains. + * + * If we are at an alignment boundary, stop here, and switch directions. */ -more: - m = mc[page_base]; - while (pageout_count < vm_pageout_page_count) { - /* - * If we are at an alignment boundary, and haven't reached the - * last flushable page forward, stop here, and switch - * directions. - */ - if (alignment == pageout_count - 1 && num_ends == 0) - break; - - m = vm_page_prev(m); + while (pageout_count < vm_pageout_page_count && + pageout_count != alignment) { + more: + m = vm_radix_iter_prev(&pages); if (m == NULL || !vm_pageout_flushable(m)) { num_ends++; break; @@ -407,14 +405,15 @@ mc[--page_base] = m; ++pageout_count; } - m = mc[page_base + pageout_count - 1]; + pages.index = mc[page_base + pageout_count - 1]->pindex; while (num_ends != 2 && pageout_count < vm_pageout_page_count) { - m = vm_page_next(m); + m = vm_radix_iter_next(&pages); if (m == NULL || !vm_pageout_flushable(m)) { - if (num_ends++ == 0) - /* Resume the reverse scan. */ - goto more; - break; + if (num_ends++ == 1) + break; + /* Resume the reverse scan. */ + pages.index = mc[page_base]->pindex; + goto more; } mc[page_base + pageout_count] = m; ++pageout_count; Index: sys/vm/vm_radix.h =================================================================== --- sys/vm/vm_radix.h +++ sys/vm/vm_radix.h @@ -121,6 +121,18 @@ return (VM_RADIX_PCTRIE_LOOKUP_UNLOCKED(&rtree->rt_trie, index)); } +/* + * Returns the number of contiguous, non-NULL pages read into the ma[] + * array, without requiring an external lock. + */ +static __inline int +vm_radix_lookup_range_unlocked(struct vm_radix *rtree, vm_pindex_t index, + vm_page_t ma[], int count) +{ + return (VM_RADIX_PCTRIE_LOOKUP_RANGE_UNLOCKED(&rtree->rt_trie, index, + ma, count)); +} + /* * Initialize an iterator for vm_radix. */ @@ -257,6 +269,12 @@ return (VM_RADIX_PCTRIE_ITER_STEP_GE(pages)); } +#define VM_RADIX_FOREACH_FROM(m, pages, start) \ + for (m = vm_radix_iter_lookup_ge(&pages, start); m != NULL; \ + m = vm_radix_iter_step(&pages)) + +#define VM_RADIX_FOREACH(m, pages) VM_RADIX_FOREACH_FROM(m, pages, 0) + /* * Initialize an iterator pointing to the page with the greatest pindex that is * less than or equal to the specified pindex, or NULL if there are no such @@ -282,6 +300,12 @@ return (VM_RADIX_PCTRIE_ITER_NEXT(pages)); } +#define VM_RADIX_FORALL_FROM(m, pages, start) \ + for (m = vm_radix_iter_lookup(&pages, start); m != NULL; \ + m = vm_radix_iter_next(&pages)) + +#define VM_RADIX_FORALL(m, pages) VM_RADIX_FORALL_FROM(m, pages, 0) + /* * Update the iterator to point to the page with the pindex that is one less * than the current pindex, or NULL if there is no such page. Return the page. Index: sys/vm/vm_reserv.c =================================================================== --- sys/vm/vm_reserv.c +++ sys/vm/vm_reserv.c @@ -523,9 +523,9 @@ rv = vm_reserv_from_page(mpred); if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) goto found; - msucc = TAILQ_NEXT(mpred, listq); + msucc = vm_radix_lookup_ge(&object->rtree, mpred->pindex + 1); } else - msucc = TAILQ_FIRST(&object->memq); + msucc = vm_radix_lookup_ge(&object->rtree, 0); if (msucc != NULL) { KASSERT(msucc->pindex > pindex, ("vm_reserv_from_object: msucc doesn't succeed pindex")); Index: sys/vm/vm_swapout.c =================================================================== --- sys/vm/vm_swapout.c +++ sys/vm/vm_swapout.c @@ -106,6 +106,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ vm_swapout_object_deactivate(pmap_t pmap, vm_object_t first_object, long desired) { + struct pctrie_iter pages; vm_object_t backing_object, object; vm_page_t m; bool unmap; @@ -192,7 +194,8 @@ /* * Scan the object's entire memory queue. */ - TAILQ_FOREACH(m, &object->memq, listq) { + vm_page_iter_init(&pages, first_object); + VM_RADIX_FOREACH(m, pages) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; if (should_yield()) Index: sys/vm/vnode_pager.c =================================================================== --- sys/vm/vnode_pager.c +++ sys/vm/vnode_pager.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -1045,14 +1046,14 @@ vm_page_t p; VM_OBJECT_WLOCK(object); - startpindex = m[0]->pindex - rbehind; - if ((p = TAILQ_PREV(m[0], pglist, listq)) != NULL && + tpindex = m[0]->pindex - 1; + startpindex = tpindex - rbehind + 1; + if ((p = vm_radix_lookup_le(&object->rtree, tpindex)) != NULL && p->pindex >= startpindex) startpindex = p->pindex + 1; /* tpindex is unsigned; beware of numeric underflow. */ - for (tpindex = m[0]->pindex - 1; - tpindex >= startpindex && tpindex < m[0]->pindex; + for (; tpindex >= startpindex && tpindex < m[0]->pindex; tpindex--, i++) { p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL); if (p == NULL) { @@ -1082,15 +1083,15 @@ if (!VM_OBJECT_WOWNED(object)) VM_OBJECT_WLOCK(object); - endpindex = m[count - 1]->pindex + rahead + 1; - if ((p = TAILQ_NEXT(m[count - 1], listq)) != NULL && + tpindex = m[count - 1]->pindex + 1; + endpindex = tpindex + rahead; + if ((p = vm_radix_lookup_ge(&object->rtree, tpindex)) != NULL && p->pindex < endpindex) endpindex = p->pindex; if (endpindex > object->size) endpindex = object->size; - for (tpindex = m[count - 1]->pindex + 1; - tpindex < endpindex; i++, tpindex++) { + for (; tpindex < endpindex; i++, tpindex++) { p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL); if (p == NULL) break; Index: sys/x86/iommu/amd_idpgtbl.c =================================================================== --- sys/x86/iommu/amd_idpgtbl.c +++ sys/x86/iommu/amd_idpgtbl.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -103,6 +104,7 @@ void amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain) { + struct pctrie_iter pages; vm_object_t obj; vm_page_t m; @@ -118,7 +120,8 @@ /* Obliterate ref_counts */ VM_OBJECT_ASSERT_WLOCKED(obj); - for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) + vm_page_iter_init(&pages, obj); + VM_RADIX_FORALL(m, pages) vm_page_clearref(m); VM_OBJECT_WUNLOCK(obj); vm_object_deallocate(obj); Index: sys/x86/iommu/intel_idpgtbl.c =================================================================== --- sys/x86/iommu/intel_idpgtbl.c +++ sys/x86/iommu/intel_idpgtbl.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -165,6 +166,7 @@ vm_object_t dmar_get_idmap_pgtbl(struct dmar_domain *domain, iommu_gaddr_t maxaddr) { + struct pctrie_iter pages; struct dmar_unit *unit; struct idpgtbl *tbl; vm_object_t res; @@ -260,9 +262,9 @@ */ unit = domain->dmar; if (!DMAR_IS_COHERENT(unit)) { + vm_page_iter_init(&pages, res); VM_OBJECT_WLOCK(res); - for (m = vm_page_lookup(res, 0); m != NULL; - m = vm_page_next(m)) + VM_RADIX_FORALL(m, pages) pmap_invalidate_cache_pages(&m, 1); VM_OBJECT_WUNLOCK(res); } @@ -707,6 +709,7 @@ void dmar_domain_free_pgtbl(struct dmar_domain *domain) { + struct pctrie_iter pages; vm_object_t obj; vm_page_t m; @@ -728,7 +731,8 @@ /* Obliterate ref_counts */ VM_OBJECT_ASSERT_WLOCKED(obj); - for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) { + vm_page_iter_init(&pages, obj); + VM_RADIX_FORALL(m, pages) { vm_page_clearref(m); vm_wire_sub(1); }