Index: sys/kern/subr_pctrie.c =================================================================== --- sys/kern/subr_pctrie.c +++ sys/kern/subr_pctrie.c @@ -1111,7 +1111,6 @@ pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode) { struct pctrie_node *child, *node, *parent; - uint64_t *m; int slot; DEBUG_POISON_POINTER(parent); @@ -1126,12 +1125,11 @@ node = NULL; child = pctrie_root_load(it->ptree, NULL, PCTRIE_LOCKED); } - m = pctrie_match_value(child, it->index); - if (m != NULL) + if (pctrie_isleaf(child) && pctrie_toval(child) != NULL) pctrie_remove(it->ptree, it->index, parent, node, freenode); if (*freenode != NULL) --it->top; - return (m); + return (pctrie_toval(child)); } /* Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -634,8 +634,9 @@ static struct vmem * _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { + struct pctrie_iter pages; struct vmem *arena; - vm_page_t m, next; + vm_page_t m; vm_offset_t end, offset; int domain; @@ -648,17 +649,18 @@ offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; VM_OBJECT_WLOCK(object); - m = vm_page_lookup(object, atop(offset)); + vm_page_iter_limit_init(&pages, object, atop(end)); + m = vm_page_iter_lookup(&pages, atop(offset)); domain = vm_page_domain(m); if (__predict_true((m->oflags & VPO_KMEM_EXEC) == 0)) arena = vm_dom[domain].vmd_kernel_arena; else arena = vm_dom[domain].vmd_kernel_rwx_arena; - for (; offset < end; offset += PAGE_SIZE, m = next) { - next = vm_page_next(m); + for (; m != NULL; m = vm_radix_iter_next(&pages)) { vm_page_xbusy_claim(m); vm_page_unwire_noq(m); - vm_page_free(m); + vm_page_free_notree(m); + vm_radix_iter_remove(&pages); } VM_OBJECT_WUNLOCK(object); Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -1517,9 +1517,10 @@ void vm_object_split(vm_map_entry_t entry) { - vm_page_t m, m_next; + struct pctrie_iter pages; + vm_page_t m; vm_object_t orig_object, new_object, backing_object; - vm_pindex_t idx, offidxstart; + vm_pindex_t offidxstart; vm_size_t size; orig_object = entry->object.vm_object; @@ -1570,17 +1571,11 @@ * that the object is in transition. */ vm_object_set_flag(orig_object, OBJ_SPLIT); -#ifdef INVARIANTS - idx = 0; -#endif + vm_page_iter_limit_init(&pages, orig_object, offidxstart + size); retry: - m = vm_page_find_least(orig_object, offidxstart); - KASSERT(m == NULL || idx <= m->pindex - offidxstart, - ("%s: object %p was repopulated", __func__, orig_object)); - for (; m != NULL && (idx = m->pindex - offidxstart) < size; - m = m_next) { - m_next = TAILQ_NEXT(m, listq); - + pctrie_iter_reset(&pages); + for (m = vm_page_iter_lookup_ge(&pages, offidxstart); m != NULL; + m = vm_radix_iter_step(&pages)) { /* * We must wait for pending I/O to complete before we can * rename the page. @@ -1601,13 +1596,14 @@ * an incomplete fault. Just remove and ignore. */ if (vm_page_none_valid(m)) { - if (vm_page_remove(m)) - vm_page_free(m); + vm_radix_iter_remove(&pages); + if (vm_page_remove_notree(m)) + vm_page_free_notree(m); continue; } /* vm_page_rename() will dirty the page. */ - if (vm_page_rename(m, new_object, idx)) { + if (vm_page_rename(m, new_object, m->pindex - offidxstart)) { vm_page_xunbusy(m); VM_OBJECT_WUNLOCK(new_object); VM_OBJECT_WUNLOCK(orig_object); @@ -1632,6 +1628,7 @@ */ vm_reserv_rename(m, new_object, orig_object, offidxstart); #endif + vm_radix_iter_remove(&pages); } /* @@ -1653,7 +1650,8 @@ } static vm_page_t -vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p) +vm_object_collapse_scan_wait(struct pctrie_iter *pages, vm_object_t object, + vm_page_t p) { vm_object_t backing_object; @@ -1680,7 +1678,8 @@ VM_OBJECT_WLOCK(object); } VM_OBJECT_WLOCK(backing_object); - return (TAILQ_FIRST(&backing_object->memq)); + vm_page_iter_init(pages, backing_object); + return (vm_page_iter_lookup_ge(pages, 0)); } static bool @@ -1773,6 +1772,7 @@ static void vm_object_collapse_scan(vm_object_t object) { + struct pctrie_iter pages; vm_object_t backing_object; vm_page_t next, p, pp; vm_pindex_t backing_offset_index, new_pindex; @@ -1786,7 +1786,8 @@ /* * Our scan */ - for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) { + vm_page_iter_init(&pages, backing_object); + for (p = vm_page_iter_lookup_ge(&pages, 0); p != NULL; p = next) { next = TAILQ_NEXT(p, listq); new_pindex = p->pindex - backing_offset_index; @@ -1794,7 +1795,7 @@ * Check for busy page */ if (vm_page_tryxbusy(p) == 0) { - next = vm_object_collapse_scan_wait(object, p); + next = vm_object_collapse_scan_wait(&pages, object, p); continue; } @@ -1811,16 +1812,20 @@ KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) - vm_page_free(p); + if (vm_page_remove_notree(p)) + vm_page_free_notree(p); + vm_radix_iter_remove(&pages); + next = vm_radix_iter_step(&pages); continue; } if (!vm_page_all_valid(p)) { KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) - vm_page_free(p); + if (vm_page_remove_notree(p)) + vm_page_free_notree(p); + vm_radix_iter_remove(&pages); + next = vm_radix_iter_step(&pages); continue; } @@ -1833,7 +1838,7 @@ * busy bit owner, we can't tell whether it shadows the * original page. */ - next = vm_object_collapse_scan_wait(object, pp); + next = vm_object_collapse_scan_wait(&pages, object, pp); continue; } @@ -1859,10 +1864,12 @@ vm_pager_freespace(backing_object, p->pindex, 1); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) - vm_page_free(p); if (pp != NULL) vm_page_xunbusy(pp); + if (vm_page_remove_notree(p)) + vm_page_free_notree(p); + vm_radix_iter_remove(&pages); + next = vm_radix_iter_step(&pages); continue; } @@ -1875,7 +1882,8 @@ */ if (vm_page_rename(p, object, new_pindex)) { vm_page_xunbusy(p); - next = vm_object_collapse_scan_wait(object, NULL); + next = vm_object_collapse_scan_wait(&pages, object, + NULL); continue; } @@ -1891,6 +1899,8 @@ backing_offset_index); #endif vm_page_xunbusy(p); + vm_radix_iter_remove(&pages); + next = vm_radix_iter_step(&pages); } return; } @@ -2065,7 +2075,8 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { - vm_page_t p, next; + struct pctrie_iter pages; + vm_page_t p; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_UNMANAGED) == 0 || @@ -2074,16 +2085,11 @@ if (object->resident_page_count == 0) return; vm_object_pip_add(object, 1); + vm_page_iter_limit_init(&pages, object, end); again: - p = vm_page_find_least(object, start); - - /* - * Here, the variable "p" is either (1) the page with the least pindex - * greater than or equal to the parameter "start" or (2) NULL. - */ - for (; p != NULL && (p->pindex < end || end == 0); p = next) { - next = TAILQ_NEXT(p, listq); - + pctrie_iter_reset(&pages); + for (p = vm_page_iter_lookup_ge(&pages, start); p != NULL; + p = vm_radix_iter_step(&pages)) { /* * Skip invalid pages if asked to do so. Try to avoid acquiring * the busy lock, as some consumers rely on this to avoid @@ -2144,7 +2150,8 @@ if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0 && !vm_page_try_remove_all(p)) goto wired; - vm_page_free(p); + vm_page_free_notree(p); + vm_radix_iter_remove(&pages); } vm_object_pip_wakeup(object); Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -602,6 +602,7 @@ void vm_page_busy_sleep_unlocked(vm_object_t obj, vm_page_t m, vm_pindex_t pindex, const char *wmesg, int allocflags); void vm_page_free(vm_page_t m); +void vm_page_free_notree(vm_page_t m); void vm_page_free_zero(vm_page_t m); void vm_page_activate (vm_page_t); @@ -680,6 +681,7 @@ void vm_page_release_locked(vm_page_t m, int flags); vm_page_t vm_page_relookup(vm_object_t, vm_pindex_t); bool vm_page_remove(vm_page_t); +bool vm_page_remove_notree(vm_page_t); bool vm_page_remove_xbusy(vm_page_t); int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t); void vm_page_replace(vm_page_t mnew, vm_object_t object, Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -168,8 +168,8 @@ vm_pindex_t pindex, const char *wmesg, int allocflags, bool locked); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(vm_page_t m, uint8_t queue); -static bool vm_page_free_prep(vm_page_t m); -static void vm_page_free_toq(vm_page_t m); +static bool vm_page_free_prep(vm_page_t m, bool do_remove); +static void vm_page_free_toq(vm_page_t m, bool do_remove); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); @@ -1381,7 +1381,20 @@ { m->flags &= ~PG_ZERO; - vm_page_free_toq(m); + vm_page_free_toq(m, true); +} + +/* + * vm_page_free_notree: + * + * Free a page. Don't change radix tree status. + */ +void +vm_page_free_notree(vm_page_t m) +{ + + m->flags &= ~PG_ZERO; + vm_page_free_toq(m, false); } /* @@ -1394,7 +1407,7 @@ { m->flags |= PG_ZERO; - vm_page_free_toq(m); + vm_page_free_toq(m, true); } /* @@ -1618,7 +1631,6 @@ vm_page_object_remove(vm_page_t m) { vm_object_t object; - vm_page_t mrem __diagused; vm_page_assert_xbusied(m); object = m->object; @@ -1631,10 +1643,7 @@ vm_pager_page_unswapped(m); vm_pager_page_removed(object, m); - m->object = NULL; - mrem = vm_radix_remove(&object->rtree, m->pindex); - KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); /* * Now remove from the object's list of backed pages. @@ -1676,6 +1685,41 @@ return (dropped); } +/* + * vm_page_remove_notree: + * + * Just like vm_page_remove, but does not change the status of the page in + * the radix tree. + */ +bool +vm_page_remove_notree(vm_page_t m) +{ + bool dropped; + + vm_page_object_remove(m); + dropped = (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); + vm_page_xunbusy(m); + + return (dropped); +} + +/* + * vm_page_radix_remove + * + * Conditionally removes the specified page from the radix tree. + */ +static void +vm_page_radix_remove(vm_page_t m, bool do_remove) +{ + vm_page_t mrem __diagused; + + if (do_remove) { + mrem = vm_radix_remove(&m->object->rtree, m->pindex); + KASSERT(mrem == m, + ("removed page %p, expected page %p", mrem, m)); + } +} + /* * vm_page_remove_xbusy * @@ -1686,6 +1730,7 @@ vm_page_remove_xbusy(vm_page_t m) { + vm_page_radix_remove(m, true); vm_page_object_remove(m); return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); } @@ -2000,11 +2045,9 @@ * The operation cannot fail anymore. The removal must happen before * the listq iterator is tainted. */ - m->pindex = opidx; vm_page_object_remove(m); /* Return back to the new pindex to complete vm_page_insert(). */ - m->pindex = new_pindex; m->object = new_object; vm_page_insert_radixdone(m, new_object, mpred); @@ -2245,7 +2288,7 @@ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ - vm_page_free_toq(m); + vm_page_free_toq(m, true); if (req & VM_ALLOC_WAITFAIL) { VM_OBJECT_WUNLOCK(object); vm_radix_wait(); @@ -2452,7 +2495,7 @@ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ - vm_page_free_toq(m); + vm_page_free_toq(m, true); } if (req & VM_ALLOC_WAITFAIL) { VM_OBJECT_WUNLOCK(object); @@ -3091,7 +3134,7 @@ vm_page_dequeue(m); if (vm_page_replace_hold(m_new, object, m->pindex, m) && - vm_page_free_prep(m)) + vm_page_free_prep(m, true)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); @@ -3103,7 +3146,7 @@ } else { m->flags &= ~PG_ZERO; vm_page_dequeue(m); - if (vm_page_free_prep(m)) + if (vm_page_free_prep(m, true)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); KASSERT(m->dirty == 0, @@ -4042,7 +4085,7 @@ * page must be unmapped. */ static bool -vm_page_free_prep(vm_page_t m) +vm_page_free_prep(vm_page_t m, bool do_remove) { /* @@ -4089,6 +4132,7 @@ m->ref_count == VPRC_OBJREF, ("vm_page_free_prep: page %p has unexpected ref_count %u", m, m->ref_count)); + vm_page_radix_remove(m, do_remove); vm_page_object_remove(m); m->ref_count -= VPRC_OBJREF; } else @@ -4151,12 +4195,12 @@ * belongs to an object. */ static void -vm_page_free_toq(vm_page_t m) +vm_page_free_toq(vm_page_t m, bool do_remove) { struct vm_domain *vmd; uma_zone_t zone; - if (!vm_page_free_prep(m)) + if (!vm_page_free_prep(m, do_remove)) return; vmd = vm_pagequeue_domain(m); @@ -4191,7 +4235,7 @@ while ((m = SLIST_FIRST(free)) != NULL) { count++; SLIST_REMOVE_HEAD(free, plinks.s.ss); - vm_page_free_toq(m); + vm_page_free_toq(m, true); } if (update_wire_count)