Index: sys/kern/subr_pctrie.c =================================================================== --- sys/kern/subr_pctrie.c +++ sys/kern/subr_pctrie.c @@ -1106,7 +1106,6 @@ pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode) { struct pctrie_node *child, *node, *parent; - uint64_t *m; int slot; DEBUG_POISON_POINTER(parent); @@ -1121,12 +1120,11 @@ node = NULL; child = pctrie_root_load(it->ptree, NULL, PCTRIE_LOCKED); } - m = pctrie_match_value(child, it->index); - if (m != NULL) + if (pctrie_isleaf(child) && pctrie_toval(child) != NULL) pctrie_remove(it->ptree, it->index, parent, node, freenode); if (*freenode != NULL) --it->top; - return (m); + return (pctrie_toval(child)); } /* Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -634,8 +634,9 @@ static struct vmem * _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { + struct pctrie_iter pages; struct vmem *arena; - vm_page_t m, next; + vm_page_t m; vm_offset_t end, offset; int domain; @@ -648,17 +649,18 @@ offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; VM_OBJECT_WLOCK(object); - m = vm_page_lookup(object, atop(offset)); + vm_page_iter_init(&pages, object); + m = vm_page_iter_lookup(&pages, atop(offset)); domain = vm_page_domain(m); if (__predict_true((m->oflags & VPO_KMEM_EXEC) == 0)) arena = vm_dom[domain].vmd_kernel_arena; else arena = vm_dom[domain].vmd_kernel_rwx_arena; - for (; offset < end; offset += PAGE_SIZE, m = next) { - next = vm_page_next(m); + for (; offset < end; offset += PAGE_SIZE, + m = vm_page_iter_lookup(&pages, atop(offset))) { vm_page_xbusy_claim(m); vm_page_unwire_noq(m); - vm_page_free(m); + vm_page_iter_free(&pages); } VM_OBJECT_WUNLOCK(object); Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -1520,9 +1520,10 @@ void vm_object_split(vm_map_entry_t entry) { - vm_page_t m, m_next; + struct pctrie_iter pages; + vm_page_t m; vm_object_t orig_object, new_object, backing_object; - vm_pindex_t idx, offidxstart; + vm_pindex_t offidxstart; vm_size_t size; orig_object = entry->object.vm_object; @@ -1573,17 +1574,11 @@ * that the object is in transition. */ vm_object_set_flag(orig_object, OBJ_SPLIT); -#ifdef INVARIANTS - idx = 0; -#endif + vm_page_iter_limit_init(&pages, orig_object, offidxstart + size); retry: - m = vm_page_find_least(orig_object, offidxstart); - KASSERT(m == NULL || idx <= m->pindex - offidxstart, - ("%s: object %p was repopulated", __func__, orig_object)); - for (; m != NULL && (idx = m->pindex - offidxstart) < size; - m = m_next) { - m_next = TAILQ_NEXT(m, listq); - + pctrie_iter_reset(&pages); + for (m = vm_page_iter_lookup_ge(&pages, offidxstart); m != NULL; + m = vm_radix_iter_step(&pages)) { /* * We must wait for pending I/O to complete before we can * rename the page. @@ -1604,13 +1599,13 @@ * an incomplete fault. Just remove and ignore. */ if (vm_page_none_valid(m)) { - if (vm_page_remove(m)) + if (vm_page_iter_remove(&pages)) vm_page_free(m); continue; } /* vm_page_rename() will dirty the page. */ - if (vm_page_rename(m, new_object, idx)) { + if (vm_page_rename(m, new_object, m->pindex - offidxstart)) { vm_page_xunbusy(m); VM_OBJECT_WUNLOCK(new_object); VM_OBJECT_WUNLOCK(orig_object); @@ -1635,6 +1630,7 @@ */ vm_reserv_rename(m, new_object, orig_object, offidxstart); #endif + vm_radix_iter_remove(&pages); } /* @@ -1656,7 +1652,8 @@ } static vm_page_t -vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p) +vm_object_collapse_scan_wait(struct pctrie_iter *pages, vm_object_t object, + vm_page_t p) { vm_object_t backing_object; @@ -1683,12 +1680,14 @@ VM_OBJECT_WLOCK(object); } VM_OBJECT_WLOCK(backing_object); - return (TAILQ_FIRST(&backing_object->memq)); + vm_page_iter_init(pages, backing_object); + return (vm_page_iter_lookup_ge(pages, 0)); } static void vm_object_collapse_scan(vm_object_t object) { + struct pctrie_iter pages; vm_object_t backing_object; vm_page_t next, p, pp; vm_pindex_t backing_offset_index, new_pindex; @@ -1702,7 +1701,8 @@ /* * Our scan */ - for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) { + vm_page_iter_init(&pages, backing_object); + for (p = vm_page_iter_lookup_ge(&pages, 0); p != NULL; p = next) { next = TAILQ_NEXT(p, listq); new_pindex = p->pindex - backing_offset_index; @@ -1710,7 +1710,7 @@ * Check for busy page */ if (vm_page_tryxbusy(p) == 0) { - next = vm_object_collapse_scan_wait(object, p); + next = vm_object_collapse_scan_wait(&pages, object, p); continue; } @@ -1727,16 +1727,18 @@ KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) + if (vm_page_iter_remove(&pages)) vm_page_free(p); + next = vm_radix_iter_step(&pages); continue; } if (!vm_page_all_valid(p)) { KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) + if (vm_page_iter_remove(&pages)) vm_page_free(p); + next = vm_radix_iter_step(&pages); continue; } @@ -1749,7 +1751,7 @@ * busy bit owner, we can't tell whether it shadows the * original page. */ - next = vm_object_collapse_scan_wait(object, pp); + next = vm_object_collapse_scan_wait(&pages, object, pp); continue; } @@ -1775,10 +1777,11 @@ vm_pager_freespace(backing_object, p->pindex, 1); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) - vm_page_free(p); if (pp != NULL) vm_page_xunbusy(pp); + if (vm_page_iter_remove(&pages)) + vm_page_free(p); + next = vm_radix_iter_step(&pages); continue; } @@ -1791,7 +1794,8 @@ */ if (vm_page_rename(p, object, new_pindex)) { vm_page_xunbusy(p); - next = vm_object_collapse_scan_wait(object, NULL); + next = vm_object_collapse_scan_wait(&pages, object, + NULL); continue; } @@ -1807,6 +1811,8 @@ backing_offset_index); #endif vm_page_xunbusy(p); + vm_radix_iter_remove(&pages); + next = vm_radix_iter_step(&pages); } return; } @@ -1981,7 +1987,8 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { - vm_page_t p, next; + struct pctrie_iter pages; + vm_page_t p; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_UNMANAGED) == 0 || @@ -1990,16 +1997,11 @@ if (object->resident_page_count == 0) return; vm_object_pip_add(object, 1); + vm_page_iter_limit_init(&pages, object, end); again: - p = vm_page_find_least(object, start); - - /* - * Here, the variable "p" is either (1) the page with the least pindex - * greater than or equal to the parameter "start" or (2) NULL. - */ - for (; p != NULL && (p->pindex < end || end == 0); p = next) { - next = TAILQ_NEXT(p, listq); - + pctrie_iter_reset(&pages); + for (p = vm_page_iter_lookup_ge(&pages, start); p != NULL; + p = vm_radix_iter_step(&pages)) { /* * Skip invalid pages if asked to do so. Try to avoid acquiring * the busy lock, as some consumers rely on this to avoid @@ -2060,7 +2062,7 @@ if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0 && !vm_page_try_remove_all(p)) goto wired; - vm_page_free(p); + vm_page_iter_free(&pages); } vm_object_pip_wakeup(object); Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -602,6 +602,7 @@ void vm_page_busy_sleep_unlocked(vm_object_t obj, vm_page_t m, vm_pindex_t pindex, const char *wmesg, int allocflags); void vm_page_free(vm_page_t m); +void vm_page_iter_free(struct pctrie_iter *); void vm_page_free_zero(vm_page_t m); void vm_page_activate (vm_page_t); @@ -680,6 +681,7 @@ void vm_page_release_locked(vm_page_t m, int flags); vm_page_t vm_page_relookup(vm_object_t, vm_pindex_t); bool vm_page_remove(vm_page_t); +bool vm_page_iter_remove(struct pctrie_iter *); bool vm_page_remove_xbusy(vm_page_t); int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t); void vm_page_replace(vm_page_t mnew, vm_object_t object, Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -168,8 +168,8 @@ vm_pindex_t pindex, const char *wmesg, int allocflags, bool locked); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(vm_page_t m, uint8_t queue); -static bool vm_page_free_prep(vm_page_t m); -static void vm_page_free_toq(vm_page_t m); +static bool vm_page_free_prep(vm_page_t m, bool do_remove); +static void vm_page_free_toq(vm_page_t m, bool do_remove); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); @@ -1381,7 +1381,23 @@ { m->flags &= ~PG_ZERO; - vm_page_free_toq(m); + vm_page_free_toq(m, true); +} + +/* + * vm_page_iter_free: + * + * Free a page. Use iter to remove it from radix tree. + */ +void +vm_page_iter_free(struct pctrie_iter *pages) +{ + vm_page_t m; + + m = vm_radix_iter_page(pages); + vm_radix_iter_remove(pages); + m->flags &= ~PG_ZERO; + vm_page_free_toq(m, false); } /* @@ -1394,7 +1410,7 @@ { m->flags |= PG_ZERO; - vm_page_free_toq(m); + vm_page_free_toq(m, true); } /* @@ -1611,14 +1627,18 @@ } /* - * Do the work to remove a page from its object. The caller is responsible for - * updating the page's fields to reflect this removal. + * vm_page_remove_radixdone + * + * Complete page "m" removal from the specified object after the radix trie + * unhooking. + * + * The caller is responsible for updating the page's fields to reflect this + * removal. */ static void -vm_page_object_remove(vm_page_t m) +vm_page_remove_radixdone(vm_page_t m) { vm_object_t object; - vm_page_t mrem __diagused; vm_page_assert_xbusied(m); object = m->object; @@ -1631,10 +1651,7 @@ vm_pager_page_unswapped(m); vm_pager_page_removed(object, m); - m->object = NULL; - mrem = vm_radix_remove(&object->rtree, m->pindex); - KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); /* * Now remove from the object's list of backed pages. @@ -1676,6 +1693,41 @@ return (dropped); } +/* + * vm_page_iter_remove: + * + * Just like vm_page_remove, but using an iterator. + */ +bool +vm_page_iter_remove(struct pctrie_iter *pages) +{ + vm_page_t m; + bool dropped; + + m = vm_radix_iter_page(pages); + vm_radix_iter_remove(pages); + vm_page_remove_radixdone(m); + dropped = (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); + vm_page_xunbusy(m); + + return (dropped); +} + +/* + * vm_page_radix_remove + * + * Removes the specified page from the radix tree. + */ +static void +vm_page_radix_remove(vm_page_t m) +{ + vm_page_t mrem __diagused; + + mrem = vm_radix_remove(&m->object->rtree, m->pindex); + KASSERT(mrem == m, + ("removed page %p, expected page %p", mrem, m)); +} + /* * vm_page_remove_xbusy * @@ -1686,7 +1738,8 @@ vm_page_remove_xbusy(vm_page_t m) { - vm_page_object_remove(m); + vm_page_radix_remove(m); + vm_page_remove_radixdone(m); return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); } @@ -1998,11 +2051,9 @@ * The operation cannot fail anymore. The removal must happen before * the listq iterator is tainted. */ - m->pindex = opidx; - vm_page_object_remove(m); + vm_page_remove_radixdone(m); /* Return back to the new pindex to complete vm_page_insert(). */ - m->pindex = new_pindex; m->object = new_object; vm_page_insert_radixdone(m, new_object, mpred); @@ -2246,7 +2297,7 @@ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ - vm_page_free_toq(m); + vm_page_free_toq(m, true); if (req & VM_ALLOC_WAITFAIL) { VM_OBJECT_WUNLOCK(object); vm_radix_wait(); @@ -2453,7 +2504,7 @@ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ - vm_page_free_toq(m); + vm_page_free_toq(m, true); } if (req & VM_ALLOC_WAITFAIL) { VM_OBJECT_WUNLOCK(object); @@ -3092,7 +3143,7 @@ vm_page_dequeue(m); if (vm_page_replace_hold(m_new, object, m->pindex, m) && - vm_page_free_prep(m)) + vm_page_free_prep(m, true)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); @@ -3104,7 +3155,7 @@ } else { m->flags &= ~PG_ZERO; vm_page_dequeue(m); - if (vm_page_free_prep(m)) + if (vm_page_free_prep(m, true)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); KASSERT(m->dirty == 0, @@ -4043,7 +4094,7 @@ * page must be unmapped. */ static bool -vm_page_free_prep(vm_page_t m) +vm_page_free_prep(vm_page_t m, bool do_remove) { /* @@ -4090,7 +4141,9 @@ m->ref_count == VPRC_OBJREF, ("vm_page_free_prep: page %p has unexpected ref_count %u", m, m->ref_count)); - vm_page_object_remove(m); + if (do_remove) + vm_page_radix_remove(m); + vm_page_remove_radixdone(m); m->ref_count -= VPRC_OBJREF; } else vm_page_assert_unbusied(m); @@ -4152,12 +4205,12 @@ * belongs to an object. */ static void -vm_page_free_toq(vm_page_t m) +vm_page_free_toq(vm_page_t m, bool do_remove) { struct vm_domain *vmd; uma_zone_t zone; - if (!vm_page_free_prep(m)) + if (!vm_page_free_prep(m, do_remove)) return; vmd = vm_pagequeue_domain(m); @@ -4192,7 +4245,7 @@ while ((m = SLIST_FIRST(free)) != NULL) { count++; SLIST_REMOVE_HEAD(free, plinks.s.ss); - vm_page_free_toq(m); + vm_page_free_toq(m, true); } if (update_wire_count)