Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -634,8 +634,9 @@ static struct vmem * _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { + struct pctrie_iter pages; struct vmem *arena; - vm_page_t m, next; + vm_page_t m; vm_offset_t end, offset; int domain; @@ -648,17 +649,18 @@ offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; VM_OBJECT_WLOCK(object); - m = vm_page_lookup(object, atop(offset)); + vm_page_iter_init(&pages, object); + m = vm_page_iter_lookup(&pages, atop(offset)); domain = vm_page_domain(m); if (__predict_true((m->oflags & VPO_KMEM_EXEC) == 0)) arena = vm_dom[domain].vmd_kernel_arena; else arena = vm_dom[domain].vmd_kernel_rwx_arena; - for (; offset < end; offset += PAGE_SIZE, m = next) { - next = vm_page_next(m); + for (; offset < end; offset += PAGE_SIZE, + m = vm_page_iter_lookup(&pages, atop(offset))) { vm_page_xbusy_claim(m); vm_page_unwire_noq(m); - vm_page_free(m); + vm_page_iter_free(&pages); } VM_OBJECT_WUNLOCK(object); Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -1520,9 +1520,10 @@ void vm_object_split(vm_map_entry_t entry) { - vm_page_t m, m_next; + struct pctrie_iter pages; + vm_page_t m; vm_object_t orig_object, new_object, backing_object; - vm_pindex_t idx, offidxstart; + vm_pindex_t offidxstart; vm_size_t size; orig_object = entry->object.vm_object; @@ -1573,17 +1574,11 @@ * that the object is in transition. */ vm_object_set_flag(orig_object, OBJ_SPLIT); -#ifdef INVARIANTS - idx = 0; -#endif + vm_page_iter_limit_init(&pages, orig_object, offidxstart + size); retry: - m = vm_page_find_least(orig_object, offidxstart); - KASSERT(m == NULL || idx <= m->pindex - offidxstart, - ("%s: object %p was repopulated", __func__, orig_object)); - for (; m != NULL && (idx = m->pindex - offidxstart) < size; - m = m_next) { - m_next = TAILQ_NEXT(m, listq); - + pctrie_iter_reset(&pages); + for (m = vm_page_iter_lookup_ge(&pages, offidxstart); m != NULL; + m = vm_radix_iter_step(&pages)) { /* * We must wait for pending I/O to complete before we can * rename the page. @@ -1604,13 +1599,13 @@ * an incomplete fault. Just remove and ignore. */ if (vm_page_none_valid(m)) { - if (vm_page_remove(m)) + if (vm_page_iter_remove(&pages)) vm_page_free(m); continue; } /* vm_page_rename() will dirty the page. */ - if (vm_page_rename(m, new_object, idx)) { + if (vm_page_rename(&pages, new_object, m->pindex - offidxstart)) { vm_page_xunbusy(m); VM_OBJECT_WUNLOCK(new_object); VM_OBJECT_WUNLOCK(orig_object); @@ -1656,7 +1651,8 @@ } static vm_page_t -vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p) +vm_object_collapse_scan_wait(struct pctrie_iter *pages, vm_object_t object, + vm_page_t p) { vm_object_t backing_object; @@ -1683,12 +1679,14 @@ VM_OBJECT_WLOCK(object); } VM_OBJECT_WLOCK(backing_object); - return (TAILQ_FIRST(&backing_object->memq)); + vm_page_iter_init(pages, backing_object); + return (vm_page_iter_lookup_ge(pages, 0)); } static void vm_object_collapse_scan(vm_object_t object) { + struct pctrie_iter pages; vm_object_t backing_object; vm_page_t next, p, pp; vm_pindex_t backing_offset_index, new_pindex; @@ -1702,7 +1700,8 @@ /* * Our scan */ - for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) { + vm_page_iter_init(&pages, backing_object); + for (p = vm_page_iter_lookup_ge(&pages, 0); p != NULL; p = next) { next = TAILQ_NEXT(p, listq); new_pindex = p->pindex - backing_offset_index; @@ -1710,7 +1709,7 @@ * Check for busy page */ if (vm_page_tryxbusy(p) == 0) { - next = vm_object_collapse_scan_wait(object, p); + next = vm_object_collapse_scan_wait(&pages, object, p); continue; } @@ -1727,16 +1726,18 @@ KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) + if (vm_page_iter_remove(&pages)) vm_page_free(p); + next = vm_radix_iter_step(&pages); continue; } if (!vm_page_all_valid(p)) { KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) + if (vm_page_iter_remove(&pages)) vm_page_free(p); + next = vm_radix_iter_step(&pages); continue; } @@ -1749,7 +1750,7 @@ * busy bit owner, we can't tell whether it shadows the * original page. */ - next = vm_object_collapse_scan_wait(object, pp); + next = vm_object_collapse_scan_wait(&pages, object, pp); continue; } @@ -1775,10 +1776,11 @@ vm_pager_freespace(backing_object, p->pindex, 1); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) - vm_page_free(p); if (pp != NULL) vm_page_xunbusy(pp); + if (vm_page_iter_remove(&pages)) + vm_page_free(p); + next = vm_radix_iter_step(&pages); continue; } @@ -1789,9 +1791,10 @@ * If the page was mapped to a process, it can remain mapped * through the rename. vm_page_rename() will dirty the page. */ - if (vm_page_rename(p, object, new_pindex)) { + if (vm_page_rename(&pages, object, new_pindex)) { vm_page_xunbusy(p); - next = vm_object_collapse_scan_wait(object, NULL); + next = vm_object_collapse_scan_wait(&pages, object, + NULL); continue; } @@ -1807,6 +1810,7 @@ backing_offset_index); #endif vm_page_xunbusy(p); + next = vm_radix_iter_step(&pages); } return; } @@ -1981,7 +1985,8 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { - vm_page_t p, next; + struct pctrie_iter pages; + vm_page_t p; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_UNMANAGED) == 0 || @@ -1990,16 +1995,11 @@ if (object->resident_page_count == 0) return; vm_object_pip_add(object, 1); + vm_page_iter_limit_init(&pages, object, end); again: - p = vm_page_find_least(object, start); - - /* - * Here, the variable "p" is either (1) the page with the least pindex - * greater than or equal to the parameter "start" or (2) NULL. - */ - for (; p != NULL && (p->pindex < end || end == 0); p = next) { - next = TAILQ_NEXT(p, listq); - + pctrie_iter_reset(&pages); + for (p = vm_page_iter_lookup_ge(&pages, start); p != NULL; + p = vm_radix_iter_step(&pages)) { /* * Skip invalid pages if asked to do so. Try to avoid acquiring * the busy lock, as some consumers rely on this to avoid @@ -2060,7 +2060,7 @@ if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0 && !vm_page_try_remove_all(p)) goto wired; - vm_page_free(p); + vm_page_iter_free(&pages); } vm_object_pip_wakeup(object); Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -602,6 +602,7 @@ void vm_page_busy_sleep_unlocked(vm_object_t obj, vm_page_t m, vm_pindex_t pindex, const char *wmesg, int allocflags); void vm_page_free(vm_page_t m); +void vm_page_iter_free(struct pctrie_iter *); void vm_page_free_zero(vm_page_t m); void vm_page_activate (vm_page_t); @@ -680,8 +681,9 @@ void vm_page_release_locked(vm_page_t m, int flags); vm_page_t vm_page_relookup(vm_object_t, vm_pindex_t); bool vm_page_remove(vm_page_t); +bool vm_page_iter_remove(struct pctrie_iter *); bool vm_page_remove_xbusy(vm_page_t); -int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t); +int vm_page_rename(struct pctrie_iter *, vm_object_t, vm_pindex_t); void vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, vm_page_t mold); int vm_page_sbusied(vm_page_t m); Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -170,6 +170,7 @@ static void vm_page_enqueue(vm_page_t m, uint8_t queue); static bool vm_page_free_prep(vm_page_t m); static void vm_page_free_toq(vm_page_t m); +static void vm_page_free_toq_impl(vm_page_t m, bool do_remove); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); @@ -1637,14 +1638,18 @@ } /* - * Do the work to remove a page from its object. The caller is responsible for - * updating the page's fields to reflect this removal. + * vm_page_remove_radixdone + * + * Complete page "m" removal from the specified object after the radix trie + * unhooking. + * + * The caller is responsible for updating the page's fields to reflect this + * removal. */ static void -vm_page_object_remove(vm_page_t m) +vm_page_remove_radixdone(vm_page_t m) { vm_object_t object; - vm_page_t mrem __diagused; vm_page_assert_xbusied(m); object = m->object; @@ -1657,10 +1662,7 @@ vm_pager_page_unswapped(m); vm_pager_page_removed(object, m); - m->object = NULL; - mrem = vm_radix_remove(&object->rtree, m->pindex); - KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); /* * Now remove from the object's list of backed pages. @@ -1679,6 +1681,52 @@ vdrop(object->handle); } +/* + * vm_page_free_object_prep: + * + * Disassociates the given page from its VM object. + * + * The object must be locked, and the page must be xbusy. + */ +static void +vm_page_free_object_prep(vm_page_t m) +{ + KASSERT(((m->oflags & VPO_UNMANAGED) != 0) == + ((m->object->flags & OBJ_UNMANAGED) != 0), + ("%s: managed flag mismatch for page %p", + __func__, m)); + vm_page_assert_xbusied(m); + + /* + * The object reference can be released without an atomic + * operation. + */ + KASSERT((m->flags & PG_FICTITIOUS) != 0 || + m->ref_count == VPRC_OBJREF, + ("%s: page %p has unexpected ref_count %u", + __func__, m, m->ref_count)); + vm_page_remove_radixdone(m); + m->ref_count -= VPRC_OBJREF; +} + +/* + * vm_page_iter_free: + * + * Free the current page, as identified by iterator. + */ +void +vm_page_iter_free(struct pctrie_iter *pages) +{ + vm_page_t m; + + m = vm_radix_iter_page(pages); + vm_radix_iter_remove(pages); + vm_page_free_object_prep(m); + vm_page_xunbusy(m); + m->flags &= ~PG_ZERO; + vm_page_free_toq(m); +} + /* * vm_page_remove: * @@ -1702,6 +1750,42 @@ return (dropped); } +/* + * vm_page_iter_remove: + * + * Remove the current page, as identified by iterator, and remove it from the + * radix tree. + */ +bool +vm_page_iter_remove(struct pctrie_iter *pages) +{ + vm_page_t m; + bool dropped; + + m = vm_radix_iter_page(pages); + vm_radix_iter_remove(pages); + vm_page_remove_radixdone(m); + dropped = (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); + vm_page_xunbusy(m); + + return (dropped); +} + +/* + * vm_page_radix_remove + * + * Removes the specified page from the radix tree. + */ +static void +vm_page_radix_remove(vm_page_t m) +{ + vm_page_t mrem __diagused; + + mrem = vm_radix_remove(&m->object->rtree, m->pindex); + KASSERT(mrem == m, + ("removed page %p, expected page %p", mrem, m)); +} + /* * vm_page_remove_xbusy * @@ -1712,7 +1796,8 @@ vm_page_remove_xbusy(vm_page_t m) { - vm_page_object_remove(m); + vm_page_radix_remove(m); + vm_page_remove_radixdone(m); return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); } @@ -1983,8 +2068,8 @@ /* * vm_page_rename: * - * Move the given memory entry from its - * current object to the specified target object/offset. + * Move the current page, as identified by iterator, from its current + * object to the specified target object/offset. * * Note: swap associated with the page must be invalidated by the move. We * have to do this for several reasons: (1) we aren't freeing the @@ -1999,13 +2084,15 @@ * The objects must be locked. */ int -vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) +vm_page_rename(struct pctrie_iter *pages, + vm_object_t new_object, vm_pindex_t new_pindex) { - vm_page_t mpred; + vm_page_t m, mpred; vm_pindex_t opidx; VM_OBJECT_ASSERT_WLOCKED(new_object); + m = vm_radix_iter_page(pages); KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m)); /* @@ -2025,7 +2112,8 @@ * the listq iterator is tainted. */ m->pindex = opidx; - vm_page_object_remove(m); + vm_radix_iter_remove(pages); + vm_page_remove_radixdone(m); /* Return back to the new pindex to complete vm_page_insert(). */ m->pindex = new_pindex; @@ -4104,22 +4192,8 @@ VM_CNT_INC(v_tfree); if (m->object != NULL) { - KASSERT(((m->oflags & VPO_UNMANAGED) != 0) == - ((m->object->flags & OBJ_UNMANAGED) != 0), - ("vm_page_free_prep: managed flag mismatch for page %p", - m)); - vm_page_assert_xbusied(m); - - /* - * The object reference can be released without an atomic - * operation. - */ - KASSERT((m->flags & PG_FICTITIOUS) != 0 || - m->ref_count == VPRC_OBJREF, - ("vm_page_free_prep: page %p has unexpected ref_count %u", - m, m->ref_count)); - vm_page_object_remove(m); - m->ref_count -= VPRC_OBJREF; + vm_page_radix_remove(m); + vm_page_free_object_prep(m); } else vm_page_assert_unbusied(m);