Index: sys/kern/subr_pctrie.c =================================================================== --- sys/kern/subr_pctrie.c +++ sys/kern/subr_pctrie.c @@ -901,6 +901,96 @@ callback, keyoff, arg)); } +/* + * Find first leaf >= index, and fill iter with the path to the parent of that + * leaf. + */ +static uint64_t * +pctrie_iter_index(struct pctrie_iter *iter, struct pctrie_node *node, + uint64_t index) +{ + uint64_t *m; + int slot; + + /* Seek a node that matches index. */ + while (!pctrie_isleaf(node) && + !pctrie_keybarr(node, index, &slot)) { + iter->path[iter->top++] = node; + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + + /* + * If no such node was found, and instead this path leads only to nodes + * < index, back up to find a subtrie with the least value > index. + */ + if (pctrie_isleaf(node) ? + (m = pctrie_toval(node)) == NULL || *m < index : + node->pn_owner < index) { + /* Climb the path to find a node with a descendant > index. */ + do { + if (iter->top == 0) + return (NULL); + node = iter->path[--iter->top]; + slot = pctrie_slot(node, index) + 1; + } while ((node->pn_popmap >> slot) == 0); + + /* Step to the least child with a descendant > index. */ + slot += ffs(node->pn_popmap >> slot) - 1; + iter->top++; + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + /* Descend to the least leaf of the subtrie. */ + while (!pctrie_isleaf(node)) { + slot = ffs(node->pn_popmap) - 1; + iter->path[iter->top++] = node; + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + m = pctrie_toval(node); + iter->index = *m; + return (m); +} + +/* + * Find the first leaf with value at least 'step' greater than the previous + * leaf. + */ +uint64_t * +pctrie_iter_step(struct pctrie_iter *iter, uint64_t step) +{ + struct pctrie_node *node; + uint64_t index = iter->index + step; + int slot; + + /* Climb the path to find a node that matches a prefix of index. */ + do { + if (iter->top == 0) + return (NULL); + node = iter->path[--iter->top]; + } while (pctrie_keybarr(node, index, &slot)); + + /* Complete the search for index from node. */ + iter->top++; + node = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); + return (pctrie_iter_index(iter, node, index)); +} + +/* + * Find first leaf >= index, and initialize iter with the path to the parent of + * that leaf. + */ +uint64_t * +pctrie_iter_start(struct pctrie_iter *iter, + struct pctrie *ptree, uint64_t index) +{ + struct pctrie_node *node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); + + iter->top = 0; + return (pctrie_iter_index(iter, node, index)); +} + /* * Replace an existing value in the trie with another one. * Panics if there is not an old value in the trie at the new value's index. Index: sys/kern/subr_rangeset.c =================================================================== --- sys/kern/subr_rangeset.c +++ sys/kern/subr_rangeset.c @@ -272,8 +272,8 @@ int rangeset_copy(struct rangeset *dst_rs, struct rangeset *src_rs) { + struct pctrie_iter iter; struct rs_el *src_r, *dst_r; - uint64_t cursor; int error; MPASS(pctrie_is_empty(&dst_rs->rs_trie)); @@ -281,10 +281,8 @@ MPASS(dst_rs->rs_dup_data == src_rs->rs_dup_data); error = 0; - for (cursor = 0;; cursor = src_r->re_start + 1) { - src_r = RANGESET_PCTRIE_LOOKUP_GE(&src_rs->rs_trie, cursor); - if (src_r == NULL) - break; + for (src_r = RANGESET_PCTRIE_ITER_START(&iter, &src_rs->rs_trie, 0); + src_r != NULL; src_r = RANGESET_PCTRIE_ITER_STEP(&iter, 1)) { dst_r = dst_rs->rs_dup_data(dst_rs->rs_data_ctx, src_r); if (dst_r == NULL) { error = ENOMEM; @@ -303,13 +301,11 @@ static void rangeset_check(struct rangeset *rs) { + struct pctrie_iter iter; struct rs_el *r, *rp; - uint64_t cursor; - for (cursor = 0, rp = NULL;; cursor = r->re_start + 1, rp = r) { - r = RANGESET_PCTRIE_LOOKUP_GE(&rs->rs_trie, cursor); - if (r == NULL) - break; + for (rp = NULL, r = RANGESET_PCTRIE_ITER_START(&iter, &rs->rs_trie, 0); + r != NULL; rp = r, r = RANGESET_PCTRIE_ITER_STEP(&iter, 1)) { KASSERT(r->re_start < r->re_end, ("invalid interval rs %p elem %p (%#jx, %#jx)", rs, r, (uintmax_t)r->re_start, (uintmax_t)r->re_end)); @@ -332,9 +328,9 @@ DB_SHOW_COMMAND(rangeset, rangeset_show_fn) { + struct pctrie_iter iter; struct rangeset *rs; struct rs_el *r; - uint64_t cursor; if (!have_addr) { db_printf("show rangeset addr\n"); @@ -343,10 +339,8 @@ rs = (struct rangeset *)addr; db_printf("rangeset %p\n", rs); - for (cursor = 0;; cursor = r->re_start + 1) { - r = RANGESET_PCTRIE_LOOKUP_GE(&rs->rs_trie, cursor); - if (r == NULL) - break; + for (r = RANGESET_PCTRIE_ITER_START(&iter, &rs->rs_trie, 0); + r != NULL; r = RANGESET_PCTRIE_ITER_STEP(&iter, 1)) { db_printf(" el %p start %#jx end %#jx\n", r, r->re_start, r->re_end); } Index: sys/sys/pctrie.h =================================================================== --- sys/sys/pctrie.h +++ sys/sys/pctrie.h @@ -240,6 +240,21 @@ } \ \ static __inline __unused struct type * \ +name##_PCTRIE_ITER_STEP(struct pctrie_iter *iter, int64_t step) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_step(iter, step)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_START(struct pctrie_iter *iter, \ + struct pctrie *ptree, int64_t index) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_start(iter, ptree, index)); \ +} \ + \ +static __inline __unused struct type * \ name##_PCTRIE_REPLACE(struct pctrie *ptree, struct type *ptr) \ { \ \ @@ -295,6 +310,10 @@ pctrie_cb_t callback, int keyoff, void *arg); struct pctrie_node *pctrie_reclaim_resume_cb(struct pctrie_node **pnode, pctrie_cb_t callback, int keyoff, void *arg); +struct pctrie_iter; +uint64_t *pctrie_iter_step(struct pctrie_iter *iter, uint64_t step); +uint64_t *pctrie_iter_start(struct pctrie_iter *iter, + struct pctrie *ptree, uint64_t index); uint64_t *pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, struct pctrie_node **killnode); uint64_t *pctrie_replace(struct pctrie *ptree, uint64_t *newval); @@ -343,5 +362,11 @@ #define PCTRIE_COUNT (1 << PCTRIE_WIDTH) +struct pctrie_iter { + struct pctrie_node *path[sizeof(uint64_t) * NBBY / PCTRIE_WIDTH]; + uint64_t index; + int top; +}; + #endif /* _KERNEL */ #endif /* !_SYS_PCTRIE_H_ */ Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -1797,8 +1797,8 @@ u_long swap_pager_swapped_pages(vm_object_t object) { + struct pctrie_iter iter; struct swblk *sb; - vm_pindex_t pi; u_long res; int i; @@ -1807,13 +1807,14 @@ if (pctrie_is_empty(&object->un_pager.swp.swp_blks)) return (0); - for (res = 0, pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( - &object->un_pager.swp.swp_blks, pi)) != NULL; - pi = sb->p + SWAP_META_PAGES) { + res = 0; + sb = SWAP_PCTRIE_ITER_START(&iter, &object->un_pager.swp.swp_blks, 0); + while (sb != NULL) { for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) res++; } + sb = SWAP_PCTRIE_ITER_STEP(&iter, SWAP_META_PAGES); } return (res); } @@ -1827,17 +1828,17 @@ static void swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object) { + struct pctrie_iter iter; struct swblk *sb; vm_page_t m; - vm_pindex_t pi; daddr_t blk; int i, nv, rahead, rv; KASSERT((object->flags & OBJ_SWAP) != 0, ("%s: Object not swappable", __func__)); - for (pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( - &object->un_pager.swp.swp_blks, pi)) != NULL; ) { + sb = SWAP_PCTRIE_ITER_START(&iter, &object->un_pager.swp.swp_blks, 0); + while (sb != NULL) { if ((object->flags & OBJ_DEAD) != 0) { /* * Make sure that pending writes finish before @@ -1915,7 +1916,7 @@ break; } if (i == SWAP_META_PAGES) - pi = sb->p + SWAP_META_PAGES; + sb = SWAP_PCTRIE_ITER_STEP(&iter, SWAP_META_PAGES); } } @@ -2144,6 +2145,7 @@ swp_pager_meta_transfer(vm_object_t srcobject, vm_object_t dstobject, vm_pindex_t pindex, vm_pindex_t count, vm_size_t *moved) { + struct pctrie_iter iter; struct page_range range; struct swblk *sb; vm_page_t m; @@ -2162,11 +2164,9 @@ swp_pager_init_freerange(&range); offset = pindex; last = pindex + count; - for (;;) { - sb = SWAP_PCTRIE_LOOKUP_GE(&srcobject->un_pager.swp.swp_blks, - rounddown(pindex, SWAP_META_PAGES)); - if (sb == NULL || sb->p >= last) - break; + sb = SWAP_PCTRIE_ITER_START(&iter, &srcobject->un_pager.swp.swp_blks, + rounddown(pindex, SWAP_META_PAGES)); + while (sb != NULL && sb->p < last) { start = pindex > sb->p ? pindex - sb->p : 0; limit = last - sb->p < SWAP_META_PAGES ? last - sb->p : SWAP_META_PAGES; @@ -2195,6 +2195,7 @@ sb->p); uma_zfree(swblk_zone, sb); } + sb = SWAP_PCTRIE_ITER_STEP(&iter, SWAP_META_PAGES); } swp_pager_freeswapspace(&range); out: @@ -2292,6 +2293,7 @@ vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex) { + struct pctrie_iter iter; struct swblk *sb; int i; @@ -2300,7 +2302,7 @@ if (pctrie_is_empty(&object->un_pager.swp.swp_blks)) return (object->size); - sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, + sb = SWAP_PCTRIE_ITER_START(&iter, &object->un_pager.swp.swp_blks, rounddown(pindex, SWAP_META_PAGES)); if (sb == NULL) return (object->size); @@ -2309,8 +2311,7 @@ if (sb->d[i] != SWAPBLK_NONE) return (sb->p + i); } - sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, - roundup(pindex, SWAP_META_PAGES)); + sb = SWAP_PCTRIE_ITER_STEP(&iter, SWAP_META_PAGES); if (sb == NULL) return (object->size); }