Index: sys/kern/subr_pctrie.c =================================================================== --- sys/kern/subr_pctrie.c +++ sys/kern/subr_pctrie.c @@ -738,42 +738,25 @@ } #endif -/* - * Remove the specified index from the tree, and return the value stored at - * that index. If the index is not present, return NULL. - */ -uint64_t * -pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, - struct pctrie_node **freenode) +static void +pctrie_remove(struct pctrie *ptree, uint64_t index, struct pctrie_node *parent, + struct pctrie_node *node, struct pctrie_node **freenode) { - struct pctrie_node *child, *node, *parent; - uint64_t *m; + struct pctrie_node *child; int slot; - *freenode = node = NULL; - child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); - for (;;) { - if (pctrie_isleaf(child)) - break; - parent = node; - node = child; - slot = pctrie_slot(node, index); - child = pctrie_node_load(&node->pn_child[slot], NULL, - PCTRIE_LOCKED); - } - if ((m = pctrie_toval(child)) == NULL || *m != index) - return (NULL); if (node == NULL) { pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED); - return (m); + return; } + slot = pctrie_slot(node, index); KASSERT((node->pn_popmap & (1 << slot)) != 0, ("%s: bad popmap slot %d in node %p", __func__, slot, node)); node->pn_popmap ^= 1 << slot; pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED); if (!powerof2(node->pn_popmap)) - return (m); + return; KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); slot = ffs(node->pn_popmap) - 1; child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); @@ -795,6 +778,34 @@ */ pctrie_node_put(node); *freenode = node; +} + +/* + * Remove the specified index from the tree, and return the value stored at + * that index. If the index is not present, return NULL. + */ +uint64_t * +pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, + struct pctrie_node **freenode) +{ + struct pctrie_node *child, *node, *parent; + uint64_t *m; + int slot; + + *freenode = node = NULL; + child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); + for (;;) { + if (pctrie_isleaf(child)) + break; + parent = node; + node = child; + slot = pctrie_slot(node, index); + child = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + if ((m = pctrie_toval(child)) == NULL || *m != index) + return (NULL); + pctrie_remove(ptree, index, parent, node, freenode); return (m); } @@ -901,6 +912,138 @@ callback, keyoff, arg)); } +/* + * Find first leaf >= index, and fill iter with the path to the parent of that + * leaf. + */ +static uint64_t * +pctrie_iter_index(struct pctrie_iter *iter, struct pctrie_node *node, + uint64_t index) +{ + uint64_t *m; + int slot; + + /* Seek a node that matches index. */ + while (!pctrie_isleaf(node) && + !pctrie_keybarr(node, index, &slot)) { + iter->path[iter->top++] = node; + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + + /* + * If no such node was found, and instead this path leads only to nodes + * < index, back up to find a subtrie with the least value > index. + */ + if (pctrie_isleaf(node) ? + (m = pctrie_toval(node)) == NULL || *m < index : + node->pn_owner < index) { + /* Climb the path to find a node with a descendant > index. */ + do { + if (iter->top == 0) + return (NULL); + node = iter->path[--iter->top]; + slot = pctrie_slot(node, index) + 1; + } while ((node->pn_popmap >> slot) == 0); + + /* Step to the least child with a descendant > index. */ + slot += ffs(node->pn_popmap >> slot) - 1; + iter->top++; + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + /* Descend to the least leaf of the subtrie. */ + while (!pctrie_isleaf(node)) { + slot = ffs(node->pn_popmap) - 1; + iter->path[iter->top++] = node; + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + m = pctrie_toval(node); + iter->index = *m; + return (m); +} + +/* + * Find the first leaf with value at least 'step' greater than the previous + * leaf. + */ +uint64_t * +pctrie_iter_step(struct pctrie_iter *iter, uint64_t step) +{ + struct pctrie_node *node; + uint64_t index = iter->index + step; + int slot; + + if (iter->top == 0) { + uint64_t *m; + node = pctrie_root_load(iter->ptree, NULL, PCTRIE_UNSERIALIZED); + if (!pctrie_isleaf(node)) + /* + * If there have been insertions since the last + * iteration, the root may not be a leaf; in that case, + * put the root on the path. + */ + iter->path[iter->top++] = node; + else if ((m = pctrie_toval(node)) != NULL && *m >= index) { + /* If the root is the only value in the trie and it is + * larger than the current index value, it was promoted + * to root via the removal of another, and should be + * considered as the last iteration. + */ + iter->index = *m; + return (m); + } else + return (NULL); + } + + /* Climb the path to find a node that matches a prefix of index. */ + do { + if (iter->top == 0) + return (NULL); + node = iter->path[--iter->top]; + } while (pctrie_keybarr(node, index, &slot)); + + /* Complete the search for index from node. */ + iter->top++; + node = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); + return (pctrie_iter_index(iter, node, index)); +} + +/* + * Find first leaf >= index, and initialize iter with the path to the parent of + * that leaf. + */ +uint64_t * +pctrie_iter_start(struct pctrie_iter *iter, + struct pctrie *ptree, uint64_t index) +{ + struct pctrie_node *node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); + + iter->ptree = ptree; + iter->top = 0; + return (pctrie_iter_index(iter, node, index)); +} + +/* + * Remove from the trie the leaf last chosen by the iterator, and + * adjust the path if it's last member is to be freed. + */ +void +pctrie_iter_remove(struct pctrie_iter *iter, struct pctrie_node **freenode) +{ + struct pctrie_node *node = NULL, *parent = NULL; + + if (iter->top >= 1) + node = iter->path[iter->top - 1]; + if (iter->top >= 2) + parent = iter->path[iter->top - 2]; + *freenode = NULL; + pctrie_remove(iter->ptree, iter->index, parent, node, freenode); + if (*freenode != NULL) + --iter->top; +} + /* * Replace an existing value in the trie with another one. * Panics if there is not an old value in the trie at the new value's index. Index: sys/kern/subr_rangeset.c =================================================================== --- sys/kern/subr_rangeset.c +++ sys/kern/subr_rangeset.c @@ -272,8 +272,8 @@ int rangeset_copy(struct rangeset *dst_rs, struct rangeset *src_rs) { + struct pctrie_iter iter; struct rs_el *src_r, *dst_r; - uint64_t cursor; int error; MPASS(pctrie_is_empty(&dst_rs->rs_trie)); @@ -281,10 +281,8 @@ MPASS(dst_rs->rs_dup_data == src_rs->rs_dup_data); error = 0; - for (cursor = 0;; cursor = src_r->re_start + 1) { - src_r = RANGESET_PCTRIE_LOOKUP_GE(&src_rs->rs_trie, cursor); - if (src_r == NULL) - break; + for (src_r = RANGESET_PCTRIE_ITER_START(&iter, &src_rs->rs_trie, 0); + src_r != NULL; src_r = RANGESET_PCTRIE_ITER_STEP(&iter, 1)) { dst_r = dst_rs->rs_dup_data(dst_rs->rs_data_ctx, src_r); if (dst_r == NULL) { error = ENOMEM; @@ -303,13 +301,11 @@ static void rangeset_check(struct rangeset *rs) { + struct pctrie_iter iter; struct rs_el *r, *rp; - uint64_t cursor; - for (cursor = 0, rp = NULL;; cursor = r->re_start + 1, rp = r) { - r = RANGESET_PCTRIE_LOOKUP_GE(&rs->rs_trie, cursor); - if (r == NULL) - break; + for (rp = NULL, r = RANGESET_PCTRIE_ITER_START(&iter, &rs->rs_trie, 0); + r != NULL; rp = r, r = RANGESET_PCTRIE_ITER_STEP(&iter, 1)) { KASSERT(r->re_start < r->re_end, ("invalid interval rs %p elem %p (%#jx, %#jx)", rs, r, (uintmax_t)r->re_start, (uintmax_t)r->re_end)); @@ -332,9 +328,9 @@ DB_SHOW_COMMAND(rangeset, rangeset_show_fn) { + struct pctrie_iter iter; struct rangeset *rs; struct rs_el *r; - uint64_t cursor; if (!have_addr) { db_printf("show rangeset addr\n"); @@ -343,10 +339,8 @@ rs = (struct rangeset *)addr; db_printf("rangeset %p\n", rs); - for (cursor = 0;; cursor = r->re_start + 1) { - r = RANGESET_PCTRIE_LOOKUP_GE(&rs->rs_trie, cursor); - if (r == NULL) - break; + for (r = RANGESET_PCTRIE_ITER_START(&iter, &rs->rs_trie, 0); + r != NULL; r = RANGESET_PCTRIE_ITER_STEP(&iter, 1)) { db_printf(" el %p start %#jx end %#jx\n", r, r->re_start, r->re_end); } Index: sys/sys/pctrie.h =================================================================== --- sys/sys/pctrie.h +++ sys/sys/pctrie.h @@ -240,6 +240,31 @@ } \ \ static __inline __unused struct type * \ +name##_PCTRIE_ITER_STEP(struct pctrie_iter *iter, int64_t step) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_step(iter, step)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_START(struct pctrie_iter *iter, \ + struct pctrie *ptree, int64_t index) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_start(iter, ptree, index)); \ +} \ + \ +static __inline __unused void \ +name##_PCTRIE_ITER_REMOVE(struct pctrie_iter *iter) \ +{ \ + struct pctrie_node *freenode; \ + \ + pctrie_iter_remove(iter, &freenode); \ + if (freenode != NULL) \ + freefn(iter->ptree, freenode); \ +} \ + \ +static __inline __unused struct type * \ name##_PCTRIE_REPLACE(struct pctrie *ptree, struct type *ptr) \ { \ \ @@ -295,6 +320,12 @@ pctrie_cb_t callback, int keyoff, void *arg); struct pctrie_node *pctrie_reclaim_resume_cb(struct pctrie_node **pnode, pctrie_cb_t callback, int keyoff, void *arg); +struct pctrie_iter; +uint64_t *pctrie_iter_step(struct pctrie_iter *iter, uint64_t step); +uint64_t *pctrie_iter_start(struct pctrie_iter *iter, + struct pctrie *ptree, uint64_t index); +void pctrie_iter_remove(struct pctrie_iter *iter, + struct pctrie_node **freenode); uint64_t *pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, struct pctrie_node **killnode); uint64_t *pctrie_replace(struct pctrie *ptree, uint64_t *newval); @@ -343,5 +374,12 @@ #define PCTRIE_COUNT (1 << PCTRIE_WIDTH) +struct pctrie_iter { + struct pctrie_node *path[sizeof(uint64_t) * NBBY / PCTRIE_WIDTH]; + struct pctrie *ptree; + uint64_t index; + int top; +}; + #endif /* _KERNEL */ #endif /* !_SYS_PCTRIE_H_ */ Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -1797,8 +1797,8 @@ u_long swap_pager_swapped_pages(vm_object_t object) { + struct pctrie_iter iter; struct swblk *sb; - vm_pindex_t pi; u_long res; int i; @@ -1807,9 +1807,10 @@ if (pctrie_is_empty(&object->un_pager.swp.swp_blks)) return (0); - for (res = 0, pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( - &object->un_pager.swp.swp_blks, pi)) != NULL; - pi = sb->p + SWAP_META_PAGES) { + res = 0; + for (sb = SWAP_PCTRIE_ITER_START(&iter, + &object->un_pager.swp.swp_blks, 0); sb != NULL; + sb = SWAP_PCTRIE_ITER_STEP(&iter, SWAP_META_PAGES)) { for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) res++; @@ -1827,17 +1828,17 @@ static void swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object) { + struct pctrie_iter iter; struct swblk *sb; vm_page_t m; - vm_pindex_t pi; daddr_t blk; int i, nv, rahead, rv; KASSERT((object->flags & OBJ_SWAP) != 0, ("%s: Object not swappable", __func__)); - for (pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( - &object->un_pager.swp.swp_blks, pi)) != NULL; ) { + for (sb = SWAP_PCTRIE_ITER_START(&iter, + &object->un_pager.swp.swp_blks, 0); sb != NULL;) { if ((object->flags & OBJ_DEAD) != 0) { /* * Make sure that pending writes finish before @@ -1914,8 +1915,8 @@ */ break; } - if (i == SWAP_META_PAGES) - pi = sb->p + SWAP_META_PAGES; + sb = SWAP_PCTRIE_ITER_STEP(&iter, + rounddown(i, SWAP_META_PAGES)); } } @@ -2144,6 +2145,8 @@ swp_pager_meta_transfer(vm_object_t srcobject, vm_object_t dstobject, vm_pindex_t pindex, vm_pindex_t count, vm_size_t *moved) { + struct pctrie_iter iter; + struct pctrie *ptree; struct page_range range; struct swblk *sb; vm_page_t m; @@ -2156,17 +2159,17 @@ mc = 0; m = NULL; - if (count == 0 || pctrie_is_empty(&srcobject->un_pager.swp.swp_blks)) + ptree = &srcobject->un_pager.swp.swp_blks; + if (count == 0 || pctrie_is_empty(ptree)) goto out; swp_pager_init_freerange(&range); offset = pindex; last = pindex + count; - for (;;) { - sb = SWAP_PCTRIE_LOOKUP_GE(&srcobject->un_pager.swp.swp_blks, - rounddown(pindex, SWAP_META_PAGES)); - if (sb == NULL || sb->p >= last) - break; + sb = SWAP_PCTRIE_ITER_START(&iter, ptree, + rounddown(pindex, SWAP_META_PAGES)); + for (; sb != NULL && sb->p < last; + sb = SWAP_PCTRIE_ITER_STEP(&iter, SWAP_META_PAGES)) { start = pindex > sb->p ? pindex - sb->p : 0; limit = last - sb->p < SWAP_META_PAGES ? last - sb->p : SWAP_META_PAGES; @@ -2191,8 +2194,7 @@ pindex = sb->p + SWAP_META_PAGES; if (swp_pager_swblk_empty(sb, 0, start) && swp_pager_swblk_empty(sb, limit, SWAP_META_PAGES)) { - SWAP_PCTRIE_REMOVE(&srcobject->un_pager.swp.swp_blks, - sb->p); + SWAP_PCTRIE_ITER_REMOVE(&iter); uma_zfree(swblk_zone, sb); } } @@ -2292,6 +2294,7 @@ vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex) { + struct pctrie_iter iter; struct swblk *sb; int i; @@ -2300,7 +2303,7 @@ if (pctrie_is_empty(&object->un_pager.swp.swp_blks)) return (object->size); - sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, + sb = SWAP_PCTRIE_ITER_START(&iter, &object->un_pager.swp.swp_blks, rounddown(pindex, SWAP_META_PAGES)); if (sb == NULL) return (object->size); @@ -2309,8 +2312,7 @@ if (sb->d[i] != SWAPBLK_NONE) return (sb->p + i); } - sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, - roundup(pindex, SWAP_META_PAGES)); + sb = SWAP_PCTRIE_ITER_STEP(&iter, SWAP_META_PAGES); if (sb == NULL) return (object->size); }