Index: sys/kern/subr_pctrie.c =================================================================== --- sys/kern/subr_pctrie.c +++ sys/kern/subr_pctrie.c @@ -62,9 +62,6 @@ #include #endif -#define PCTRIE_MASK (PCTRIE_COUNT - 1) -#define PCTRIE_LIMIT (howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1) - #if PCTRIE_WIDTH == 3 typedef uint8_t pn_popmap_t; #elif PCTRIE_WIDTH == 4 @@ -87,18 +84,13 @@ smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ }; -enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; - -static __inline void pctrie_node_store(smr_pctnode_t *p, void *val, - enum pctrie_access access); - /* * Map index to an array position for the children of node, */ static __inline int pctrie_slot(struct pctrie_node *node, uint64_t index) { - return ((index >> node->pn_clev) & PCTRIE_MASK); + return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1)); } /* @@ -137,6 +129,8 @@ #endif } +enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; + /* * Fetch a node pointer from a slot. */ @@ -476,6 +470,21 @@ pctrie_node_store(parentp, parent, PCTRIE_LOCKED); } +/* + * Return the value associated with the node, if the node is a leaf that matches + * the index; otherwise NULL. + */ +static __always_inline uint64_t * +pctrie_match_value(struct pctrie_node *node, uint64_t index) +{ + uint64_t *m; + + if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL || + *m != index) + m = NULL; + return (m); +} + /* * Returns the value stored at the index. If the index is not present, * NULL is returned. @@ -485,21 +494,13 @@ enum pctrie_access access) { struct pctrie_node *node; - uint64_t *m; int slot; node = pctrie_root_load(ptree, smr, access); - for (;;) { - if (pctrie_isleaf(node)) { - if ((m = pctrie_toval(node)) != NULL && *m == index) - return (m); - break; - } - if (pctrie_keybarr(node, index, &slot)) - break; + /* Seek a node that matches index. */ + while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) node = pctrie_node_load(&node->pn_child[slot], smr, access); - } - return (NULL); + return (pctrie_match_value(node, index)); } /* @@ -530,6 +531,146 @@ return (res); } +/* + * Returns the last node examined in the search for the index, and updates the + * search path to that node. + */ +static __always_inline struct pctrie_node * +_pctrie_iter_lookup_node(struct pctrie_iter *iter, struct pctrie_node *node, + uint64_t index, smr_t smr, enum pctrie_access access) +{ + int slot; + + /* Seek a node that matches index. */ + while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) { + KASSERT(iter->top < nitems(iter->path), + ("%s: path overflow in trie %p", __func__, iter->ptree)); + iter->path[iter->top++] = node; + node = pctrie_node_load(&node->pn_child[slot], smr, access); + } + return (node); +} + +/* + * Returns the value stored at the index, and preserves the search path to that + * leaf. If the index is not present, NULL is returned. + */ +static __always_inline uint64_t * +_pctrie_iter_lookup(struct pctrie_iter *iter, struct pctrie *ptree, + uint64_t index, smr_t smr, enum pctrie_access access) +{ + struct pctrie_node *node; + + iter->ptree = ptree; + iter->top = 0; + iter->index = index; + node = pctrie_root_load(ptree, smr, access); + node = _pctrie_iter_lookup_node(iter, node, index, smr, access); + return (pctrie_match_value(node, index)); +} + +/* + * Returns the value stored at the index, assuming access is externally + * synchronized by a lock, and preserves the search path to that leaf. + * + * If the index is not present, NULL is returned. + */ +uint64_t * +pctrie_iter_lookup(struct pctrie_iter *iter, struct pctrie *ptree, + uint64_t index) +{ + return (_pctrie_iter_lookup(iter, ptree, index, NULL, PCTRIE_LOCKED)); +} + +/* + * Returns the value stored at the index without requiring an external lock, + * and preserves the search path to that leaf. + * + * If the index is not present, NULL is returned. + */ +uint64_t * +pctrie_iter_lookup_unlocked(struct pctrie_iter *iter, + struct pctrie *ptree, uint64_t index, smr_t smr) +{ + uint64_t *res; + + smr_enter(smr); + res = _pctrie_iter_lookup(iter, ptree, index, smr, PCTRIE_SMR); + smr_exit(smr); + return (res); +} + +/* + * Climb the search path to find the lowest node from which to start the search + * for a value matching 'index'. + */ +static struct pctrie_node * +pctrie_iter_climb(struct pctrie_iter *iter, uint64_t index, smr_t smr, + enum pctrie_access access) +{ + struct pctrie_node *node; + int slot; + + do { + if (iter->top == 0) + return (pctrie_root_load(iter->ptree, smr, access)); + node = iter->path[--iter->top]; + } while (powerof2(node->pn_popmap) || + pctrie_keybarr(node, index, &slot)); + iter->top++; + return (pctrie_node_load(&node->pn_child[slot], smr, access)); +} + +/* + * Returns the value stored at a fixed offset from the current index value, + * possibly NULL. + */ +static __always_inline uint64_t * +pctrie_iter_fixed_step(struct pctrie_iter *iter, int step, smr_t smr, + enum pctrie_access access) +{ + struct pctrie_node *node; + uint64_t index = iter->index + step; + + /* Detect step overflow. */ + if ((step > 0) != (index > iter->index)) + return (NULL); + iter->index = index; + node = pctrie_iter_climb(iter, index, smr, access); + node = _pctrie_iter_lookup_node(iter, node, index, smr, access); + return (pctrie_match_value(node, index)); +} + +/* + * Returns the value stored at one more than the current index value, possibly + * NULL, assuming access is externally synchronized by a lock. + */ +uint64_t * +pctrie_iter_incr(struct pctrie_iter *iter) +{ + return (pctrie_iter_fixed_step(iter, 1, NULL, PCTRIE_LOCKED)); +} + +/* + * Returns the value stored at one more than the current index value, possibly + * NULL, without serialization. + */ +uint64_t * +pctrie_iter_incr_unserialized(struct pctrie_iter *iter) +{ + return (pctrie_iter_fixed_step(iter, 1, NULL, PCTRIE_UNSERIALIZED)); +} + +/* + * Returns the value stored at one less than the current index value, possibly + * NULL, assuming access is externally synchronized by a lock. + */ +uint64_t * +pctrie_iter_decr(struct pctrie_iter *iter) +{ + return (pctrie_iter_fixed_step(iter, -1, NULL, PCTRIE_LOCKED)); +} + /* * Returns the value with the least index that is greater than or equal to the * specified index, or NULL if there are no such values. @@ -633,6 +774,147 @@ return (pctrie_lookup_ge_node(node, index + 1)); } +/* + * Find first leaf >= index, and fill iter with the path to the parent of that + * leaf. Return NULL if there is no such leaf less than limit. + */ +static __always_inline uint64_t * +_pctrie_iter_lookup_ge_node(struct pctrie_iter *iter, struct pctrie_node *node, + uint64_t index, uint64_t limit) +{ + uint64_t *m; + int slot; + + /* Seek a node that matches index. */ + node = _pctrie_iter_lookup_node(iter, node, index, NULL, PCTRIE_LOCKED); + + /* + * If no such node was found, and instead this path leads only to nodes + * < index, back up to find a subtrie with the least value > index. + */ + if (pctrie_isleaf(node) ? + (m = pctrie_toval(node)) == NULL || *m < index : + node->pn_owner < index) { + /* Climb the path to find a node with a descendant > index. */ + do { + if (iter->top == 0) + return (NULL); + node = iter->path[--iter->top]; + slot = pctrie_slot(node, index) + 1; + } while ((node->pn_popmap >> slot) == 0); + + /* Step to the least child with a descendant > index. */ + slot += ffs(node->pn_popmap >> slot) - 1; + iter->top++; + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + /* Descend to the least leaf of the subtrie. */ + while (!pctrie_isleaf(node)) { + if (limit != 0 && node->pn_owner >= limit) + return (NULL); + slot = ffs(node->pn_popmap) - 1; + KASSERT(iter->top < nitems(iter->path), + ("%s: path overflow in trie %p", __func__, iter->ptree)); + iter->path[iter->top++] = node; + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + m = pctrie_toval(node); + if (limit != 0 && *m >= limit) + return (NULL); + iter->index = *m; + return (m); +} + +/* + * Find first leaf >= index, and fill iter with the path to the parent of that + * leaf. + */ +static uint64_t * +pctrie_iter_lookup_ge_node(struct pctrie_iter *iter, + struct pctrie_node *node, uint64_t index) +{ + return (_pctrie_iter_lookup_ge_node(iter, node, index, 0)); +} + +/* + * Find first leaf >= index, and fill iter with the path to the parent of that + * leaf. Return NULL if such a leaf is not less than limit. + */ +static uint64_t * +pctrie_iter_lookup_ge_node_limit(struct pctrie_iter *iter, + struct pctrie_node *node, uint64_t index, uint64_t limit) +{ + return (_pctrie_iter_lookup_ge_node(iter, node, index, limit)); +} + +/* + * Find the first leaf with value at least 'step' greater than the previous + * leaf. + */ +uint64_t * +pctrie_iter_step(struct pctrie_iter *iter, int64_t step) +{ + struct pctrie_node *node; + uint64_t index = iter->index + step; + + /* Detect step overflow. */ + if ((step > 0) != (index > iter->index)) + return (NULL); + node = pctrie_iter_climb(iter, index, NULL, PCTRIE_LOCKED); + return (pctrie_iter_lookup_ge_node(iter, node, index)); +} + +/* + * Find first leaf >= index, and initialize iter with the path to the parent of + * that leaf. + */ +uint64_t * +pctrie_iter_start(struct pctrie_iter *iter, + struct pctrie *ptree, uint64_t index) +{ + struct pctrie_node *node; + + iter->ptree = ptree; + iter->top = 0; + node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); + return (pctrie_iter_lookup_ge_node(iter, node, index)); +} + +/* + * Find the first leaf with value at least 'step' greater than the previous + * leaf. Return NULL if that value is >= limit. + */ +uint64_t * +pctrie_iter_step_limit(struct pctrie_iter *iter, int64_t step, uint64_t limit) +{ + struct pctrie_node *node; + uint64_t index = iter->index + step; + + /* Detect step overflow. */ + if ((step > 0) != (index > iter->index)) + return (NULL); + node = pctrie_iter_climb(iter, index, NULL, PCTRIE_LOCKED); + return (pctrie_iter_lookup_ge_node_limit(iter, node, index, limit)); +} + +/* + * Find first leaf >= index, and initialize iter with the path to the parent of + * that leaf. Return NULL if the first value is >= limit. + */ +uint64_t * +pctrie_iter_start_limit(struct pctrie_iter *iter, + struct pctrie *ptree, uint64_t index, uint64_t limit) +{ + struct pctrie_node *node; + + iter->ptree = ptree; + iter->top = 0; + node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); + return (pctrie_iter_lookup_ge_node_limit(iter, node, index, limit)); +} + #ifdef INVARIANTS void pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, uint64_t index, @@ -738,42 +1020,25 @@ } #endif -/* - * Remove the specified index from the tree, and return the value stored at - * that index. If the index is not present, return NULL. - */ -uint64_t * -pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, - struct pctrie_node **freenode) +static void +pctrie_remove(struct pctrie *ptree, uint64_t index, struct pctrie_node *parent, + struct pctrie_node *node, struct pctrie_node **freenode) { - struct pctrie_node *child, *node, *parent; - uint64_t *m; + struct pctrie_node *child; int slot; - *freenode = node = NULL; - child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); - for (;;) { - if (pctrie_isleaf(child)) - break; - parent = node; - node = child; - slot = pctrie_slot(node, index); - child = pctrie_node_load(&node->pn_child[slot], NULL, - PCTRIE_LOCKED); - } - if ((m = pctrie_toval(child)) == NULL || *m != index) - return (NULL); if (node == NULL) { pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED); - return (m); + return; } + slot = pctrie_slot(node, index); KASSERT((node->pn_popmap & (1 << slot)) != 0, ("%s: bad popmap slot %d in node %p", __func__, slot, node)); node->pn_popmap ^= 1 << slot; pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED); if (!powerof2(node->pn_popmap)) - return (m); + return; KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); slot = ffs(node->pn_popmap) - 1; child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); @@ -795,9 +1060,87 @@ */ pctrie_node_put(node); *freenode = node; +} + +/* + * Remove the specified index from the tree, and return the value stored at + * that index. If the index is not present, return NULL. + */ +uint64_t * +pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, + struct pctrie_node **freenode) +{ + struct pctrie_node *child, *node, *parent; + uint64_t *m; + int slot; + + parent = (struct pctrie_node *)0xdeadbeef; + *freenode = node = NULL; + child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); + while (!pctrie_isleaf(child)) { + parent = node; + node = child; + slot = pctrie_slot(node, index); + child = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + m = pctrie_match_value(child, index); + if (m != NULL) + pctrie_remove(ptree, index, parent, node, freenode); + return (m); +} + +/* + * Remove from the trie the leaf last chosen by the iterator, and + * adjust the path if it's last member is to be freed. + */ +uint64_t * +pctrie_iter_remove(struct pctrie_iter *iter, struct pctrie_node **freenode) +{ + struct pctrie_node *child, *node, *parent; + uint64_t *m; + int slot; + + *freenode = NULL; + parent = (struct pctrie_node *)0xdeadbeef; + if (iter->top >= 1) { + parent = (iter->top >= 2) ? iter->path[iter->top - 2] : NULL; + node = iter->path[iter->top - 1]; + slot = pctrie_slot(node, iter->index); + child = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } else { + node = NULL; + child = pctrie_root_load(iter->ptree, NULL, PCTRIE_LOCKED); + } + m = pctrie_match_value(child, iter->index); + if (m != NULL) + pctrie_remove(iter->ptree, iter->index, parent, node, freenode); return (m); } +/* + * Return the current leaf, assuming access is externally synchronized by a + * lock. + */ +uint64_t * +pctrie_iter_value(struct pctrie_iter *iter) +{ + struct pctrie_node *node; + int slot; + + if (iter->top == 0) + node = pctrie_root_load(iter->ptree, NULL, + PCTRIE_UNSERIALIZED); + else { + node = iter->path[iter->top - 1]; + slot = pctrie_slot(node, iter->index); + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + return (pctrie_toval(node)); +} + /* * Walk the subtrie rooted at *pnode in order, invoking callback on leaves and * using the leftmost child pointer for path reversal, until an interior node Index: sys/kern/subr_rangeset.c =================================================================== --- sys/kern/subr_rangeset.c +++ sys/kern/subr_rangeset.c @@ -272,8 +272,8 @@ int rangeset_copy(struct rangeset *dst_rs, struct rangeset *src_rs) { + struct pctrie_iter iter; struct rs_el *src_r, *dst_r; - uint64_t cursor; int error; MPASS(pctrie_is_empty(&dst_rs->rs_trie)); @@ -281,10 +281,8 @@ MPASS(dst_rs->rs_dup_data == src_rs->rs_dup_data); error = 0; - for (cursor = 0;; cursor = src_r->re_start + 1) { - src_r = RANGESET_PCTRIE_LOOKUP_GE(&src_rs->rs_trie, cursor); - if (src_r == NULL) - break; + for (src_r = RANGESET_PCTRIE_ITER_START(&iter, &src_rs->rs_trie, 0); + src_r != NULL; src_r = RANGESET_PCTRIE_ITER_STEP(&iter, 1)) { dst_r = dst_rs->rs_dup_data(dst_rs->rs_data_ctx, src_r); if (dst_r == NULL) { error = ENOMEM; @@ -303,13 +301,11 @@ static void rangeset_check(struct rangeset *rs) { + struct pctrie_iter iter; struct rs_el *r, *rp; - uint64_t cursor; - for (cursor = 0, rp = NULL;; cursor = r->re_start + 1, rp = r) { - r = RANGESET_PCTRIE_LOOKUP_GE(&rs->rs_trie, cursor); - if (r == NULL) - break; + for (rp = NULL, r = RANGESET_PCTRIE_ITER_START(&iter, &rs->rs_trie, 0); + r != NULL; rp = r, r = RANGESET_PCTRIE_ITER_STEP(&iter, 1)) { KASSERT(r->re_start < r->re_end, ("invalid interval rs %p elem %p (%#jx, %#jx)", rs, r, (uintmax_t)r->re_start, (uintmax_t)r->re_end)); @@ -332,9 +328,9 @@ DB_SHOW_COMMAND(rangeset, rangeset_show_fn) { + struct pctrie_iter iter; struct rangeset *rs; struct rs_el *r; - uint64_t cursor; if (!have_addr) { db_printf("show rangeset addr\n"); @@ -343,10 +339,8 @@ rs = (struct rangeset *)addr; db_printf("rangeset %p\n", rs); - for (cursor = 0;; cursor = r->re_start + 1) { - r = RANGESET_PCTRIE_LOOKUP_GE(&rs->rs_trie, cursor); - if (r == NULL) - break; + for (r = RANGESET_PCTRIE_ITER_START(&iter, &rs->rs_trie, 0); + r != NULL; r = RANGESET_PCTRIE_ITER_STEP(&iter, 1)) { db_printf(" el %p start %#jx end %#jx\n", r, r->re_start, r->re_end); } Index: sys/sys/pctrie.h =================================================================== --- sys/sys/pctrie.h +++ sys/sys/pctrie.h @@ -48,6 +48,15 @@ return name##_PCTRIE_VAL2PTR(pctrie_lookup_unlocked(ptree, \ key, (smr))); \ } \ + \ +static __inline struct type * \ +name##_PCTRIE_ITER_LOOKUP_UNLOCKED(struct pctrie_iter *iter, \ + struct pctrie *ptree, uint64_t key) \ +{ \ + \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_lookup_unlocked(iter, \ + ptree, key, (smr))); \ +} \ #ifdef INVARIANTS void pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, @@ -195,6 +204,15 @@ return name##_PCTRIE_VAL2PTR(pctrie_lookup(ptree, key)); \ } \ \ +static __inline struct type * \ +name##_PCTRIE_ITER_LOOKUP(struct pctrie_iter *iter, \ + struct pctrie *ptree, uint64_t key) \ +{ \ + \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_lookup(iter, ptree, \ + key)); \ +} \ + \ static __inline __unused struct type * \ name##_PCTRIE_LOOKUP_LE(struct pctrie *ptree, uint64_t key) \ { \ @@ -240,6 +258,80 @@ } \ \ static __inline __unused struct type * \ +name##_PCTRIE_ITER_INCR(struct pctrie_iter *iter) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_incr(iter)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_DECR(struct pctrie_iter *iter) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_decr(iter)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_VALUE(struct pctrie_iter *iter) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_value(iter)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_STEP(struct pctrie_iter *iter, int64_t step) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_step(iter, step)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_NEXT(struct pctrie_iter *iter) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_step(iter, 1)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_START(struct pctrie_iter *iter, \ + struct pctrie *ptree, uint64_t index) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_start(iter, ptree, index)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_STEP_LIMIT(struct pctrie_iter *iter, int64_t step, \ + uint64_t limit) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_step_limit(iter, step, limit)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_NEXT_LIMIT(struct pctrie_iter *iter, uint64_t limit) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_step_limit(iter, 1, limit)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_START_LIMIT(struct pctrie_iter *iter, \ + struct pctrie *ptree, uint64_t index, uint64_t limit) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_start_limit(iter, ptree, index, limit)); \ +} \ + \ +static __inline __unused void \ +name##_PCTRIE_ITER_REMOVE(struct pctrie_iter *iter) \ +{ \ + uint64_t *val; \ + struct pctrie_node *freenode; \ + \ + val = pctrie_iter_remove(iter, &freenode); \ + if (val == NULL) \ + panic("%s: key not found", __func__); \ + if (freenode != NULL) \ + freefn(iter->ptree, freenode); \ +} \ + \ +static __inline __unused struct type * \ name##_PCTRIE_REPLACE(struct pctrie *ptree, struct type *ptr) \ { \ \ @@ -272,6 +364,7 @@ return name##_PCTRIE_VAL2PTR(val); \ } +struct pctrie_iter; void *pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val, uint64_t **found_out); void *pctrie_insert_lookup_gt(struct pctrie *ptree, uint64_t *val, @@ -283,10 +376,28 @@ void pctrie_insert_node(void *parentp, struct pctrie_node *parent, uint64_t *val); uint64_t *pctrie_lookup(struct pctrie *ptree, uint64_t key); -uint64_t *pctrie_lookup_ge(struct pctrie *ptree, uint64_t key); -uint64_t *pctrie_lookup_le(struct pctrie *ptree, uint64_t key); uint64_t *pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t key, smr_t smr); +uint64_t *pctrie_iter_lookup(struct pctrie_iter *iter, + struct pctrie *ptree, uint64_t index); +uint64_t *pctrie_iter_lookup_unlocked(struct pctrie_iter *iter, + struct pctrie *ptree, uint64_t index, smr_t smr); +uint64_t *pctrie_iter_incr(struct pctrie_iter *iter); +uint64_t *pctrie_iter_incr_unserialized(struct pctrie_iter *iter); +uint64_t *pctrie_iter_decr(struct pctrie_iter *iter); +uint64_t *pctrie_lookup_ge(struct pctrie *ptree, uint64_t key); +uint64_t *pctrie_subtree_lookup_gt(struct pctrie_node *node, + uint64_t key); +uint64_t *pctrie_iter_step(struct pctrie_iter *iter, int64_t step); +uint64_t *pctrie_iter_start(struct pctrie_iter *iter, + struct pctrie *ptree, uint64_t index); +uint64_t *pctrie_iter_step_limit(struct pctrie_iter *iter, int64_t step, + uint64_t limit); +uint64_t *pctrie_iter_start_limit(struct pctrie_iter *iter, + struct pctrie *ptree, uint64_t index, uint64_t limit); +uint64_t *pctrie_lookup_le(struct pctrie *ptree, uint64_t key); +uint64_t *pctrie_subtree_lookup_lt(struct pctrie_node *node, + uint64_t key); struct pctrie_node *pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree); struct pctrie_node *pctrie_reclaim_resume(struct pctrie_node **pnode); @@ -297,11 +408,10 @@ pctrie_cb_t callback, int keyoff, void *arg); uint64_t *pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, struct pctrie_node **killnode); +uint64_t *pctrie_iter_remove(struct pctrie_iter *iter, + struct pctrie_node **freenode); +uint64_t *pctrie_iter_value(struct pctrie_iter *iter); uint64_t *pctrie_replace(struct pctrie *ptree, uint64_t *newval); -uint64_t *pctrie_subtree_lookup_gt(struct pctrie_node *node, - uint64_t key); -uint64_t *pctrie_subtree_lookup_lt(struct pctrie_node *node, - uint64_t key); size_t pctrie_node_size(void); int pctrie_zone_init(void *mem, int size, int flags); @@ -342,6 +452,14 @@ #endif #define PCTRIE_COUNT (1 << PCTRIE_WIDTH) +#define PCTRIE_LIMIT howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) + +struct pctrie_iter { + struct pctrie_node *path[PCTRIE_LIMIT]; + struct pctrie *ptree; + uint64_t index; + int top; +}; #endif /* _KERNEL */ #endif /* !_SYS_PCTRIE_H_ */ Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -491,7 +491,7 @@ static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t, vm_size_t *); static void swp_pager_meta_transfer(vm_object_t src, vm_object_t dst, - vm_pindex_t pindex, vm_pindex_t count, vm_size_t *freed); + vm_pindex_t pindex, vm_pindex_t count); static void swp_pager_meta_free_all(vm_object_t); static daddr_t swp_pager_meta_lookup(vm_object_t, vm_pindex_t); @@ -1084,8 +1084,7 @@ /* * Transfer source to destination. */ - swp_pager_meta_transfer(srcobject, dstobject, offset, dstobject->size, - NULL); + swp_pager_meta_transfer(srcobject, dstobject, offset, dstobject->size); /* * Free left over swap blocks in source. @@ -1776,8 +1775,8 @@ u_long swap_pager_swapped_pages(vm_object_t object) { + struct pctrie_iter iter; struct swblk *sb; - vm_pindex_t pi; u_long res; int i; @@ -1786,9 +1785,10 @@ if (pctrie_is_empty(&object->un_pager.swp.swp_blks)) return (0); - for (res = 0, pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( - &object->un_pager.swp.swp_blks, pi)) != NULL; - pi = sb->p + SWAP_META_PAGES) { + res = 0; + for (sb = SWAP_PCTRIE_ITER_START(&iter, + &object->un_pager.swp.swp_blks, 0); sb != NULL; + sb = SWAP_PCTRIE_ITER_STEP(&iter, SWAP_META_PAGES)) { for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) res++; @@ -1806,6 +1806,7 @@ static void swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object) { + struct pctrie_iter iter; struct page_range range; struct swblk *sb; vm_page_t m; @@ -1822,29 +1823,28 @@ i = 0; swp_pager_init_freerange(&range); for (;;) { - if (i == 0 && (object->flags & OBJ_DEAD) != 0) { - /* - * Make sure that pending writes finish before - * returning. - */ - vm_object_pip_wait(object, "swpoff"); - swp_pager_meta_free_all(object); - break; - } - - if (i == SWAP_META_PAGES) { + if (i == 0) { + if ((object->flags & OBJ_DEAD) != 0) { + /* + * Make sure that pending writes finish before + * returning. + */ + vm_object_pip_wait(object, "swpoff"); + swp_pager_meta_free_all(object); + break; + } + sb = SWAP_PCTRIE_ITER_START(&iter, + &object->un_pager.swp.swp_blks, pi); + } else if (i == SWAP_META_PAGES) { pi = sb->p + SWAP_META_PAGES; if (sb_empty) { - SWAP_PCTRIE_REMOVE( - &object->un_pager.swp.swp_blks, sb->p); + SWAP_PCTRIE_ITER_REMOVE(&iter); uma_zfree(swblk_zone, sb); } + sb = SWAP_PCTRIE_ITER_STEP(&iter, SWAP_META_PAGES); i = 0; } - if (i == 0) { - sb = SWAP_PCTRIE_LOOKUP_GE( - &object->un_pager.swp.swp_blks, pi); if (sb == NULL) break; sb_empty = true; @@ -2142,31 +2142,27 @@ } /* - * SWP_PAGER_META_TRANSFER() - free a range of blocks in the srcobject's swap - * metadata, or transfer it into dstobject. + * SWP_PAGER_META_TRANSFER() - transfer a range of blocks in the srcobject's + * swap metadata into dstobject. * * This routine will free swap metadata structures as they are cleaned * out. */ static void swp_pager_meta_transfer(vm_object_t srcobject, vm_object_t dstobject, - vm_pindex_t pindex, vm_pindex_t count, vm_size_t *moved) + vm_pindex_t pindex, vm_pindex_t count) { struct page_range range; struct swblk *sb; daddr_t blk; - vm_page_t m; vm_pindex_t offset, last; - vm_size_t mc; int i, limit, start; VM_OBJECT_ASSERT_WLOCKED(srcobject); - MPASS(moved == NULL || dstobject == NULL); + VM_OBJECT_ASSERT_WLOCKED(dstobject); - mc = 0; - m = NULL; if (count == 0 || pctrie_is_empty(&srcobject->un_pager.swp.swp_blks)) - goto out; + return; swp_pager_init_freerange(&range); offset = pindex; @@ -2180,13 +2176,11 @@ limit = last - sb->p < SWAP_META_PAGES ? last - sb->p : SWAP_META_PAGES; for (i = start; i < limit; i++) { - blk = sb->d[i]; - if (blk == SWAPBLK_NONE) + if (sb->d[i] == SWAPBLK_NONE) continue; - if (dstobject == NULL || - (blk = swp_pager_meta_build(dstobject, - sb->p + i - offset, blk, true), - blk != sb->d[i] && blk != SWAPBLK_NONE)) + blk = swp_pager_meta_build(dstobject, + sb->p + i - offset, sb->d[i], true); + if (blk != sb->d[i] && blk != SWAPBLK_NONE) swp_pager_update_freerange(&range, sb->d[i]); else if (blk == sb->d[i]) { /* @@ -2197,17 +2191,9 @@ */ VM_OBJECT_WUNLOCK(srcobject); swp_pager_meta_build(dstobject, - sb->p + i - offset, blk, false); + sb->p + i - offset, sb->d[i], false); VM_OBJECT_WLOCK(srcobject); } - if (moved != NULL) { - if (m != NULL && m->pindex != pindex + i - 1) - m = NULL; - m = m != NULL ? vm_page_next(m) : - vm_page_lookup(srcobject, pindex + i); - if (m == NULL || vm_page_none_valid(m)) - mc++; - } sb->d[i] = SWAPBLK_NONE; } pindex = sb->p + SWAP_META_PAGES; @@ -2219,9 +2205,6 @@ } } swp_pager_freeswapspace(&range); -out: - if (moved != NULL) - *moved = mc; } /* @@ -2238,7 +2221,56 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count, vm_size_t *freed) { - swp_pager_meta_transfer(object, NULL, pindex, count, freed); + struct pctrie_iter iter; + struct pctrie *ptree; + struct page_range range; + struct swblk *sb; + vm_page_t m; + vm_pindex_t last; + vm_size_t fc; + int i, limit, start; + + VM_OBJECT_ASSERT_WLOCKED(object); + + fc = 0; + m = NULL; + ptree = &object->un_pager.swp.swp_blks; + if (count == 0 || pctrie_is_empty(ptree)) + goto out; + + swp_pager_init_freerange(&range); + last = pindex + count; + for (sb = SWAP_PCTRIE_ITER_START_LIMIT(&iter, ptree, + rounddown(pindex, SWAP_META_PAGES), last); sb != NULL; + sb = SWAP_PCTRIE_ITER_STEP_LIMIT(&iter, SWAP_META_PAGES, last)) { + start = pindex > sb->p ? pindex - sb->p : 0; + limit = last - sb->p < SWAP_META_PAGES ? last - sb->p : + SWAP_META_PAGES; + for (i = start; i < limit; i++) { + if (sb->d[i] == SWAPBLK_NONE) + continue; + swp_pager_update_freerange(&range, sb->d[i]); + if (freed != NULL) { + if (m != NULL && m->pindex != pindex + i - 1) + m = NULL; + m = m != NULL ? vm_page_next(m) : + vm_page_lookup(object, pindex + i); + if (m == NULL || vm_page_none_valid(m)) + fc++; + } + sb->d[i] = SWAPBLK_NONE; + } + pindex = sb->p + SWAP_META_PAGES; + if (swp_pager_swblk_empty(sb, 0, start) && + swp_pager_swblk_empty(sb, limit, SWAP_META_PAGES)) { + SWAP_PCTRIE_ITER_REMOVE(&iter); + uma_zfree(swblk_zone, sb); + } + } + swp_pager_freeswapspace(&range); +out: + if (freed != NULL) + *freed = fc; } static void @@ -2314,6 +2346,7 @@ vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex) { + struct pctrie_iter iter; struct swblk *sb; int i; @@ -2322,7 +2355,7 @@ if (pctrie_is_empty(&object->un_pager.swp.swp_blks)) return (object->size); - sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, + sb = SWAP_PCTRIE_ITER_START(&iter, &object->un_pager.swp.swp_blks, rounddown(pindex, SWAP_META_PAGES)); if (sb == NULL) return (object->size); @@ -2331,8 +2364,7 @@ if (sb->d[i] != SWAPBLK_NONE) return (sb->p + i); } - sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, - roundup(pindex, SWAP_META_PAGES)); + sb = SWAP_PCTRIE_ITER_STEP(&iter, SWAP_META_PAGES); if (sb == NULL) return (object->size); } @@ -2776,6 +2808,8 @@ long vmspace_swap_count(struct vmspace *vmspace) { + struct pctrie_iter iter; + struct pctrie *ptree; vm_map_t map; vm_map_entry_t cur; vm_object_t object; @@ -2796,13 +2830,12 @@ VM_OBJECT_RLOCK(object); if ((object->flags & OBJ_SWAP) == 0) goto unlock; + ptree = &object->un_pager.swp.swp_blks; pi = OFF_TO_IDX(cur->offset); e = pi + OFF_TO_IDX(cur->end - cur->start); - for (;; pi = sb->p + SWAP_META_PAGES) { - sb = SWAP_PCTRIE_LOOKUP_GE( - &object->un_pager.swp.swp_blks, pi); - if (sb == NULL || sb->p >= e) - break; + for (sb = SWAP_PCTRIE_ITER_START_LIMIT(&iter, ptree, pi, e); + sb != NULL; sb = + SWAP_PCTRIE_ITER_STEP_LIMIT(&iter, SWAP_META_PAGES, e)) { for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->p + i < e && sb->d[i] != SWAPBLK_NONE)