Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -11464,8 +11464,8 @@ static bool pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t *pte) { - struct pmap_pkru_range *next_ppr, *ppr; - vm_offset_t va; + struct pctrie_iter ranges; + struct pmap_pkru_range *ppr; u_int keyidx; PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -11476,20 +11476,14 @@ sva >= VM_MAXUSER_ADDRESS) return (true); MPASS(eva <= VM_MAXUSER_ADDRESS); - ppr = rangeset_lookup(&pmap->pm_pkru, sva); - if (ppr == NULL) { - ppr = rangeset_next(&pmap->pm_pkru, sva); - return (ppr == NULL || - ppr->pkru_rs_el.re_start >= eva); - } + ppr = rangeset_lookup_iter(&ranges, &pmap->pm_pkru, sva); + if (ppr == NULL) + return (rangeset_empty_iter(&ranges, sva, eva)); keyidx = ppr->pkru_keyidx; - while ((va = ppr->pkru_rs_el.re_end) < eva) { - next_ppr = rangeset_next(&pmap->pm_pkru, va); - if (next_ppr == NULL || - va != next_ppr->pkru_rs_el.re_start || - keyidx != next_ppr->pkru_keyidx) + while (ppr->pkru_rs_el.re_end < eva) { + if ((ppr = rangeset_next_iter(&ranges)) == NULL || + keyidx != ppr->pkru_keyidx) return (false); - ppr = next_ppr; } *pte |= X86_PG_PKU(keyidx); return (true); Index: sys/arm64/arm64/pmap.c =================================================================== --- sys/arm64/arm64/pmap.c +++ sys/arm64/arm64/pmap.c @@ -9365,8 +9365,8 @@ static bool pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t *pte) { - struct rs_el *next_rs, *rs; - vm_offset_t va; + struct pctrie_iter ranges; + struct rs_el *rs; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(ADDR_IS_CANONICAL(sva), @@ -9383,18 +9383,12 @@ if (pmap->pm_bti == NULL) return (true); PMAP_ASSERT_STAGE1(pmap); - rs = rangeset_lookup(pmap->pm_bti, sva); - if (rs == NULL) { - rs = rangeset_next(pmap->pm_bti, sva); - return (rs == NULL || - rs->re_start >= eva); - } - while ((va = rs->re_end) < eva) { - next_rs = rangeset_next(pmap->pm_bti, va); - if (next_rs == NULL || - va != next_rs->re_start) + rs = rangeset_lookup_iter(&ranges, &pmap->pm_bti, sva); + if (rs == NULL) + return (rangeset_empty_iter(&ranges, sva, eva)); + while (rs->re_end < eva) { + if ((rs = rangeset_next_iter(&ranges)) == NULL) return (false); - rs = next_rs; } *pte |= ATTR_S1_GP; return (true); Index: sys/fs/tmpfs/tmpfs_vnops.c =================================================================== --- sys/fs/tmpfs/tmpfs_vnops.c +++ sys/fs/tmpfs/tmpfs_vnops.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -2070,11 +2071,13 @@ static off_t tmpfs_seek_data_locked(vm_object_t obj, off_t noff) { + struct pctrie_iter blks; vm_page_t m; vm_pindex_t p, p_m, p_swp; p = OFF_TO_IDX(noff); m = vm_page_find_least(obj, p); + swblk_iter_init(&blks, obj); /* * Microoptimize the most common case for SEEK_DATA, where @@ -2083,7 +2086,7 @@ if (m != NULL && vm_page_any_valid(m) && m->pindex == p) return (noff); - p_swp = swap_pager_find_least(obj, p); + p_swp = swap_pager_find_least(&blks, p, obj->size); if (p_swp == p) return (noff); @@ -2111,9 +2114,11 @@ static off_t tmpfs_seek_hole_locked(vm_object_t obj, off_t noff) { + struct pctrie_iter blks; vm_page_t m; vm_pindex_t p, p_swp; + swblk_iter_init(&blks, obj); for (;; noff = tmpfs_seek_next(noff)) { /* * Walk over the largest sequential run of the valid pages. @@ -2128,7 +2133,7 @@ * there is a hole in the swap at the same place. */ p = OFF_TO_IDX(noff); - p_swp = swap_pager_find_least(obj, p); + p_swp = swap_pager_find_least(&blks, p, obj->size); if (p_swp != p) { noff = IDX_TO_OFF(p); break; Index: sys/kern/subr_pctrie.c =================================================================== --- sys/kern/subr_pctrie.c +++ sys/kern/subr_pctrie.c @@ -62,9 +62,6 @@ #include #endif -#define PCTRIE_MASK (PCTRIE_COUNT - 1) -#define PCTRIE_LIMIT (howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1) - #if PCTRIE_WIDTH == 3 typedef uint8_t pn_popmap_t; #elif PCTRIE_WIDTH == 4 @@ -87,18 +84,13 @@ smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ }; -enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; - -static __inline void pctrie_node_store(smr_pctnode_t *p, void *val, - enum pctrie_access access); - /* * Map index to an array position for the children of node, */ static __inline int pctrie_slot(struct pctrie_node *node, uint64_t index) { - return ((index >> node->pn_clev) & PCTRIE_MASK); + return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1)); } /* @@ -137,6 +129,8 @@ #endif } +enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; + /* * Fetch a node pointer from a slot. */ @@ -476,6 +470,21 @@ pctrie_node_store(parentp, parent, PCTRIE_LOCKED); } +/* + * Return the value associated with the node, if the node is a leaf that matches + * the index; otherwise NULL. + */ +static __always_inline uint64_t * +pctrie_match_value(struct pctrie_node *node, uint64_t index) +{ + uint64_t *m; + + if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL || + *m != index) + m = NULL; + return (m); +} + /* * Returns the value stored at the index. If the index is not present, * NULL is returned. @@ -485,21 +494,13 @@ enum pctrie_access access) { struct pctrie_node *node; - uint64_t *m; int slot; node = pctrie_root_load(ptree, smr, access); - for (;;) { - if (pctrie_isleaf(node)) { - if ((m = pctrie_toval(node)) != NULL && *m == index) - return (m); - break; - } - if (pctrie_keybarr(node, index, &slot)) - break; + /* Seek a node that matches index. */ + while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) node = pctrie_node_load(&node->pn_child[slot], smr, access); - } - return (NULL); + return (pctrie_match_value(node, index)); } /* @@ -530,6 +531,122 @@ return (res); } +/* + * Returns the last node examined in the search for the index, and updates the + * search path to that node. + */ +static __always_inline struct pctrie_node * +_pctrie_iter_lookup_node(struct pctrie_iter *it, uint64_t index, smr_t smr, + enum pctrie_access access) +{ + struct pctrie_node *node; + int slot; + + /* + * Climb the search path to find the lowest node from which to start the + * search for a value matching 'index'. + */ + while (it->top != 0) { + node = it->path[it->top - 1]; + if (!pctrie_keybarr(node, index, &slot)) { + node = pctrie_node_load( + &node->pn_child[slot], smr, access); + break; + } + --it->top; + } + if (it->top == 0) + node = pctrie_root_load(it->ptree, smr, access); + + /* Seek a node that matches index. */ + while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) { + KASSERT(it->top < nitems(it->path), + ("%s: path overflow in trie %p", __func__, it->ptree)); + it->path[it->top++] = node; + node = pctrie_node_load(&node->pn_child[slot], smr, access); + } + return (node); +} + +/* + * Returns the value stored at a given index value, possibly NULL. + */ +static __always_inline uint64_t * +_pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index, smr_t smr, + enum pctrie_access access) +{ + struct pctrie_node *node; + + it->index = index; + node = _pctrie_iter_lookup_node(it, index, smr, access); + return (pctrie_match_value(node, index)); +} + +/* + * Returns the value stored at a given index value, possibly NULL. + */ +uint64_t * +pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index) +{ + return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED)); +} + +/* + * Returns the value stored at a fixed offset from the current index value, + * possibly NULL. + */ +static __always_inline uint64_t * +_pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr, + enum pctrie_access access) +{ + uint64_t index = it->index + stride; + + /* Detect stride overflow. */ + if ((stride > 0) != (index > it->index)) + return (NULL); + return (_pctrie_iter_lookup(it, index, smr, access)); +} + +/* + * Returns the value stored at a fixed offset from the current index value, + * possibly NULL. + */ +uint64_t * +pctrie_iter_stride(struct pctrie_iter *it, int stride) +{ + return (_pctrie_iter_stride(it, stride, NULL, PCTRIE_LOCKED)); +} + +/* + * Returns the value stored at one more than the current index value, possibly + * NULL, assuming access is externally synchronized by a lock. + */ +uint64_t * +pctrie_iter_next(struct pctrie_iter *it) +{ + return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_LOCKED)); +} + +/* + * Returns the value stored at one more than the current index value, possibly + * NULL, without serialization. + */ +uint64_t * +pctrie_iter_next_unserialized(struct pctrie_iter *it) +{ + return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_UNSERIALIZED)); +} + +/* + * Returns the value stored at one less than the current index value, possibly + * NULL, assuming access is externally synchronized by a lock. + */ +uint64_t * +pctrie_iter_prev(struct pctrie_iter *it) +{ + return (_pctrie_iter_stride(it, -1, NULL, PCTRIE_LOCKED)); +} + /* * Returns the value with the least index that is greater than or equal to the * specified index, or NULL if there are no such values. @@ -633,6 +750,115 @@ return (pctrie_lookup_ge_node(node, index + 1)); } +/* + * Find first leaf >= index, and fill iter with the path to the parent of that + * leaf. Return NULL if there is no such leaf less than limit. + */ +static __always_inline uint64_t * +_pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index, uint64_t limit) +{ + struct pctrie_node *node; + uint64_t *m; + int slot; + + /* Seek a node that matches index. */ + node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED); + + /* + * If no such node was found, and instead this path leads only to nodes + * < index, back up to find a subtrie with the least value > index. + */ + if (pctrie_isleaf(node) ? + (m = pctrie_toval(node)) == NULL || *m < index : + node->pn_owner < index) { + /* Climb the path to find a node with a descendant > index. */ + while (it->top != 0) { + node = it->path[it->top - 1]; + slot = pctrie_slot(node, index) + 1; + if ((node->pn_popmap >> slot) != 0) + break; + --it->top; + } + if (it->top == 0) + return (NULL); + + /* Step to the least child with a descendant > index. */ + slot += ffs(node->pn_popmap >> slot) - 1; + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + /* Descend to the least leaf of the subtrie. */ + while (!pctrie_isleaf(node)) { + if (limit != 0 && node->pn_owner >= limit) + return (NULL); + slot = ffs(node->pn_popmap) - 1; + KASSERT(it->top < nitems(it->path), + ("%s: path overflow in trie %p", __func__, it->ptree)); + it->path[it->top++] = node; + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + m = pctrie_toval(node); + if (limit != 0 && *m >= limit) + return (NULL); + it->index = *m; + return (m); +} + +/* + * Find first leaf >= index, and fill iter with the path to the parent of that + * leaf. + */ +uint64_t * +pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index) +{ + return (_pctrie_iter_lookup_ge(it, index, 0)); +} + +/* + * Find first leaf >= index, and fill iter with the path to the parent of that + * leaf. Return NULL if such a leaf is not less than limit. + */ +uint64_t * +pctrie_iter_lookup_ge_limit(struct pctrie_iter *it, uint64_t index, + uint64_t limit) +{ + return (_pctrie_iter_lookup_ge(it, index, limit)); +} + +/* + * Find the least leaf with value 'jump' or more greater than the previous + * leaf. + */ +uint64_t * +pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump) +{ + uint64_t index = it->index + jump; + + /* Detect jump overflow. */ + if ((jump > 0) != (index > it->index)) + return (NULL); + return (pctrie_iter_lookup_ge(it, index)); +} + +/* + * Find the first leaf with value at least 'jump' greater than the previous + * leaf. Return NULL if that value is >= limit. + */ +uint64_t * +pctrie_iter_jump_ge_limit(struct pctrie_iter *it, int64_t jump, + uint64_t limit) +{ + uint64_t index = it->index + jump; + + /* Detect jump overflow. */ + if ((jump > 0) != (index > it->index)) + return (NULL); + if (limit != 0 && index >= limit) + return (NULL); + return (pctrie_iter_lookup_ge_limit(it, index, limit)); +} + #ifdef INVARIANTS void pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, uint64_t index, @@ -720,6 +946,115 @@ return (pctrie_lookup_le_node(node, index - 1)); } +/* + * Find first leaf <= index, and fill iter with the path to the parent of that + * leaf. Return NULL if there is no such leaf greater than limit. + */ +static __always_inline uint64_t * +_pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index, uint64_t limit) +{ + struct pctrie_node *node; + uint64_t *m; + int slot; + + /* Seek a node that matches index. */ + node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED); + + /* + * If no such node was found, and instead this path leads only to nodes + * > index, back up to find a subtrie with the least value > index. + */ + if (pctrie_isleaf(node) ? + (m = pctrie_toval(node)) == NULL || *m > index : + node->pn_owner > index) { + /* Climb the path to find a node with a descendant < index. */ + while (it->top != 0) { + node = it->path[it->top - 1]; + slot = pctrie_slot(node, index); + if ((node->pn_popmap & ((1 << slot) - 1)) != 0) + break; + --it->top; + } + if (it->top == 0) + return (NULL); + + /* Step to the greatest child with a descendant < index. */ + slot = ilog2(node->pn_popmap & ((1 << slot) - 1)); + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + /* Descend to the greatest leaf of the subtrie. */ + while (!pctrie_isleaf(node)) { + if (limit != 0 && limit >= + node->pn_owner + (PCTRIE_COUNT << node->pn_clev) - 1) + return (NULL); + slot = ilog2(node->pn_popmap); + KASSERT(it->top < nitems(it->path), + ("%s: path overflow in trie %p", __func__, it->ptree)); + it->path[it->top++] = node; + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + m = pctrie_toval(node); + if (limit != 0 && *m <= limit) + return (NULL); + it->index = *m; + return (m); +} + +/* + * Find first leaf <= index, and fill iter with the path to the parent of that + * leaf. + */ +uint64_t * +pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index) +{ + return (_pctrie_iter_lookup_le(it, index, 0)); +} + +/* + * Find first leaf <= index, and fill iter with the path to the parent of that + * leaf. Return NULL if such a leaf is not more than limit. + */ +uint64_t * +pctrie_iter_lookup_le_limit(struct pctrie_iter *it, uint64_t index, + uint64_t limit) +{ + return (_pctrie_iter_lookup_le(it, index, limit)); +} + +/* + * Find the greatest leaf with value 'jump' or more less than the previous leaf. + */ +uint64_t * +pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump) +{ + uint64_t index = it->index - jump; + + /* Detect jump overflow. */ + if ((jump > 0) != (index < it->index)) + return (NULL); + return (pctrie_iter_lookup_le(it, index)); +} + +/* + * Find the first leaf with value at most 'jump' less than the previous + * leaf. Return NULL if that value is <= limit. + */ +uint64_t * +pctrie_iter_jump_le_limit(struct pctrie_iter *it, int64_t jump, + uint64_t limit) +{ + uint64_t index = it->index - jump; + + /* Detect jump overflow. */ + if ((jump > 0) != (index < it->index)) + return (NULL); + if (limit != 0 && index <= limit) + return (NULL); + return (pctrie_iter_lookup_le_limit(it, index, limit)); +} + #ifdef INVARIANTS void pctrie_subtree_lookup_lt_assert(struct pctrie_node *node, uint64_t index, @@ -738,42 +1073,25 @@ } #endif -/* - * Remove the specified index from the tree, and return the value stored at - * that index. If the index is not present, return NULL. - */ -uint64_t * -pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, - struct pctrie_node **freenode) +static void +pctrie_remove(struct pctrie *ptree, uint64_t index, struct pctrie_node *parent, + struct pctrie_node *node, struct pctrie_node **freenode) { - struct pctrie_node *child, *node, *parent; - uint64_t *m; + struct pctrie_node *child; int slot; - *freenode = node = NULL; - child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); - for (;;) { - if (pctrie_isleaf(child)) - break; - parent = node; - node = child; - slot = pctrie_slot(node, index); - child = pctrie_node_load(&node->pn_child[slot], NULL, - PCTRIE_LOCKED); - } - if ((m = pctrie_toval(child)) == NULL || *m != index) - return (NULL); if (node == NULL) { pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED); - return (m); + return; } + slot = pctrie_slot(node, index); KASSERT((node->pn_popmap & (1 << slot)) != 0, ("%s: bad popmap slot %d in node %p", __func__, slot, node)); node->pn_popmap ^= 1 << slot; pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED); if (!powerof2(node->pn_popmap)) - return (m); + return; KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); slot = ffs(node->pn_popmap) - 1; child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); @@ -795,9 +1113,89 @@ */ pctrie_node_put(node); *freenode = node; +} + +/* + * Remove the specified index from the tree, and return the value stored at + * that index. If the index is not present, return NULL. + */ +uint64_t * +pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, + struct pctrie_node **freenode) +{ + struct pctrie_node *child, *node, *parent; + uint64_t *m; + int slot; + + DEBUG_POISON_POINTER(parent); + *freenode = node = NULL; + child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); + while (!pctrie_isleaf(child)) { + parent = node; + node = child; + slot = pctrie_slot(node, index); + child = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + m = pctrie_match_value(child, index); + if (m != NULL) + pctrie_remove(ptree, index, parent, node, freenode); return (m); } +/* + * Remove from the trie the leaf last chosen by the iterator, and + * adjust the path if it's last member is to be freed. + */ +uint64_t * +pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode) +{ + struct pctrie_node *child, *node, *parent; + uint64_t *m; + int slot; + + DEBUG_POISON_POINTER(parent); + *freenode = NULL; + if (it->top >= 1) { + parent = (it->top >= 2) ? it->path[it->top - 2] : NULL; + node = it->path[it->top - 1]; + slot = pctrie_slot(node, it->index); + child = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } else { + node = NULL; + child = pctrie_root_load(it->ptree, NULL, PCTRIE_LOCKED); + } + m = pctrie_match_value(child, it->index); + if (m != NULL) + pctrie_remove(it->ptree, it->index, parent, node, freenode); + if (*freenode != NULL) + --it->top; + return (m); +} + +/* + * Return the current leaf, assuming access is externally synchronized by a + * lock. + */ +uint64_t * +pctrie_iter_value(struct pctrie_iter *it) +{ + struct pctrie_node *node; + int slot; + + if (it->top == 0) + node = pctrie_root_load(it->ptree, NULL, + PCTRIE_UNSERIALIZED); + else { + node = it->path[it->top - 1]; + slot = pctrie_slot(node, it->index); + node = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + } + return (pctrie_toval(node)); +} + /* * Walk the subtrie rooted at *pnode in order, invoking callback on leaves and * using the leftmost child pointer for path reversal, until an interior node Index: sys/kern/subr_rangeset.c =================================================================== --- sys/kern/subr_rangeset.c +++ sys/kern/subr_rangeset.c @@ -262,18 +262,40 @@ } void * -rangeset_next(struct rangeset *rs, uint64_t place) +rangeset_lookup_iter(struct pctrie_iter *ranges, struct rangeset *rs, + uint64_t start) { + struct rs_el *r; rangeset_check(rs); - return (RANGESET_PCTRIE_LOOKUP_GE(&rs->rs_trie, place)); + pctrie_iter_init(ranges, &rs->rs_trie); + r = RANGESET_PCTRIE_ITER_LOOKUP_LE(ranges, start); + if (r != NULL && r->re_end < start) + return (r); + return (NULL); +} + +bool +rangeset_empty_iter(struct pctrie_iter *ranges, uint64_t start, uint64_t end) +{ + return (RANGESET_PCTRIE_ITER_LOOKUP_GE_LIMIT( + ranges, start + 1, end) == NULL); +} + +void * +rangeset_next_iter(struct pctrie_iter *ranges) +{ + struct rs_el *r; + + r = RANGESET_PCTRIE_ITER_VALUE(ranges); + return (RANGESET_PCTRIE_ITER_LOOKUP(ranges, r->re_end)); } int rangeset_copy(struct rangeset *dst_rs, struct rangeset *src_rs) { + struct pctrie_iter ranges; struct rs_el *src_r, *dst_r; - uint64_t cursor; int error; MPASS(pctrie_is_empty(&dst_rs->rs_trie)); @@ -281,10 +303,9 @@ MPASS(dst_rs->rs_dup_data == src_rs->rs_dup_data); error = 0; - for (cursor = 0;; cursor = src_r->re_start + 1) { - src_r = RANGESET_PCTRIE_LOOKUP_GE(&src_rs->rs_trie, cursor); - if (src_r == NULL) - break; + pctrie_iter_init(&ranges, &src_rs->rs_trie); + for (src_r = RANGESET_PCTRIE_ITER_LOOKUP_GE(&ranges, 0); + src_r != NULL; src_r = RANGESET_PCTRIE_ITER_STEP_GE(&ranges)) { dst_r = dst_rs->rs_dup_data(dst_rs->rs_data_ctx, src_r); if (dst_r == NULL) { error = ENOMEM; @@ -303,13 +324,13 @@ static void rangeset_check(struct rangeset *rs) { + struct pctrie_iter ranges; struct rs_el *r, *rp; - uint64_t cursor; - for (cursor = 0, rp = NULL;; cursor = r->re_start + 1, rp = r) { - r = RANGESET_PCTRIE_LOOKUP_GE(&rs->rs_trie, cursor); - if (r == NULL) - break; + pctrie_iter_init(&ranges, &rs->rs_trie); + for (rp = NULL, + r = RANGESET_PCTRIE_ITER_LOOKUP_GE(&ranges, 0); + r != NULL; rp = r, r = RANGESET_PCTRIE_ITER_STEP_GE(&ranges)) { KASSERT(r->re_start < r->re_end, ("invalid interval rs %p elem %p (%#jx, %#jx)", rs, r, (uintmax_t)r->re_start, (uintmax_t)r->re_end)); @@ -332,9 +353,9 @@ DB_SHOW_COMMAND(rangeset, rangeset_show_fn) { + struct pctrie_iter ranges; struct rangeset *rs; struct rs_el *r; - uint64_t cursor; if (!have_addr) { db_printf("show rangeset addr\n"); @@ -343,10 +364,9 @@ rs = (struct rangeset *)addr; db_printf("rangeset %p\n", rs); - for (cursor = 0;; cursor = r->re_start + 1) { - r = RANGESET_PCTRIE_LOOKUP_GE(&rs->rs_trie, cursor); - if (r == NULL) - break; + pctrie_iter_init(&ranges, &rs->rs_trie); + for (r = RANGESET_PCTRIE_ITER_LOOKUP_GE(&ranges, 0); + r != NULL; r = RANGESET_PCTRIE_ITER_STEP_GE(&ranges)) { db_printf(" el %p start %#jx end %#jx\n", r, r->re_start, r->re_end); } Index: sys/sys/pctrie.h =================================================================== --- sys/sys/pctrie.h +++ sys/sys/pctrie.h @@ -41,7 +41,7 @@ #define PCTRIE_DEFINE_SMR(name, type, field, allocfn, freefn, smr) \ PCTRIE_DEFINE(name, type, field, allocfn, freefn) \ \ -static __inline struct type * \ +static __inline __unused struct type * \ name##_PCTRIE_LOOKUP_UNLOCKED(struct pctrie *ptree, uint64_t key) \ { \ \ @@ -240,6 +240,140 @@ } \ \ static __inline __unused struct type * \ +name##_PCTRIE_ITER_LOOKUP(struct pctrie_iter *it, uint64_t index) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_lookup(it, index)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_STRIDE(struct pctrie_iter *it, int stride) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_stride(it, stride)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_NEXT(struct pctrie_iter *it) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_next(it)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_NEXT_UNSERIALIZED(struct pctrie_iter *it) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_next_unserialized(it)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_PREV(struct pctrie_iter *it) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_prev(it)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_VALUE(struct pctrie_iter *it) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_value(it)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_LOOKUP_GE(struct pctrie_iter *it, uint64_t index) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_lookup_ge(it, index)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_JUMP_GE(struct pctrie_iter *it, int64_t jump) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_jump_ge(it, jump)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_STEP_GE(struct pctrie_iter *it) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_jump_ge(it, 1)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_LOOKUP_GE_LIMIT(struct pctrie_iter *it, \ + uint64_t index, uint64_t limit) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_lookup_ge_limit(it, index, limit)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_JUMP_GE_LIMIT(struct pctrie_iter *it, int64_t jump, \ + uint64_t limit) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_jump_ge_limit(it, jump, limit)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_STEP_GE_LIMIT(struct pctrie_iter *it, \ + uint64_t limit) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_jump_ge_limit(it, 1, limit)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_LOOKUP_LE(struct pctrie_iter *it, uint64_t index) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_lookup_le(it, index)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_JUMP_LE(struct pctrie_iter *it, int64_t jump) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_jump_le(it, jump)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_STEP_LE(struct pctrie_iter *it) \ +{ \ + return name##_PCTRIE_VAL2PTR(pctrie_iter_jump_le(it, 1)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_LOOKUP_LE_LIMIT(struct pctrie_iter *it, \ + uint64_t index, uint64_t limit) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_lookup_le_limit(it, index, limit)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_JUMP_LE_LIMIT(struct pctrie_iter *it, int64_t jump, \ + uint64_t limit) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_jump_le_limit(it, jump, limit)); \ +} \ + \ +static __inline __unused struct type * \ +name##_PCTRIE_ITER_STEP_LE_LIMIT(struct pctrie_iter *it, \ + uint64_t limit) \ +{ \ + return name##_PCTRIE_VAL2PTR( \ + pctrie_iter_jump_le_limit(it, 1, limit)); \ +} \ + \ +static __inline __unused void \ +name##_PCTRIE_ITER_REMOVE(struct pctrie_iter *it) \ +{ \ + uint64_t *val; \ + struct pctrie_node *freenode; \ + \ + val = pctrie_iter_remove(it, &freenode); \ + if (val == NULL) \ + panic("%s: key not found", __func__); \ + if (freenode != NULL) \ + freefn(it->ptree, freenode); \ +} \ + \ +static __inline __unused struct type * \ name##_PCTRIE_REPLACE(struct pctrie *ptree, struct type *ptr) \ { \ \ @@ -272,6 +406,7 @@ return name##_PCTRIE_VAL2PTR(val); \ } +struct pctrie_iter; void *pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val, uint64_t **found_out); void *pctrie_insert_lookup_gt(struct pctrie *ptree, uint64_t *val, @@ -283,10 +418,31 @@ void pctrie_insert_node(void *parentp, struct pctrie_node *parent, uint64_t *val); uint64_t *pctrie_lookup(struct pctrie *ptree, uint64_t key); -uint64_t *pctrie_lookup_ge(struct pctrie *ptree, uint64_t key); -uint64_t *pctrie_lookup_le(struct pctrie *ptree, uint64_t key); uint64_t *pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t key, smr_t smr); +uint64_t *pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index); +uint64_t *pctrie_iter_stride(struct pctrie_iter *it, int stride); +uint64_t *pctrie_iter_next(struct pctrie_iter *it); +uint64_t *pctrie_iter_next_unserialized(struct pctrie_iter *it); +uint64_t *pctrie_iter_prev(struct pctrie_iter *it); +uint64_t *pctrie_lookup_ge(struct pctrie *ptree, uint64_t key); +uint64_t *pctrie_subtree_lookup_gt(struct pctrie_node *node, + uint64_t key); +uint64_t *pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index); +uint64_t *pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump); +uint64_t *pctrie_iter_lookup_ge_limit(struct pctrie_iter *it, + uint64_t index, uint64_t limit); +uint64_t *pctrie_iter_jump_ge_limit(struct pctrie_iter *it, + int64_t jump, uint64_t limit); +uint64_t *pctrie_lookup_le(struct pctrie *ptree, uint64_t key); +uint64_t *pctrie_subtree_lookup_lt(struct pctrie_node *node, + uint64_t key); +uint64_t *pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index); +uint64_t *pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump); +uint64_t *pctrie_iter_lookup_le_limit(struct pctrie_iter *it, + uint64_t index, uint64_t limit); +uint64_t *pctrie_iter_jump_le_limit(struct pctrie_iter *it, + int64_t jump, uint64_t limit); struct pctrie_node *pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree); struct pctrie_node *pctrie_reclaim_resume(struct pctrie_node **pnode); @@ -297,11 +453,10 @@ pctrie_cb_t callback, int keyoff, void *arg); uint64_t *pctrie_remove_lookup(struct pctrie *ptree, uint64_t index, struct pctrie_node **killnode); +uint64_t *pctrie_iter_remove(struct pctrie_iter *it, + struct pctrie_node **freenode); +uint64_t *pctrie_iter_value(struct pctrie_iter *it); uint64_t *pctrie_replace(struct pctrie *ptree, uint64_t *newval); -uint64_t *pctrie_subtree_lookup_gt(struct pctrie_node *node, - uint64_t key); -uint64_t *pctrie_subtree_lookup_lt(struct pctrie_node *node, - uint64_t key); size_t pctrie_node_size(void); int pctrie_zone_init(void *mem, int size, int flags); @@ -342,6 +497,21 @@ #endif #define PCTRIE_COUNT (1 << PCTRIE_WIDTH) +#define PCTRIE_LIMIT howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) + +struct pctrie_iter { + struct pctrie_node *path[PCTRIE_LIMIT]; + struct pctrie *ptree; + uint64_t index; + int top; +}; + +static __inline void +pctrie_iter_init(struct pctrie_iter *it, struct pctrie *ptree) +{ + it->ptree = ptree; + it->top = 0; +} #endif /* _KERNEL */ #endif /* !_SYS_PCTRIE_H_ */ Index: sys/sys/rangeset.h =================================================================== --- sys/sys/rangeset.h +++ sys/sys/rangeset.h @@ -34,6 +34,7 @@ #ifdef _KERNEL #include +struct pctrie_iter; typedef bool (*rs_pred_t)(void *ctx, void *r); @@ -75,10 +76,23 @@ void *rangeset_lookup(struct rangeset *rs, uint64_t place); /* - * Finds the first range that begins at or after place. + * Returns the range that includes 'place', if any, and saves in iter the path + * to that place.. */ -void *rangeset_next(struct rangeset *rs, uint64_t place); +void *rangeset_lookup_iter(struct pctrie_iter *ranges, struct rangeset *rs, + uint64_t start); +/* + * After lookup_iter fails, report whether the interval from start to end is + * empty. + */ +bool rangeset_empty_iter(struct pctrie_iter *ranges, uint64_t sva, + uint64_t eva); + +/* + * Finds the range that abuts the current one to the right, if any. + */ +void *rangeset_next_iter(struct pctrie_iter *ranges); /* * Copies src_rs entries into dst_rs. dst_rs must be empty. * Leaves dst_rs empty on failure. Index: sys/vm/swap_pager.h =================================================================== --- sys/vm/swap_pager.h +++ sys/vm/swap_pager.h @@ -40,6 +40,7 @@ #include struct buf; +struct pctrie_iter; struct swdevt; struct thread; typedef void sw_strategy_t(struct buf *, struct swdevt *); @@ -74,7 +75,9 @@ struct xswdev; int swap_dev_info(int name, struct xswdev *xs, char *devname, size_t len); void swap_pager_copy(vm_object_t, vm_object_t, vm_pindex_t, int); -vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex); +void swblk_iter_init(struct pctrie_iter *blks, vm_object_t object); +vm_pindex_t swap_pager_find_least(struct pctrie_iter *blks, vm_pindex_t pindex, + vm_pindex_t size); void swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size, vm_size_t *freed); void swap_pager_swap_init(void); Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -491,7 +491,7 @@ static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t, vm_size_t *); static void swp_pager_meta_transfer(vm_object_t src, vm_object_t dst, - vm_pindex_t pindex, vm_pindex_t count, vm_size_t *freed); + vm_pindex_t pindex, vm_pindex_t count); static void swp_pager_meta_free_all(vm_object_t); static daddr_t swp_pager_meta_lookup(vm_object_t, vm_pindex_t); @@ -531,6 +531,77 @@ PCTRIE_DEFINE(SWAP, swblk, p, swblk_trie_alloc, swblk_trie_free); +void +swblk_iter_init(struct pctrie_iter *blks, vm_object_t object) +{ + VM_OBJECT_ASSERT_LOCKED(object); + MPASS((object->flags & OBJ_SWAP) != 0); + pctrie_iter_init(blks, &object->un_pager.swp.swp_blks); +} + +static struct swblk * +swblk_lookup(vm_object_t object, vm_pindex_t pindex) +{ + return (SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, pindex)); +} + +static struct swblk * +swblk_start(struct pctrie_iter *blks, vm_pindex_t pindex) +{ + return (SWAP_PCTRIE_ITER_LOOKUP_GE(blks, + rounddown(pindex, SWAP_META_PAGES))); +} + +static struct swblk * +swblk_next(struct pctrie_iter *blks) +{ + return (SWAP_PCTRIE_ITER_JUMP_GE(blks, SWAP_META_PAGES)); +} + +static struct swblk * +swblk_start_limit(struct pctrie_iter *blks, vm_pindex_t pindex, + vm_pindex_t limit) +{ + return (SWAP_PCTRIE_ITER_LOOKUP_GE_LIMIT(blks, + rounddown(pindex, SWAP_META_PAGES), limit)); +} + +static struct swblk * +swblk_next_limit(struct pctrie_iter *blks, vm_pindex_t limit) +{ + return (SWAP_PCTRIE_ITER_JUMP_GE_LIMIT(blks, SWAP_META_PAGES, limit)); +} + +static struct swblk * +swblk_restart(struct pctrie_iter *blks, vm_pindex_t pindex) +{ + return (SWAP_PCTRIE_ITER_LOOKUP(blks, pindex)); +} + +static void +swblk_remove(struct pctrie_iter *blks) +{ + SWAP_PCTRIE_ITER_REMOVE(blks); +} + +static void +swblk_lookup_remove(vm_object_t object, vm_pindex_t pindex) +{ + SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, pindex); +} + +static int +swblk_lookup_insert(vm_object_t object, struct swblk *sb) +{ + return (SWAP_PCTRIE_INSERT(&object->un_pager.swp.swp_blks, sb)); +} + +static bool +swblk_is_empty(vm_object_t object) +{ + return (pctrie_is_empty(&object->un_pager.swp.swp_blks)); +} + /* * SWP_SIZECHECK() - update swap_pager_full indication * @@ -1084,8 +1155,7 @@ /* * Transfer source to destination. */ - swp_pager_meta_transfer(srcobject, dstobject, offset, dstobject->size, - NULL); + swp_pager_meta_transfer(srcobject, dstobject, offset, dstobject->size); /* * Free left over swap blocks in source. @@ -1218,8 +1288,7 @@ } swap_pager_unswapped_acct(m); - sb = SWAP_PCTRIE_LOOKUP(&m->object->un_pager.swp.swp_blks, - rounddown(m->pindex, SWAP_META_PAGES)); + sb = swblk_lookup(m->object, rounddown(m->pindex, SWAP_META_PAGES)); if (sb == NULL) return; range.start = sb->d[m->pindex % SWAP_META_PAGES]; @@ -1776,19 +1845,19 @@ u_long swap_pager_swapped_pages(vm_object_t object) { + struct pctrie_iter blks; struct swblk *sb; - vm_pindex_t pi; u_long res; int i; VM_OBJECT_ASSERT_LOCKED(object); - if (pctrie_is_empty(&object->un_pager.swp.swp_blks)) + if (swblk_is_empty(object)) return (0); - for (res = 0, pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( - &object->un_pager.swp.swp_blks, pi)) != NULL; - pi = sb->p + SWAP_META_PAGES) { + res = 0; + swblk_iter_init(&blks, object); + for (sb = swblk_start(&blks, 0); sb != NULL; sb = swblk_next(&blks)) { for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) res++; @@ -1806,6 +1875,7 @@ static void swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object) { + struct pctrie_iter blks; struct page_range range; struct swblk *sb; vm_page_t m; @@ -1822,31 +1892,33 @@ i = 0; swp_pager_init_freerange(&range); for (;;) { - if (i == 0 && (object->flags & OBJ_DEAD) != 0) { - /* - * Make sure that pending writes finish before - * returning. - */ - vm_object_pip_wait(object, "swpoff"); - swp_pager_meta_free_all(object); - break; - } - - if (i == SWAP_META_PAGES) { + if (i == 0) { + if ((object->flags & OBJ_DEAD) != 0) { + /* + * Make sure that pending writes finish before + * returning. + */ + vm_object_pip_wait(object, "swpoff"); + swp_pager_meta_free_all(object); + break; + } + VM_OBJECT_NORELEASE(object); + swblk_iter_init(&blks, object); + sb = swblk_start(&blks, pi); + } else if (i == SWAP_META_PAGES) { pi = sb->p + SWAP_META_PAGES; if (sb_empty) { - SWAP_PCTRIE_REMOVE( - &object->un_pager.swp.swp_blks, sb->p); + swblk_remove(&blks); uma_zfree(swblk_zone, sb); } + sb = swblk_next(&blks); i = 0; } - if (i == 0) { - sb = SWAP_PCTRIE_LOOKUP_GE( - &object->un_pager.swp.swp_blks, pi); - if (sb == NULL) + if (sb == NULL) { + VM_OBJECT_RELEASEOK(object); break; + } sb_empty = true; m = NULL; } @@ -1868,6 +1940,7 @@ m = m != NULL ? vm_page_next(m) : vm_page_lookup(object, sb->p + i); if (m != NULL && (m->oflags & VPO_SWAPINPROG) != 0) { + VM_OBJECT_RELEASEOK(object); m->oflags |= VPO_SWAPSLEEP; VM_OBJECT_SLEEP(object, &object->handle, PSWP, "swpoff", 0); @@ -1897,6 +1970,7 @@ continue; /* Get the page from swap, mark it dirty, restart the scan. */ + VM_OBJECT_RELEASEOK(object); vm_object_pip_add(object, 1); rahead = SWAP_META_PAGES; rv = swap_pager_getpages_locked(object, &m, 1, NULL, &rahead); @@ -2020,7 +2094,7 @@ { if (swp_pager_swblk_empty(sb, 0, SWAP_META_PAGES)) { - SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p); + swblk_lookup_remove(object, sb->p); uma_zfree(swblk_zone, sb); } } @@ -2050,7 +2124,7 @@ VM_OBJECT_ASSERT_WLOCKED(object); rdpi = rounddown(pindex, SWAP_META_PAGES); - sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi); + sb = swblk_lookup(object, rdpi); if (sb == NULL) { if (swapblk == SWAPBLK_NONE) return (SWAPBLK_NONE); @@ -2079,8 +2153,7 @@ } else uma_zwait(swblk_zone); VM_OBJECT_WLOCK(object); - sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, - rdpi); + sb = swblk_lookup(object, rdpi); if (sb != NULL) /* * Somebody swapped out a nearby page, @@ -2090,8 +2163,7 @@ goto allocated; } for (;;) { - error = SWAP_PCTRIE_INSERT( - &object->un_pager.swp.swp_blks, sb); + error = swblk_lookup_insert(object, sb); if (error == 0) { if (atomic_cmpset_int(&swpctrie_zone_exhausted, 1, 0)) @@ -2113,8 +2185,7 @@ } else uma_zwait(swpctrie_zone); VM_OBJECT_WLOCK(object); - sb1 = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, - rdpi); + sb1 = swblk_lookup(object, rdpi); if (sb1 != NULL) { uma_zfree(swblk_zone, sb); sb = sb1; @@ -2142,85 +2213,76 @@ } /* - * SWP_PAGER_META_TRANSFER() - free a range of blocks in the srcobject's swap - * metadata, or transfer it into dstobject. + * SWP_PAGER_META_TRANSFER() - transfer a range of blocks in the srcobject's + * swap metadata into dstobject. * * This routine will free swap metadata structures as they are cleaned * out. */ static void swp_pager_meta_transfer(vm_object_t srcobject, vm_object_t dstobject, - vm_pindex_t pindex, vm_pindex_t count, vm_size_t *moved) + vm_pindex_t pindex, vm_pindex_t count) { + struct pctrie_iter blks; struct page_range range; struct swblk *sb; daddr_t blk; - vm_page_t m; - vm_pindex_t offset, last; - vm_size_t mc; + vm_pindex_t last; int i, limit, start; + bool reset; VM_OBJECT_ASSERT_WLOCKED(srcobject); - MPASS(moved == NULL || dstobject == NULL); + VM_OBJECT_ASSERT_WLOCKED(dstobject); - mc = 0; - m = NULL; - if (count == 0 || pctrie_is_empty(&srcobject->un_pager.swp.swp_blks)) - goto out; + if (count == 0 || swblk_is_empty(srcobject)) + return; swp_pager_init_freerange(&range); - offset = pindex; + VM_OBJECT_NORELEASE(srcobject); + swblk_iter_init(&blks, srcobject); last = pindex + count; - for (;;) { - sb = SWAP_PCTRIE_LOOKUP_GE(&srcobject->un_pager.swp.swp_blks, - rounddown(pindex, SWAP_META_PAGES)); - if (sb == NULL || sb->p >= last) - break; - start = pindex > sb->p ? pindex - sb->p : 0; - limit = last - sb->p < SWAP_META_PAGES ? last - sb->p : - SWAP_META_PAGES; + reset = false; + for (sb = swblk_start_limit(&blks, pindex, last), + start = (sb != NULL && sb->p < pindex) ? pindex - sb->p : 0; + sb != NULL; sb = swblk_next_limit(&blks, last), start = 0) { + limit = MIN(last - sb->p, SWAP_META_PAGES); for (i = start; i < limit; i++) { - blk = sb->d[i]; - if (blk == SWAPBLK_NONE) + if (sb->d[i] == SWAPBLK_NONE) continue; - if (dstobject == NULL || - (blk = swp_pager_meta_build(dstobject, - sb->p + i - offset, blk, true), - blk != sb->d[i] && blk != SWAPBLK_NONE)) - swp_pager_update_freerange(&range, sb->d[i]); - else if (blk == sb->d[i]) { + blk = swp_pager_meta_build(dstobject, + sb->p + i - pindex, sb->d[i], true); + if (blk == sb->d[i]) { /* * Destination has no swapblk and is not * resident, so transfer source. * swp_pager_meta_build() failed memory * allocation already, likely to sleep in retry. */ + VM_OBJECT_RELEASEOK(srcobject); VM_OBJECT_WUNLOCK(srcobject); swp_pager_meta_build(dstobject, - sb->p + i - offset, blk, false); + sb->p + i - pindex, sb->d[i], false); VM_OBJECT_WLOCK(srcobject); - } - if (moved != NULL) { - m = (m != NULL && m->pindex == sb->p + i - 1) ? - vm_page_next(m) : - vm_page_lookup(srcobject, sb->p + i); - if (m == NULL || vm_page_none_valid(m)) - mc++; - } + VM_OBJECT_NORELEASE(srcobject); + reset = true; + } else if (blk != SWAPBLK_NONE) + swp_pager_update_freerange(&range, sb->d[i]); sb->d[i] = SWAPBLK_NONE; } - pindex = sb->p + SWAP_META_PAGES; + if (reset) { + /* Rebuild search path after losing object lock. */ + reset = false; + swblk_iter_init(&blks, srcobject); + swblk_restart(&blks, sb->p); + } if (swp_pager_swblk_empty(sb, 0, start) && swp_pager_swblk_empty(sb, limit, SWAP_META_PAGES)) { - SWAP_PCTRIE_REMOVE(&srcobject->un_pager.swp.swp_blks, - sb->p); + swblk_remove(&blks); uma_zfree(swblk_zone, sb); } } + VM_OBJECT_RELEASEOK(srcobject); swp_pager_freeswapspace(&range); -out: - if (moved != NULL) - *moved = mc; } /* @@ -2237,7 +2299,53 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count, vm_size_t *freed) { - swp_pager_meta_transfer(object, NULL, pindex, count, freed); + struct pctrie_iter blks; + struct page_range range; + struct swblk *sb; + vm_page_t m; + vm_pindex_t last; + vm_size_t fc; + int i, limit, start; + + VM_OBJECT_ASSERT_WLOCKED(object); + + fc = 0; + m = NULL; + if (count == 0 || swblk_is_empty(object)) + goto out; + + swp_pager_init_freerange(&range); + last = pindex + count; + VM_OBJECT_NORELEASE(object); + swblk_iter_init(&blks, object); + for (sb = swblk_start_limit(&blks, pindex, last), + start = (sb != NULL && sb->p < pindex) ? pindex - sb->p : 0; + sb != NULL; sb = swblk_next_limit(&blks, last), start = 0) { + limit = MIN(last - sb->p, SWAP_META_PAGES); + for (i = start; i < limit; i++) { + if (sb->d[i] == SWAPBLK_NONE) + continue; + swp_pager_update_freerange(&range, sb->d[i]); + if (freed != NULL) { + m = (m != NULL && m->pindex == sb->p + i - 1) ? + vm_page_next(m) : + vm_page_lookup(object, sb->p + 1); + if (m == NULL || vm_page_none_valid(m)) + fc++; + } + sb->d[i] = SWAPBLK_NONE; + } + if (swp_pager_swblk_empty(sb, 0, start) && + swp_pager_swblk_empty(sb, limit, SWAP_META_PAGES)) { + swblk_remove(&blks); + uma_zfree(swblk_zone, sb); + } + } + VM_OBJECT_RELEASEOK(object); + swp_pager_freeswapspace(&range); +out: + if (freed != NULL) + *freed = fc; } static void @@ -2296,44 +2404,33 @@ KASSERT((object->flags & OBJ_SWAP) != 0, ("Lookup object not swappable")); - sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, - rounddown(pindex, SWAP_META_PAGES)); + sb = swblk_lookup(object, rounddown(pindex, SWAP_META_PAGES)); if (sb == NULL) return (SWAPBLK_NONE); return (sb->d[pindex % SWAP_META_PAGES]); } /* - * Returns the least page index which is greater than or equal to the - * parameter pindex and for which there is a swap block allocated. - * Returns object's size if the object's type is not swap or if there - * are no allocated swap blocks for the object after the requested - * pindex. + * Returns the least page index which is greater than or equal to the parameter + * pindex and for which there is a swap block allocated. Returns size if are no + * allocated swap blocks for the object after the requested pindex. */ vm_pindex_t -swap_pager_find_least(vm_object_t object, vm_pindex_t pindex) +swap_pager_find_least(struct pctrie_iter *blks, vm_pindex_t pindex, + vm_pindex_t size) { struct swblk *sb; int i; - VM_OBJECT_ASSERT_LOCKED(object); - MPASS((object->flags & OBJ_SWAP) != 0); - - if (pctrie_is_empty(&object->un_pager.swp.swp_blks)) - return (object->size); - sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, - rounddown(pindex, SWAP_META_PAGES)); - if (sb == NULL) - return (object->size); + if ((sb = swblk_start(blks, pindex)) == NULL) + return (size); if (sb->p < pindex) { for (i = pindex % SWAP_META_PAGES; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) return (sb->p + i); } - sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, - roundup(pindex, SWAP_META_PAGES)); - if (sb == NULL) - return (object->size); + if ((sb = swblk_next(blks)) == NULL) + return (size); } for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) @@ -2345,7 +2442,7 @@ * doesn't map any blocks. */ MPASS(0); - return (object->size); + return (size); } /* @@ -2775,6 +2872,7 @@ long vmspace_swap_count(struct vmspace *vmspace) { + struct pctrie_iter blks; vm_map_t map; vm_map_entry_t cur; vm_object_t object; @@ -2797,11 +2895,9 @@ goto unlock; pi = OFF_TO_IDX(cur->offset); e = pi + OFF_TO_IDX(cur->end - cur->start); - for (;; pi = sb->p + SWAP_META_PAGES) { - sb = SWAP_PCTRIE_LOOKUP_GE( - &object->un_pager.swp.swp_blks, pi); - if (sb == NULL || sb->p >= e) - break; + swblk_iter_init(&blks, object); + for (sb = swblk_start_limit(&blks, pi, e); + sb != NULL; sb = swblk_next_limit(&blks, e)) { for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->p + i < e && sb->d[i] != SWAPBLK_NONE) Index: sys/vm/vm_object.h =================================================================== --- sys/vm/vm_object.h +++ sys/vm/vm_object.h @@ -269,6 +269,8 @@ lock_class_rw.lc_unlock(&(object)->lock.lock_object) #define VM_OBJECT_PICKUP(object, state) \ lock_class_rw.lc_lock(&(object)->lock.lock_object, (state)) +#define VM_OBJECT_NORELEASE(object) WITNESS_NORELEASE(&(object)->lock) +#define VM_OBJECT_RELEASEOK(object) WITNESS_RELEASEOK(&(object)->lock) #define VM_OBJECT_ASSERT_PAGING(object) \ KASSERT(blockcount_read(&(object)->paging_in_progress) != 0, \ Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -1681,9 +1681,10 @@ static bool vm_object_scan_all_shadowed(vm_object_t object) { + struct pctrie_iter blks; vm_object_t backing_object; vm_page_t p, pp; - vm_pindex_t backing_offset_index, new_pindex, pi, ps; + vm_pindex_t backing_offset_index, new_pindex, pi, ps, size; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(object->backing_object); @@ -1695,7 +1696,9 @@ pi = backing_offset_index = OFF_TO_IDX(object->backing_object_offset); p = vm_page_find_least(backing_object, pi); - ps = swap_pager_find_least(backing_object, pi); + size = backing_object->size; + swblk_iter_init(&blks, backing_object); + ps = swap_pager_find_least(&blks, pi, size); /* * Only check pages inside the parent object's range and @@ -1705,8 +1708,8 @@ if (p != NULL && p->pindex < pi) p = TAILQ_NEXT(p, listq); if (ps < pi) - ps = swap_pager_find_least(backing_object, pi); - if (p == NULL && ps >= backing_object->size) + ps = swap_pager_find_least(&blks, pi, size); + if (p == NULL && ps >= size) break; else if (p == NULL) pi = ps;