Index: sys/kern/subr_pctrie.c =================================================================== --- sys/kern/subr_pctrie.c +++ sys/kern/subr_pctrie.c @@ -80,13 +80,10 @@ "pn_popmap_t too wide"); /* Flag bits stored in node pointers. */ -#define PCTRIE_ISLEAF 0x1 #define PCTRIE_FLAGS 0x1 #define PCTRIE_PAD PCTRIE_FLAGS -/* Returns one unit associated with specified level. */ -#define PCTRIE_UNITLEVEL(lev) \ - ((uint64_t)1 << ((lev) * PCTRIE_WIDTH)) +enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; struct pctrie_node; typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t; @@ -94,29 +91,22 @@ struct pctrie_node { uint64_t pn_owner; /* Owner of record. */ pn_popmap_t pn_popmap; /* Valid children. */ - uint8_t pn_clev; /* Current level. */ + uint8_t pn_clev; /* Level * WIDTH. */ smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ }; - -enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; +#define PCTRIE_NULL (struct pctrie_node *)PCTRIE_ISLEAF static __inline void pctrie_node_store(smr_pctnode_t *p, void *val, enum pctrie_access access); /* - * Return the position in the array for a given level. + * Map index to an array position for the children of node, + * Returns a value >= PCTRIE_COUNT if there is no such index. */ -static __inline int -pctrie_slot(uint64_t index, uint16_t level) -{ - return ((index >> (level * PCTRIE_WIDTH)) & PCTRIE_MASK); -} - -/* Computes the key (index) with the low-order 'level' radix-digits zeroed. */ static __inline uint64_t -pctrie_trimkey(uint64_t index, uint16_t level) +pctrie_slot(uint64_t index, struct pctrie_node *node) { - return (index & -PCTRIE_UNITLEVEL(level)); + return ((index - node->pn_owner) >> node->pn_clev); } /* @@ -125,7 +115,7 @@ */ static struct pctrie_node * pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t index, - uint16_t clevel) + uint64_t newind) { struct pctrie_node *node; @@ -140,11 +130,22 @@ */ if (node->pn_popmap != 0) { pctrie_node_store(&node->pn_child[ffs(node->pn_popmap) - 1], - NULL, PCTRIE_UNSERIALIZED); + PCTRIE_NULL, PCTRIE_UNSERIALIZED); node->pn_popmap = 0; } - node->pn_owner = pctrie_trimkey(index, clevel + 1); - node->pn_clev = clevel; + + /* + * From the highest-order bit where the indexes differ, + * compute the highest level in the trie where they differ. Then, + * compute the least index of this subtrie. + */ + KASSERT(index != newind, ("%s: passing the same key value %jx", + __func__, (uintmax_t)index)); + _Static_assert(sizeof(long long) >= sizeof(uint64_t), + "uint64 too wide"); + node->pn_clev = rounddown(flsll(index ^ newind) - 1, PCTRIE_WIDTH); + node->pn_owner = PCTRIE_COUNT; + node->pn_owner = index & -(node->pn_owner << node->pn_clev); return (node); } @@ -165,7 +166,8 @@ if ((node->pn_popmap & (1 << slot)) != 0) continue; KASSERT(smr_unserialized_load(&node->pn_child[slot], true) == - NULL, ("pctrie_node_put: node %p has a child", node)); + PCTRIE_NULL, + ("pctrie_node_put: node %p has a child", node)); } #endif freefn(ptree, node); @@ -177,20 +179,29 @@ static __inline struct pctrie_node * pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access) { + struct pctrie_node *node; + switch (access) { case PCTRIE_UNSERIALIZED: - return (smr_unserialized_load(p, true)); + node = smr_unserialized_load(p, true); + break; case PCTRIE_LOCKED: - return (smr_serialized_load(p, true)); + node = smr_serialized_load(p, true); + break; case PCTRIE_SMR: - return (smr_entered_load(p, smr)); + node = smr_entered_load(p, smr); + break; + default: + __assert_unreachable(); } - __assert_unreachable(); + KASSERT(node != NULL, ("%s: NULL node", __func__)); + return (node); } static __inline void pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access) { + KASSERT(v != NULL, ("%s: NULL node", __func__)); switch (access) { case PCTRIE_UNSERIALIZED: smr_unserialized_store(p, v, true); @@ -259,52 +270,18 @@ * Make 'child' a child of 'node'. */ static __inline void -pctrie_addnode(struct pctrie_node *node, uint64_t index, uint16_t clev, +pctrie_addnode(struct pctrie_node *node, uint64_t index, struct pctrie_node *child, enum pctrie_access access) { int slot; - slot = pctrie_slot(index, clev); + slot = pctrie_slot(index, node); pctrie_node_store(&node->pn_child[slot], child, access); node->pn_popmap ^= 1 << slot; KASSERT((node->pn_popmap & (1 << slot)) != 0, ("%s: bad popmap slot %d in node %p", __func__, slot, node)); } -/* - * Returns the level where two keys differ. - * It cannot accept 2 equal keys. - */ -static __inline uint16_t -pctrie_keydiff(uint64_t index1, uint64_t index2) -{ - - KASSERT(index1 != index2, ("%s: passing the same key value %jx", - __func__, (uintmax_t)index1)); - CTASSERT(sizeof(long long) >= sizeof(uint64_t)); - - /* - * From the highest-order bit where the indexes differ, - * compute the highest level in the trie where they differ. - */ - return ((flsll(index1 ^ index2) - 1) / PCTRIE_WIDTH); -} - -/* - * Returns TRUE if it can be determined that key does not belong to the - * specified node. Otherwise, returns FALSE. - */ -static __inline bool -pctrie_keybarr(struct pctrie_node *node, uint64_t idx) -{ - - if (node->pn_clev < PCTRIE_LIMIT) { - idx = pctrie_trimkey(idx, node->pn_clev + 1); - return (idx != node->pn_owner); - } - return (false); -} - /* * Internal helper for pctrie_reclaim_allnodes(). * This function is recursive. @@ -320,12 +297,13 @@ slot = ffs(node->pn_popmap) - 1; child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_UNSERIALIZED); - KASSERT(child != NULL, ("%s: bad popmap slot %d in node %p", + KASSERT(child != PCTRIE_NULL, + ("%s: bad popmap slot %d in node %p", __func__, slot, node)); if (!pctrie_isleaf(child)) pctrie_reclaim_allnodes_int(ptree, child, freefn); node->pn_popmap ^= 1 << slot; - pctrie_node_store(&node->pn_child[slot], NULL, + pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_UNSERIALIZED); } pctrie_node_put(ptree, node, freefn); @@ -341,7 +319,9 @@ node = mem; node->pn_popmap = 0; - memset(node->pn_child, 0, sizeof(node->pn_child)); + for (int i = 0; i < nitems(node->pn_child); i++) + pctrie_node_store(&node->pn_child[i], PCTRIE_NULL, + PCTRIE_UNSERIALIZED); return (0); } @@ -360,10 +340,9 @@ pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn) { uint64_t index, newind; - struct pctrie_node *leaf, *node, *tmp; + struct pctrie_node *leaf, *node, *parent; smr_pctnode_t *parentp; - int slot; - uint16_t clev; + uint64_t slot; index = *val; leaf = pctrie_toleaf(val); @@ -373,51 +352,47 @@ * will never be used. */ node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); - if (node == NULL) { + if (node == PCTRIE_NULL) { ptree->pt_root = (uintptr_t)leaf; return (0); } - for (parentp = (smr_pctnode_t *)&ptree->pt_root;; node = tmp) { - if (pctrie_isleaf(node)) { - newind = *pctrie_toval(node); - if (newind == index) - panic("%s: key %jx is already present", - __func__, (uintmax_t)index); - break; - } else if (pctrie_keybarr(node, index)) { - newind = node->pn_owner; - break; - } - slot = pctrie_slot(index, node->pn_clev); - parentp = &node->pn_child[slot]; - tmp = pctrie_node_load(parentp, NULL, PCTRIE_LOCKED); - if (tmp == NULL) { - pctrie_addnode(node, index, node->pn_clev, leaf, - PCTRIE_LOCKED); - return (0); - } + parentp = (smr_pctnode_t *)&ptree->pt_root; + while (!pctrie_isleaf(node) && + (slot = pctrie_slot(index, node)) < PCTRIE_COUNT) { + parent = node; + parentp = &parent->pn_child[slot]; + node = pctrie_node_load(parentp, NULL, PCTRIE_LOCKED); + } + if (node == PCTRIE_NULL) { + pctrie_addnode(parent, index, leaf, PCTRIE_LOCKED); + return (0); } + if (pctrie_isleaf(node)) { + newind = *pctrie_toval(node); + if (newind == index) + panic("%s: key %jx is already present", + __func__, (uintmax_t)index); + } else + newind = node->pn_owner; /* * A new node is needed because the right insertion level is reached. * Setup the new intermediate node and add the 2 children: the * new object and the older edge or object. */ - clev = pctrie_keydiff(newind, index); - tmp = pctrie_node_get(ptree, allocfn, index, clev); - if (tmp == NULL) + parent = pctrie_node_get(ptree, allocfn, index, newind); + if (parent == NULL) return (ENOMEM); /* These writes are not yet visible due to ordering. */ - pctrie_addnode(tmp, index, clev, leaf, PCTRIE_UNSERIALIZED); - pctrie_addnode(tmp, newind, clev, node, PCTRIE_UNSERIALIZED); + pctrie_addnode(parent, index, leaf, PCTRIE_UNSERIALIZED); + pctrie_addnode(parent, newind, node, PCTRIE_UNSERIALIZED); /* Synchronize to make the above visible. */ - pctrie_node_store(parentp, tmp, PCTRIE_LOCKED); + pctrie_node_store(parentp, parent, PCTRIE_LOCKED); return (0); } /* - * Returns the value stored at the index. If the index is not present, - * NULL is returned. + * Returns the value stored at the index, or NULL if there is no such value. */ static __always_inline uint64_t * _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr, @@ -425,29 +400,22 @@ { struct pctrie_node *node; uint64_t *m; - int slot; + uint64_t slot; node = pctrie_root_load(ptree, smr, access); - while (node != NULL) { - if (pctrie_isleaf(node)) { - m = pctrie_toval(node); - if (*m == index) - return (m); - break; - } - if (pctrie_keybarr(node, index)) - break; - slot = pctrie_slot(index, node->pn_clev); + while (!pctrie_isleaf(node) && + (slot = pctrie_slot(index, node)) < PCTRIE_COUNT) node = pctrie_node_load(&node->pn_child[slot], smr, access); - } + if (pctrie_isleaf(node) && (m = pctrie_toval(node)) != NULL && + *m == index) + return (m); return (NULL); } /* - * Returns the value stored at the index, assuming access is externally - * synchronized by a lock. + * Returns the value stored at the index, or NULL if there is no such value. * - * If the index is not present, NULL is returned. + * Requires that access be externally synchronized by a lock. */ uint64_t * pctrie_lookup(struct pctrie *ptree, uint64_t index) @@ -456,9 +424,9 @@ } /* - * Returns the value stored at the index without requiring an external lock. - * - * If the index is not present, NULL is returned. + * Returns the value stored at the index, or NULL if there is no such value. + * + * Does not require access synchronization. */ uint64_t * pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr) @@ -482,7 +450,7 @@ { struct pctrie_node *node, *succ; uint64_t *m; - int slot; + uint64_t slot; /* * Descend the trie as if performing an ordinary lookup for the @@ -499,24 +467,8 @@ */ node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); succ = NULL; - while (node != NULL) { - if (pctrie_isleaf(node)) { - m = pctrie_toval(node); - if (*m >= index) - return (m); - break; - } - if (pctrie_keybarr(node, index)) { - /* - * If all values in this subtree are > index, then the - * least value in this subtree is the answer. - */ - if (node->pn_owner > index) - succ = node; - break; - } - slot = pctrie_slot(index, node->pn_clev); - + while (!pctrie_isleaf(node) && + (slot = pctrie_slot(index, node)) < PCTRIE_COUNT) { /* * Just in case the next search step leads to a subtree of all * values < index, check popmap to see if a next bigger step, to @@ -528,6 +480,18 @@ node = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); } + if (pctrie_isleaf(node)) { + if ((m = pctrie_toval(node)) != NULL && + *m >= index) + return (m); + } else { + /* + * If all values in this subtree are > index, then the least + * value in this subtree is the answer. + */ + if (node->pn_owner > index) + succ = node; + } /* * Restart the search from the last place visited in the subtree that @@ -540,10 +504,10 @@ * Take a step to the next bigger sibling of the node chosen * last time. In that subtree, all values > index. */ - slot = pctrie_slot(index, succ->pn_clev) + 1; + slot = pctrie_slot(index, succ) + 1; KASSERT((succ->pn_popmap >> slot) != 0, ("%s: no popmap siblings past slot %d in node %p", - __func__, slot, succ)); + __func__, (int)slot, succ)); slot += ffs(succ->pn_popmap >> slot) - 1; succ = pctrie_node_load(&succ->pn_child[slot], NULL, PCTRIE_LOCKED); @@ -573,38 +537,33 @@ { struct pctrie_node *node, *pred; uint64_t *m; - int slot; + uint64_t slot; /* * Mirror the implementation of pctrie_lookup_ge, described above. */ node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); pred = NULL; - while (node != NULL) { - if (pctrie_isleaf(node)) { - m = pctrie_toval(node); - if (*m <= index) - return (m); - break; - } - if (pctrie_keybarr(node, index)) { - if (node->pn_owner < index) - pred = node; - break; - } - slot = pctrie_slot(index, node->pn_clev); + while (!pctrie_isleaf(node) && + (slot = pctrie_slot(index, node)) < PCTRIE_COUNT) { if ((node->pn_popmap & ((1 << slot) - 1)) != 0) pred = node; node = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED); } + if (pctrie_isleaf(node)) { + if ((m = pctrie_toval(node)) != NULL && + *m <= index) + return (m); + } else if (node->pn_owner < index) + pred = node; if (pred == NULL) return (NULL); if (pred != node) { - slot = pctrie_slot(index, pred->pn_clev); + slot = pctrie_slot(index, pred); KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0, ("%s: no popmap siblings before slot %d in node %p", - __func__, slot, pred)); + __func__, (int)slot, pred)); slot = fls(pred->pn_popmap & ((1 << slot) - 1)) - 1; pred = pctrie_node_load(&pred->pn_child[slot], NULL, PCTRIE_LOCKED); @@ -626,66 +585,57 @@ void pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn) { - struct pctrie_node *node, *parent, *tmp; + struct pctrie_node *gparent, *node, *parent; uint64_t *m; - int slot; + uint64_t slot; node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); if (pctrie_isleaf(node)) { - m = pctrie_toval(node); - if (*m != index) - panic("%s: invalid key found", __func__); - pctrie_root_store(ptree, NULL, PCTRIE_LOCKED); - return; + if ((m = pctrie_toval(node)) != NULL && *m == index) { + pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED); + return; + } + panic("%s: key not found", __func__); } parent = NULL; - for (;;) { - if (node == NULL) - panic("pctrie_remove: impossible to locate the key"); - slot = pctrie_slot(index, node->pn_clev); - tmp = pctrie_node_load(&node->pn_child[slot], NULL, - PCTRIE_LOCKED); - if (pctrie_isleaf(tmp)) { - m = pctrie_toval(tmp); - if (*m != index) - panic("%s: invalid key found", __func__); - KASSERT((node->pn_popmap & (1 << slot)) != 0, - ("%s: bad popmap slot %d in node %p", - __func__, slot, node)); - node->pn_popmap ^= 1 << slot; - pctrie_node_store(&node->pn_child[slot], NULL, - PCTRIE_LOCKED); - if (!powerof2(node->pn_popmap)) - break; - KASSERT(node->pn_popmap != 0, - ("%s: bad popmap all zeroes", __func__)); - slot = ffs(node->pn_popmap) - 1; - tmp = pctrie_node_load(&node->pn_child[slot], - NULL, PCTRIE_LOCKED); - KASSERT(tmp != NULL, - ("%s: bad popmap slot %d in node %p", - __func__, slot, node)); - if (parent == NULL) - pctrie_root_store(ptree, tmp, PCTRIE_LOCKED); - else { - slot = pctrie_slot(index, parent->pn_clev); - KASSERT(pctrie_node_load( - &parent->pn_child[slot], NULL, - PCTRIE_LOCKED) == node, - ("%s: invalid child value", __func__)); - pctrie_node_store(&parent->pn_child[slot], tmp, - PCTRIE_LOCKED); - } - /* - * The child is still valid and we can not zero the - * pointer until all SMR references are gone. - */ - pctrie_node_put(ptree, node, freefn); - break; - } + while (!pctrie_isleaf(node) && + (slot = pctrie_slot(index, node)) < PCTRIE_COUNT) { + gparent = parent; parent = node; - node = tmp; + node = pctrie_node_load(&parent->pn_child[slot], NULL, + PCTRIE_LOCKED); } + if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL || + *m != index) + panic("%s: key not found", __func__); + KASSERT((parent->pn_popmap & (1 << slot)) != 0, + ("%s: bad popmap slot %d in node %p", + __func__, (int)slot, parent)); + parent->pn_popmap ^= 1 << slot; + pctrie_node_store(&parent->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED); + if (!powerof2(parent->pn_popmap)) + return; + KASSERT(parent->pn_popmap != 0, + ("%s: bad popmap all zeroes", __func__)); + slot = ffs(parent->pn_popmap) - 1; + node = pctrie_node_load(&parent->pn_child[slot], NULL, PCTRIE_LOCKED); + KASSERT(node != PCTRIE_NULL, + ("%s: bad popmap slot %d in node %p", __func__, (int)slot, parent)); + if (gparent == NULL) + pctrie_root_store(ptree, node, PCTRIE_LOCKED); + else { + slot = pctrie_slot(index, gparent); + KASSERT(parent == + pctrie_node_load(&gparent->pn_child[slot], NULL, + PCTRIE_LOCKED), ("%s: invalid child value", __func__)); + pctrie_node_store(&gparent->pn_child[slot], node, + PCTRIE_LOCKED); + } + /* + * The child is still valid and we can not zero the + * pointer until all SMR references are gone. + */ + pctrie_node_put(ptree, parent, freefn); } /* @@ -699,9 +649,9 @@ struct pctrie_node *root; root = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); - if (root == NULL) + if (root == PCTRIE_NULL) return; - pctrie_root_store(ptree, NULL, PCTRIE_UNSERIALIZED); + pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_UNSERIALIZED); if (!pctrie_isleaf(root)) pctrie_reclaim_allnodes_int(ptree, root, freefn); } Index: sys/sys/pctrie.h =================================================================== --- sys/sys/pctrie.h +++ sys/sys/pctrie.h @@ -134,19 +134,20 @@ pctrie_free_t freefn); size_t pctrie_node_size(void); int pctrie_zone_init(void *mem, int size, int flags); +#define PCTRIE_ISLEAF 0x1 static __inline void pctrie_init(struct pctrie *ptree) { - ptree->pt_root = 0; + ptree->pt_root = PCTRIE_ISLEAF; } static __inline bool pctrie_is_empty(struct pctrie *ptree) { - return (ptree->pt_root == 0); + return (ptree->pt_root == PCTRIE_ISLEAF); } /* Index: sys/vm/vm_radix.h =================================================================== --- sys/vm/vm_radix.h +++ sys/vm/vm_radix.h @@ -47,19 +47,20 @@ vm_page_t vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index); vm_page_t vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage); void vm_radix_zinit(void); +#define VM_RADIX_ISLEAF 0x1 static __inline void vm_radix_init(struct vm_radix *rtree) { - rtree->rt_root = 0; + rtree->rt_root = VM_RADIX_ISLEAF; } static __inline bool vm_radix_is_empty(struct vm_radix *rtree) { - return (rtree->rt_root == 0); + return (rtree->rt_root == (uintptr_t)VM_RADIX_ISLEAF); } #endif /* _KERNEL */ Index: sys/vm/vm_radix.c =================================================================== --- sys/vm/vm_radix.c +++ sys/vm/vm_radix.c @@ -104,14 +104,9 @@ "rn_popmap_t too wide"); /* Flag bits stored in node pointers. */ -#define VM_RADIX_ISLEAF 0x1 #define VM_RADIX_FLAGS 0x1 #define VM_RADIX_PAD VM_RADIX_FLAGS -/* Returns one unit associated with specified level. */ -#define VM_RADIX_UNITLEVEL(lev) \ - ((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH)) - enum vm_radix_access { SMR, LOCKED, UNSERIALIZED }; struct vm_radix_node; @@ -120,9 +115,10 @@ struct vm_radix_node { vm_pindex_t rn_owner; /* Owner of record. */ rn_popmap_t rn_popmap; /* Valid children. */ - uint8_t rn_clev; /* Current level. */ + uint8_t rn_clev; /* Level * WIDTH. */ smrnode_t rn_child[VM_RADIX_COUNT]; /* Child nodes. */ }; +#define VM_RADIX_NULL (struct vm_radix_node *)VM_RADIX_ISLEAF static uma_zone_t vm_radix_node_zone; static smr_t vm_radix_smr; @@ -131,26 +127,20 @@ enum vm_radix_access access); /* - * Return the position in the array for a given level. + * Map index to an array position for the children of rnode, + * Returns a value >= VM_RADIX_COUNT if there is no such index. */ -static __inline int -vm_radix_slot(vm_pindex_t index, uint16_t level) -{ - return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); -} - -/* Computes the key (index) with the low-order 'level' radix-digits zeroed. */ static __inline vm_pindex_t -vm_radix_trimkey(vm_pindex_t index, uint16_t level) +vm_radix_slot(vm_pindex_t index, struct vm_radix_node *rnode) { - return (index & -VM_RADIX_UNITLEVEL(level)); + return ((index - rnode->rn_owner) >> rnode->rn_clev); } /* * Allocate a radix node. */ static struct vm_radix_node * -vm_radix_node_get(vm_pindex_t index, uint16_t clevel) +vm_radix_node_get(vm_pindex_t index, vm_pindex_t newind) { struct vm_radix_node *rnode; @@ -165,11 +155,22 @@ */ if (rnode->rn_popmap != 0) { vm_radix_node_store(&rnode->rn_child[ffs(rnode->rn_popmap) - 1], - NULL, UNSERIALIZED); + VM_RADIX_NULL, UNSERIALIZED); rnode->rn_popmap = 0; } - rnode->rn_owner = vm_radix_trimkey(index, clevel + 1); - rnode->rn_clev = clevel; + + /* + * From the highest-order bit where the indexes differ, + * compute the highest level in the trie where they differ. Then, + * compute the least index of this subtrie. + */ + KASSERT(index != newind, ("%s: passing the same key value %jx", + __func__, (uintmax_t)index)); + _Static_assert(sizeof(long long) >= sizeof(vm_pindex_t), + "vm_pindex_t too wide"); + rnode->rn_clev = rounddown(flsll(index ^ newind) - 1, VM_RADIX_WIDTH); + rnode->rn_owner = VM_RADIX_COUNT; + rnode->rn_owner = index & -(rnode->rn_owner << rnode->rn_clev); return (rnode); } @@ -189,35 +190,43 @@ if ((rnode->rn_popmap & (1 << slot)) != 0) continue; KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) == - NULL, ("vm_radix_node_put: rnode %p has a child", rnode)); + VM_RADIX_NULL, + ("vm_radix_node_put: rnode %p has a child", rnode)); } #endif uma_zfree_smr(vm_radix_node_zone, rnode); } /* - * Fetch a node pointer from a slot in another node. + * Fetch a node pointer from a slot. */ static __inline struct vm_radix_node * vm_radix_node_load(smrnode_t *p, enum vm_radix_access access) { + struct vm_radix_node *rnode; switch (access) { case UNSERIALIZED: - return (smr_unserialized_load(p, true)); + rnode = smr_unserialized_load(p, true); + break; case LOCKED: - return (smr_serialized_load(p, true)); + rnode = smr_serialized_load(p, true); + break; case SMR: - return (smr_entered_load(p, vm_radix_smr)); + rnode = smr_entered_load(p, vm_radix_smr); + break; + default: + __assert_unreachable(); } - __assert_unreachable(); + KASSERT(rnode != NULL, ("%s: NULL node", __func__)); + return (rnode); } static __inline void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v, enum vm_radix_access access) { - + KASSERT(v != NULL, ("%s: NULL node", __func__)); switch (access) { case UNSERIALIZED: smr_unserialized_store(p, v, true); @@ -284,52 +293,18 @@ * Make 'child' a child of 'rnode'. */ static __inline void -vm_radix_addnode(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev, +vm_radix_addnode(struct vm_radix_node *rnode, vm_pindex_t index, struct vm_radix_node *child, enum vm_radix_access access) { int slot; - slot = vm_radix_slot(index, clev); + slot = vm_radix_slot(index, rnode); vm_radix_node_store(&rnode->rn_child[slot], child, access); rnode->rn_popmap ^= 1 << slot; KASSERT((rnode->rn_popmap & (1 << slot)) != 0, ("%s: bad popmap slot %d in rnode %p", __func__, slot, rnode)); } -/* - * Returns the level where two keys differ. - * It cannot accept 2 equal keys. - */ -static __inline uint16_t -vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2) -{ - - KASSERT(index1 != index2, ("%s: passing the same key value %jx", - __func__, (uintmax_t)index1)); - CTASSERT(sizeof(long long) >= sizeof(vm_pindex_t)); - - /* - * From the highest-order bit where the indexes differ, - * compute the highest level in the trie where they differ. - */ - return ((flsll(index1 ^ index2) - 1) / VM_RADIX_WIDTH); -} - -/* - * Returns TRUE if it can be determined that key does not belong to the - * specified rnode. Otherwise, returns FALSE. - */ -static __inline bool -vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx) -{ - - if (rnode->rn_clev < VM_RADIX_LIMIT) { - idx = vm_radix_trimkey(idx, rnode->rn_clev + 1); - return (idx != rnode->rn_owner); - } - return (false); -} - /* * Internal helper for vm_radix_reclaim_allnodes(). * This function is recursive. @@ -344,17 +319,34 @@ slot = ffs(rnode->rn_popmap) - 1; child = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED); - KASSERT(child != NULL, ("%s: bad popmap slot %d in rnode %p", + KASSERT(child != VM_RADIX_NULL, + ("%s: bad popmap slot %d in rnode %p", __func__, slot, rnode)); if (!vm_radix_isleaf(child)) vm_radix_reclaim_allnodes_int(child); rnode->rn_popmap ^= 1 << slot; - vm_radix_node_store(&rnode->rn_child[slot], NULL, + vm_radix_node_store(&rnode->rn_child[slot], VM_RADIX_NULL, UNSERIALIZED); } vm_radix_node_put(rnode); } +/* + * radix node zone initializer. + */ +static int +vm_radix_zone_init(void *mem, int size, int flags) +{ + struct vm_radix_node *rnode; + + rnode = mem; + rnode->rn_popmap = 0; + for (int i = 0; i < nitems(rnode->rn_child); i++) + vm_radix_node_store(&rnode->rn_child[i], VM_RADIX_NULL, + UNSERIALIZED); + return (0); +} + #ifndef UMA_MD_SMALL_ALLOC void vm_radix_reserve_kva(void); /* @@ -387,8 +379,8 @@ { vm_radix_node_zone = uma_zcreate("RADIX NODE", - sizeof(struct vm_radix_node), NULL, NULL, NULL, NULL, - VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_SMR | UMA_ZONE_ZINIT); + sizeof(struct vm_radix_node), NULL, NULL, vm_radix_zone_init, NULL, + VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_SMR); vm_radix_smr = uma_zone_get_smr(vm_radix_node_zone); } @@ -400,10 +392,9 @@ vm_radix_insert(struct vm_radix *rtree, vm_page_t page) { vm_pindex_t index, newind; - struct vm_radix_node *leaf, *rnode, *tmp; + struct vm_radix_node *leaf, *parent, *rnode; smrnode_t *parentp; - int slot; - uint16_t clev; + vm_pindex_t slot; index = page->pindex; leaf = vm_radix_toleaf(page); @@ -413,51 +404,47 @@ * will never be used. */ rnode = vm_radix_root_load(rtree, LOCKED); - if (rnode == NULL) { + if (rnode == VM_RADIX_NULL) { rtree->rt_root = (uintptr_t)leaf; return (0); } - for (parentp = (smrnode_t *)&rtree->rt_root;; rnode = tmp) { - if (vm_radix_isleaf(rnode)) { - newind = vm_radix_topage(rnode)->pindex; - if (newind == index) - panic("%s: key %jx is already present", - __func__, (uintmax_t)index); - break; - } else if (vm_radix_keybarr(rnode, index)) { - newind = rnode->rn_owner; - break; - } - slot = vm_radix_slot(index, rnode->rn_clev); - parentp = &rnode->rn_child[slot]; - tmp = vm_radix_node_load(parentp, LOCKED); - if (tmp == NULL) { - vm_radix_addnode(rnode, index, rnode->rn_clev, leaf, - LOCKED); - return (0); - } + parentp = (smrnode_t *)&rtree->rt_root; + while (!vm_radix_isleaf(rnode) && + (slot = vm_radix_slot(index, rnode)) < VM_RADIX_COUNT) { + parent = rnode; + parentp = &parent->rn_child[slot]; + rnode = vm_radix_node_load(parentp, LOCKED); + } + if (rnode == VM_RADIX_NULL) { + vm_radix_addnode(parent, index, leaf, LOCKED); + return (0); } + if (vm_radix_isleaf(rnode)) { + newind = vm_radix_topage(rnode)->pindex; + if (newind == index) + panic("%s: key %jx is already present", + __func__, (uintmax_t)index); + } else + newind = rnode->rn_owner; /* * A new node is needed because the right insertion level is reached. * Setup the new intermediate node and add the 2 children: the * new object and the older edge or object. */ - clev = vm_radix_keydiff(newind, index); - tmp = vm_radix_node_get(index, clev); - if (tmp == NULL) + parent = vm_radix_node_get(index, newind); + if (parent == NULL) return (ENOMEM); /* These writes are not yet visible due to ordering. */ - vm_radix_addnode(tmp, index, clev, leaf, UNSERIALIZED); - vm_radix_addnode(tmp, newind, clev, rnode, UNSERIALIZED); + vm_radix_addnode(parent, index, leaf, UNSERIALIZED); + vm_radix_addnode(parent, newind, rnode, UNSERIALIZED); /* Serializing write to make the above visible. */ - vm_radix_node_store(parentp, tmp, LOCKED); + vm_radix_node_store(parentp, parent, LOCKED); return (0); } /* - * Returns the value stored at the index. If the index is not present, - * NULL is returned. + * Returns the page stored at the pindex, or NULL if there is no such page. */ static __always_inline vm_page_t _vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, @@ -465,40 +452,35 @@ { struct vm_radix_node *rnode; vm_page_t m; - int slot; + vm_pindex_t slot; rnode = vm_radix_root_load(rtree, access); - while (rnode != NULL) { - if (vm_radix_isleaf(rnode)) { - m = vm_radix_topage(rnode); - if (m->pindex == index) - return (m); - break; - } - if (vm_radix_keybarr(rnode, index)) - break; - slot = vm_radix_slot(index, rnode->rn_clev); + while (!vm_radix_isleaf(rnode) && + (slot = vm_radix_slot(index, rnode)) < VM_RADIX_COUNT) rnode = vm_radix_node_load(&rnode->rn_child[slot], access); - } + if (vm_radix_isleaf(rnode) && + (m = vm_radix_topage(rnode)) != NULL && + m->pindex == index) + return (m); return (NULL); } /* - * Returns the value stored at the index assuming there is an external lock. + * Returns the page stored at the pindex, or NULL if there is no such page. * - * If the index is not present, NULL is returned. + * Requires that access be externally synchronized by a lock. */ vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) { - return _vm_radix_lookup(rtree, index, LOCKED); + return (_vm_radix_lookup(rtree, index, LOCKED)); } /* - * Returns the value stored at the index without requiring an external lock. + * Returns the page stored at the pindex, or NULL if there is no such page. * - * If the index is not present, NULL is returned. + * Does not require access synchronization. */ vm_page_t vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index) @@ -523,7 +505,7 @@ { struct vm_radix_node *rnode, *succ; vm_page_t m; - int slot; + vm_pindex_t slot; /* * Descend the trie as if performing an ordinary lookup for the page @@ -540,25 +522,8 @@ */ rnode = vm_radix_root_load(rtree, LOCKED); succ = NULL; - while (rnode != NULL) { - if (vm_radix_isleaf(rnode)) { - m = vm_radix_topage(rnode); - if (m->pindex >= index) - return (m); - break; - } - if (vm_radix_keybarr(rnode, index)) { - /* - * If all pages in this subtree have pindex > index, - * then the page in this subtree with the least pindex - * is the answer. - */ - if (rnode->rn_owner > index) - succ = rnode; - break; - } - slot = vm_radix_slot(index, rnode->rn_clev); - + while (!vm_radix_isleaf(rnode) && + (slot = vm_radix_slot(index, rnode)) < VM_RADIX_COUNT) { /* * Just in case the next search step leads to a subtree of all * pages with pindex < index, check popmap to see if a next @@ -569,6 +534,18 @@ succ = rnode; rnode = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); } + if (vm_radix_isleaf(rnode)) { + if ((m = vm_radix_topage(rnode)) != NULL && + m->pindex >= index) + return (m); + } else { + /* + * If all pages in this subtree have pindex > index, then the + * page in this subtree with the least pindex is the answer. + */ + if (rnode->rn_owner > index) + succ = rnode; + } /* * Restart the search from the last place visited in the subtree that @@ -581,10 +558,10 @@ * Take a step to the next bigger sibling of the node chosen * last time. In that subtree, all pages have pindex > index. */ - slot = vm_radix_slot(index, succ->rn_clev) + 1; + slot = vm_radix_slot(index, succ) + 1; KASSERT((succ->rn_popmap >> slot) != 0, ("%s: no popmap siblings past slot %d in node %p", - __func__, slot, succ)); + __func__, (int)slot, succ)); slot += ffs(succ->rn_popmap >> slot) - 1; succ = vm_radix_node_load(&succ->rn_child[slot], LOCKED); } @@ -612,37 +589,32 @@ { struct vm_radix_node *pred, *rnode; vm_page_t m; - int slot; + vm_pindex_t slot; /* * Mirror the implementation of vm_radix_lookup_ge, described above. */ rnode = vm_radix_root_load(rtree, LOCKED); pred = NULL; - while (rnode != NULL) { - if (vm_radix_isleaf(rnode)) { - m = vm_radix_topage(rnode); - if (m->pindex <= index) - return (m); - break; - } - if (vm_radix_keybarr(rnode, index)) { - if (rnode->rn_owner < index) - pred = rnode; - break; - } - slot = vm_radix_slot(index, rnode->rn_clev); + while (!vm_radix_isleaf(rnode) && + (slot = vm_radix_slot(index, rnode)) < VM_RADIX_COUNT) { if ((rnode->rn_popmap & ((1 << slot) - 1)) != 0) pred = rnode; rnode = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); } + if (vm_radix_isleaf(rnode)) { + if ((m = vm_radix_topage(rnode)) != NULL && + m->pindex <= index) + return (m); + } else if (rnode->rn_owner < index) + pred = rnode; if (pred == NULL) return (NULL); if (pred != rnode) { - slot = vm_radix_slot(index, pred->rn_clev); + slot = vm_radix_slot(index, pred); KASSERT((pred->rn_popmap & ((1 << slot) - 1)) != 0, ("%s: no popmap siblings before slot %d in node %p", - __func__, slot, pred)); + __func__, (int)slot, pred)); slot = fls(pred->rn_popmap & ((1 << slot) - 1)) - 1; pred = vm_radix_node_load(&pred->rn_child[slot], LOCKED); } @@ -662,63 +634,55 @@ vm_page_t vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) { - struct vm_radix_node *rnode, *parent, *tmp; + struct vm_radix_node *gparent, *rnode, *parent; vm_page_t m; - int slot; + vm_pindex_t slot; rnode = vm_radix_root_load(rtree, LOCKED); if (vm_radix_isleaf(rnode)) { - m = vm_radix_topage(rnode); - if (m->pindex != index) - return (NULL); - vm_radix_root_store(rtree, NULL, LOCKED); - return (m); - } - parent = NULL; - for (;;) { - if (rnode == NULL) - return (NULL); - slot = vm_radix_slot(index, rnode->rn_clev); - tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); - if (vm_radix_isleaf(tmp)) { - m = vm_radix_topage(tmp); - if (m->pindex != index) - return (NULL); - KASSERT((rnode->rn_popmap & (1 << slot)) != 0, - ("%s: bad popmap slot %d in rnode %p", - __func__, slot, rnode)); - rnode->rn_popmap ^= 1 << slot; - vm_radix_node_store( - &rnode->rn_child[slot], NULL, LOCKED); - if (!powerof2(rnode->rn_popmap)) - return (m); - KASSERT(rnode->rn_popmap != 0, - ("%s: bad popmap all zeroes", __func__)); - slot = ffs(rnode->rn_popmap) - 1; - tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); - KASSERT(tmp != NULL, - ("%s: bad popmap slot %d in rnode %p", - __func__, slot, rnode)); - if (parent == NULL) - vm_radix_root_store(rtree, tmp, LOCKED); - else { - slot = vm_radix_slot(index, parent->rn_clev); - KASSERT(vm_radix_node_load( - &parent->rn_child[slot], LOCKED) == rnode, - ("%s: invalid child value", __func__)); - vm_radix_node_store(&parent->rn_child[slot], - tmp, LOCKED); - } - /* - * The child is still valid and we can not zero the - * pointer until all smr references are gone. - */ - vm_radix_node_put(rnode); + if ((m = vm_radix_topage(rnode)) != NULL && + m->pindex == index) { + vm_radix_root_store(rtree, VM_RADIX_NULL, LOCKED); return (m); } + return (NULL); + } + parent = NULL; + while (!vm_radix_isleaf(rnode) && + (slot = vm_radix_slot(index, rnode)) < VM_RADIX_COUNT) { + gparent = parent; parent = rnode; - rnode = tmp; + rnode = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); } + if (!vm_radix_isleaf(rnode) || (m = vm_radix_topage(rnode)) == NULL || + m->pindex != index) + return (NULL); + KASSERT((parent->rn_popmap & (1 << slot)) != 0, + ("%s: bad popmap slot %d in rnode %p", __func__, (int)slot, parent)); + parent->rn_popmap ^= 1 << slot; + vm_radix_node_store(&parent->rn_child[slot], VM_RADIX_NULL, LOCKED); + if (!powerof2(parent->rn_popmap)) + return (m); + KASSERT(parent->rn_popmap != 0, ("%s: bad popmap all zeroes", __func__)); + slot = ffs(parent->rn_popmap) - 1; + rnode = vm_radix_node_load(&parent->rn_child[slot], LOCKED); + KASSERT(rnode != VM_RADIX_NULL, + ("%s: bad popmap slot %d in rnode %p", __func__, (int)slot, parent)); + if (gparent == NULL) + vm_radix_root_store(rtree, rnode, LOCKED); + else { + slot = vm_radix_slot(index, gparent); + KASSERT(parent == + vm_radix_node_load(&gparent->rn_child[slot], LOCKED), + ("%s: invalid child value", __func__)); + vm_radix_node_store(&gparent->rn_child[slot], rnode, LOCKED); + } + /* + * The child is still valid and we can not zero the + * pointer until all smr references are gone. + */ + vm_radix_node_put(parent); + return (m); } /* @@ -732,9 +696,9 @@ struct vm_radix_node *root; root = vm_radix_root_load(rtree, LOCKED); - if (root == NULL) + if (root == VM_RADIX_NULL) return; - vm_radix_root_store(rtree, NULL, UNSERIALIZED); + vm_radix_root_store(rtree, VM_RADIX_NULL, UNSERIALIZED); if (!vm_radix_isleaf(root)) vm_radix_reclaim_allnodes_int(root); } @@ -746,36 +710,30 @@ vm_page_t vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage) { - struct vm_radix_node *rnode, *tmp; + struct vm_radix_node *leaf, *parent, *rnode; vm_page_t m; vm_pindex_t index; - int slot; + vm_pindex_t slot; + leaf = vm_radix_toleaf(newpage); index = newpage->pindex; rnode = vm_radix_root_load(rtree, LOCKED); - if (rnode == NULL) - panic("%s: replacing page on an empty trie", __func__); if (vm_radix_isleaf(rnode)) { - m = vm_radix_topage(rnode); - if (m->pindex != index) - panic("%s: original replacing root key not found", - __func__); - rtree->rt_root = (uintptr_t)vm_radix_toleaf(newpage); - return (m); - } - for (;;) { - slot = vm_radix_slot(index, rnode->rn_clev); - tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); - if (vm_radix_isleaf(tmp)) { - m = vm_radix_topage(tmp); - if (m->pindex != index) - break; - vm_radix_node_store(&rnode->rn_child[slot], - vm_radix_toleaf(newpage), LOCKED); + if ((m = vm_radix_topage(rnode)) != NULL && m->pindex == index) { + rtree->rt_root = (uintptr_t)leaf; return (m); - } else if (tmp == NULL || vm_radix_keybarr(tmp, index)) - break; - rnode = tmp; + } + panic("%s: original replacing page not found at root", __func__); + } + while (!vm_radix_isleaf(rnode) && + (slot = vm_radix_slot(index, rnode)) < VM_RADIX_COUNT) { + parent = rnode; + rnode = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); + } + if (vm_radix_isleaf(rnode) && (m = vm_radix_topage(rnode)) != NULL && + m->pindex == index) { + vm_radix_node_store(&parent->rn_child[slot], leaf, LOCKED); + return (m); } panic("%s: original replacing page not found", __func__); } @@ -801,14 +759,14 @@ rnode = (struct vm_radix_node *)addr; db_printf("radixnode %p, owner %jx, children popmap %04x, level %u:\n", (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_popmap, - rnode->rn_clev); + rnode->rn_clev / VM_RADIX_WIDTH); for (popmap = rnode->rn_popmap; popmap != 0; popmap ^= 1 << slot) { slot = ffs(popmap) - 1; tmp = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED); db_printf("slot: %d, val: %p, page: %p, clev: %d\n", slot, (void *)tmp, vm_radix_isleaf(tmp) ? vm_radix_topage(tmp) : NULL, - rnode->rn_clev); + rnode->rn_clev / VM_RADIX_WIDTH); } } #endif /* DDB */