Index: sys/kern/subr_pctrie.c =================================================================== --- sys/kern/subr_pctrie.c +++ sys/kern/subr_pctrie.c @@ -55,6 +55,9 @@ #include #include #include +#include /* smr.h depends on struct thread. */ +#include +#include #ifdef DDB #include @@ -72,18 +75,27 @@ #define PCTRIE_UNITLEVEL(lev) \ ((uint64_t)1 << ((lev) * PCTRIE_WIDTH)) +struct pctrie_node; +typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t; + struct pctrie_node { - uint64_t pn_owner; /* Owner of record. */ - uint16_t pn_count; /* Valid children. */ - uint16_t pn_clev; /* Current level. */ - void *pn_child[PCTRIE_COUNT]; /* Child nodes. */ + uint64_t pn_owner; /* Owner of record. */ + uint16_t pn_count; /* Valid children. */ + uint8_t pn_clev; /* Current level. */ + int8_t pn_last; /* Zero last ptr. */ + smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */ }; +enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED }; + +static __inline void pctrie_node_store(smr_pctnode_t *p, void *val, + enum pctrie_access access); + /* * Allocate a node. Pre-allocation should ensure that the request * will always be satisfied. */ -static __inline struct pctrie_node * +static struct pctrie_node * pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t owner, uint16_t count, uint16_t clevel) { @@ -92,6 +104,11 @@ node = allocfn(ptree); if (node == NULL) return (NULL); + if (node->pn_last != 0) { + pctrie_node_store(&node->pn_child[node->pn_last - 1], NULL, + PCTRIE_UNSERIALIZED); + node->pn_last = 0; + } node->pn_owner = owner; node->pn_count = count; node->pn_clev = clevel; @@ -104,7 +121,7 @@ */ static __inline void pctrie_node_put(struct pctrie *ptree, struct pctrie_node *node, - pctrie_free_t freefn) + pctrie_free_t freefn, int8_t last) { #ifdef INVARIANTS int slot; @@ -112,10 +129,14 @@ KASSERT(node->pn_count == 0, ("pctrie_node_put: node %p has %d children", node, node->pn_count)); - for (slot = 0; slot < PCTRIE_COUNT; slot++) - KASSERT(node->pn_child[slot] == NULL, - ("pctrie_node_put: node %p has a child", node)); + for (slot = 0; slot < PCTRIE_COUNT; slot++) { + if (slot == last) + continue; + KASSERT(smr_unserialized_load(&node->pn_child[slot], true) == + NULL, ("pctrie_node_put: node %p has a child", node)); + } #endif + node->pn_last = last + 1; freefn(ptree, node); } @@ -143,24 +164,59 @@ return (ret); } +/* + * Fetch a node pointer from a slot. + */ +static __inline struct pctrie_node * +pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access) +{ + switch (access) { + case PCTRIE_UNSERIALIZED: + return (smr_unserialized_load(p, true)); + case PCTRIE_LOCKED: + return (smr_serialized_load(p, true)); + case PCTRIE_SMR: + return (smr_entered_load(p, smr)); + } + __assert_unreachable(); +} + +static __inline void +pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access) +{ + switch (access) { + case PCTRIE_UNSERIALIZED: + smr_unserialized_store(p, v, true); + break; + case PCTRIE_LOCKED: + smr_serialized_store(p, v, true); + break; + case PCTRIE_SMR: + panic("%s: Not supported in SMR section.", __func__); + break; + default: + __assert_unreachable(); + break; + } +} + /* * Get the root node for a tree. */ static __inline struct pctrie_node * -pctrie_getroot(struct pctrie *ptree) +pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access) { - - return ((struct pctrie_node *)ptree->pt_root); + return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access)); } /* * Set the root node for a tree. */ static __inline void -pctrie_setroot(struct pctrie *ptree, struct pctrie_node *node) +pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node, + enum pctrie_access access) { - - ptree->pt_root = (uintptr_t)node; + pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access); } /* @@ -188,12 +244,13 @@ */ static __inline void pctrie_addval(struct pctrie_node *node, uint64_t index, uint16_t clev, - uint64_t *val) + uint64_t *val, enum pctrie_access access) { int slot; slot = pctrie_slot(index, clev); - node->pn_child[slot] = (void *)((uintptr_t)val | PCTRIE_ISLEAF); + pctrie_node_store(&node->pn_child[slot], + (void *)((uintptr_t)val | PCTRIE_ISLEAF), access); } /* @@ -237,20 +294,23 @@ pctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node, pctrie_free_t freefn) { + struct pctrie_node *child; int slot; KASSERT(node->pn_count <= PCTRIE_COUNT, ("pctrie_reclaim_allnodes_int: bad count in node %p", node)); for (slot = 0; node->pn_count != 0; slot++) { - if (node->pn_child[slot] == NULL) + child = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_UNSERIALIZED); + if (child == NULL) continue; - if (!pctrie_isleaf(node->pn_child[slot])) - pctrie_reclaim_allnodes_int(ptree, - node->pn_child[slot], freefn); - node->pn_child[slot] = NULL; + if (!pctrie_isleaf(child)) + pctrie_reclaim_allnodes_int(ptree, child, freefn); + pctrie_node_store(&node->pn_child[slot], NULL, + PCTRIE_UNSERIALIZED); node->pn_count--; } - pctrie_node_put(ptree, node, freefn); + pctrie_node_put(ptree, node, freefn, -1); } /* @@ -262,6 +322,7 @@ struct pctrie_node *node; node = mem; + node->pn_last = 0; memset(node->pn_child, 0, sizeof(node->pn_child)); return (0); } @@ -281,8 +342,8 @@ pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn) { uint64_t index, newind; - void **parentp; struct pctrie_node *node, *tmp; + smr_pctnode_t *parentp; uint64_t *m; int slot; uint16_t clev; @@ -293,12 +354,12 @@ * The owner of record for root is not really important because it * will never be used. */ - node = pctrie_getroot(ptree); + node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); if (node == NULL) { ptree->pt_root = (uintptr_t)val | PCTRIE_ISLEAF; return (0); } - parentp = (void **)&ptree->pt_root; + parentp = (smr_pctnode_t *)&ptree->pt_root; for (;;) { if (pctrie_isleaf(node)) { m = pctrie_toval(node); @@ -310,20 +371,25 @@ pctrie_trimkey(index, clev + 1), 2, clev); if (tmp == NULL) return (ENOMEM); - *parentp = tmp; - pctrie_addval(tmp, index, clev, val); - pctrie_addval(tmp, *m, clev, m); + /* These writes are not yet visible due to ordering. */ + pctrie_addval(tmp, index, clev, val, + PCTRIE_UNSERIALIZED); + pctrie_addval(tmp, *m, clev, m, PCTRIE_UNSERIALIZED); + /* Synchronize to make leaf visible. */ + pctrie_node_store(parentp, tmp, PCTRIE_LOCKED); return (0); } else if (pctrie_keybarr(node, index)) break; slot = pctrie_slot(index, node->pn_clev); - if (node->pn_child[slot] == NULL) { + parentp = &node->pn_child[slot]; + tmp = pctrie_node_load(parentp, NULL, PCTRIE_LOCKED); + if (tmp == NULL) { node->pn_count++; - pctrie_addval(node, index, node->pn_clev, val); + pctrie_addval(node, index, node->pn_clev, val, + PCTRIE_LOCKED); return (0); } - parentp = &node->pn_child[slot]; - node = node->pn_child[slot]; + node = tmp; } /* @@ -337,10 +403,12 @@ pctrie_trimkey(index, clev + 1), 2, clev); if (tmp == NULL) return (ENOMEM); - *parentp = tmp; - pctrie_addval(tmp, index, clev, val); slot = pctrie_slot(newind, clev); - tmp->pn_child[slot] = node; + /* These writes are not yet visible due to ordering. */ + pctrie_addval(tmp, index, clev, val, PCTRIE_UNSERIALIZED); + pctrie_node_store(&tmp->pn_child[slot], node, PCTRIE_UNSERIALIZED); + /* Synchronize to make the above visible. */ + pctrie_node_store(parentp, tmp, PCTRIE_LOCKED); return (0); } @@ -349,31 +417,61 @@ * Returns the value stored at the index. If the index is not present, * NULL is returned. */ -uint64_t * -pctrie_lookup(struct pctrie *ptree, uint64_t index) +static __always_inline uint64_t * +_pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr, + enum pctrie_access access) { struct pctrie_node *node; uint64_t *m; int slot; - node = pctrie_getroot(ptree); + node = pctrie_root_load(ptree, smr, access); while (node != NULL) { if (pctrie_isleaf(node)) { m = pctrie_toval(node); if (*m == index) return (m); - else - break; - } else if (pctrie_keybarr(node, index)) + break; + } + if (pctrie_keybarr(node, index)) break; slot = pctrie_slot(index, node->pn_clev); - node = node->pn_child[slot]; + node = pctrie_node_load(&node->pn_child[slot], smr, access); } return (NULL); } /* - * Look up the nearest entry at a position bigger than or equal to index. + * Returns the value stored at the index, assuming access is externally + * synchronized by a lock. + * + * If the index is not present, NULL is returned. + */ +uint64_t * +pctrie_lookup(struct pctrie *ptree, uint64_t index) +{ + return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED)); +} + +/* + * Returns the value stored at the index without requiring an external lock. + * + * If the index is not present, NULL is returned. + */ +uint64_t * +pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr) +{ + uint64_t *res; + + smr_enter(smr); + res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR); + smr_exit(smr); + return (res); +} + +/* + * Look up the nearest entry at a position bigger than or equal to index, + * assuming access is externally synchronized by a lock. */ uint64_t * pctrie_lookup_ge(struct pctrie *ptree, uint64_t index) @@ -388,7 +486,7 @@ unsigned tos; int slot; - node = pctrie_getroot(ptree); + node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); if (node == NULL) return (NULL); else if (pctrie_isleaf(node)) { @@ -404,7 +502,7 @@ * If the keys differ before the current bisection node, * then the search key might rollback to the earliest * available bisection node or to the smallest key - * in the current node (if the owner is bigger than the + * in the current node (if the owner is greater than the * search key). */ if (pctrie_keybarr(node, index)) { @@ -439,7 +537,8 @@ ("pctrie_lookup_ge: keybarr failed")); } slot = pctrie_slot(index, node->pn_clev); - child = node->pn_child[slot]; + child = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); if (pctrie_isleaf(child)) { m = pctrie_toval(child); if (*m >= index) @@ -457,7 +556,8 @@ do { index += inc; slot++; - child = node->pn_child[slot]; + child = pctrie_node_load(&node->pn_child[slot], + NULL, PCTRIE_LOCKED); if (pctrie_isleaf(child)) { m = pctrie_toval(child); if (*m >= index) @@ -470,7 +570,7 @@ ("pctrie_lookup_ge: child is radix node")); /* - * If a value or edge bigger than the search slot is not found + * If a value or edge greater than the search slot is not found * in the current node, ascend to the next higher-level node. */ goto ascend; @@ -485,7 +585,8 @@ } /* - * Look up the nearest entry at a position less than or equal to index. + * Look up the nearest entry at a position less than or equal to index, + * assuming access is externally synchronized by a lock. */ uint64_t * pctrie_lookup_le(struct pctrie *ptree, uint64_t index) @@ -500,7 +601,7 @@ unsigned tos; int slot; - node = pctrie_getroot(ptree); + node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); if (node == NULL) return (NULL); else if (pctrie_isleaf(node)) { @@ -553,7 +654,8 @@ ("pctrie_lookup_le: keybarr failed")); } slot = pctrie_slot(index, node->pn_clev); - child = node->pn_child[slot]; + child = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); if (pctrie_isleaf(child)) { m = pctrie_toval(child); if (*m <= index) @@ -571,7 +673,8 @@ do { index -= inc; slot--; - child = node->pn_child[slot]; + child = pctrie_node_load(&node->pn_child[slot], + NULL, PCTRIE_LOCKED); if (pctrie_isleaf(child)) { m = pctrie_toval(child); if (*m <= index) @@ -605,16 +708,16 @@ void pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn) { - struct pctrie_node *node, *parent; + struct pctrie_node *node, *parent, *tmp; uint64_t *m; int i, slot; - node = pctrie_getroot(ptree); + node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); if (pctrie_isleaf(node)) { m = pctrie_toval(node); if (*m != index) panic("%s: invalid key found", __func__); - pctrie_setroot(ptree, NULL); + pctrie_root_store(ptree, NULL, PCTRIE_LOCKED); return; } parent = NULL; @@ -622,34 +725,46 @@ if (node == NULL) panic("pctrie_remove: impossible to locate the key"); slot = pctrie_slot(index, node->pn_clev); - if (pctrie_isleaf(node->pn_child[slot])) { - m = pctrie_toval(node->pn_child[slot]); + tmp = pctrie_node_load(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); + if (pctrie_isleaf(tmp)) { + m = pctrie_toval(tmp); if (*m != index) panic("%s: invalid key found", __func__); - node->pn_child[slot] = NULL; + pctrie_node_store(&node->pn_child[slot], NULL, + PCTRIE_LOCKED); node->pn_count--; if (node->pn_count > 1) break; - for (i = 0; i < PCTRIE_COUNT; i++) - if (node->pn_child[i] != NULL) + for (i = 0; i < PCTRIE_COUNT; i++) { + tmp = pctrie_node_load(&node->pn_child[i], + NULL, PCTRIE_LOCKED); + if (tmp != NULL) break; + } KASSERT(i != PCTRIE_COUNT, ("%s: invalid node configuration", __func__)); if (parent == NULL) - pctrie_setroot(ptree, node->pn_child[i]); + pctrie_root_store(ptree, tmp, PCTRIE_LOCKED); else { slot = pctrie_slot(index, parent->pn_clev); - KASSERT(parent->pn_child[slot] == node, + KASSERT(pctrie_node_load( + &parent->pn_child[slot], NULL, + PCTRIE_LOCKED) == node, ("%s: invalid child value", __func__)); - parent->pn_child[slot] = node->pn_child[i]; + pctrie_node_store(&parent->pn_child[slot], tmp, + PCTRIE_LOCKED); } + /* + * The child is still valid and we can not zero the + * pointer until all SMR references are gone. + */ node->pn_count--; - node->pn_child[i] = NULL; - pctrie_node_put(ptree, node, freefn); + pctrie_node_put(ptree, node, freefn, i); break; } parent = node; - node = node->pn_child[slot]; + node = tmp; } } @@ -663,10 +778,10 @@ { struct pctrie_node *root; - root = pctrie_getroot(ptree); + root = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED); if (root == NULL) return; - pctrie_setroot(ptree, NULL); + pctrie_root_store(ptree, NULL, PCTRIE_UNSERIALIZED); if (!pctrie_isleaf(root)) pctrie_reclaim_allnodes_int(ptree, root, freefn); } @@ -677,7 +792,7 @@ */ DB_SHOW_COMMAND(pctrienode, db_show_pctrienode) { - struct pctrie_node *node; + struct pctrie_node *node, *tmp; int i; if (!have_addr) @@ -686,12 +801,14 @@ db_printf("node %p, owner %jx, children count %u, level %u:\n", (void *)node, (uintmax_t)node->pn_owner, node->pn_count, node->pn_clev); - for (i = 0; i < PCTRIE_COUNT; i++) - if (node->pn_child[i] != NULL) + for (i = 0; i < PCTRIE_COUNT; i++) { + tmp = pctrie_node_load(&node->pn_child[i], NULL, + PCTRIE_UNSERIALIZED); + if (tmp != NULL) db_printf("slot: %d, val: %p, value: %p, clev: %d\n", - i, (void *)node->pn_child[i], - pctrie_isleaf(node->pn_child[i]) ? - pctrie_toval(node->pn_child[i]) : NULL, + i, (void *)tmp, + pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL, node->pn_clev); + } } #endif /* DDB */ Index: sys/sys/pctrie.h =================================================================== --- sys/sys/pctrie.h +++ sys/sys/pctrie.h @@ -34,9 +34,21 @@ #define _SYS_PCTRIE_H_ #include +#include #ifdef _KERNEL +#define PCTRIE_DEFINE_SMR(name, type, field, allocfn, freefn, smr) \ + PCTRIE_DEFINE(name, type, field, allocfn, freefn) \ + \ +static __inline struct type * \ +name##_PCTRIE_LOOKUP_UNLOCKED(struct pctrie *ptree, uint64_t key) \ +{ \ + \ + return name##_PCTRIE_VAL2PTR(pctrie_lookup_unlocked(ptree, \ + key, (smr))); \ +} \ + #define PCTRIE_DEFINE(name, type, field, allocfn, freefn) \ \ CTASSERT(sizeof(((struct type *)0)->field) == sizeof(uint64_t)); \ @@ -114,6 +126,8 @@ uint64_t *pctrie_lookup(struct pctrie *ptree, uint64_t key); uint64_t *pctrie_lookup_ge(struct pctrie *ptree, uint64_t key); uint64_t *pctrie_lookup_le(struct pctrie *ptree, uint64_t key); +uint64_t *pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t key, + smr_t smr); void pctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn); void pctrie_remove(struct pctrie *ptree, uint64_t key,