Index: sys/vm/vm_radix.h =================================================================== --- sys/vm/vm_radix.h +++ sys/vm/vm_radix.h @@ -43,6 +43,7 @@ vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index); vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index); vm_page_t vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index); +vm_page_t vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index); void vm_radix_reclaim_allnodes(struct vm_radix *rtree); vm_page_t vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index); vm_page_t vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage); Index: sys/vm/vm_radix.c =================================================================== --- sys/vm/vm_radix.c +++ sys/vm/vm_radix.c @@ -58,11 +58,14 @@ #include #include #include +#include #include +#include #include #include #include +#include #include #include @@ -95,6 +98,8 @@ #define VM_RADIX_UNITLEVEL(lev) \ ((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH)) +enum vm_radix_access { ATOMIC, PLAIN }; + struct vm_radix_node { vm_pindex_t rn_owner; /* Owner of record. */ uint16_t rn_count; /* Valid children. */ @@ -103,6 +108,7 @@ }; static uma_zone_t vm_radix_node_zone; +static smr_t vm_radix_smr; /* * Allocate a radix node. @@ -112,7 +118,7 @@ { struct vm_radix_node *rnode; - rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT); + rnode = uma_zalloc_smr(vm_radix_node_zone, M_NOWAIT); if (rnode == NULL) return (NULL); rnode->rn_owner = owner; @@ -128,7 +134,7 @@ vm_radix_node_put(struct vm_radix_node *rnode) { - uma_zfree(vm_radix_node_zone, rnode); + uma_zfree_smr(vm_radix_node_zone, rnode); } /* @@ -155,14 +161,29 @@ return (ret); } +/* + * Fetch a node pointer from a slot in another node. + */ +static __inline struct vm_radix_node * +vm_radix_getnode(void **rnode, enum vm_radix_access access) +{ + + switch (access) { + case PLAIN: + return (*rnode); + case ATOMIC: + return ((void *)atomic_load_acq_ptr((uintptr_t *)rnode)); + } +} + /* * Get the root node for a radix tree. */ static __inline struct vm_radix_node * -vm_radix_getroot(struct vm_radix *rtree) +vm_radix_getroot(struct vm_radix *rtree, enum vm_radix_access access) { - return ((struct vm_radix_node *)rtree->rt_root); + return (vm_radix_getnode((void **)&rtree->rt_root, access)); } /* @@ -172,7 +193,7 @@ vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode) { - rtree->rt_root = (uintptr_t)rnode; + atomic_store_rel_ptr(&rtree->rt_root, (uintptr_t)rnode); } /* @@ -283,16 +304,6 @@ } #endif -static int -vm_radix_node_zone_init(void *mem, int size __unused, int flags __unused) -{ - struct vm_radix_node *rnode; - - rnode = mem; - bzero(rnode, sizeof(*rnode)); - return (0); -} - #ifndef UMA_MD_SMALL_ALLOC void vm_radix_reserve_kva(void); /* @@ -331,7 +342,9 @@ #else NULL, #endif - vm_radix_node_zone_init, NULL, VM_RADIX_PAD, UMA_ZONE_VM); + NULL, NULL, VM_RADIX_PAD, + UMA_ZONE_VM | UMA_ZONE_SMR | UMA_ZONE_ZINIT); + vm_radix_smr = uma_zone_get_smr(vm_radix_node_zone); } /* @@ -342,7 +355,7 @@ vm_radix_insert(struct vm_radix *rtree, vm_page_t page) { vm_pindex_t index, newind; - void **parentp; + volatile uintptr_t *parentp; struct vm_radix_node *rnode, *tmp; vm_page_t m; int slot; @@ -354,12 +367,12 @@ * The owner of record for root is not really important because it * will never be used. */ - rnode = vm_radix_getroot(rtree); + rnode = vm_radix_getroot(rtree, PLAIN); if (rnode == NULL) { rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF; return (0); } - parentp = (void **)&rtree->rt_root; + parentp = (uintptr_t *)&rtree->rt_root; for (;;) { if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); @@ -371,9 +384,10 @@ clev + 1), 2, clev); if (tmp == NULL) return (ENOMEM); - *parentp = tmp; vm_radix_addpage(tmp, index, clev, page); vm_radix_addpage(tmp, m->pindex, clev, m); + /* synchronize to keep leaf visible. */ + atomic_store_rel_ptr(parentp, (uintptr_t)tmp); return (0); } else if (vm_radix_keybarr(rnode, index)) break; @@ -383,7 +397,7 @@ vm_radix_addpage(rnode, index, rnode->rn_clev, page); return (0); } - parentp = &rnode->rn_child[slot]; + parentp = (uintptr_t *)&rnode->rn_child[slot]; rnode = rnode->rn_child[slot]; } @@ -397,10 +411,12 @@ tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev); if (tmp == NULL) return (ENOMEM); - *parentp = tmp; vm_radix_addpage(tmp, index, clev, page); slot = vm_radix_slot(newind, clev); tmp->rn_child[slot] = rnode; + /* Synchronize with lookup to keep the original entry visible. */ + atomic_store_rel_ptr(parentp, (uintptr_t)tmp); + return (0); } @@ -413,7 +429,7 @@ { struct vm_radix_node *rnode; - rnode = vm_radix_getroot(rtree); + rnode = vm_radix_getroot(rtree, PLAIN); if (rnode == NULL) return (FALSE); return (vm_radix_isleaf(rnode)); @@ -423,31 +439,76 @@ * Returns the value stored at the index. If the index is not present, * NULL is returned. */ -vm_page_t -vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) +static __always_inline vm_page_t +_vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, + enum vm_radix_access access) { struct vm_radix_node *rnode; vm_page_t m; int slot; - rnode = vm_radix_getroot(rtree); + rnode = vm_radix_getroot(rtree, access); while (rnode != NULL) { if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex == index) return (m); - else - break; - } else if (vm_radix_keybarr(rnode, index)) + break; + } + if (vm_radix_keybarr(rnode, index)) break; slot = vm_radix_slot(index, rnode->rn_clev); - rnode = rnode->rn_child[slot]; + rnode = vm_radix_getnode(&rnode->rn_child[slot], access); } return (NULL); } /* - * Look up the nearest entry at a position bigger than or equal to index. + * Returns the value stored at the index assuming there is an external lock. + * + * If the index is not present, NULL is returned. + */ +vm_page_t +vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) +{ + + return _vm_radix_lookup(rtree, index, PLAIN); +} + +/* + * Returns the value stored at the index without requiring an external lock. + * + * If the index is not present, NULL is returned. + */ +vm_page_t +vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index) +{ + struct vm_object *obj; + vm_page_t m; + + smr_enter(vm_radix_smr); + for (;;) { + m = _vm_radix_lookup(rtree, index, ATOMIC); + if (m == NULL) + break; + + /* + * vm_page_object_remove removes from the tree before + * clearing the pointer so it is safe to loop here. We + * may still return a stale result however. The caller + * is responsible for the final reference and verification. + */ + obj = m->object; + if (__predict_true(obj != NULL && &obj->rtree == rtree)) + break; + } + smr_exit(vm_radix_smr); + + return (m); +} + +/* + * Look up the nearest entry at a position greater than or equal to index. */ vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) @@ -461,7 +522,7 @@ #endif int slot, tos; - rnode = vm_radix_getroot(rtree); + rnode = vm_radix_getroot(rtree, PLAIN); if (rnode == NULL) return (NULL); else if (vm_radix_isleaf(rnode)) { @@ -477,7 +538,7 @@ * If the keys differ before the current bisection node, * then the search key might rollback to the earliest * available bisection node or to the smallest key - * in the current node (if the owner is bigger than the + * in the current node (if the owner is greater than the * search key). */ if (vm_radix_keybarr(rnode, index)) { @@ -512,7 +573,7 @@ ("vm_radix_lookup_ge: keybarr failed")); } slot = vm_radix_slot(index, rnode->rn_clev); - child = rnode->rn_child[slot]; + child = vm_radix_getnode(&rnode->rn_child[slot], PLAIN); if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex >= index) @@ -530,7 +591,8 @@ do { index += inc; slot++; - child = rnode->rn_child[slot]; + child = vm_radix_getnode(&rnode->rn_child[slot], + PLAIN); if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex >= index) @@ -543,7 +605,7 @@ ("vm_radix_lookup_ge: child is radix node")); /* - * If a page or edge bigger than the search slot is not found + * If a page or edge greater than the search slot is not found * in the current node, ascend to the next higher-level node. */ goto ascend; @@ -572,7 +634,7 @@ #endif int slot, tos; - rnode = vm_radix_getroot(rtree); + rnode = vm_radix_getroot(rtree, PLAIN); if (rnode == NULL) return (NULL); else if (vm_radix_isleaf(rnode)) { @@ -625,7 +687,7 @@ ("vm_radix_lookup_le: keybarr failed")); } slot = vm_radix_slot(index, rnode->rn_clev); - child = rnode->rn_child[slot]; + child = vm_radix_getnode(&rnode->rn_child[slot], PLAIN); if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex <= index) @@ -643,7 +705,8 @@ do { index -= inc; slot--; - child = rnode->rn_child[slot]; + child = vm_radix_getnode(&rnode->rn_child[slot], + PLAIN); if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex <= index) @@ -681,7 +744,7 @@ vm_page_t m; int i, slot; - rnode = vm_radix_getroot(rtree); + rnode = vm_radix_getroot(rtree, PLAIN); if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex != index) @@ -713,7 +776,9 @@ slot = vm_radix_slot(index, parent->rn_clev); KASSERT(parent->rn_child[slot] == rnode, ("%s: invalid child value", __func__)); - parent->rn_child[slot] = rnode->rn_child[i]; + atomic_store_rel_ptr( + (uintptr_t *)&parent->rn_child[slot], + (uintptr_t)rnode->rn_child[i]); } rnode->rn_count--; rnode->rn_child[i] = NULL; @@ -735,7 +800,7 @@ { struct vm_radix_node *root; - root = vm_radix_getroot(rtree); + root = vm_radix_getroot(rtree, PLAIN); if (root == NULL) return; vm_radix_setroot(rtree, NULL); @@ -756,7 +821,7 @@ int slot; index = newpage->pindex; - rnode = vm_radix_getroot(rtree); + rnode = vm_radix_getroot(rtree, PLAIN); if (rnode == NULL) panic("%s: replacing page on an empty trie", __func__); if (vm_radix_isleaf(rnode)) {