Page MenuHomeFreeBSD

D23446.id68399.diff
No OneTemporary

D23446.id68399.diff

Index: sys/vm/vm_radix.h
===================================================================
--- sys/vm/vm_radix.h
+++ sys/vm/vm_radix.h
@@ -43,6 +43,7 @@
vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index);
vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index);
vm_page_t vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index);
+vm_page_t vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index);
void vm_radix_reclaim_allnodes(struct vm_radix *rtree);
vm_page_t vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index);
vm_page_t vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage);
Index: sys/vm/vm_radix.c
===================================================================
--- sys/vm/vm_radix.c
+++ sys/vm/vm_radix.c
@@ -58,11 +58,14 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/proc.h>
#include <sys/vmmeter.h>
+#include <sys/smr.h>
#include <vm/uma.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
+#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_radix.h>
@@ -95,14 +98,21 @@
#define VM_RADIX_UNITLEVEL(lev) \
((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH))
+enum vm_radix_access { SMR, LOCKED, UNSERIALIZED };
+
+struct vm_radix_node;
+SMR_TYPE_DECLARE(smrnode_t, struct vm_radix_node *);
+
struct vm_radix_node {
- vm_pindex_t rn_owner; /* Owner of record. */
- uint16_t rn_count; /* Valid children. */
- uint16_t rn_clev; /* Current level. */
- void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */
+ vm_pindex_t rn_owner; /* Owner of record. */
+ uint16_t rn_count; /* Valid children. */
+ uint8_t rn_clev; /* Current level. */
+ int8_t rn_last; /* zero last ptr. */
+ smrnode_t rn_child[VM_RADIX_COUNT]; /* Child nodes. */
};
static uma_zone_t vm_radix_node_zone;
+static smr_t vm_radix_smr;
/*
* Allocate a radix node.
@@ -112,7 +122,7 @@
{
struct vm_radix_node *rnode;
- rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT);
+ rnode = uma_zalloc_smr(vm_radix_node_zone, M_NOWAIT);
if (rnode == NULL)
return (NULL);
rnode->rn_owner = owner;
@@ -125,10 +135,11 @@
* Free radix node.
*/
static __inline void
-vm_radix_node_put(struct vm_radix_node *rnode)
+vm_radix_node_put(struct vm_radix_node *rnode, int8_t last)
{
- uma_zfree(vm_radix_node_zone, rnode);
+ rnode->rn_last = last;
+ uma_zfree_smr(vm_radix_node_zone, rnode);
}
/*
@@ -155,24 +166,60 @@
return (ret);
}
+/*
+ * Fetch a node pointer from a slot in another node.
+ */
+static __inline struct vm_radix_node *
+vm_radix_node_load(smrnode_t *p, enum vm_radix_access access)
+{
+
+ switch (access) {
+ case UNSERIALIZED:
+ return (smr_unserialized_load(p, true));
+ case LOCKED:
+ return (smr_serialized_load(p, true));
+ case SMR:
+ return (smr_entered_load(p, vm_radix_smr));
+ }
+}
+
+static __inline void
+vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v,
+ enum vm_radix_access access)
+{
+
+
+ switch (access) {
+ case UNSERIALIZED:
+ smr_unserialized_store(p, v, true);
+ break;
+ case LOCKED:
+ smr_serialized_store(p, v, true);
+ break;
+ case SMR:
+ panic("vm_radix_node_store: Not supported in smr section.");
+ }
+}
+
/*
* Get the root node for a radix tree.
*/
static __inline struct vm_radix_node *
-vm_radix_getroot(struct vm_radix *rtree)
+vm_radix_root_load(struct vm_radix *rtree, enum vm_radix_access access)
{
- return ((struct vm_radix_node *)rtree->rt_root);
+ return (vm_radix_node_load((smrnode_t *)&rtree->rt_root, access));
}
/*
* Set the root node for a radix tree.
*/
static __inline void
-vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
+vm_radix_root_store(struct vm_radix *rtree, struct vm_radix_node *rnode,
+ enum vm_radix_access access)
{
- rtree->rt_root = (uintptr_t)rnode;
+ vm_radix_node_store((smrnode_t *)&rtree->rt_root, rnode, access);
}
/*
@@ -200,12 +247,13 @@
*/
static __inline void
vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
- vm_page_t page)
+ vm_page_t page, enum vm_radix_access access)
{
int slot;
slot = vm_radix_slot(index, clev);
- rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
+ vm_radix_node_store(&rnode->rn_child[slot],
+ (struct vm_radix_node *)((uintptr_t)page | VM_RADIX_ISLEAF), access);
}
/*
@@ -248,22 +296,23 @@
static void
vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
{
+ struct vm_radix_node *child;
int slot;
KASSERT(rnode->rn_count <= VM_RADIX_COUNT,
("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode));
for (slot = 0; rnode->rn_count != 0; slot++) {
- if (rnode->rn_child[slot] == NULL)
+ child = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED);
+ if (child == NULL)
continue;
- if (!vm_radix_isleaf(rnode->rn_child[slot]))
- vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
- rnode->rn_child[slot] = NULL;
+ if (!vm_radix_isleaf(child))
+ vm_radix_reclaim_allnodes_int(child);
+ vm_radix_node_store(&rnode->rn_child[slot], NULL, UNSERIALIZED);
rnode->rn_count--;
}
- vm_radix_node_put(rnode);
+ vm_radix_node_put(rnode, -1);
}
-#ifdef INVARIANTS
/*
* Radix node zone destructor.
*/
@@ -274,23 +323,23 @@
int slot;
rnode = mem;
+ /*
+ * We want to clear the last child pointer after the final section
+ * has exited so lookup can not return false negatives.
+ */
+ if (rnode->rn_last != -1) {
+ vm_radix_node_store(&rnode->rn_child[rnode->rn_last],
+ NULL, UNSERIALIZED);
+ rnode->rn_last = -1;
+ }
+#ifdef INVARIANTS
KASSERT(rnode->rn_count == 0,
("vm_radix_node_put: rnode %p has %d children", rnode,
rnode->rn_count));
for (slot = 0; slot < VM_RADIX_COUNT; slot++)
- KASSERT(rnode->rn_child[slot] == NULL,
- ("vm_radix_node_put: rnode %p has a child", rnode));
-}
+ KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) ==
+ NULL, ("vm_radix_node_put: rnode %p has a child", rnode));
#endif
-
-static int
-vm_radix_node_zone_init(void *mem, int size __unused, int flags __unused)
-{
- struct vm_radix_node *rnode;
-
- rnode = mem;
- bzero(rnode, sizeof(*rnode));
- return (0);
}
#ifndef UMA_MD_SMALL_ALLOC
@@ -326,12 +375,9 @@
vm_radix_node_zone = uma_zcreate("RADIX NODE",
sizeof(struct vm_radix_node), NULL,
-#ifdef INVARIANTS
- vm_radix_node_zone_dtor,
-#else
- NULL,
-#endif
- vm_radix_node_zone_init, NULL, VM_RADIX_PAD, UMA_ZONE_VM);
+ vm_radix_node_zone_dtor, NULL, NULL, VM_RADIX_PAD,
+ UMA_ZONE_VM | UMA_ZONE_SMR | UMA_ZONE_ZINIT);
+ vm_radix_smr = uma_zone_get_smr(vm_radix_node_zone);
}
/*
@@ -342,8 +388,8 @@
vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
{
vm_pindex_t index, newind;
- void **parentp;
struct vm_radix_node *rnode, *tmp;
+ smrnode_t *parentp;
vm_page_t m;
int slot;
uint16_t clev;
@@ -354,12 +400,12 @@
* The owner of record for root is not really important because it
* will never be used.
*/
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (rnode == NULL) {
rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF;
return (0);
}
- parentp = (void **)&rtree->rt_root;
+ parentp = (smrnode_t *)&rtree->rt_root;
for (;;) {
if (vm_radix_isleaf(rnode)) {
m = vm_radix_topage(rnode);
@@ -371,20 +417,24 @@
clev + 1), 2, clev);
if (tmp == NULL)
return (ENOMEM);
- *parentp = tmp;
- vm_radix_addpage(tmp, index, clev, page);
- vm_radix_addpage(tmp, m->pindex, clev, m);
+ /* These writes are not yet visible due to ordering. */
+ vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
+ vm_radix_addpage(tmp, m->pindex, clev, m, UNSERIALIZED);
+ /* Synchronize to make leaf visible. */
+ vm_radix_node_store(parentp, tmp, LOCKED);
return (0);
} else if (vm_radix_keybarr(rnode, index))
break;
slot = vm_radix_slot(index, rnode->rn_clev);
- if (rnode->rn_child[slot] == NULL) {
+ parentp = &rnode->rn_child[slot];
+ tmp = vm_radix_node_load(parentp, LOCKED);
+ if (tmp == NULL) {
rnode->rn_count++;
- vm_radix_addpage(rnode, index, rnode->rn_clev, page);
+ vm_radix_addpage(rnode, index, rnode->rn_clev, page,
+ LOCKED);
return (0);
}
- parentp = &rnode->rn_child[slot];
- rnode = rnode->rn_child[slot];
+ rnode = tmp;
}
/*
@@ -397,10 +447,13 @@
tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev);
if (tmp == NULL)
return (ENOMEM);
- *parentp = tmp;
- vm_radix_addpage(tmp, index, clev, page);
slot = vm_radix_slot(newind, clev);
- tmp->rn_child[slot] = rnode;
+ /* These writes are not yet visible due to ordering. */
+ vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
+ vm_radix_node_store(&tmp->rn_child[slot], rnode, UNSERIALIZED);
+ /* Serializing write to make the above visible. */
+ vm_radix_node_store(parentp, tmp, LOCKED);
+
return (0);
}
@@ -413,7 +466,7 @@
{
struct vm_radix_node *rnode;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (rnode == NULL)
return (FALSE);
return (vm_radix_isleaf(rnode));
@@ -423,31 +476,61 @@
* Returns the value stored at the index. If the index is not present,
* NULL is returned.
*/
-vm_page_t
-vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
+static __always_inline vm_page_t
+_vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index,
+ enum vm_radix_access access)
{
struct vm_radix_node *rnode;
vm_page_t m;
int slot;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, access);
while (rnode != NULL) {
if (vm_radix_isleaf(rnode)) {
m = vm_radix_topage(rnode);
if (m->pindex == index)
return (m);
- else
- break;
- } else if (vm_radix_keybarr(rnode, index))
+ break;
+ }
+ if (vm_radix_keybarr(rnode, index))
break;
slot = vm_radix_slot(index, rnode->rn_clev);
- rnode = rnode->rn_child[slot];
+ rnode = vm_radix_node_load(&rnode->rn_child[slot], access);
}
return (NULL);
}
/*
- * Look up the nearest entry at a position bigger than or equal to index.
+ * Returns the value stored at the index assuming there is an external lock.
+ *
+ * If the index is not present, NULL is returned.
+ */
+vm_page_t
+vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
+{
+
+ return _vm_radix_lookup(rtree, index, LOCKED);
+}
+
+/*
+ * Returns the value stored at the index without requiring an external lock.
+ *
+ * If the index is not present, NULL is returned.
+ */
+vm_page_t
+vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index)
+{
+ vm_page_t m;
+
+ smr_enter(vm_radix_smr);
+ m = _vm_radix_lookup(rtree, index, SMR);
+ smr_exit(vm_radix_smr);
+
+ return (m);
+}
+
+/*
+ * Look up the nearest entry at a position greater than or equal to index.
*/
vm_page_t
vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
@@ -461,7 +544,7 @@
#endif
int slot, tos;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (rnode == NULL)
return (NULL);
else if (vm_radix_isleaf(rnode)) {
@@ -477,7 +560,7 @@
* If the keys differ before the current bisection node,
* then the search key might rollback to the earliest
* available bisection node or to the smallest key
- * in the current node (if the owner is bigger than the
+ * in the current node (if the owner is greater than the
* search key).
*/
if (vm_radix_keybarr(rnode, index)) {
@@ -512,7 +595,7 @@
("vm_radix_lookup_ge: keybarr failed"));
}
slot = vm_radix_slot(index, rnode->rn_clev);
- child = rnode->rn_child[slot];
+ child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
if (vm_radix_isleaf(child)) {
m = vm_radix_topage(child);
if (m->pindex >= index)
@@ -530,7 +613,8 @@
do {
index += inc;
slot++;
- child = rnode->rn_child[slot];
+ child = vm_radix_node_load(&rnode->rn_child[slot],
+ LOCKED);
if (vm_radix_isleaf(child)) {
m = vm_radix_topage(child);
if (m->pindex >= index)
@@ -543,7 +627,7 @@
("vm_radix_lookup_ge: child is radix node"));
/*
- * If a page or edge bigger than the search slot is not found
+ * If a page or edge greater than the search slot is not found
* in the current node, ascend to the next higher-level node.
*/
goto ascend;
@@ -572,7 +656,7 @@
#endif
int slot, tos;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (rnode == NULL)
return (NULL);
else if (vm_radix_isleaf(rnode)) {
@@ -625,7 +709,7 @@
("vm_radix_lookup_le: keybarr failed"));
}
slot = vm_radix_slot(index, rnode->rn_clev);
- child = rnode->rn_child[slot];
+ child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
if (vm_radix_isleaf(child)) {
m = vm_radix_topage(child);
if (m->pindex <= index)
@@ -643,7 +727,8 @@
do {
index -= inc;
slot--;
- child = rnode->rn_child[slot];
+ child = vm_radix_node_load(&rnode->rn_child[slot],
+ LOCKED);
if (vm_radix_isleaf(child)) {
m = vm_radix_topage(child);
if (m->pindex <= index)
@@ -677,16 +762,16 @@
vm_page_t
vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
{
- struct vm_radix_node *rnode, *parent;
+ struct vm_radix_node *rnode, *parent, *tmp;
vm_page_t m;
int i, slot;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (vm_radix_isleaf(rnode)) {
m = vm_radix_topage(rnode);
if (m->pindex != index)
return (NULL);
- vm_radix_setroot(rtree, NULL);
+ vm_radix_root_store(rtree, NULL, LOCKED);
return (m);
}
parent = NULL;
@@ -694,34 +779,42 @@
if (rnode == NULL)
return (NULL);
slot = vm_radix_slot(index, rnode->rn_clev);
- if (vm_radix_isleaf(rnode->rn_child[slot])) {
- m = vm_radix_topage(rnode->rn_child[slot]);
+ tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
+ if (vm_radix_isleaf(tmp)) {
+ m = vm_radix_topage(tmp);
if (m->pindex != index)
return (NULL);
- rnode->rn_child[slot] = NULL;
+ vm_radix_node_store(&rnode->rn_child[slot], NULL, LOCKED);
rnode->rn_count--;
if (rnode->rn_count > 1)
return (m);
for (i = 0; i < VM_RADIX_COUNT; i++)
- if (rnode->rn_child[i] != NULL)
+ if (vm_radix_node_load(&rnode->rn_child[i],
+ LOCKED) != NULL)
break;
KASSERT(i != VM_RADIX_COUNT,
("%s: invalid node configuration", __func__));
+ tmp = vm_radix_node_load(&rnode->rn_child[i], LOCKED);
if (parent == NULL)
- vm_radix_setroot(rtree, rnode->rn_child[i]);
+ vm_radix_root_store(rtree, tmp, LOCKED);
else {
slot = vm_radix_slot(index, parent->rn_clev);
- KASSERT(parent->rn_child[slot] == rnode,
+ KASSERT(vm_radix_node_load(
+ &parent->rn_child[slot], LOCKED) == rnode,
("%s: invalid child value", __func__));
- parent->rn_child[slot] = rnode->rn_child[i];
+ vm_radix_node_store(&parent->rn_child[slot],
+ tmp, LOCKED);
}
+ /*
+ * The child is still valid and we can not zero the
+ * pointer until all smr references are gone.
+ */
rnode->rn_count--;
- rnode->rn_child[i] = NULL;
- vm_radix_node_put(rnode);
+ vm_radix_node_put(rnode, i);
return (m);
}
parent = rnode;
- rnode = rnode->rn_child[slot];
+ rnode = tmp;
}
}
@@ -735,10 +828,10 @@
{
struct vm_radix_node *root;
- root = vm_radix_getroot(rtree);
+ root = vm_radix_root_load(rtree, LOCKED);
if (root == NULL)
return;
- vm_radix_setroot(rtree, NULL);
+ vm_radix_root_store(rtree, NULL, UNSERIALIZED);
if (!vm_radix_isleaf(root))
vm_radix_reclaim_allnodes_int(root);
}
@@ -750,13 +843,13 @@
vm_page_t
vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage)
{
- struct vm_radix_node *rnode;
+ struct vm_radix_node *rnode, *tmp;
vm_page_t m;
vm_pindex_t index;
int slot;
index = newpage->pindex;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (rnode == NULL)
panic("%s: replacing page on an empty trie", __func__);
if (vm_radix_isleaf(rnode)) {
@@ -769,19 +862,19 @@
}
for (;;) {
slot = vm_radix_slot(index, rnode->rn_clev);
- if (vm_radix_isleaf(rnode->rn_child[slot])) {
- m = vm_radix_topage(rnode->rn_child[slot]);
+ tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
+ if (vm_radix_isleaf(tmp)) {
+ m = vm_radix_topage(tmp);
if (m->pindex == index) {
- rnode->rn_child[slot] =
- (void *)((uintptr_t)newpage |
- VM_RADIX_ISLEAF);
+ vm_radix_node_store(&rnode->rn_child[slot],
+ (struct vm_radix_node *)((uintptr_t)newpage |
+ VM_RADIX_ISLEAF), LOCKED);
return (m);
} else
break;
- } else if (rnode->rn_child[slot] == NULL ||
- vm_radix_keybarr(rnode->rn_child[slot], index))
+ } else if (tmp == NULL || vm_radix_keybarr(tmp, index))
break;
- rnode = rnode->rn_child[slot];
+ rnode = tmp;
}
panic("%s: original replacing page not found", __func__);
}
@@ -798,7 +891,7 @@
*/
DB_SHOW_COMMAND(radixnode, db_show_radixnode)
{
- struct vm_radix_node *rnode;
+ struct vm_radix_node *rnode, *tmp;
int i;
if (!have_addr)
@@ -807,12 +900,13 @@
db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
(void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
rnode->rn_clev);
- for (i = 0; i < VM_RADIX_COUNT; i++)
- if (rnode->rn_child[i] != NULL)
+ for (i = 0; i < VM_RADIX_COUNT; i++) {
+ tmp = vm_radix_node_load(&rnode->rn_child[i], UNSERIALIZED);
+ if (tmp != NULL)
db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
- i, (void *)rnode->rn_child[i],
- vm_radix_isleaf(rnode->rn_child[i]) ?
- vm_radix_topage(rnode->rn_child[i]) : NULL,
+ i, (void *)tmp,
+ vm_radix_isleaf(tmp) ? vm_radix_topage(tmp) : NULL,
rnode->rn_clev);
+ }
}
#endif /* DDB */

File Metadata

Mime Type
text/plain
Expires
Thu, Mar 13, 1:21 PM (18 h, 5 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17136238
Default Alt Text
D23446.id68399.diff (17 KB)

Event Timeline