Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F112080830
D23446.id68399.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
17 KB
Referenced Files
None
Subscribers
None
D23446.id68399.diff
View Options
Index: sys/vm/vm_radix.h
===================================================================
--- sys/vm/vm_radix.h
+++ sys/vm/vm_radix.h
@@ -43,6 +43,7 @@
vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index);
vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index);
vm_page_t vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index);
+vm_page_t vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index);
void vm_radix_reclaim_allnodes(struct vm_radix *rtree);
vm_page_t vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index);
vm_page_t vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage);
Index: sys/vm/vm_radix.c
===================================================================
--- sys/vm/vm_radix.c
+++ sys/vm/vm_radix.c
@@ -58,11 +58,14 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/proc.h>
#include <sys/vmmeter.h>
+#include <sys/smr.h>
#include <vm/uma.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
+#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_radix.h>
@@ -95,14 +98,21 @@
#define VM_RADIX_UNITLEVEL(lev) \
((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH))
+enum vm_radix_access { SMR, LOCKED, UNSERIALIZED };
+
+struct vm_radix_node;
+SMR_TYPE_DECLARE(smrnode_t, struct vm_radix_node *);
+
struct vm_radix_node {
- vm_pindex_t rn_owner; /* Owner of record. */
- uint16_t rn_count; /* Valid children. */
- uint16_t rn_clev; /* Current level. */
- void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */
+ vm_pindex_t rn_owner; /* Owner of record. */
+ uint16_t rn_count; /* Valid children. */
+ uint8_t rn_clev; /* Current level. */
+ int8_t rn_last; /* zero last ptr. */
+ smrnode_t rn_child[VM_RADIX_COUNT]; /* Child nodes. */
};
static uma_zone_t vm_radix_node_zone;
+static smr_t vm_radix_smr;
/*
* Allocate a radix node.
@@ -112,7 +122,7 @@
{
struct vm_radix_node *rnode;
- rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT);
+ rnode = uma_zalloc_smr(vm_radix_node_zone, M_NOWAIT);
if (rnode == NULL)
return (NULL);
rnode->rn_owner = owner;
@@ -125,10 +135,11 @@
* Free radix node.
*/
static __inline void
-vm_radix_node_put(struct vm_radix_node *rnode)
+vm_radix_node_put(struct vm_radix_node *rnode, int8_t last)
{
- uma_zfree(vm_radix_node_zone, rnode);
+ rnode->rn_last = last;
+ uma_zfree_smr(vm_radix_node_zone, rnode);
}
/*
@@ -155,24 +166,60 @@
return (ret);
}
+/*
+ * Fetch a node pointer from a slot in another node.
+ */
+static __inline struct vm_radix_node *
+vm_radix_node_load(smrnode_t *p, enum vm_radix_access access)
+{
+
+ switch (access) {
+ case UNSERIALIZED:
+ return (smr_unserialized_load(p, true));
+ case LOCKED:
+ return (smr_serialized_load(p, true));
+ case SMR:
+ return (smr_entered_load(p, vm_radix_smr));
+ }
+}
+
+static __inline void
+vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v,
+ enum vm_radix_access access)
+{
+
+
+ switch (access) {
+ case UNSERIALIZED:
+ smr_unserialized_store(p, v, true);
+ break;
+ case LOCKED:
+ smr_serialized_store(p, v, true);
+ break;
+ case SMR:
+ panic("vm_radix_node_store: Not supported in smr section.");
+ }
+}
+
/*
* Get the root node for a radix tree.
*/
static __inline struct vm_radix_node *
-vm_radix_getroot(struct vm_radix *rtree)
+vm_radix_root_load(struct vm_radix *rtree, enum vm_radix_access access)
{
- return ((struct vm_radix_node *)rtree->rt_root);
+ return (vm_radix_node_load((smrnode_t *)&rtree->rt_root, access));
}
/*
* Set the root node for a radix tree.
*/
static __inline void
-vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
+vm_radix_root_store(struct vm_radix *rtree, struct vm_radix_node *rnode,
+ enum vm_radix_access access)
{
- rtree->rt_root = (uintptr_t)rnode;
+ vm_radix_node_store((smrnode_t *)&rtree->rt_root, rnode, access);
}
/*
@@ -200,12 +247,13 @@
*/
static __inline void
vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
- vm_page_t page)
+ vm_page_t page, enum vm_radix_access access)
{
int slot;
slot = vm_radix_slot(index, clev);
- rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
+ vm_radix_node_store(&rnode->rn_child[slot],
+ (struct vm_radix_node *)((uintptr_t)page | VM_RADIX_ISLEAF), access);
}
/*
@@ -248,22 +296,23 @@
static void
vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
{
+ struct vm_radix_node *child;
int slot;
KASSERT(rnode->rn_count <= VM_RADIX_COUNT,
("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode));
for (slot = 0; rnode->rn_count != 0; slot++) {
- if (rnode->rn_child[slot] == NULL)
+ child = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED);
+ if (child == NULL)
continue;
- if (!vm_radix_isleaf(rnode->rn_child[slot]))
- vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
- rnode->rn_child[slot] = NULL;
+ if (!vm_radix_isleaf(child))
+ vm_radix_reclaim_allnodes_int(child);
+ vm_radix_node_store(&rnode->rn_child[slot], NULL, UNSERIALIZED);
rnode->rn_count--;
}
- vm_radix_node_put(rnode);
+ vm_radix_node_put(rnode, -1);
}
-#ifdef INVARIANTS
/*
* Radix node zone destructor.
*/
@@ -274,23 +323,23 @@
int slot;
rnode = mem;
+ /*
+ * We want to clear the last child pointer after the final section
+ * has exited so lookup can not return false negatives.
+ */
+ if (rnode->rn_last != -1) {
+ vm_radix_node_store(&rnode->rn_child[rnode->rn_last],
+ NULL, UNSERIALIZED);
+ rnode->rn_last = -1;
+ }
+#ifdef INVARIANTS
KASSERT(rnode->rn_count == 0,
("vm_radix_node_put: rnode %p has %d children", rnode,
rnode->rn_count));
for (slot = 0; slot < VM_RADIX_COUNT; slot++)
- KASSERT(rnode->rn_child[slot] == NULL,
- ("vm_radix_node_put: rnode %p has a child", rnode));
-}
+ KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) ==
+ NULL, ("vm_radix_node_put: rnode %p has a child", rnode));
#endif
-
-static int
-vm_radix_node_zone_init(void *mem, int size __unused, int flags __unused)
-{
- struct vm_radix_node *rnode;
-
- rnode = mem;
- bzero(rnode, sizeof(*rnode));
- return (0);
}
#ifndef UMA_MD_SMALL_ALLOC
@@ -326,12 +375,9 @@
vm_radix_node_zone = uma_zcreate("RADIX NODE",
sizeof(struct vm_radix_node), NULL,
-#ifdef INVARIANTS
- vm_radix_node_zone_dtor,
-#else
- NULL,
-#endif
- vm_radix_node_zone_init, NULL, VM_RADIX_PAD, UMA_ZONE_VM);
+ vm_radix_node_zone_dtor, NULL, NULL, VM_RADIX_PAD,
+ UMA_ZONE_VM | UMA_ZONE_SMR | UMA_ZONE_ZINIT);
+ vm_radix_smr = uma_zone_get_smr(vm_radix_node_zone);
}
/*
@@ -342,8 +388,8 @@
vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
{
vm_pindex_t index, newind;
- void **parentp;
struct vm_radix_node *rnode, *tmp;
+ smrnode_t *parentp;
vm_page_t m;
int slot;
uint16_t clev;
@@ -354,12 +400,12 @@
* The owner of record for root is not really important because it
* will never be used.
*/
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (rnode == NULL) {
rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF;
return (0);
}
- parentp = (void **)&rtree->rt_root;
+ parentp = (smrnode_t *)&rtree->rt_root;
for (;;) {
if (vm_radix_isleaf(rnode)) {
m = vm_radix_topage(rnode);
@@ -371,20 +417,24 @@
clev + 1), 2, clev);
if (tmp == NULL)
return (ENOMEM);
- *parentp = tmp;
- vm_radix_addpage(tmp, index, clev, page);
- vm_radix_addpage(tmp, m->pindex, clev, m);
+ /* These writes are not yet visible due to ordering. */
+ vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
+ vm_radix_addpage(tmp, m->pindex, clev, m, UNSERIALIZED);
+ /* Synchronize to make leaf visible. */
+ vm_radix_node_store(parentp, tmp, LOCKED);
return (0);
} else if (vm_radix_keybarr(rnode, index))
break;
slot = vm_radix_slot(index, rnode->rn_clev);
- if (rnode->rn_child[slot] == NULL) {
+ parentp = &rnode->rn_child[slot];
+ tmp = vm_radix_node_load(parentp, LOCKED);
+ if (tmp == NULL) {
rnode->rn_count++;
- vm_radix_addpage(rnode, index, rnode->rn_clev, page);
+ vm_radix_addpage(rnode, index, rnode->rn_clev, page,
+ LOCKED);
return (0);
}
- parentp = &rnode->rn_child[slot];
- rnode = rnode->rn_child[slot];
+ rnode = tmp;
}
/*
@@ -397,10 +447,13 @@
tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev);
if (tmp == NULL)
return (ENOMEM);
- *parentp = tmp;
- vm_radix_addpage(tmp, index, clev, page);
slot = vm_radix_slot(newind, clev);
- tmp->rn_child[slot] = rnode;
+ /* These writes are not yet visible due to ordering. */
+ vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
+ vm_radix_node_store(&tmp->rn_child[slot], rnode, UNSERIALIZED);
+ /* Serializing write to make the above visible. */
+ vm_radix_node_store(parentp, tmp, LOCKED);
+
return (0);
}
@@ -413,7 +466,7 @@
{
struct vm_radix_node *rnode;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (rnode == NULL)
return (FALSE);
return (vm_radix_isleaf(rnode));
@@ -423,31 +476,61 @@
* Returns the value stored at the index. If the index is not present,
* NULL is returned.
*/
-vm_page_t
-vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
+static __always_inline vm_page_t
+_vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index,
+ enum vm_radix_access access)
{
struct vm_radix_node *rnode;
vm_page_t m;
int slot;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, access);
while (rnode != NULL) {
if (vm_radix_isleaf(rnode)) {
m = vm_radix_topage(rnode);
if (m->pindex == index)
return (m);
- else
- break;
- } else if (vm_radix_keybarr(rnode, index))
+ break;
+ }
+ if (vm_radix_keybarr(rnode, index))
break;
slot = vm_radix_slot(index, rnode->rn_clev);
- rnode = rnode->rn_child[slot];
+ rnode = vm_radix_node_load(&rnode->rn_child[slot], access);
}
return (NULL);
}
/*
- * Look up the nearest entry at a position bigger than or equal to index.
+ * Returns the value stored at the index assuming there is an external lock.
+ *
+ * If the index is not present, NULL is returned.
+ */
+vm_page_t
+vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
+{
+
+ return _vm_radix_lookup(rtree, index, LOCKED);
+}
+
+/*
+ * Returns the value stored at the index without requiring an external lock.
+ *
+ * If the index is not present, NULL is returned.
+ */
+vm_page_t
+vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index)
+{
+ vm_page_t m;
+
+ smr_enter(vm_radix_smr);
+ m = _vm_radix_lookup(rtree, index, SMR);
+ smr_exit(vm_radix_smr);
+
+ return (m);
+}
+
+/*
+ * Look up the nearest entry at a position greater than or equal to index.
*/
vm_page_t
vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
@@ -461,7 +544,7 @@
#endif
int slot, tos;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (rnode == NULL)
return (NULL);
else if (vm_radix_isleaf(rnode)) {
@@ -477,7 +560,7 @@
* If the keys differ before the current bisection node,
* then the search key might rollback to the earliest
* available bisection node or to the smallest key
- * in the current node (if the owner is bigger than the
+ * in the current node (if the owner is greater than the
* search key).
*/
if (vm_radix_keybarr(rnode, index)) {
@@ -512,7 +595,7 @@
("vm_radix_lookup_ge: keybarr failed"));
}
slot = vm_radix_slot(index, rnode->rn_clev);
- child = rnode->rn_child[slot];
+ child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
if (vm_radix_isleaf(child)) {
m = vm_radix_topage(child);
if (m->pindex >= index)
@@ -530,7 +613,8 @@
do {
index += inc;
slot++;
- child = rnode->rn_child[slot];
+ child = vm_radix_node_load(&rnode->rn_child[slot],
+ LOCKED);
if (vm_radix_isleaf(child)) {
m = vm_radix_topage(child);
if (m->pindex >= index)
@@ -543,7 +627,7 @@
("vm_radix_lookup_ge: child is radix node"));
/*
- * If a page or edge bigger than the search slot is not found
+ * If a page or edge greater than the search slot is not found
* in the current node, ascend to the next higher-level node.
*/
goto ascend;
@@ -572,7 +656,7 @@
#endif
int slot, tos;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (rnode == NULL)
return (NULL);
else if (vm_radix_isleaf(rnode)) {
@@ -625,7 +709,7 @@
("vm_radix_lookup_le: keybarr failed"));
}
slot = vm_radix_slot(index, rnode->rn_clev);
- child = rnode->rn_child[slot];
+ child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
if (vm_radix_isleaf(child)) {
m = vm_radix_topage(child);
if (m->pindex <= index)
@@ -643,7 +727,8 @@
do {
index -= inc;
slot--;
- child = rnode->rn_child[slot];
+ child = vm_radix_node_load(&rnode->rn_child[slot],
+ LOCKED);
if (vm_radix_isleaf(child)) {
m = vm_radix_topage(child);
if (m->pindex <= index)
@@ -677,16 +762,16 @@
vm_page_t
vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
{
- struct vm_radix_node *rnode, *parent;
+ struct vm_radix_node *rnode, *parent, *tmp;
vm_page_t m;
int i, slot;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (vm_radix_isleaf(rnode)) {
m = vm_radix_topage(rnode);
if (m->pindex != index)
return (NULL);
- vm_radix_setroot(rtree, NULL);
+ vm_radix_root_store(rtree, NULL, LOCKED);
return (m);
}
parent = NULL;
@@ -694,34 +779,42 @@
if (rnode == NULL)
return (NULL);
slot = vm_radix_slot(index, rnode->rn_clev);
- if (vm_radix_isleaf(rnode->rn_child[slot])) {
- m = vm_radix_topage(rnode->rn_child[slot]);
+ tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
+ if (vm_radix_isleaf(tmp)) {
+ m = vm_radix_topage(tmp);
if (m->pindex != index)
return (NULL);
- rnode->rn_child[slot] = NULL;
+ vm_radix_node_store(&rnode->rn_child[slot], NULL, LOCKED);
rnode->rn_count--;
if (rnode->rn_count > 1)
return (m);
for (i = 0; i < VM_RADIX_COUNT; i++)
- if (rnode->rn_child[i] != NULL)
+ if (vm_radix_node_load(&rnode->rn_child[i],
+ LOCKED) != NULL)
break;
KASSERT(i != VM_RADIX_COUNT,
("%s: invalid node configuration", __func__));
+ tmp = vm_radix_node_load(&rnode->rn_child[i], LOCKED);
if (parent == NULL)
- vm_radix_setroot(rtree, rnode->rn_child[i]);
+ vm_radix_root_store(rtree, tmp, LOCKED);
else {
slot = vm_radix_slot(index, parent->rn_clev);
- KASSERT(parent->rn_child[slot] == rnode,
+ KASSERT(vm_radix_node_load(
+ &parent->rn_child[slot], LOCKED) == rnode,
("%s: invalid child value", __func__));
- parent->rn_child[slot] = rnode->rn_child[i];
+ vm_radix_node_store(&parent->rn_child[slot],
+ tmp, LOCKED);
}
+ /*
+ * The child is still valid and we can not zero the
+ * pointer until all smr references are gone.
+ */
rnode->rn_count--;
- rnode->rn_child[i] = NULL;
- vm_radix_node_put(rnode);
+ vm_radix_node_put(rnode, i);
return (m);
}
parent = rnode;
- rnode = rnode->rn_child[slot];
+ rnode = tmp;
}
}
@@ -735,10 +828,10 @@
{
struct vm_radix_node *root;
- root = vm_radix_getroot(rtree);
+ root = vm_radix_root_load(rtree, LOCKED);
if (root == NULL)
return;
- vm_radix_setroot(rtree, NULL);
+ vm_radix_root_store(rtree, NULL, UNSERIALIZED);
if (!vm_radix_isleaf(root))
vm_radix_reclaim_allnodes_int(root);
}
@@ -750,13 +843,13 @@
vm_page_t
vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage)
{
- struct vm_radix_node *rnode;
+ struct vm_radix_node *rnode, *tmp;
vm_page_t m;
vm_pindex_t index;
int slot;
index = newpage->pindex;
- rnode = vm_radix_getroot(rtree);
+ rnode = vm_radix_root_load(rtree, LOCKED);
if (rnode == NULL)
panic("%s: replacing page on an empty trie", __func__);
if (vm_radix_isleaf(rnode)) {
@@ -769,19 +862,19 @@
}
for (;;) {
slot = vm_radix_slot(index, rnode->rn_clev);
- if (vm_radix_isleaf(rnode->rn_child[slot])) {
- m = vm_radix_topage(rnode->rn_child[slot]);
+ tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
+ if (vm_radix_isleaf(tmp)) {
+ m = vm_radix_topage(tmp);
if (m->pindex == index) {
- rnode->rn_child[slot] =
- (void *)((uintptr_t)newpage |
- VM_RADIX_ISLEAF);
+ vm_radix_node_store(&rnode->rn_child[slot],
+ (struct vm_radix_node *)((uintptr_t)newpage |
+ VM_RADIX_ISLEAF), LOCKED);
return (m);
} else
break;
- } else if (rnode->rn_child[slot] == NULL ||
- vm_radix_keybarr(rnode->rn_child[slot], index))
+ } else if (tmp == NULL || vm_radix_keybarr(tmp, index))
break;
- rnode = rnode->rn_child[slot];
+ rnode = tmp;
}
panic("%s: original replacing page not found", __func__);
}
@@ -798,7 +891,7 @@
*/
DB_SHOW_COMMAND(radixnode, db_show_radixnode)
{
- struct vm_radix_node *rnode;
+ struct vm_radix_node *rnode, *tmp;
int i;
if (!have_addr)
@@ -807,12 +900,13 @@
db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
(void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
rnode->rn_clev);
- for (i = 0; i < VM_RADIX_COUNT; i++)
- if (rnode->rn_child[i] != NULL)
+ for (i = 0; i < VM_RADIX_COUNT; i++) {
+ tmp = vm_radix_node_load(&rnode->rn_child[i], UNSERIALIZED);
+ if (tmp != NULL)
db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
- i, (void *)rnode->rn_child[i],
- vm_radix_isleaf(rnode->rn_child[i]) ?
- vm_radix_topage(rnode->rn_child[i]) : NULL,
+ i, (void *)tmp,
+ vm_radix_isleaf(tmp) ? vm_radix_topage(tmp) : NULL,
rnode->rn_clev);
+ }
}
#endif /* DDB */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Mar 13, 1:21 PM (18 h, 5 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17136238
Default Alt Text
D23446.id68399.diff (17 KB)
Attached To
Mode
D23446: Add a lockless lookup mechanism that uses a SMR zone.
Attached
Detach File
Event Timeline
Log In to Comment