Index: sys/vm/vm_map.h =================================================================== --- sys/vm/vm_map.h +++ sys/vm/vm_map.h @@ -106,7 +106,6 @@ vm_offset_t start; /* start address */ vm_offset_t end; /* end address */ vm_offset_t next_read; /* vaddr of the next sequential read */ - vm_size_t adj_free; /* amount of adjacent free space */ vm_size_t max_free; /* max free space in subtree */ union vm_map_object object; /* object I point to */ vm_ooffset_t offset; /* offset into object */ Index: sys/vm/vm_map.c =================================================================== --- sys/vm/vm_map.c +++ sys/vm/vm_map.c @@ -865,100 +865,132 @@ static inline void vm_map_entry_set_max_free(vm_map_entry_t entry) { - - entry->max_free = entry->adj_free; - if (entry->left != NULL && entry->left->max_free > entry->max_free) - entry->max_free = entry->left->max_free; - if (entry->right != NULL && entry->right->max_free > entry->max_free) - entry->max_free = entry->right->max_free; + vm_map_entry_t child; + vm_size_t max_left, max_right; + + child = entry->left; + max_left = (child != NULL) ? child->max_free : + entry->start - entry->prev->end; + child = entry->right; + max_right = (child != NULL) ? child->max_free : + entry->next->start - entry->end; + entry->max_free = MAX(max_left, max_right); } /* - * vm_map_entry_splay: - * - * The Sleator and Tarjan top-down splay algorithm with the - * following variation. Max_free must be computed bottom-up, so - * on the downward pass, maintain the left and right spines in - * reverse order. Then, make a second pass up each side to fix - * the pointers and compute max_free. The time bound is O(log n) - * amortized. - * - * The new root is the vm_map_entry containing "addr", or else an - * adjacent entry (lower or higher) if addr is not in the tree. - * - * The map must be locked, and leaves it so. - * - * Returns: the new root. + * Walk down the tree until we find addr or a NULL pointer where addr would go, + * breaking off left and right subtrees of nodes less than, or greater than + * addr. llist and rlist are the two sides in reverse order (bottom-up), with + * llist linked by the right pointer and rlist linked by the left pointer in the + * vm_map_entry. */ static vm_map_entry_t -vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root) +vm_map_entry_splay_split(vm_offset_t addr, vm_size_t length, + vm_map_entry_t root, vm_map_entry_t *out_llist, vm_map_entry_t *out_rlist) { vm_map_entry_t llist, rlist; - vm_map_entry_t ltree, rtree; vm_map_entry_t y; - /* Special case of empty tree. */ - if (root == NULL) - return (root); - - /* - * Pass One: Splay down the tree until we find addr or a NULL - * pointer where addr would go. llist and rlist are the two - * sides in reverse order (bottom-up), with llist linked by - * the right pointer and rlist linked by the left pointer in - * the vm_map_entry. Wait until Pass Two to set max_free on - * the two spines. - */ llist = NULL; rlist = NULL; - for (;;) { - /* root is never NULL in here. */ + while (root != NULL && root->max_free >= length) { if (addr < root->start) { y = root->left; - if (y == NULL) - break; - if (addr < y->start && y->left != NULL) { - /* Rotate right and put y on rlist. */ + if (y != NULL && y->max_free >= length && + addr < y->start) { + /* Rotate right and make y root. */ root->left = y->right; y->right = root; vm_map_entry_set_max_free(root); - root = y->left; - y->left = rlist; - rlist = y; - } else { - /* Put root on rlist. */ - root->left = rlist; - rlist = root; root = y; + y = root->left; } + /* Put root on rlist. */ + root->left = rlist; + rlist = root; + root = y; } else if (addr >= root->end) { y = root->right; - if (y == NULL) - break; - if (addr >= y->end && y->right != NULL) { - /* Rotate left and put y on llist. */ + if (y != NULL && y->max_free >= length && + addr >= y->end) { + /* Rotate left and make y root. */ root->right = y->left; y->left = root; vm_map_entry_set_max_free(root); - root = y->right; - y->right = llist; - llist = y; - } else { - /* Put root on llist. */ - root->right = llist; - llist = root; root = y; + y = root->right; } + /* Put root on llist. */ + root->right = llist; + llist = root; + root = y; } else break; } + *out_llist = llist; + *out_rlist = rlist; + return (root); +} + +static void +vm_map_entry_splay_walk_left_spine(vm_map_entry_t root, vm_map_entry_t *iolist) +{ + vm_map_entry_t list, tree, y; + + tree = root->left; + list = *iolist; + while (tree != NULL) { + if ((y = tree->right) != NULL) { + /* Rotate left. */ + tree->right = y->left; + y->left = tree; + vm_map_entry_set_max_free(tree); + tree = y; + y = tree->right; + } + /* Put tree on list. */ + tree->right = list; + list = tree; + tree = y; + } + *iolist = list; +} + +static void +vm_map_entry_splay_walk_right_spine(vm_map_entry_t root, vm_map_entry_t *iolist) +{ + vm_map_entry_t list, tree, y; + + tree = root->right; + list = *iolist; + while (tree != NULL) { + if ((y = tree->left) != NULL) { + /* Rotate right. */ + tree->left = y->right; + y->right = tree; + vm_map_entry_set_max_free(tree); + tree = y; + y = tree->left; + } + /* Put tree on list. */ + tree->left = list; + list = tree; + tree = y; + } + *iolist = list; +} + +/* + * Walk back up the two spines, flip the pointers and set max_free. The + * subtrees of the root go at the bottom of llist and rlist. + */ +static vm_map_entry_t +vm_map_entry_splay_merge(vm_map_entry_t root, + vm_map_entry_t llist, vm_map_entry_t rlist, + vm_map_entry_t ltree, vm_map_entry_t rtree) +{ + vm_map_entry_t y; - /* - * Pass Two: Walk back up the two spines, flip the pointers - * and set max_free. The subtrees of the root go at the - * bottom of llist and rlist. - */ - ltree = root->left; while (llist != NULL) { y = llist->right; llist->right = ltree; @@ -966,7 +998,6 @@ ltree = llist; llist = y; } - rtree = root->right; while (rlist != NULL) { y = rlist->left; rlist->left = rtree; @@ -985,6 +1016,55 @@ return (root); } +/* + * vm_map_entry_splay: + * + * The Sleator and Tarjan top-down splay algorithm with the + * following variation. Max_free must be computed bottom-up, so + * on the downward pass, maintain the left and right spines in + * reverse order. Then, make a second pass up each side to fix + * the pointers and compute max_free. The time bound is O(log n) + * amortized. + * + * The new root is the vm_map_entry containing "addr", or else an + * adjacent entry (lower if possible) if addr is not in the tree. + * + * The map must be locked, and leaves it so. + * + * Returns: the new root. + */ +static vm_map_entry_t +vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root) +{ + vm_map_entry_t llist, rlist; + + root = vm_map_entry_splay_split(addr, 0, root, &llist, &rlist); + if (root != NULL) { + /* do nothing */ + } else if (llist != NULL) { + /* + * Recover the greatest node in the left + * subtree and make it the root. + */ + root = llist; + llist = root->right; + root->right = NULL; + } else if (rlist != NULL) { + /* + * Recover the least node in the right + * subtree and make it the root. + */ + root = rlist; + rlist = root->left; + root->left = NULL; + } else { + /* There is no root. */ + return (NULL); + } + return (vm_map_entry_splay_merge(root, llist, rlist, + root->left, root->right)); +} + /* * vm_map_entry_{un,}link: * @@ -992,41 +1072,23 @@ */ static void vm_map_entry_link(vm_map_t map, - vm_map_entry_t after_where, vm_map_entry_t entry) { + vm_map_entry_t llist, rlist, root; - CTR4(KTR_VM, - "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map, - map->nentries, entry, after_where); + CTR3(KTR_VM, + "vm_map_entry_link: map %p, nentries %d, entry %p", map, + map->nentries, entry); VM_MAP_ASSERT_LOCKED(map); - KASSERT(after_where->end <= entry->start, - ("vm_map_entry_link: prev end %jx new start %jx overlap", - (uintmax_t)after_where->end, (uintmax_t)entry->start)); - KASSERT(entry->end <= after_where->next->start, - ("vm_map_entry_link: new end %jx next start %jx overlap", - (uintmax_t)entry->end, (uintmax_t)after_where->next->start)); - map->nentries++; - entry->prev = after_where; - entry->next = after_where->next; - entry->next->prev = entry; - after_where->next = entry; - - if (after_where != &map->header) { - if (after_where != map->root) - vm_map_entry_splay(after_where->start, map->root); - entry->right = after_where->right; - entry->left = after_where; - after_where->right = NULL; - after_where->adj_free = entry->start - after_where->end; - vm_map_entry_set_max_free(after_where); - } else { - entry->right = map->root; - entry->left = NULL; - } - entry->adj_free = entry->next->start - entry->end; - vm_map_entry_set_max_free(entry); + root = map->root; + root = vm_map_entry_splay_split(entry->start, 0, root, &llist, &rlist); + KASSERT(root == NULL, + ("vm_map_entry_link: link object already mapped")); + entry->prev = (llist == NULL) ? &map->header : llist; + entry->next = (rlist == NULL) ? &map->header : rlist; + entry->prev->next = entry->next->prev = entry; + root = vm_map_entry_splay_merge(entry, llist, rlist, NULL, NULL); map->root = entry; } @@ -1034,25 +1096,36 @@ vm_map_entry_unlink(vm_map_t map, vm_map_entry_t entry) { - vm_map_entry_t next, prev, root; + vm_map_entry_t llist, rlist, root; VM_MAP_ASSERT_LOCKED(map); - if (entry != map->root) - vm_map_entry_splay(entry->start, map->root); - if (entry->left == NULL) - root = entry->right; - else { - root = vm_map_entry_splay(entry->start, entry->left); - root->right = entry->right; - root->adj_free = entry->next->start - root->end; - vm_map_entry_set_max_free(root); - } + llist = entry->prev; + rlist = entry->next; + llist->next = rlist; + rlist->prev = llist; + root = map->root; + root = vm_map_entry_splay_split(entry->start, 0, root, &llist, &rlist); + KASSERT(root != NULL, + ("vm_map_entry_unlink: unlink object not mapped")); + vm_map_entry_splay_walk_left_spine(root, &llist); + vm_map_entry_splay_walk_right_spine(root, &rlist); + + if (llist != NULL) { + /* New root is predecessor of found node. */ + root = llist; + llist = root->right; + root->right = NULL; + } else if (rlist != NULL) { + /* New root is successor of found node. */ + root = rlist; + rlist = root->left; + root->left = NULL; + } else + root = NULL; + if (root != NULL) + root = vm_map_entry_splay_merge(root, llist, rlist, + root->left, root->right); map->root = root; - - prev = entry->prev; - next = entry->next; - next->prev = prev; - prev->next = next; map->nentries--; CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map, map->nentries, entry); @@ -1061,27 +1134,34 @@ /* * vm_map_entry_resize_free: * - * Recompute the amount of free space following a vm_map_entry - * and propagate that value up the tree. Call this function after + * Recompute the amount of free space next to a vm_map_entry + * and propagate those values up the tree. Call this function after * resizing a map entry in-place, that is, without a call to * vm_map_entry_link() or _unlink(). * * The map must be locked, and leaves it so. */ static void -vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry) +vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry, bool newStart) { + vm_map_entry_t llist, rlist, root; - /* - * Using splay trees without parent pointers, propagating - * max_free up the tree is done by moving the entry to the - * root and making the change there. - */ - if (entry != map->root) - map->root = vm_map_entry_splay(entry->start, map->root); - - entry->adj_free = entry->next->start - entry->end; - vm_map_entry_set_max_free(entry); + VM_MAP_ASSERT_LOCKED(map); + root = map->root; + root = vm_map_entry_splay_split(entry->start, 0, root, &llist, &rlist); + KASSERT(root != NULL, + ("vm_map_entry_resize_free: resize_free object not mapped")); + if (newStart) { + vm_map_entry_splay_walk_left_spine(root, &llist); + root->left = NULL; + } else { + vm_map_entry_splay_walk_right_spine(root, &rlist); + root->right = NULL; + } + map->root = vm_map_entry_splay_merge(root, llist, rlist, + root->left, root->right); + CTR3(KTR_VM, "vm_map_entry_resize_free: map %p, nentries %d, entry %p", map, + map->nentries, entry); } /* @@ -1100,7 +1180,7 @@ vm_offset_t address, vm_map_entry_t *entry) /* OUT */ { - vm_map_entry_t cur; + vm_map_entry_t cur, lbound; boolean_t locked; /* @@ -1108,12 +1188,15 @@ * "address" is the map's header. */ cur = map->root; - if (cur == NULL) + if (cur == NULL) { *entry = &map->header; - else if (address >= cur->start && cur->end > address) { + return (FALSE); + } + if (address >= cur->start && cur->end > address) { *entry = cur; return (TRUE); - } else if ((locked = vm_map_locked(map)) || + } + if ((locked = vm_map_locked(map)) || sx_try_upgrade(&map->lock)) { /* * Splay requires a write lock on the map. However, it only @@ -1130,35 +1213,30 @@ * is that map entry. Otherwise, the new root is a map entry * immediately before or after "address". */ - if (address >= cur->start) { + if (address < cur->start) { + *entry = &map->header; + return (FALSE); + } + *entry = cur; + return (address < cur->end); + } + /* + * Since the map is only locked for read access, perform a + * standard binary search tree lookup for "address". + */ + lbound = &map->header; + do { + if (address < cur->start) { + cur = cur->left; + } else if (cur->end <= address) { + lbound = cur; + cur = cur->right; + } else { *entry = cur; - if (cur->end > address) - return (TRUE); - } else - *entry = cur->prev; - } else - /* - * Since the map is only locked for read access, perform a - * standard binary search tree lookup for "address". - */ - for (;;) { - if (address < cur->start) { - if (cur->left == NULL) { - *entry = cur->prev; - break; - } - cur = cur->left; - } else if (cur->end > address) { - *entry = cur; - return (TRUE); - } else { - if (cur->right == NULL) { - *entry = cur; - break; - } - cur = cur->right; - } + return (TRUE); } + } while (cur != NULL); + *entry = lbound; return (FALSE); } @@ -1300,7 +1378,7 @@ if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0) map->size += end - prev_entry->end; prev_entry->end = end; - vm_map_entry_resize_free(map, prev_entry); + vm_map_entry_resize_free(map, prev_entry, false); vm_map_simplify_entry(map, prev_entry); return (KERN_SUCCESS); } @@ -1351,7 +1429,7 @@ /* * Insert the new entry into the list */ - vm_map_entry_link(map, prev_entry, new_entry); + vm_map_entry_link(map, new_entry); if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0) map->size += new_entry->end - new_entry->start; @@ -1377,11 +1455,11 @@ * Find the first fit (lowest VM address) for "length" free bytes * beginning at address >= start in the given map. * - * In a vm_map_entry, "adj_free" is the amount of free space - * adjacent (higher address) to this entry, and "max_free" is the - * maximum amount of contiguous free space in its subtree. This - * allows finding a free region in one path down the tree, so - * O(log n) amortized with splay trees. + * In a vm_map_entry, "max_free" is the maximum amount of + * contiguous free space between an entry in its subtree and a + * neighbor of that entry. This allows finding a free region in + * one path down the tree, so O(log n) amortized with splay + * trees. * * The map must be locked, and leaves it so. * @@ -1392,8 +1470,8 @@ vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length, vm_offset_t *addr) /* OUT */ { - vm_map_entry_t entry; - vm_offset_t st; + vm_map_entry_t llist, rlist, root, y; + vm_size_t left_length; /* * Request must fit within min/max VM address and must avoid @@ -1413,8 +1491,21 @@ * After splay, if start comes before root node, then there * must be a gap from start to the root. */ - map->root = vm_map_entry_splay(start, map->root); - if (start + length <= map->root->start) { + root = vm_map_entry_splay_split(start, length, map->root, &llist, &rlist); + if (root != NULL) + start = root->end; + else if (rlist != NULL) { + root = rlist; + rlist = root->left; + root->left = NULL; + } else { + root = llist; + llist = root->right; + root->right = NULL; + } + map->root = vm_map_entry_splay_merge(root, llist, rlist, + root->left, root->right); + if (start + length <= root->start) { *addr = start; return (0); } @@ -1424,34 +1515,78 @@ * start, and this is the last comparison where address * wrap might be a problem. */ - st = (start > map->root->end) ? start : map->root->end; - if (length <= map->root->end + map->root->adj_free - st) { - *addr = st; + if (root->right == NULL && + start + length <= vm_map_max(map)) { + *addr = start; return (0); } /* With max_free, can immediately tell if no solution. */ - entry = map->root->right; - if (entry == NULL || length > entry->max_free) + if (root->right == NULL || length > root->right->max_free) return (1); /* - * Search the right subtree in the order: left subtree, root, - * right subtree (first fit). The previous splay implies that - * all regions in the right subtree have addresses > start. + * Splay for the least large-enough gap in the right subtree. */ - while (entry != NULL) { - if (entry->left != NULL && entry->left->max_free >= length) - entry = entry->left; - else if (entry->adj_free >= length) { - *addr = entry->end; - return (0); - } else - entry = entry->right; + llist = NULL; + rlist = NULL; + for (left_length = 0; ; left_length = root->left != NULL ? + root->left->max_free : root->start - llist->end) { + if (length <= left_length) { + y = root->left; + if (y != NULL && + (length <= (y->left != NULL ? + y->left->max_free : y->start - llist->end))) { + /* Rotate right and make y root. */ + root->left = y->right; + y->right = root; + vm_map_entry_set_max_free(root); + root = y; + y = root->left; + } + /* Put root on rlist. */ + root->left = rlist; + rlist = root; + } else { + y = root->right; + if (y != NULL && + (length > (y->left != NULL ? + y->left->max_free : y->start - root->end))) { + /* Rotate left and make y root. */ + root->right = y->left; + y->left = root; + vm_map_entry_set_max_free(root); + root = y; + y = root->right; + } + /* Put root on llist. */ + root->right = llist; + llist = root; + } + root = y; + if (root == NULL) + break; } - - /* Can't get here, so panic if we do. */ - panic("vm_map_findspace: max_free corrupt"); + root = llist; + llist = root->right; + if ((y = rlist) == NULL) + root->right = NULL; + else { + rlist = y->left; + y->left = NULL; + root->right = y->right; + } + root = vm_map_entry_splay_merge(root, llist, rlist, + root->left, root->right); + if (y != NULL) { + y->right = root->right; + vm_map_entry_set_max_free(y); + root->right = y; + vm_map_entry_set_max_free(root); + } + map->root = root; + *addr = root->end; + return (0); } int @@ -1533,7 +1668,9 @@ VM_MAP_ASSERT_LOCKED(map); free_addr = *addr; KASSERT(!vm_map_findspace(map, free_addr, length, addr) && - free_addr == *addr, ("caller provided insufficient free space")); + free_addr == *addr, + ("caller failed to provide space %d at address %p", + (int)length, (void*)free_addr)); for (;;) { /* * At the start of every iteration, the free space at address @@ -1827,15 +1964,14 @@ vm_map_entry_unlink(map, prev); entry->start = prev->start; entry->offset = prev->offset; - if (entry->prev != &map->header) - vm_map_entry_resize_free(map, entry->prev); + vm_map_entry_resize_free(map, entry, true); vm_map_merged_neighbor_dispose(map, prev); } next = entry->next; if (vm_map_mergeable_neighbors(entry, next)) { vm_map_entry_unlink(map, next); entry->end = next->end; - vm_map_entry_resize_free(map, entry); + vm_map_entry_resize_free(map, entry, false); vm_map_merged_neighbor_dispose(map, next); } } @@ -1913,7 +2049,7 @@ if (new_entry->cred != NULL) crhold(entry->cred); - vm_map_entry_link(map, entry->prev, new_entry); + vm_map_entry_link(map, new_entry); if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { vm_object_reference(new_entry->object.vm_object); @@ -1995,7 +2131,7 @@ if (new_entry->cred != NULL) crhold(entry->cred); - vm_map_entry_link(map, entry, new_entry); + vm_map_entry_link(map, new_entry); if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { vm_object_reference(new_entry->object.vm_object); @@ -3674,8 +3810,7 @@ * Insert the entry into the new map -- we know we're * inserting at the end of the new map. */ - vm_map_entry_link(new_map, new_map->header.prev, - new_entry); + vm_map_entry_link(new_map, new_entry); vmspace_map_entry_forked(vm1, vm2, new_entry); /* @@ -3702,8 +3837,7 @@ new_entry->wired_count = 0; new_entry->object.vm_object = NULL; new_entry->cred = NULL; - vm_map_entry_link(new_map, new_map->header.prev, - new_entry); + vm_map_entry_link(new_map, new_entry); vmspace_map_entry_forked(vm1, vm2, new_entry); vm_map_copy_entry(old_map, new_map, old_entry, new_entry, fork_charge); @@ -3726,8 +3860,7 @@ new_entry->max_protection = old_entry->max_protection; new_entry->inheritance = VM_INHERIT_ZERO; - vm_map_entry_link(new_map, new_map->header.prev, - new_entry); + vm_map_entry_link(new_map, new_entry); vmspace_map_entry_forked(vm1, vm2, new_entry); new_entry->cred = curthread->td_ucred; @@ -4025,7 +4158,7 @@ } else { MPASS(gap_entry->start < gap_entry->end - grow_amount); gap_entry->end -= grow_amount; - vm_map_entry_resize_free(map, gap_entry); + vm_map_entry_resize_free(map, gap_entry, false); gap_deleted = false; } rv = vm_map_insert(map, NULL, 0, grow_start, @@ -4040,7 +4173,7 @@ MPASS(rv1 == KERN_SUCCESS); } else { gap_entry->end += grow_amount; - vm_map_entry_resize_free(map, gap_entry); + vm_map_entry_resize_free(map, gap_entry, false); } } } else { @@ -4062,7 +4195,7 @@ gap_entry->start += grow_amount; stack_entry->end += grow_amount; map->size += grow_amount; - vm_map_entry_resize_free(map, stack_entry); + vm_map_entry_resize_free(map, stack_entry, false); rv = KERN_SUCCESS; } else rv = KERN_FAILURE;