diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -75,6 +75,7 @@ int swap_dev_info(int name, struct xswdev *xs, char *devname, size_t len); void swap_pager_copy(vm_object_t, vm_object_t, vm_pindex_t, int); vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex); +bool swap_pager_scan_all_shadowed(vm_object_t object); void swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size, vm_size_t *freed); void swap_pager_swap_init(void); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -557,6 +557,13 @@ return (pctrie_is_empty(&object->un_pager.swp.swp_blks)); } +static struct swblk * +swblk_iter_lookup_ge(struct pctrie_iter *blks, vm_pindex_t pindex) +{ + return (SWAP_PCTRIE_ITER_LOOKUP_GE(blks, + rounddown(pindex, SWAP_META_PAGES))); +} + static void swblk_iter_init_only(struct pctrie_iter *blks, vm_object_t object) { @@ -571,8 +578,7 @@ vm_pindex_t pindex) { swblk_iter_init_only(blks, object); - return (SWAP_PCTRIE_ITER_LOOKUP_GE(blks, - rounddown(pindex, SWAP_META_PAGES))); + return (swblk_iter_lookup_ge(blks, pindex)); } static struct swblk * @@ -591,8 +597,7 @@ VM_OBJECT_ASSERT_LOCKED(object); MPASS((object->flags & OBJ_SWAP) != 0); pctrie_iter_limit_init(blks, &object->un_pager.swp.swp_blks, limit); - return (SWAP_PCTRIE_ITER_LOOKUP_GE(blks, - rounddown(pindex, SWAP_META_PAGES))); + return (swblk_iter_lookup_ge(blks, pindex)); } static struct swblk * @@ -2441,26 +2446,25 @@ * pindex and for which there is a swap block allocated. Returns OBJ_MAX_SIZE * if are no allocated swap blocks for the object after the requested pindex. */ -vm_pindex_t -swap_pager_find_least(vm_object_t object, vm_pindex_t pindex) +static vm_pindex_t +swap_pager_iter_find_least(struct pctrie_iter *blks, vm_pindex_t pindex) { - struct pctrie_iter blks; struct swblk *sb; int i; - if ((sb = swblk_iter_init(&blks, object, pindex)) == NULL) + if ((sb = swblk_iter_lookup_ge(blks, pindex)) == NULL) return (OBJ_MAX_SIZE); - if (blks.index < pindex) { + if (blks->index < pindex) { for (i = pindex % SWAP_META_PAGES; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) - return (blks.index + i); + return (blks->index + i); } - if ((sb = swblk_iter_next(&blks)) == NULL) + if ((sb = swblk_iter_next(blks)) == NULL) return (OBJ_MAX_SIZE); } for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) - return (blks.index + i); + return (blks->index + i); } /* @@ -2471,6 +2475,118 @@ return (OBJ_MAX_SIZE); } +/* + * Returns the least page index which is greater than or equal to the parameter + * pindex and for which there is a swap block allocated. Returns OBJ_MAX_SIZE + * if are no allocated swap blocks for the object after the requested pindex. + */ +vm_pindex_t +swap_pager_find_least(vm_object_t object, vm_pindex_t pindex) +{ + struct pctrie_iter blks; + + swblk_iter_init_only(&blks, object); + return (swap_pager_iter_find_least(&blks, pindex)); +} + +/* + * Is every page in the backing object or swap shadowed in the parent, and + * unbusy and valid in swap? + */ +bool +swap_pager_scan_all_shadowed(vm_object_t object) +{ + struct pctrie_iter backing_blks, backing_pages, pages; + vm_object_t backing_object; + vm_page_t p, pp; + vm_pindex_t backing_offset_index, new_pindex, pi, pi_ubound, ps, pv; + + VM_OBJECT_ASSERT_WLOCKED(object); + VM_OBJECT_ASSERT_WLOCKED(object->backing_object); + + backing_object = object->backing_object; + + if ((backing_object->flags & OBJ_ANON) == 0) + return (false); + + KASSERT((object->flags & OBJ_ANON) != 0, + ("Shadow object is not anonymous")); + backing_offset_index = OFF_TO_IDX(object->backing_object_offset); + pi_ubound = MIN(backing_object->size, + backing_offset_index + object->size); + vm_page_iter_init(&pages, object); + vm_page_iter_init(&backing_pages, backing_object); + swblk_iter_init_only(&backing_blks, backing_object); + + /* + * Only check pages inside the parent object's range and inside the + * parent object's mapping of the backing object. + */ + pv = ps = pi = backing_offset_index - 1; + for (;;) { + if (pi == pv) { + p = vm_page_iter_lookup_ge(&backing_pages, pv + 1); + pv = p != NULL ? p->pindex : backing_object->size; + } + if (pi == ps) + ps = swap_pager_iter_find_least(&backing_blks, ps + 1); + pi = MIN(pv, ps); + if (pi >= pi_ubound) + break; + + if (pi == pv) { + /* + * If the backing object page is busy a grandparent or + * older page may still be undergoing CoW. It is not + * safe to collapse the backing object until it is + * quiesced. + */ + if (vm_page_tryxbusy(p) == 0) + return (false); + + /* + * We raced with the fault handler that left newly + * allocated invalid page on the object queue and + * retried. + */ + if (!vm_page_all_valid(p)) + break; + + /* + * Busy of p disallows fault handler to validate parent + * page (pp, below). + */ + } + + /* + * See if the parent has the page or if the parent's object + * pager has the page. If the parent has the page but the page + * is not valid, the parent's object pager must have the page. + * + * If this fails, the parent does not completely shadow the + * object and we might as well give up now. + */ + new_pindex = pi - backing_offset_index; + pp = vm_page_iter_lookup(&pages, new_pindex); + + /* + * The valid check here is stable due to object lock being + * required to clear valid and initiate paging. + */ + if ((pp == NULL || vm_page_none_valid(pp)) && + !swap_pager_haspage(object, new_pindex, NULL, NULL)) + break; + if (pi == pv) + vm_page_xunbusy(p); + } + if (pi < pi_ubound) { + if (pi == pv) + vm_page_xunbusy(p); + return (false); + } + return (true); +} + /* * System call swapon(name) enables swapping on device name, * which must be in the swdevsw. Return EBUSY diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1686,93 +1686,6 @@ return (TAILQ_FIRST(&backing_object->memq)); } -static bool -vm_object_scan_all_shadowed(vm_object_t object) -{ - vm_object_t backing_object; - vm_page_t p, pp; - vm_pindex_t backing_offset_index, new_pindex, pi, ps; - - VM_OBJECT_ASSERT_WLOCKED(object); - VM_OBJECT_ASSERT_WLOCKED(object->backing_object); - - backing_object = object->backing_object; - - if ((backing_object->flags & OBJ_ANON) == 0) - return (false); - - pi = backing_offset_index = OFF_TO_IDX(object->backing_object_offset); - p = vm_page_find_least(backing_object, pi); - ps = swap_pager_find_least(backing_object, pi); - - /* - * Only check pages inside the parent object's range and - * inside the parent object's mapping of the backing object. - */ - for (;; pi++) { - if (p != NULL && p->pindex < pi) - p = TAILQ_NEXT(p, listq); - if (ps < pi) - ps = swap_pager_find_least(backing_object, pi); - if (p == NULL && ps >= backing_object->size) - break; - else if (p == NULL) - pi = ps; - else - pi = MIN(p->pindex, ps); - - new_pindex = pi - backing_offset_index; - if (new_pindex >= object->size) - break; - - if (p != NULL) { - /* - * If the backing object page is busy a - * grandparent or older page may still be - * undergoing CoW. It is not safe to collapse - * the backing object until it is quiesced. - */ - if (vm_page_tryxbusy(p) == 0) - return (false); - - /* - * We raced with the fault handler that left - * newly allocated invalid page on the object - * queue and retried. - */ - if (!vm_page_all_valid(p)) - goto unbusy_ret; - } - - /* - * See if the parent has the page or if the parent's object - * pager has the page. If the parent has the page but the page - * is not valid, the parent's object pager must have the page. - * - * If this fails, the parent does not completely shadow the - * object and we might as well give up now. - */ - pp = vm_page_lookup(object, new_pindex); - - /* - * The valid check here is stable due to object lock - * being required to clear valid and initiate paging. - * Busy of p disallows fault handler to validate pp. - */ - if ((pp == NULL || vm_page_none_valid(pp)) && - !vm_pager_has_page(object, new_pindex, NULL, NULL)) - goto unbusy_ret; - if (p != NULL) - vm_page_xunbusy(p); - } - return (true); - -unbusy_ret: - if (p != NULL) - vm_page_xunbusy(p); - return (false); -} - static void vm_object_collapse_scan(vm_object_t object) { @@ -2001,7 +1914,7 @@ * The object lock and backing_object lock must not * be dropped during this sequence. */ - if (!vm_object_scan_all_shadowed(object)) { + if (!swap_pager_scan_all_shadowed(object)) { VM_OBJECT_WUNLOCK(backing_object); break; }