Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -529,6 +529,13 @@ uma_zfree(swpctrie_zone, node); } +static int +swblk_start(struct swblk *sb, vm_pindex_t pindex) +{ + return (sb == NULL || sb->p >= pindex ? + 0 : pindex % SWAP_META_PAGES); +} + PCTRIE_DEFINE(SWAP, swblk, p, swblk_trie_alloc, swblk_trie_free); static struct swblk * @@ -538,50 +545,56 @@ rounddown(pindex, SWAP_META_PAGES))); } -static struct swblk * -swblk_start(vm_object_t object, vm_pindex_t pindex) +static void +swblk_lookup_remove(vm_object_t object, struct swblk *sb) { - return (SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, - rounddown(pindex, SWAP_META_PAGES))); + SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p); } -static struct swblk * -swblk_next(vm_object_t object, struct swblk *sb) +static int +swblk_lookup_insert(vm_object_t object, struct swblk *sb) { - return (swblk_start(object, sb->p + SWAP_META_PAGES)); + return (SWAP_PCTRIE_INSERT(&object->un_pager.swp.swp_blks, sb)); } -static struct swblk * -swblk_start_limit(vm_object_t object, vm_pindex_t pindex, vm_pindex_t limit) +static bool +swblk_is_empty(vm_object_t object) { - struct swblk *sb = swblk_start(object, pindex); - if (sb != NULL && sb->p < limit) - return (sb); - return (NULL); + return (pctrie_is_empty(&object->un_pager.swp.swp_blks)); } static struct swblk * -swblk_next_limit(vm_object_t object, struct swblk *sb, vm_pindex_t limit) +swblk_iter(struct pctrie_iter *blks, vm_object_t object, + vm_pindex_t pindex) { - return (swblk_start_limit(object, sb->p + SWAP_META_PAGES, limit)); + VM_OBJECT_ASSERT_LOCKED(object); + MPASS((object->flags & OBJ_SWAP) != 0); + pctrie_iter_init(blks, &object->un_pager.swp.swp_blks); + return (SWAP_PCTRIE_ITER_LOOKUP_GE(blks, + rounddown(pindex, SWAP_META_PAGES))); } -static void -swblk_lookup_remove(vm_object_t object, struct swblk *sb) +static struct swblk * +swblk_iter_limit(struct pctrie_iter *blks, vm_object_t object, + vm_pindex_t pindex, vm_pindex_t limit) { - SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p); + VM_OBJECT_ASSERT_LOCKED(object); + MPASS((object->flags & OBJ_SWAP) != 0); + pctrie_iter_limit_init(blks, &object->un_pager.swp.swp_blks, limit); + return (SWAP_PCTRIE_ITER_LOOKUP_GE(blks, + rounddown(pindex, SWAP_META_PAGES))); } -static int -swblk_lookup_insert(vm_object_t object, struct swblk *sb) +static struct swblk * +swblk_iter_next(struct pctrie_iter *blks) { - return (SWAP_PCTRIE_INSERT(&object->un_pager.swp.swp_blks, sb)); + return (SWAP_PCTRIE_ITER_JUMP_GE(blks, SWAP_META_PAGES)); } -static bool -swblk_is_empty(vm_object_t object) +static void +swblk_iter_remove(struct pctrie_iter *blks) { - return (pctrie_is_empty(&object->un_pager.swp.swp_blks)); + SWAP_PCTRIE_ITER_REMOVE(blks); } /* @@ -1827,6 +1840,7 @@ u_long swap_pager_swapped_pages(vm_object_t object) { + struct pctrie_iter blks; struct swblk *sb; u_long res; int i; @@ -1837,8 +1851,8 @@ return (0); res = 0; - for (sb = swblk_start(object, 0); sb != NULL; - sb = swblk_next(object, sb)) { + for (sb = swblk_iter(&blks, object, 0); sb != NULL; + sb = swblk_iter_next(&blks)) { for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) res++; @@ -1856,25 +1870,24 @@ static void swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object) { - struct pctrie_iter pages; + struct pctrie_iter blks, pages; struct page_range range; struct swblk *sb; vm_page_t m; - vm_pindex_t pi; - daddr_t blk; int i, rahead, rv; bool sb_empty; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_SWAP) != 0, ("%s: Object not swappable", __func__)); + KASSERT((sp->sw_flags & SW_CLOSING) != 0, + ("%s: Device not blocking further allocations", __func__)); - pi = 0; - i = 0; vm_page_iter_init(&pages, object); swp_pager_init_freerange(&range); - for (;;) { - if (i == 0 && (object->flags & OBJ_DEAD) != 0) { + sb = swblk_iter(&blks, object, 0); + while (sb != NULL) { + if ((object->flags & OBJ_DEAD) != 0) { /* * Make sure that pending writes finish before * returning. @@ -1883,78 +1896,82 @@ swp_pager_meta_free_all(object); break; } + sb_empty = true; + for (i = 0; i < SWAP_META_PAGES; i++) { + /* Skip an invalid block. */ + if (sb->d[i] == SWAPBLK_NONE) + continue; + /* Skip a block not of this device. */ + if (!swp_pager_isondev(sb->d[i], sp)) { + sb_empty = false; + continue; + } - if (i == SWAP_META_PAGES) { - pi = sb->p + SWAP_META_PAGES; - if (sb_empty) { - swblk_lookup_remove(object, sb); - uma_zfree(swblk_zone, sb); + /* + * Look for a page corresponding to this block. If the + * found page has pending operations, sleep and restart + * the scan. + */ + m = vm_page_iter_lookup(&pages, blks.index + i); + if (m != NULL && (m->oflags & VPO_SWAPINPROG) != 0) { + m->oflags |= VPO_SWAPSLEEP; + VM_OBJECT_SLEEP(object, &object->handle, PSWP, + "swpoff", 0); + break; } - i = 0; - } - if (i == 0) { - sb = swblk_start(object, pi); - if (sb == NULL) + /* + * If the found page is valid, mark it dirty and free + * the swap block. + */ + if (m != NULL && vm_page_all_valid(m)) { + swp_pager_force_dirty(&range, m, &sb->d[i]); + continue; + } + /* Is there a page we can acquire or allocate? */ + if (m != NULL) { + if (!vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL)) + break; + } else if ((m = vm_page_alloc(object, blks.index + i, + VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL)) == NULL) break; - sb_empty = true; - } - /* Skip an invalid block. */ - blk = sb->d[i]; - if (blk == SWAPBLK_NONE || !swp_pager_isondev(blk, sp)) { - if (blk != SWAPBLK_NONE) - sb_empty = false; - i++; - continue; + /* Get the page from swap, and restart the scan. */ + vm_object_pip_add(object, 1); + rahead = SWAP_META_PAGES; + rv = swap_pager_getpages_locked(object, &m, 1, + NULL, &rahead); + if (rv != VM_PAGER_OK) + panic("%s: read from swap failed: %d", + __func__, rv); + VM_OBJECT_WLOCK(object); + vm_object_pip_wakeupn(object, 1); + KASSERT(vm_page_all_valid(m), + ("%s: Page %p not all valid", __func__, m)); + vm_page_xunbusy(m); + break; } - - /* - * Look for a page corresponding to this block, If the found - * page has pending operations, sleep and restart the scan. - */ - m = vm_page_iter_lookup(&pages, sb->p + i); - if (m != NULL && (m->oflags & VPO_SWAPINPROG) != 0) { - m->oflags |= VPO_SWAPSLEEP; - VM_OBJECT_SLEEP(object, &object->handle, PSWP, "swpoff", - 0); - i = 0; /* Restart scan after object lock dropped. */ + if (i < SWAP_META_PAGES) { + /* + * With the object lock released and regained, the + * swapblk could have been freed, so search again for + * the first swblk at or after blks.index. + */ + pctrie_iter_reset(&pages); + sb = swblk_iter(&blks, object, blks.index); continue; } + if (sb_empty) { + swblk_iter_remove(&blks); + uma_zfree(swblk_zone, sb); + } /* - * If the found page is valid, mark it dirty and free the swap - * block. + * It is safe to advance to the next block. No allocations + * before blk.index have happened, even with the lock released, + * because allocations on this device are blocked. */ - if (m != NULL && vm_page_all_valid(m)) { - swp_pager_force_dirty(&range, m, &sb->d[i]); - i++; - continue; - } - - /* Is there a page we can acquire or allocate? */ - if (m == NULL) { - m = vm_page_alloc(object, sb->p + i, - VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); - } else if (!vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL)) - m = NULL; - - /* If no page available, repeat this iteration. */ - if (m == NULL) - continue; - - /* Get the page from swap, mark it dirty, restart the scan. */ - vm_object_pip_add(object, 1); - rahead = SWAP_META_PAGES; - rv = swap_pager_getpages_locked(object, &m, 1, NULL, &rahead); - if (rv != VM_PAGER_OK) - panic("%s: read from swap failed: %d", __func__, rv); - VM_OBJECT_WLOCK(object); - vm_object_pip_wakeupn(object, 1); - KASSERT(vm_page_all_valid(m), - ("%s: Page %p not all valid", __func__, m)); - vm_page_xunbusy(m); - i = 0; /* Restart scan after object lock dropped. */ + sb = swblk_iter_next(&blks); } swp_pager_freeswapspace(&range); } @@ -2195,10 +2212,11 @@ swp_pager_meta_transfer(vm_object_t srcobject, vm_object_t dstobject, vm_pindex_t pindex, vm_pindex_t count) { + struct pctrie_iter blks; struct page_range range; struct swblk *sb; daddr_t blk, d[SWAP_META_PAGES]; - vm_pindex_t offset, last; + vm_pindex_t last; int d_mask, i, limit, start; _Static_assert(8 * sizeof(d_mask) >= SWAP_META_PAGES, "d_mask not big enough"); @@ -2211,20 +2229,16 @@ swp_pager_init_freerange(&range); d_mask = 0; - offset = pindex; last = pindex + count; - sb = swblk_start_limit(srcobject, pindex, last); - start = (sb != NULL && sb->p < pindex) ? pindex - sb->p : 0; - for (; sb != NULL; - sb = swblk_start_limit(srcobject, pindex, last), start = 0) { - pindex = sb->p; - MPASS(d_mask == 0); - limit = MIN(last - pindex, SWAP_META_PAGES); + for (sb = swblk_iter_limit(&blks, srcobject, pindex, last), + start = swblk_start(sb, pindex); + sb != NULL; sb = swblk_iter_next(&blks), start = 0) { + limit = MIN(last - blks.index, SWAP_META_PAGES); for (i = start; i < limit; i++) { if (sb->d[i] == SWAPBLK_NONE) continue; blk = swp_pager_meta_build(dstobject, - pindex + i - offset, sb->d[i], true); + blks.index + i - pindex, sb->d[i], true); if (blk == sb->d[i]) { /* * Failed memory allocation stopped transfer; @@ -2241,7 +2255,7 @@ } if (swp_pager_swblk_empty(sb, 0, start) && swp_pager_swblk_empty(sb, limit, SWAP_META_PAGES)) { - swblk_lookup_remove(srcobject, sb); + swblk_iter_remove(&blks); uma_zfree(swblk_zone, sb); } if (d_mask != 0) { @@ -2250,12 +2264,17 @@ do { i = ffs(d_mask) - 1; swp_pager_meta_build(dstobject, - pindex + i - offset, d[i], false); + blks.index + i - pindex, d[i], false); d_mask &= ~(1 << i); } while (d_mask != 0); VM_OBJECT_WLOCK(srcobject); + + /* + * While the lock was not held, the iterator path could + * have been corrupted, so discard it. + */ + pctrie_iter_reset(&blks); } - pindex += SWAP_META_PAGES; } swp_pager_freeswapspace(&range); } @@ -2271,7 +2290,7 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count, vm_size_t *freed) { - struct pctrie_iter pages; + struct pctrie_iter blks, pages; struct page_range range; struct swblk *sb; vm_page_t m; @@ -2288,26 +2307,24 @@ swp_pager_init_freerange(&range); vm_page_iter_init(&pages, object); last = pindex + count; - sb = swblk_start_limit(object, pindex, last); - start = (sb != NULL && sb->p < pindex) ? pindex - sb->p : 0; - for (; sb != NULL; - sb = swblk_start_limit(object, pindex, last), start = 0) { - limit = MIN(last - sb->p, SWAP_META_PAGES); + for (sb = swblk_iter_limit(&blks, object, pindex, last), + start = swblk_start(sb, pindex); + sb != NULL; sb = swblk_iter_next(&blks), start = 0) { + limit = MIN(last - blks.index, SWAP_META_PAGES); for (i = start; i < limit; i++) { if (sb->d[i] == SWAPBLK_NONE) continue; swp_pager_update_freerange(&range, sb->d[i]); if (freed != NULL) { - m = vm_page_iter_lookup(&pages, sb->p + i); + m = vm_page_iter_lookup(&pages, blks.index + i); if (m == NULL || vm_page_none_valid(m)) fc++; } sb->d[i] = SWAPBLK_NONE; } - pindex = sb->p + SWAP_META_PAGES; if (swp_pager_swblk_empty(sb, 0, start) && swp_pager_swblk_empty(sb, limit, SWAP_META_PAGES)) { - swblk_lookup_remove(object, sb); + swblk_iter_remove(&blks); uma_zfree(swblk_zone, sb); } } @@ -2387,22 +2404,23 @@ vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex) { + struct pctrie_iter blks; struct swblk *sb; int i; - if ((sb = swblk_start(object, pindex)) == NULL) + if ((sb = swblk_iter(&blks, object, pindex)) == NULL) return (OBJ_MAX_SIZE); - if (sb->p < pindex) { + if (blks.index < pindex) { for (i = pindex % SWAP_META_PAGES; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) - return (sb->p + i); + return (blks.index + i); } - if ((sb = swblk_next(object, sb)) == NULL) + if ((sb = swblk_iter_next(&blks)) == NULL) return (OBJ_MAX_SIZE); } for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) - return (sb->p + i); + return (blks.index + i); } /* @@ -2840,13 +2858,14 @@ long vmspace_swap_count(struct vmspace *vmspace) { + struct pctrie_iter blks; vm_map_t map; vm_map_entry_t cur; vm_object_t object; struct swblk *sb; vm_pindex_t e, pi; long count; - int i; + int i, limit, start; map = &vmspace->vm_map; count = 0; @@ -2862,11 +2881,12 @@ goto unlock; pi = OFF_TO_IDX(cur->offset); e = pi + OFF_TO_IDX(cur->end - cur->start); - for (sb = swblk_start_limit(object, pi, e); - sb != NULL; sb = swblk_next_limit(object, sb, e)) { - for (i = 0; i < SWAP_META_PAGES; i++) { - if (sb->p + i < e && - sb->d[i] != SWAPBLK_NONE) + for (sb = swblk_iter_limit(&blks, object, pi, e), + start = swblk_start(sb, pi); + sb != NULL; sb = swblk_iter_next(&blks), start = 0) { + limit = MIN(e - blks.index, SWAP_META_PAGES); + for (i = start; i < limit; i++) { + if (sb->d[i] != SWAPBLK_NONE) count++; } }