Changeset View
Standalone View
sys/vm/swap_pager.c
Show First 20 Lines • Show All 432 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Metadata functions | * Metadata functions | ||||
*/ | */ | ||||
static daddr_t swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t); | static daddr_t swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t); | ||||
static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t); | static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t); | ||||
static void swp_pager_meta_transfer(vm_object_t src, vm_object_t dst, | static void swp_pager_meta_transfer(vm_object_t src, vm_object_t dst, | ||||
vm_pindex_t pindex, vm_pindex_t count); | vm_pindex_t pindex, vm_pindex_t count); | ||||
static void swp_pager_meta_free_all(vm_object_t); | static void swp_pager_meta_free_all(vm_object_t); | ||||
static daddr_t swp_pager_meta_lookup(vm_object_t, vm_pindex_t); | static daddr_t swp_pager_meta_lookup(vm_object_t object, vm_pindex_t pindex0, | ||||
int *before, int *after); | |||||
static void | static void | ||||
swp_pager_init_freerange(daddr_t *start, daddr_t *num) | swp_pager_init_freerange(daddr_t *start, daddr_t *num) | ||||
{ | { | ||||
*start = SWAPBLK_NONE; | *start = SWAPBLK_NONE; | ||||
*num = 0; | *num = 0; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 503 Lines • ▼ Show 20 Lines | |||||
swp_pager_xfer_source(vm_object_t srcobject, vm_object_t dstobject, | swp_pager_xfer_source(vm_object_t srcobject, vm_object_t dstobject, | ||||
vm_pindex_t pindex, daddr_t addr) | vm_pindex_t pindex, daddr_t addr) | ||||
{ | { | ||||
daddr_t dstaddr; | daddr_t dstaddr; | ||||
KASSERT(srcobject->type == OBJT_SWAP, | KASSERT(srcobject->type == OBJT_SWAP, | ||||
("%s: Srcobject not swappable", __func__)); | ("%s: Srcobject not swappable", __func__)); | ||||
if (dstobject->type == OBJT_SWAP && | if (dstobject->type == OBJT_SWAP && | ||||
swp_pager_meta_lookup(dstobject, pindex) != SWAPBLK_NONE) { | swp_pager_meta_lookup(dstobject, pindex, NULL, NULL) != | ||||
SWAPBLK_NONE) { | |||||
/* Caller should destroy the source block. */ | /* Caller should destroy the source block. */ | ||||
return (false); | return (false); | ||||
} | } | ||||
/* | /* | ||||
* Destination has no swapblk and is not resident, transfer source. | * Destination has no swapblk and is not resident, transfer source. | ||||
* swp_pager_meta_build() can sleep. | * swp_pager_meta_build() can sleep. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines | |||||
* | * | ||||
* If TRUE, we also try to determine how much valid, contiguous backing | * If TRUE, we also try to determine how much valid, contiguous backing | ||||
* store exists before and after the requested page. | * store exists before and after the requested page. | ||||
*/ | */ | ||||
static boolean_t | static boolean_t | ||||
swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, | swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, | ||||
int *after) | int *after) | ||||
{ | { | ||||
daddr_t blk, blk0; | daddr_t blk0; | ||||
int i; | |||||
if (before != NULL) | |||||
*before = SWB_NPAGES - 1; | |||||
if (after != NULL) | |||||
*after = SWB_NPAGES - 1; | |||||
markj: This is changing the values of maxahead and maxbehind passed in swap_pager_getpages(), so the… | |||||
Done Inline ActionsExisting code has limit with look like: ota_j.email.ne.jp: Existing code has limit with look like:
for (i = 1; i < SWB_NPAGES; i++) | |||||
Done Inline ActionsI'm interested in removing SWB_NPAGES ceiling and see if it improves performance. However, for now, I want to keep the same behavior. ota_j.email.ne.jp: I'm interested in removing SWB_NPAGES ceiling and see if it improves performance. However… | |||||
VM_OBJECT_ASSERT_LOCKED(object); | VM_OBJECT_ASSERT_LOCKED(object); | ||||
KASSERT(object->type == OBJT_SWAP, | KASSERT(object->type == OBJT_SWAP, | ||||
("%s: object not swappable", __func__)); | ("%s: object not swappable", __func__)); | ||||
/* | blk0 = swp_pager_meta_lookup(object, pindex, before, after); | ||||
Done Inline ActionsI think these initializations should be done in swp_pager_meta_lookup() now. markj: I think these initializations should be done in swp_pager_meta_lookup() now. | |||||
* do we have good backing store at the requested index ? | return (blk0 != SWAPBLK_NONE); | ||||
*/ | |||||
blk0 = swp_pager_meta_lookup(object, pindex); | |||||
if (blk0 == SWAPBLK_NONE) { | |||||
if (before) | |||||
*before = 0; | |||||
if (after) | |||||
*after = 0; | |||||
return (FALSE); | |||||
} | } | ||||
/* | /* | ||||
* find backwards-looking contiguous good backing store | |||||
*/ | |||||
if (before != NULL) { | |||||
for (i = 1; i < SWB_NPAGES; i++) { | |||||
if (i > pindex) | |||||
break; | |||||
blk = swp_pager_meta_lookup(object, pindex - i); | |||||
Done Inline ActionsThese swp_pager_meta_lookup function calls here and below are meld into a single swp_pager_meta_lookup. For up to the size of SWAP_META_PAGES, swp_pager_meta_lookup can access adjacent elements via array access avoiding most of PCTRIE lookup. ota_j.email.ne.jp: These swp_pager_meta_lookup function calls here and below are meld into a single… | |||||
if (blk != blk0 - i) | |||||
break; | |||||
} | |||||
*before = i - 1; | |||||
} | |||||
/* | |||||
* find forward-looking contiguous good backing store | |||||
*/ | |||||
if (after != NULL) { | |||||
for (i = 1; i < SWB_NPAGES; i++) { | |||||
blk = swp_pager_meta_lookup(object, pindex + i); | |||||
if (blk != blk0 + i) | |||||
break; | |||||
} | |||||
*after = i - 1; | |||||
} | |||||
return (TRUE); | |||||
} | |||||
/* | |||||
* SWAP_PAGER_PAGE_UNSWAPPED() - remove swap backing store related to page | * SWAP_PAGER_PAGE_UNSWAPPED() - remove swap backing store related to page | ||||
* | * | ||||
* This removes any associated swap backing store, whether valid or | * This removes any associated swap backing store, whether valid or | ||||
* not, from the page. | * not, from the page. | ||||
* | * | ||||
* This routine is typically called when a page is made dirty, at | * This routine is typically called when a page is made dirty, at | ||||
* which point any associated swap can be freed. MADV_FREE also | * which point any associated swap can be freed. MADV_FREE also | ||||
* calls us in a special-case situation | * calls us in a special-case situation | ||||
▲ Show 20 Lines • Show All 149 Lines • ▼ Show 20 Lines | swap_pager_getpages_locked(vm_object_t object, vm_page_t *ma, int count, | ||||
if (rbehind != NULL) | if (rbehind != NULL) | ||||
count += *rbehind; | count += *rbehind; | ||||
if (rahead != NULL) | if (rahead != NULL) | ||||
count += *rahead; | count += *rahead; | ||||
vm_object_pip_add(object, count); | vm_object_pip_add(object, count); | ||||
pindex = bm->pindex; | pindex = bm->pindex; | ||||
blk = swp_pager_meta_lookup(object, pindex); | blk = swp_pager_meta_lookup(object, pindex, NULL, NULL); | ||||
KASSERT(blk != SWAPBLK_NONE, | KASSERT(blk != SWAPBLK_NONE, | ||||
("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex)); | ("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex)); | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
bp = uma_zalloc(swrbuf_zone, M_WAITOK); | bp = uma_zalloc(swrbuf_zone, M_WAITOK); | ||||
/* Pages cannot leave the object while busy. */ | /* Pages cannot leave the object while busy. */ | ||||
for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) { | for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) { | ||||
MPASS(p->pindex == bm->pindex + i); | MPASS(p->pindex == bm->pindex + i); | ||||
▲ Show 20 Lines • Show All 635 Lines • ▼ Show 20 Lines | swp_pager_swblk_empty(struct swblk *sb, int start, int limit) | ||||
for (i = start; i < limit; i++) { | for (i = start; i < limit; i++) { | ||||
if (sb->d[i] != SWAPBLK_NONE) | if (sb->d[i] != SWAPBLK_NONE) | ||||
return (false); | return (false); | ||||
} | } | ||||
return (true); | return (true); | ||||
} | } | ||||
/* | /* | ||||
* SWP_SWBLK_FORWARD_SEARCH() - looks for the number of continuous blocks | |||||
* | |||||
* Given "swblk" and an "index" to it with the address of the | |||||
* "index", the function looks forward for the number of continuous | |||||
* blocks up to "max_seek". | |||||
*/ | |||||
static int | |||||
swp_swblk_forward_search(vm_object_t object, struct swblk *sb, | |||||
int index, daddr_t expected_addr, int max_seek) | |||||
{ | |||||
int seek = 0; | |||||
VM_OBJECT_ASSERT_LOCKED(object); | |||||
do { | |||||
for (; index < SWAP_META_PAGES; ++index) { | |||||
if (expected_addr++ != sb->d[index]) | |||||
return (seek); | |||||
++seek; | |||||
if (seek >= max_seek) | |||||
return (seek); | |||||
} | |||||
index = 0; | |||||
} while ((sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, | |||||
sb->p + SWAP_META_PAGES)) != NULL); | |||||
return (seek); | |||||
} | |||||
/* | |||||
* SWP_SWBLK_BACKWARD_SEARCH() - looks for the number of continuous blocks | |||||
* | |||||
* Given "swblk" and an "index" to it with the address of the | |||||
* "index", the function looks backward for the number of continuous | |||||
* blocks up to "max_seek". | |||||
*/ | |||||
static int | |||||
swp_swblk_backward_search(vm_object_t object, struct swblk *sb, | |||||
int index, daddr_t expected_addr, int max_seek) | |||||
{ | |||||
int seek = 0; | |||||
VM_OBJECT_ASSERT_LOCKED(object); | |||||
do { | |||||
for (; index >= 0; --index) { | |||||
if (expected_addr-- != sb->d[index]) | |||||
return (seek); | |||||
++seek; | |||||
if (seek >= max_seek) | |||||
return (seek); | |||||
} | |||||
index = SWAP_META_PAGES - 1; | |||||
} while ((sb = SWAP_PCTRIE_LOOKUP( &object->un_pager.swp.swp_blks, | |||||
sb->p - SWAP_META_PAGES)) != NULL); | |||||
return (seek); | |||||
} | |||||
/* | |||||
* SWP_PAGER_FREE_EMPTY_SWBLK() - frees if a block is free | * SWP_PAGER_FREE_EMPTY_SWBLK() - frees if a block is free | ||||
* | * | ||||
* Nothing is done if the block is still in use. | * Nothing is done if the block is still in use. | ||||
*/ | */ | ||||
static void | static void | ||||
swp_pager_free_empty_swblk(vm_object_t object, struct swblk *sb) | swp_pager_free_empty_swblk(vm_object_t object, struct swblk *sb) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 226 Lines • ▼ Show 20 Lines | for (pindex = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( | ||||
} | } | ||||
SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p); | SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p); | ||||
uma_zfree(swblk_zone, sb); | uma_zfree(swblk_zone, sb); | ||||
} | } | ||||
swp_pager_freeswapspace(s_free, n_free); | swp_pager_freeswapspace(s_free, n_free); | ||||
} | } | ||||
/* | /* | ||||
* SWP_PAGER_METACTL() - misc control of swap meta data. | * SWP_PAGER_META_LOOKUP() - lookup good backing store for | ||||
* the requested page and optionally the number of continuous blocks. | |||||
* | * | ||||
* This routine is capable of looking up, or removing swapblk | * This routine looks up swapblk assignment in the swap meta data. | ||||
* assignments in the swap meta data. It returns the swapblk being | * In addition, if before and/or after are provided with a limit value, | ||||
* looked-up, popped, or SWAPBLK_NONE if the block was invalid. | * it looks for the number of continuous blocks backward and forward. | ||||
* | * | ||||
* When acting on a busy resident page and paging is in progress, we | * This returns SWAPBLK_NONE if swblk at pindex isn't associated to | ||||
* have to wait until paging is complete but otherwise can act on the | * swap meta data; *before and *after are not modified for this case. | ||||
* busy page. | * Otherwise, return the swap address of pindex. If *before and/or | ||||
* *after are provided with positive numbers, the number of continuous | |||||
* blocks before and/or after up to provided limits are searched and | |||||
* returned. | |||||
* | |||||
* Input - "object" and "pindex0" | |||||
* Input/Output - "before" and "after" - the number of continuous blocks | |||||
* to look for as input and its actual size as output. | |||||
*/ | */ | ||||
static daddr_t | static daddr_t | ||||
swp_pager_meta_lookup(vm_object_t object, vm_pindex_t pindex) | swp_pager_meta_lookup(vm_object_t object, vm_pindex_t pindex, int *before, | ||||
int *after) | |||||
{ | { | ||||
struct swblk *sb; | struct swblk *sb; | ||||
int i, end; | |||||
VM_OBJECT_ASSERT_LOCKED(object); | VM_OBJECT_ASSERT_LOCKED(object); | ||||
/* | /* | ||||
* The meta data only exists if the object is OBJT_SWAP | * The meta data only exists if the object is OBJT_SWAP | ||||
* and even then might not be allocated yet. | * and even then might not be allocated yet. | ||||
*/ | */ | ||||
KASSERT(object->type == OBJT_SWAP, | KASSERT(object->type == OBJT_SWAP, | ||||
("Lookup object not swappable")); | ("Lookup object not swappable")); | ||||
Done Inline ActionsI need to restore this KASSERT. ota_j.email.ne.jp: I need to restore this KASSERT. | |||||
i = pindex % SWAP_META_PAGES; | |||||
sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, | sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, | ||||
rounddown(pindex, SWAP_META_PAGES)); | rounddown(pindex, SWAP_META_PAGES)); | ||||
if (sb == NULL) | if (sb == NULL || sb->d[i] == SWAPBLK_NONE) { | ||||
if (before != NULL) | |||||
*before = 0; | |||||
if (after != NULL) | |||||
*after = 0; | |||||
return (SWAPBLK_NONE); | return (SWAPBLK_NONE); | ||||
return (sb->d[pindex % SWAP_META_PAGES]); | } | ||||
if (before != NULL) { | |||||
Done Inline Actionsbefore != NULL, there and in all other places kib: before != NULL, there and in all other places | |||||
if (*before <= 0 || pindex == 0) { | |||||
Done Inline ActionsHow could this condition (left part before ||) be true. And even if it is, why do we care ? kib: How could this condition (left part before ||) be true. And even if it is, why do we care ? | |||||
Done Inline ActionsThe argument is an integer and vm_pindex_t is unsigned long long; I wanted to check negative values earlier to avoid comparing these 2 different types. ota_j.email.ne.jp: The argument is an integer and vm_pindex_t is unsigned long long; I wanted to check negative… | |||||
*before = 0; | |||||
} else { | |||||
Done Inline ActionsStyle, } else { should be one line. markj: Style, `} else {` should be one line. | |||||
if (pindex < *before) | |||||
end = pindex; | |||||
else | |||||
end = *before; | |||||
*before = swp_swblk_backward_search(object, sb, i - 1, | |||||
Done Inline ActionsThis comment seems a little misleading? vm_pindex_t is unsigned. markj: This comment seems a little misleading? vm_pindex_t is unsigned. | |||||
Done Inline ActionsI will change this to something like /* be sure to avoid under-flow */ ota_j.email.ne.jp: I will change this to something like /* be sure to avoid under-flow */ | |||||
Done Inline ActionsProblem cases were when pindex was very small like 1 and *before was larger, like 31, the subtraction resulted a huge number. I think the comment is now accurate. ota_j.email.ne.jp: Problem cases were when pindex was very small like 1 and *before was larger, like 31, the… | |||||
Done Inline ActionsI'm removing, actually. ota_j.email.ne.jp: I'm removing, actually. | |||||
sb->d[i] - 1, end); | |||||
} | |||||
} | |||||
if (after != NULL) { | |||||
if (*after <= 0) | |||||
*after = 0; | |||||
else { | |||||
if (pindex + *after < pindex) | |||||
end = UINT64_MAX - pindex; | |||||
else | |||||
end = *after; | |||||
Done Inline ActionsI think it is clearer to write pindex + *after < pindex. markj: I think it is clearer to write `pindex + *after < pindex`. | |||||
*after = swp_swblk_forward_search(object, sb, i + 1, | |||||
sb->d[i] + 1, end); | |||||
} | |||||
} | |||||
return (sb->d[i]); | |||||
} | } | ||||
/* | /* | ||||
* Returns the least page index which is greater than or equal to the | * Returns the least page index which is greater than or equal to the | ||||
* parameter pindex and for which there is a swap block allocated. | * parameter pindex and for which there is a swap block allocated. | ||||
* Returns object's size if the object's type is not swap or if there | * Returns object's size if the object's type is not swap or if there | ||||
* are no allocated swap blocks for the object after the requested | * are no allocated swap blocks for the object after the requested | ||||
* pindex. | * pindex. | ||||
Show All 15 Lines | swap_pager_find_least(vm_object_t object, vm_pindex_t pindex) | ||||
if (sb->p < pindex) { | if (sb->p < pindex) { | ||||
for (i = pindex % SWAP_META_PAGES; i < SWAP_META_PAGES; i++) { | for (i = pindex % SWAP_META_PAGES; i < SWAP_META_PAGES; i++) { | ||||
if (sb->d[i] != SWAPBLK_NONE) | if (sb->d[i] != SWAPBLK_NONE) | ||||
return (sb->p + i); | return (sb->p + i); | ||||
} | } | ||||
sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, | sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, | ||||
roundup(pindex, SWAP_META_PAGES)); | roundup(pindex, SWAP_META_PAGES)); | ||||
if (sb == NULL) | if (sb == NULL) | ||||
return (object->size); | return (object->size); | ||||
Done Inline ActionsI find this function quite hard to read: there are many local variables and adjustments by one. Is it possible to simplify this at all? markj: I find this function quite hard to read: there are many local variables and adjustments by one. | |||||
Done Inline ActionsI will see how I can simplify. ota_j.email.ne.jp: I will see how I can simplify. | |||||
} | } | ||||
for (i = 0; i < SWAP_META_PAGES; i++) { | for (i = 0; i < SWAP_META_PAGES; i++) { | ||||
if (sb->d[i] != SWAPBLK_NONE) | if (sb->d[i] != SWAPBLK_NONE) | ||||
return (sb->p + i); | return (sb->p + i); | ||||
} | } | ||||
/* | /* | ||||
* We get here if a swblk is present in the trie but it | * We get here if a swblk is present in the trie but it | ||||
▲ Show 20 Lines • Show All 848 Lines • Show Last 20 Lines |
This is changing the values of maxahead and maxbehind passed in swap_pager_getpages(), so the caller's hints may not be respected.