Changeset View
Standalone View
sys/vm/swap_pager.c
Show First 20 Lines • Show All 696 Lines • ▼ Show 20 Lines | swap_pager_dealloc(vm_object_t object) | ||||
* associated with vm_page_t's for this object. We do not care | * associated with vm_page_t's for this object. We do not care | ||||
* if paging is still in progress on some objects. | * if paging is still in progress on some objects. | ||||
*/ | */ | ||||
swp_pager_meta_free_all(object); | swp_pager_meta_free_all(object); | ||||
object->handle = NULL; | object->handle = NULL; | ||||
object->type = OBJT_DEAD; | object->type = OBJT_DEAD; | ||||
} | } | ||||
static void | |||||
swp_pager_async_trimdone(struct buf *bp) | |||||
{ | |||||
struct swdevt *sp; | |||||
daddr_t blk; | |||||
int npages; | |||||
sp = (struct swdevt *)bp->b_fsprivate1; | |||||
blk = bp->b_blkno; | |||||
npages = bp->b_npages; | |||||
uma_zfree(swwbuf_zone, bp); | |||||
mtx_lock(&swbuf_mtx); | |||||
if (++nsw_wcount_async == 1) | |||||
wakeup(&nsw_wcount_async); | |||||
mtx_unlock(&swbuf_mtx); | |||||
mtx_lock(&sw_dev_mtx); | |||||
blk -= sp->sw_first; | |||||
blist_free(sp->sw_blist, blk, npages); | |||||
mtx_unlock(&sw_dev_mtx); | |||||
} | |||||
static struct buf * | |||||
swapdev_trim(struct swdevt *sp) | |||||
{ | |||||
struct buf *bp; | |||||
daddr_t blk; | |||||
u_long nblks; | |||||
int npages, mpages; | |||||
/* Quit if the cursor is too far behind the trimmer. */ | |||||
mtx_lock(&sw_dev_mtx); | |||||
nblks = sp->sw_nblks; | |||||
blk = sp->sw_trimmer; | |||||
alc: This is problematic because the cursors are 64-bit variables while npages is a 32-bit variable. | |||||
if ((blk + nblks - sp->sw_cursor) % nblks > nblks / 5) { | |||||
mtx_unlock(&sw_dev_mtx); | |||||
return (NULL); | |||||
} | |||||
/* Grab a lot of free space allocated long ago. */ | |||||
npages = 1; | |||||
mpages = 10000; | |||||
Not Done Inline ActionsTake a look at ffs_blkfree_sendtrim() in ufs/ffs/ffs_alloc.c alc: Take a look at ffs_blkfree_sendtrim() in ufs/ffs/ffs_alloc.c | |||||
blk = blist_alloc(sp->sw_blist, &sp->sw_trimmer, &npages, mpages); | |||||
/* Quit if there's nothing free between the trimmer and cursor. */ | |||||
Not Done Inline ActionsI think that you are familiar with a pthread_cond_wait(). This is similar, in particular, the mutex must be held by the caller. And that mutex is being used to synchronize access to nsw_wcount_async. alc: I think that you are familiar with a pthread_cond_wait(). This is similar, in particular, the… | |||||
if (blk == SWAPBLK_NONE) { | |||||
mtx_unlock(&sw_dev_mtx); | |||||
return (NULL); | |||||
} | |||||
if ((blk + nblks - sp->sw_cursor) % nblks > nblks / 5) { | |||||
/* The trim allocation jumped the cursor. Bail out. */ | |||||
blist_free(sp->sw_blist, blk, npages); | |||||
mtx_unlock(&sw_dev_mtx); | |||||
return (NULL); | |||||
} | |||||
blk += sp->sw_first; | |||||
mtx_unlock(&sw_dev_mtx); | |||||
mtx_lock(&swbuf_mtx); | |||||
while (nsw_wcount_async == 0) | |||||
msleep(&nsw_wcount_async, &swbuf_mtx, PVM, | |||||
"swbufa", 0); | |||||
nsw_wcount_async--; | |||||
mtx_unlock(&swbuf_mtx); | |||||
bp = uma_zalloc(swwbuf_zone, M_WAITOK); | |||||
Not Done Inline ActionsYou can't perform an M_WAITOK allocation when a mutex is held. A mutex is held when this function is called by swap_pager_reserve(). alc: You can't perform an M_WAITOK allocation when a mutex is held. A mutex is held when this… | |||||
bp->b_flags = B_ASYNC; | |||||
bp->b_iocmd = BIO_DELETE; | |||||
bp->b_rcred = crhold(thread0.td_ucred); | |||||
bp->b_wcred = crhold(thread0.td_ucred); | |||||
bp->b_bcount = PAGE_SIZE * npages; | |||||
bp->b_bufsize = PAGE_SIZE * npages; | |||||
bp->b_blkno = blk; | |||||
bp->b_npages = npages; | |||||
bp->b_iodone = swp_pager_async_trimdone; | |||||
bp->b_fsprivate1 = sp; | |||||
BUF_KERNPROC(bp); | |||||
return (bp); | |||||
} | |||||
/************************************************************************ | /************************************************************************ | ||||
* SWAP PAGER BITMAP ROUTINES * | * SWAP PAGER BITMAP ROUTINES * | ||||
************************************************************************/ | ************************************************************************/ | ||||
/* | /* | ||||
* SWP_PAGER_GETSWAPSPACE() - allocate raw swap space | * SWP_PAGER_GETSWAPSPACE() - allocate raw swap space | ||||
* | * | ||||
* Allocate swap for up to the requested number of pages, and at | * Allocate swap for up to the requested number of pages, and at | ||||
* least a minimum number of pages. The starting swap block number | * least a minimum number of pages. The starting swap block number | ||||
* (a page index) is returned or SWAPBLK_NONE if the allocation | * (a page index) is returned or SWAPBLK_NONE if the allocation | ||||
* failed. | * failed. | ||||
* | * | ||||
* Also has the side effect of advising that somebody made a mistake | * Also has the side effect of advising that somebody made a mistake | ||||
* when they configured swap and didn't configure enough. | * when they configured swap and didn't configure enough. | ||||
* | * | ||||
* This routine may not sleep. | * This routine may not sleep. | ||||
* | * | ||||
* We allocate in round-robin fashion from the configured devices. | * We allocate in round-robin fashion from the configured devices. | ||||
*/ | */ | ||||
static daddr_t | static daddr_t | ||||
swp_pager_getswapspace(int *io_npages, int limit) | swp_pager_getswapspace(int *io_npages, int limit) | ||||
{ | { | ||||
daddr_t blk; | daddr_t blk; | ||||
struct swdevt *sp; | struct swdevt *sp; | ||||
struct buf *bp; | |||||
int mpages, npages; | int mpages, npages; | ||||
blk = SWAPBLK_NONE; | blk = SWAPBLK_NONE; | ||||
mpages = *io_npages; | mpages = *io_npages; | ||||
npages = imin(BLIST_MAX_ALLOC, mpages); | npages = imin(BLIST_MAX_ALLOC, mpages); | ||||
mtx_lock(&sw_dev_mtx); | mtx_lock(&sw_dev_mtx); | ||||
sp = swdevhd; | sp = swdevhd; | ||||
while (!TAILQ_EMPTY(&swtailq)) { | while (!TAILQ_EMPTY(&swtailq)) { | ||||
if (sp == NULL) | if (sp == NULL) | ||||
sp = TAILQ_FIRST(&swtailq); | sp = TAILQ_FIRST(&swtailq); | ||||
if ((sp->sw_flags & SW_CLOSING) == 0) | if ((sp->sw_flags & SW_CLOSING) == 0) | ||||
blk = blist_alloc(sp->sw_blist, &npages, mpages); | blk = blist_alloc(sp->sw_blist, &sp->sw_cursor, | ||||
&npages, mpages); | |||||
if (blk != SWAPBLK_NONE) | if (blk != SWAPBLK_NONE) | ||||
break; | break; | ||||
sp = TAILQ_NEXT(sp, sw_list); | sp = TAILQ_NEXT(sp, sw_list); | ||||
if (swdevhd == sp) { | if (swdevhd == sp) { | ||||
if (npages <= limit) | if (npages <= limit) | ||||
break; | break; | ||||
mpages = npages - 1; | mpages = npages - 1; | ||||
npages >>= 1; | npages >>= 1; | ||||
Show All 11 Lines | if (swap_pager_full != 2) { | ||||
printf("swp_pager_getswapspace(%d): failed\n", | printf("swp_pager_getswapspace(%d): failed\n", | ||||
*io_npages); | *io_npages); | ||||
swap_pager_full = 2; | swap_pager_full = 2; | ||||
swap_pager_almost_full = 1; | swap_pager_almost_full = 1; | ||||
} | } | ||||
swdevhd = NULL; | swdevhd = NULL; | ||||
} | } | ||||
mtx_unlock(&sw_dev_mtx); | mtx_unlock(&sw_dev_mtx); | ||||
if (blk != SWAPBLK_NONE) { | |||||
bp = swapdev_trim(sp); | |||||
if (bp != NULL) | |||||
swp_pager_strategy(bp); | |||||
} | |||||
return (blk); | return (blk); | ||||
} | } | ||||
static bool | static bool | ||||
swp_pager_isondev(daddr_t blk, struct swdevt *sp) | swp_pager_isondev(daddr_t blk, struct swdevt *sp) | ||||
{ | { | ||||
return (blk >= sp->sw_first && blk < sp->sw_end); | return (blk >= sp->sw_first && blk < sp->sw_end); | ||||
▲ Show 20 Lines • Show All 105 Lines • ▼ Show 20 Lines | |||||
* The object must be locked. | * The object must be locked. | ||||
*/ | */ | ||||
void | void | ||||
swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size) | swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size) | ||||
{ | { | ||||
swp_pager_meta_free(object, start, size); | swp_pager_meta_free(object, start, size); | ||||
} | } | ||||
Not Done Inline Actionssw_dev_mtx isn't being released. alc: sw_dev_mtx isn't being released. | |||||
Done Inline Actionsswapdev_trim releases it if it returns a non-NULL bp. sw_strategy -> swapgeom_strategy acquires and releases it. dougm: swapdev_trim releases it if it returns a non-NULL bp.
sw_strategy -> swapgeom_strategy… | |||||
/* | /* | ||||
* SWAP_PAGER_RESERVE() - reserve swap blocks in object | * SWAP_PAGER_RESERVE() - reserve swap blocks in object | ||||
* | * | ||||
* Assigns swap blocks to the specified range within the object. The | * Assigns swap blocks to the specified range within the object. The | ||||
* swap blocks are not zeroed. Any previous swap assignment is destroyed. | * swap blocks are not zeroed. Any previous swap assignment is destroyed. | ||||
* | * | ||||
* Returns 0 on success, -1 on failure. | * Returns 0 on success, -1 on failure. | ||||
*/ | */ | ||||
int | int | ||||
swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) | swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) | ||||
{ | { | ||||
daddr_t addr, blk, n_free, s_free; | daddr_t addr, blk, n_free, s_free; | ||||
int i, j, n; | int i, j, k, n; | ||||
struct { | |||||
daddr_t blk; | |||||
int nblks; | |||||
} mem[size]; | |||||
alcUnsubmitted Not Done Inline ActionsIf swap space is fragmented, a call that allocates a moderate amount of swap space will overflow the stack and crash the kernel. alc: If swap space is fragmented, a call that allocates a moderate amount of swap space will… | |||||
swp_pager_init_freerange(&s_free, &n_free); | for (i = k = 0; i < size; i += n, k++) { | ||||
VM_OBJECT_WLOCK(object); | |||||
for (i = 0; i < size; i += n) { | |||||
n = size - i; | n = size - i; | ||||
blk = swp_pager_getswapspace(&n, 1); | blk = swp_pager_getswapspace(&n, 1); | ||||
if (blk == SWAPBLK_NONE) { | if (blk == SWAPBLK_NONE) | ||||
swp_pager_meta_free(object, start, i); | break; | ||||
VM_OBJECT_WUNLOCK(object); | mem[k].blk = blk; | ||||
mem[k].nblks = n; | |||||
} | |||||
if (i < size) { | |||||
while (k-- > 0) | |||||
swp_pager_freeswapspace(mem[k].blk, mem[k].nblks); | |||||
return (-1); | return (-1); | ||||
} | } | ||||
swp_pager_init_freerange(&s_free, &n_free); | |||||
VM_OBJECT_WLOCK(object); | |||||
for (i = k = 0; i < size; i += n, k++) { | |||||
Not Done Inline ActionsThis actually looks like a bug fix. Specifically, it addresses an unlikely, but hypothetically possible, swap space leak. Please create a separate change for this fix. alc: This actually looks like a bug fix. Specifically, it addresses an unlikely, but hypothetically… | |||||
blk = mem[k].blk; | |||||
n = mem[k].blks; | |||||
for (j = 0; j < n; ++j) { | for (j = 0; j < n; ++j) { | ||||
addr = swp_pager_meta_build(object, | addr = swp_pager_meta_build(object, | ||||
start + i + j, blk + j); | start + i + j, blk + j); | ||||
if (addr != SWAPBLK_NONE) | if (addr != SWAPBLK_NONE) | ||||
swp_pager_update_freerange(&s_free, &n_free, | swp_pager_update_freerange(&s_free, &n_free, | ||||
addr); | addr); | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,383 Lines • ▼ Show 20 Lines | swaponsomething(struct vnode *vp, void *id, u_long nblks, | ||||
sp->sw_dev = dev; | sp->sw_dev = dev; | ||||
sp->sw_nblks = nblks; | sp->sw_nblks = nblks; | ||||
sp->sw_used = 0; | sp->sw_used = 0; | ||||
sp->sw_strategy = strategy; | sp->sw_strategy = strategy; | ||||
sp->sw_close = close; | sp->sw_close = close; | ||||
sp->sw_flags = flags; | sp->sw_flags = flags; | ||||
sp->sw_blist = blist_create(nblks, M_WAITOK); | sp->sw_blist = blist_create(nblks, M_WAITOK); | ||||
sp->sw_cursor = 0; | |||||
sp->sw_trimmer = 0; | |||||
/* | /* | ||||
* Do not free the first two block in order to avoid overwriting | * Do not free the first two block in order to avoid overwriting | ||||
* any bsd label at the front of the partition | * any bsd label at the front of the partition | ||||
*/ | */ | ||||
blist_free(sp->sw_blist, 2, nblks - 2); | blist_free(sp->sw_blist, 2, nblks - 2); | ||||
dvbase = 0; | dvbase = 0; | ||||
mtx_lock(&sw_dev_mtx); | mtx_lock(&sw_dev_mtx); | ||||
▲ Show 20 Lines • Show All 684 Lines • Show Last 20 Lines |
This is problematic because the cursors are 64-bit variables while npages is a 32-bit variable.