Changeset View
Standalone View
sys/vm/swap_pager.c
Show First 20 Lines • Show All 730 Lines • ▼ Show 20 Lines | swp_pager_getswapspace(int *io_npages, int limit) | ||||
blk = SWAPBLK_NONE; | blk = SWAPBLK_NONE; | ||||
mpages = *io_npages; | mpages = *io_npages; | ||||
npages = imin(BLIST_MAX_ALLOC, mpages); | npages = imin(BLIST_MAX_ALLOC, mpages); | ||||
mtx_lock(&sw_dev_mtx); | mtx_lock(&sw_dev_mtx); | ||||
sp = swdevhd; | sp = swdevhd; | ||||
while (!TAILQ_EMPTY(&swtailq)) { | while (!TAILQ_EMPTY(&swtailq)) { | ||||
if (sp == NULL) | if (sp == NULL) | ||||
sp = TAILQ_FIRST(&swtailq); | sp = TAILQ_FIRST(&swtailq); | ||||
if ((sp->sw_flags & SW_CLOSING) == 0) | if ((sp->sw_flags & SW_CLOSING) == 0) | ||||
alc: This is problematic because the cursors are 64-bit variables while npages is a 32-bit variable. | |||||
blk = blist_alloc(sp->sw_blist, &npages, mpages); | blk = blist_alloc(sp->sw_blist, &sp->sw_cursor, | ||||
&npages, mpages); | |||||
if (blk != SWAPBLK_NONE) | if (blk != SWAPBLK_NONE) | ||||
break; | break; | ||||
sp = TAILQ_NEXT(sp, sw_list); | sp = TAILQ_NEXT(sp, sw_list); | ||||
if (swdevhd == sp) { | if (swdevhd == sp) { | ||||
if (npages <= limit) | if (npages <= limit) | ||||
break; | break; | ||||
Not Done Inline ActionsTake a look at ffs_blkfree_sendtrim() in ufs/ffs/ffs_alloc.c alc: Take a look at ffs_blkfree_sendtrim() in ufs/ffs/ffs_alloc.c | |||||
mpages = npages - 1; | mpages = npages - 1; | ||||
npages >>= 1; | npages >>= 1; | ||||
} | } | ||||
Not Done Inline ActionsI think that you are familiar with a pthread_cond_wait(). This is similar, in particular, the mutex must be held by the caller. And that mutex is being used to synchronize access to nsw_wcount_async. alc: I think that you are familiar with a pthread_cond_wait(). This is similar, in particular, the… | |||||
} | } | ||||
if (blk != SWAPBLK_NONE) { | if (blk != SWAPBLK_NONE) { | ||||
*io_npages = npages; | *io_npages = npages; | ||||
blk += sp->sw_first; | blk += sp->sw_first; | ||||
sp->sw_used += npages; | sp->sw_used += npages; | ||||
swap_pager_avail -= npages; | swap_pager_avail -= npages; | ||||
swp_sizecheck(); | swp_sizecheck(); | ||||
swdevhd = TAILQ_NEXT(sp, sw_list); | swdevhd = TAILQ_NEXT(sp, sw_list); | ||||
} else { | } else { | ||||
if (swap_pager_full != 2) { | if (swap_pager_full != 2) { | ||||
printf("swp_pager_getswapspace(%d): failed\n", | printf("swp_pager_getswapspace(%d): failed\n", | ||||
*io_npages); | *io_npages); | ||||
swap_pager_full = 2; | swap_pager_full = 2; | ||||
swap_pager_almost_full = 1; | swap_pager_almost_full = 1; | ||||
} | } | ||||
swdevhd = NULL; | swdevhd = NULL; | ||||
} | } | ||||
mtx_unlock(&sw_dev_mtx); | mtx_unlock(&sw_dev_mtx); | ||||
return (blk); | return (blk); | ||||
Not Done Inline ActionsYou can't perform an M_WAITOK allocation when a mutex is held. A mutex is held when this function is called by swap_pager_reserve(). alc: You can't perform an M_WAITOK allocation when a mutex is held. A mutex is held when this… | |||||
} | } | ||||
static bool | static bool | ||||
swp_pager_isondev(daddr_t blk, struct swdevt *sp) | swp_pager_isondev(daddr_t blk, struct swdevt *sp) | ||||
{ | { | ||||
return (blk >= sp->sw_first && blk < sp->sw_end); | return (blk >= sp->sw_first && blk < sp->sw_end); | ||||
} | } | ||||
Show All 18 Lines | if (swp_pager_isondev(bp->b_blkno, sp)) { | ||||
sp->sw_strategy(bp, sp); | sp->sw_strategy(bp, sp); | ||||
return; | return; | ||||
} | } | ||||
} | } | ||||
panic("Swapdev not found"); | panic("Swapdev not found"); | ||||
} | } | ||||
static void | |||||
swp_pager_async_trimdone(struct buf *bp) | |||||
{ | |||||
struct swdevt *sp; | |||||
daddr_t blk; | |||||
int npages; | |||||
sp = (struct swdevt *)bp->b_fsprivate1; | |||||
blk = bp->b_blkno; | |||||
npages = bp->b_npages; | |||||
uma_zfree(swwbuf_zone, bp); | |||||
mtx_lock(&swbuf_mtx); | |||||
if (++nsw_wcount_async == 1) | |||||
wakeup(&nsw_wcount_async); | |||||
mtx_unlock(&swbuf_mtx); | |||||
mtx_lock(&sw_dev_mtx); | |||||
blk -= sp->sw_first; | |||||
blist_free(sp->sw_blist, blk, npages); | |||||
mtx_unlock(&sw_dev_mtx); | |||||
} | |||||
static int trimzone_numer = 1; | |||||
static int trimzone_denom = 8; | |||||
static int min_trim_alloc = 1; | |||||
static int max_trim_alloc = 32768; | |||||
/* | /* | ||||
* The 'trim zone' is the small range of addresses just ahead of the cursor | |||||
* likely to be allocated soon. When the trimmer falls into the trim zone, | |||||
* we allocate blocks with the trimmer. | |||||
*/ | |||||
static bool | |||||
swapdev_in_trim_zone(daddr_t start, daddr_t end, daddr_t nblks) | |||||
{ | |||||
return ((end + nblks - start) % nblks / trimzone_numer < | |||||
nblks / trimzone_denom); | |||||
} | |||||
/* find a swap device that needs trimming, and start trimming */ | |||||
static void | |||||
swp_pager_trimswapspace(void) | |||||
{ | |||||
struct swdevt *sp; | |||||
struct buf *bp; | |||||
daddr_t blk, cursor; | |||||
u_long nblks; | |||||
int npages; | |||||
mtx_lock(&sw_dev_mtx); | |||||
TAILQ_FOREACH(sp, &swtailq, sw_list) { | |||||
/* Quit if the cursor is too far behind the trimmer. */ | |||||
nblks = sp->sw_nblks; | |||||
cursor = sp->sw_cursor; | |||||
while (swapdev_in_trim_zone(cursor, sp->sw_trimmer, nblks)) { | |||||
/* Grab free space allocated long ago. */ | |||||
npages = min_trim_alloc; | |||||
blk = blist_alloc(sp->sw_blist, &sp->sw_trimmer, | |||||
&npages, max_trim_alloc); | |||||
/* Quit if there's nothing free. */ | |||||
if (blk == SWAPBLK_NONE) | |||||
break; | |||||
mtx_unlock(&sw_dev_mtx); | |||||
CTR5(KTR_SPARE5, "%s: cursor %p blk %p " | |||||
"size %5d trimmer %p", __func__, | |||||
(void*)cursor, (void*)blk, npages, | |||||
(void*)sp->sw_trimmer); | |||||
blk += sp->sw_first; | |||||
mtx_lock(&swbuf_mtx); | |||||
if (nsw_wcount_async == 0) { | |||||
mtx_unlock(&swbuf_mtx); | |||||
mtx_lock(&sw_dev_mtx); | |||||
break; | |||||
} | |||||
nsw_wcount_async--; | |||||
mtx_unlock(&swbuf_mtx); | |||||
bp = uma_zalloc(swwbuf_zone, M_NOWAIT); | |||||
if (bp == NULL) { | |||||
mtx_lock(&sw_dev_mtx); | |||||
break; | |||||
} | |||||
bp->b_flags = B_ASYNC; | |||||
bp->b_data = NULL; | |||||
bp->b_iocmd = BIO_DELETE; | |||||
bp->b_rcred = crhold(thread0.td_ucred); | |||||
bp->b_wcred = crhold(thread0.td_ucred); | |||||
bp->b_bcount = PAGE_SIZE * npages; | |||||
bp->b_bufsize = PAGE_SIZE * npages; | |||||
bp->b_blkno = blk; | |||||
bp->b_npages = npages; | |||||
bp->b_iodone = swp_pager_async_trimdone; | |||||
bp->b_fsprivate1 = sp; | |||||
BUF_KERNPROC(bp); | |||||
sp->sw_strategy(bp,sp); | |||||
mtx_lock(&swbuf_mtx); | |||||
} | |||||
} | |||||
mtx_unlock(&sw_dev_mtx); | |||||
} | |||||
/* | |||||
* SWP_PAGER_FREESWAPSPACE() - free raw swap space | * SWP_PAGER_FREESWAPSPACE() - free raw swap space | ||||
* | * | ||||
* This routine returns the specified swap blocks back to the bitmap. | * This routine returns the specified swap blocks back to the bitmap. | ||||
* | * | ||||
* This routine may not sleep. | * This routine may not sleep. | ||||
*/ | */ | ||||
static void | static void | ||||
swp_pager_freeswapspace(daddr_t blk, daddr_t npages) | swp_pager_freeswapspace(daddr_t blk, daddr_t npages) | ||||
{ | { | ||||
struct swdevt *sp; | struct swdevt *sp; | ||||
if (npages == 0) | if (npages == 0) | ||||
Not Done Inline Actionssw_dev_mtx isn't being released. alc: sw_dev_mtx isn't being released. | |||||
Done Inline Actionsswapdev_trim releases it if it returns a non-NULL bp. sw_strategy -> swapgeom_strategy acquires and releases it. dougm: swapdev_trim releases it if it returns a non-NULL bp.
sw_strategy -> swapgeom_strategy… | |||||
return; | return; | ||||
mtx_lock(&sw_dev_mtx); | mtx_lock(&sw_dev_mtx); | ||||
TAILQ_FOREACH(sp, &swtailq, sw_list) { | TAILQ_FOREACH(sp, &swtailq, sw_list) { | ||||
if (swp_pager_isondev(blk, sp)) { | if (swp_pager_isondev(blk, sp)) { | ||||
sp->sw_used -= npages; | sp->sw_used -= npages; | ||||
/* | /* | ||||
* If we are attempting to stop swapping on | * If we are attempting to stop swapping on | ||||
* this device, we don't want to mark any | * this device, we don't want to mark any | ||||
▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines | |||||
int | int | ||||
swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) | swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) | ||||
{ | { | ||||
daddr_t addr, blk, n_free, s_free; | daddr_t addr, blk, n_free, s_free; | ||||
int i, j, n; | int i, j, n; | ||||
swp_pager_init_freerange(&s_free, &n_free); | swp_pager_init_freerange(&s_free, &n_free); | ||||
VM_OBJECT_WLOCK(object); | VM_OBJECT_WLOCK(object); | ||||
for (i = 0; i < size; i += n) { | for (i = 0; i < size; i += n) { | ||||
Not Done Inline ActionsIf swap space is fragmented, a call that allocates a moderate amount of swap space will overflow the stack and crash the kernel. alc: If swap space is fragmented, a call that allocates a moderate amount of swap space will… | |||||
n = size - i; | n = size - i; | ||||
blk = swp_pager_getswapspace(&n, 1); | blk = swp_pager_getswapspace(&n, 1); | ||||
if (blk == SWAPBLK_NONE) { | if (blk == SWAPBLK_NONE) { | ||||
swp_pager_meta_free(object, start, i); | swp_pager_meta_free(object, start, i); | ||||
VM_OBJECT_WUNLOCK(object); | break; | ||||
Not Done Inline ActionsThis actually looks like a bug fix. Specifically, it addresses an unlikely, but hypothetically possible, swap space leak. Please create a separate change for this fix. alc: This actually looks like a bug fix. Specifically, it addresses an unlikely, but hypothetically… | |||||
return (-1); | |||||
} | } | ||||
for (j = 0; j < n; ++j) { | for (j = 0; j < n; ++j) { | ||||
addr = swp_pager_meta_build(object, | addr = swp_pager_meta_build(object, | ||||
start + i + j, blk + j); | start + i + j, blk + j); | ||||
if (addr != SWAPBLK_NONE) | if (addr != SWAPBLK_NONE) | ||||
swp_pager_update_freerange(&s_free, &n_free, | swp_pager_update_freerange(&s_free, &n_free, | ||||
addr); | addr); | ||||
} | } | ||||
} | } | ||||
swp_pager_freeswapspace(s_free, n_free); | |||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
return (0); | swp_pager_freeswapspace(s_free, n_free); | ||||
swp_pager_trimswapspace(); | |||||
return (i == size ? 0 : -1); | |||||
} | } | ||||
/* | /* | ||||
* SWAP_PAGER_COPY() - copy blocks from source pager to destination pager | * SWAP_PAGER_COPY() - copy blocks from source pager to destination pager | ||||
* and destroy the source. | * and destroy the source. | ||||
* | * | ||||
* Copy any valid swapblks from the source to the destination. In | * Copy any valid swapblks from the source to the destination. In | ||||
* cases where both the source and destination have a valid swapblk, | * cases where both the source and destination have a valid swapblk, | ||||
▲ Show 20 Lines • Show All 405 Lines • ▼ Show 20 Lines | |||||
static void | static void | ||||
swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count, | swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count, | ||||
int flags, int *rtvals) | int flags, int *rtvals) | ||||
{ | { | ||||
int i, n; | int i, n; | ||||
boolean_t sync; | boolean_t sync; | ||||
daddr_t addr, n_free, s_free; | daddr_t addr, n_free, s_free; | ||||
swp_pager_init_freerange(&s_free, &n_free); | |||||
if (count && ma[0]->object != object) { | if (count && ma[0]->object != object) { | ||||
panic("swap_pager_putpages: object mismatch %p/%p", | panic("swap_pager_putpages: object mismatch %p/%p", | ||||
object, | object, | ||||
ma[0]->object | ma[0]->object | ||||
); | ); | ||||
} | } | ||||
/* | /* | ||||
* Step 1 | * Step 1 | ||||
* | * | ||||
* Turn object into OBJT_SWAP | * Turn object into OBJT_SWAP | ||||
* check for bogus sysops | * check for bogus sysops | ||||
* force sync if not pageout process | * force sync if not pageout process | ||||
*/ | */ | ||||
if (object->type != OBJT_SWAP) { | if (object->type != OBJT_SWAP) { | ||||
addr = swp_pager_meta_build(object, 0, SWAPBLK_NONE); | addr = swp_pager_meta_build(object, 0, SWAPBLK_NONE); | ||||
KASSERT(addr == SWAPBLK_NONE, | KASSERT(addr == SWAPBLK_NONE, | ||||
("unexpected object swap block")); | ("unexpected object swap block")); | ||||
} | } | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
n = 0; | n = 0; | ||||
swp_pager_init_freerange(&s_free, &n_free); | |||||
if (curproc != pageproc) | if (curproc != pageproc) | ||||
sync = TRUE; | sync = TRUE; | ||||
else | else | ||||
sync = (flags & VM_PAGER_PUT_SYNC) != 0; | sync = (flags & VM_PAGER_PUT_SYNC) != 0; | ||||
/* | /* | ||||
* Step 2 | * Step 2 | ||||
* | * | ||||
▲ Show 20 Lines • Show All 101 Lines • ▼ Show 20 Lines | for (i = 0; i < count; i += n) { | ||||
bwait(bp, PVM, "swwrt"); | bwait(bp, PVM, "swwrt"); | ||||
/* | /* | ||||
* Now that we are through with the bp, we can call the | * Now that we are through with the bp, we can call the | ||||
* normal async completion, which frees everything up. | * normal async completion, which frees everything up. | ||||
*/ | */ | ||||
swp_pager_async_iodone(bp); | swp_pager_async_iodone(bp); | ||||
} | } | ||||
VM_OBJECT_WLOCK(object); | |||||
swp_pager_freeswapspace(s_free, n_free); | swp_pager_freeswapspace(s_free, n_free); | ||||
swp_pager_trimswapspace(); | |||||
VM_OBJECT_WLOCK(object); | |||||
} | } | ||||
/* | /* | ||||
* swp_pager_async_iodone: | * swp_pager_async_iodone: | ||||
* | * | ||||
* Completion routine for asynchronous reads and writes from/to swap. | * Completion routine for asynchronous reads and writes from/to swap. | ||||
* Also called manually by synchronous code to finish up a bp. | * Also called manually by synchronous code to finish up a bp. | ||||
* | * | ||||
▲ Show 20 Lines • Show All 809 Lines • ▼ Show 20 Lines | swaponsomething(struct vnode *vp, void *id, u_long nblks, | ||||
sp->sw_dev = dev; | sp->sw_dev = dev; | ||||
sp->sw_nblks = nblks; | sp->sw_nblks = nblks; | ||||
sp->sw_used = 0; | sp->sw_used = 0; | ||||
sp->sw_strategy = strategy; | sp->sw_strategy = strategy; | ||||
sp->sw_close = close; | sp->sw_close = close; | ||||
sp->sw_flags = flags; | sp->sw_flags = flags; | ||||
sp->sw_blist = blist_create(nblks, M_WAITOK); | sp->sw_blist = blist_create(nblks, M_WAITOK); | ||||
sp->sw_cursor = 0; | |||||
sp->sw_trimmer = 0; | |||||
/* | /* | ||||
* Do not free the first two block in order to avoid overwriting | * Do not free the first two block in order to avoid overwriting | ||||
* any bsd label at the front of the partition | * any bsd label at the front of the partition | ||||
*/ | */ | ||||
blist_free(sp->sw_blist, 2, nblks - 2); | blist_free(sp->sw_blist, 2, nblks - 2); | ||||
dvbase = 0; | dvbase = 0; | ||||
mtx_lock(&sw_dev_mtx); | mtx_lock(&sw_dev_mtx); | ||||
▲ Show 20 Lines • Show All 423 Lines • ▼ Show 20 Lines | if (cp == NULL) { | ||||
mtx_unlock(&sw_dev_mtx); | mtx_unlock(&sw_dev_mtx); | ||||
bp->b_error = ENXIO; | bp->b_error = ENXIO; | ||||
bp->b_ioflags |= BIO_ERROR; | bp->b_ioflags |= BIO_ERROR; | ||||
bufdone(bp); | bufdone(bp); | ||||
return; | return; | ||||
} | } | ||||
swapgeom_acquire(cp); | swapgeom_acquire(cp); | ||||
mtx_unlock(&sw_dev_mtx); | mtx_unlock(&sw_dev_mtx); | ||||
if (bp->b_iocmd == BIO_WRITE) | if (bp->b_iocmd == BIO_WRITE || bp->b_iocmd == BIO_DELETE) | ||||
bio = g_new_bio(); | bio = g_new_bio(); | ||||
else | else | ||||
bio = g_alloc_bio(); | bio = g_alloc_bio(); | ||||
if (bio == NULL) { | if (bio == NULL) { | ||||
mtx_lock(&sw_dev_mtx); | mtx_lock(&sw_dev_mtx); | ||||
swapgeom_release(cp, sp); | swapgeom_release(cp, sp); | ||||
mtx_unlock(&sw_dev_mtx); | mtx_unlock(&sw_dev_mtx); | ||||
bp->b_error = ENOMEM; | bp->b_error = ENOMEM; | ||||
▲ Show 20 Lines • Show All 244 Lines • Show Last 20 Lines |
This is problematic because the cursors are 64-bit variables while npages is a 32-bit variable.