Changeset View
Standalone View
sys/vm/swap_pager.c
Show First 20 Lines • Show All 730 Lines • ▼ Show 20 Lines | swp_pager_getswapspace(int *io_npages, int limit) | ||||
blk = SWAPBLK_NONE; | blk = SWAPBLK_NONE; | ||||
mpages = *io_npages; | mpages = *io_npages; | ||||
npages = imin(BLIST_MAX_ALLOC, mpages); | npages = imin(BLIST_MAX_ALLOC, mpages); | ||||
mtx_lock(&sw_dev_mtx); | mtx_lock(&sw_dev_mtx); | ||||
sp = swdevhd; | sp = swdevhd; | ||||
while (!TAILQ_EMPTY(&swtailq)) { | while (!TAILQ_EMPTY(&swtailq)) { | ||||
if (sp == NULL) | if (sp == NULL) | ||||
sp = TAILQ_FIRST(&swtailq); | sp = TAILQ_FIRST(&swtailq); | ||||
if ((sp->sw_flags & SW_CLOSING) == 0) | if ((sp->sw_flags & SW_CLOSING) == 0) | ||||
alc: This is problematic because the cursors are 64-bit variables while npages is a 32-bit variable. | |||||
blk = blist_alloc(sp->sw_blist, &npages, mpages); | blk = blist_alloc(sp->sw_blist, &sp->sw_cursor, | ||||
&npages, mpages); | |||||
if (blk != SWAPBLK_NONE) | if (blk != SWAPBLK_NONE) | ||||
break; | break; | ||||
sp = TAILQ_NEXT(sp, sw_list); | sp = TAILQ_NEXT(sp, sw_list); | ||||
if (swdevhd == sp) { | if (swdevhd == sp) { | ||||
if (npages <= limit) | if (npages <= limit) | ||||
break; | break; | ||||
Not Done Inline ActionsTake a look at ffs_blkfree_sendtrim() in ufs/ffs/ffs_alloc.c alc: Take a look at ffs_blkfree_sendtrim() in ufs/ffs/ffs_alloc.c | |||||
mpages = npages - 1; | mpages = npages - 1; | ||||
npages >>= 1; | npages >>= 1; | ||||
} | } | ||||
Not Done Inline ActionsI think that you are familiar with a pthread_cond_wait(). This is similar, in particular, the mutex must be held by the caller. And that mutex is being used to synchronize access to nsw_wcount_async. alc: I think that you are familiar with a pthread_cond_wait(). This is similar, in particular, the… | |||||
} | } | ||||
if (blk != SWAPBLK_NONE) { | if (blk != SWAPBLK_NONE) { | ||||
*io_npages = npages; | *io_npages = npages; | ||||
blk += sp->sw_first; | blk += sp->sw_first; | ||||
sp->sw_used += npages; | sp->sw_used += npages; | ||||
swap_pager_avail -= npages; | swap_pager_avail -= npages; | ||||
swp_sizecheck(); | swp_sizecheck(); | ||||
swdevhd = TAILQ_NEXT(sp, sw_list); | swdevhd = TAILQ_NEXT(sp, sw_list); | ||||
} else { | } else { | ||||
if (swap_pager_full != 2) { | if (swap_pager_full != 2) { | ||||
printf("swp_pager_getswapspace(%d): failed\n", | printf("swp_pager_getswapspace(%d): failed\n", | ||||
*io_npages); | *io_npages); | ||||
swap_pager_full = 2; | swap_pager_full = 2; | ||||
swap_pager_almost_full = 1; | swap_pager_almost_full = 1; | ||||
} | } | ||||
swdevhd = NULL; | swdevhd = NULL; | ||||
} | } | ||||
mtx_unlock(&sw_dev_mtx); | mtx_unlock(&sw_dev_mtx); | ||||
return (blk); | return (blk); | ||||
Not Done Inline ActionsYou can't perform an M_WAITOK allocation when a mutex is held. A mutex is held when this function is called by swap_pager_reserve(). alc: You can't perform an M_WAITOK allocation when a mutex is held. A mutex is held when this… | |||||
} | } | ||||
static bool | static bool | ||||
swp_pager_isondev(daddr_t blk, struct swdevt *sp) | swp_pager_isondev(daddr_t blk, struct swdevt *sp) | ||||
{ | { | ||||
return (blk >= sp->sw_first && blk < sp->sw_end); | return (blk >= sp->sw_first && blk < sp->sw_end); | ||||
} | } | ||||
Show All 18 Lines | if (swp_pager_isondev(bp->b_blkno, sp)) { | ||||
sp->sw_strategy(bp, sp); | sp->sw_strategy(bp, sp); | ||||
return; | return; | ||||
} | } | ||||
} | } | ||||
panic("Swapdev not found"); | panic("Swapdev not found"); | ||||
} | } | ||||
static void | |||||
swp_pager_async_trimdone(struct buf *bp) | |||||
{ | |||||
struct swdevt *sp; | |||||
daddr_t blk; | |||||
int npages; | |||||
sp = (struct swdevt *)bp->b_fsprivate1; | |||||
blk = bp->b_blkno; | |||||
npages = bp->b_npages; | |||||
uma_zfree(swwbuf_zone, bp); | |||||
mtx_lock(&swbuf_mtx); | |||||
if (++nsw_wcount_async == 1) | |||||
wakeup(&nsw_wcount_async); | |||||
mtx_unlock(&swbuf_mtx); | |||||
mtx_lock(&sw_dev_mtx); | |||||
blk -= sp->sw_first; | |||||
blist_free(sp->sw_blist, blk, npages); | |||||
mtx_unlock(&sw_dev_mtx); | |||||
} | |||||
static int trimzone_numer = 1; | |||||
static int trimzone_denom = 8; | |||||
static int min_trim_alloc = 1; | |||||
static int max_trim_alloc = 32768; | |||||
/* | /* | ||||
* The 'trim zone' is the small range of addresses just ahead of the cursor | |||||
* likely to be allocated soon. When the trimmer falls into the trim zone, | |||||
* we allocate blocks with the trimmer. | |||||
*/ | |||||
static bool | |||||
swapdev_in_trim_zone(daddr_t start, daddr_t end, daddr_t nblks) | |||||
{ | |||||
return ((end + nblks - start) % nblks / trimzone_numer < | |||||
nblks / trimzone_denom); | |||||
} | |||||
/* sw_dev_mtx lock is held. */ | |||||
static struct buf * | |||||
swapdev_trim(struct swdevt *sp) | |||||
{ | |||||
struct buf *bp; | |||||
daddr_t blk; | |||||
u_long nblks; | |||||
int npages; | |||||
/* Quit if the cursor is too far behind the trimmer. */ | |||||
nblks = sp->sw_nblks; | |||||
if (!swapdev_in_trim_zone(sp->sw_cursor, sp->sw_trimmer, nblks)) | |||||
return (NULL); | |||||
/* Grab a (hopefully) lot of free space allocated long ago. */ | |||||
npages = min_trim_alloc; | |||||
blk = blist_alloc(sp->sw_blist, &sp->sw_trimmer, | |||||
&npages, max_trim_alloc); | |||||
/* Quit if there's nothing free. */ | |||||
if (blk == SWAPBLK_NONE) | |||||
return (NULL); | |||||
if (swapdev_in_trim_zone(sp->sw_cursor, sp->sw_trimmer, nblks)) { | |||||
/* | |||||
* Trim allocation is too close to the cursor. Either we've | |||||
* wrapped around and jumped the cursor, or we've found too | |||||
* little free space close to the cursor to move the trimmer | |||||
* out of the trim zone. Bail out. | |||||
* | |||||
*/ | |||||
blist_free(sp->sw_blist, blk, npages); | |||||
return (NULL); | |||||
} | |||||
blk += sp->sw_first; | |||||
mtx_unlock(&sw_dev_mtx); | |||||
CTR3(KTR_SPARE5, "swapdev_trim: cursor %p trimmer %p trimsize %d", | |||||
(void*)sp->sw_cursor, (void*)sp->sw_trimmer, npages); | |||||
mtx_lock(&swbuf_mtx); | |||||
while (nsw_wcount_async == 0) | |||||
msleep(&nsw_wcount_async, &swbuf_mtx, PVM, | |||||
"swbufa", 0); | |||||
nsw_wcount_async--; | |||||
mtx_unlock(&swbuf_mtx); | |||||
bp = uma_zalloc(swwbuf_zone, M_WAITOK); | |||||
bp->b_flags = B_ASYNC; | |||||
bp->b_data = NULL; | |||||
bp->b_iocmd = BIO_DELETE; | |||||
bp->b_rcred = crhold(thread0.td_ucred); | |||||
bp->b_wcred = crhold(thread0.td_ucred); | |||||
bp->b_bcount = PAGE_SIZE * npages; | |||||
bp->b_bufsize = PAGE_SIZE * npages; | |||||
bp->b_blkno = blk; | |||||
bp->b_npages = npages; | |||||
bp->b_iodone = swp_pager_async_trimdone; | |||||
bp->b_fsprivate1 = sp; | |||||
BUF_KERNPROC(bp); | |||||
return (bp); | |||||
} | |||||
/* find a swap device that needs trimming, and start trimming */ | |||||
static void | |||||
swp_pager_trimswapspace(void) | |||||
{ | |||||
struct swdevt *sp; | |||||
struct buf *bp; | |||||
mtx_lock(&sw_dev_mtx); | |||||
TAILQ_FOREACH(sp, &swtailq, sw_list) { | |||||
bp = swapdev_trim(sp); | |||||
if (bp == NULL) | |||||
continue; | |||||
/* sw_dev_mtx released in swapdev_trim */ | |||||
sp->sw_strategy(bp,sp); | |||||
Not Done Inline Actionssw_dev_mtx isn't being released. alc: sw_dev_mtx isn't being released. | |||||
Done Inline Actionsswapdev_trim releases it if it returns a non-NULL bp. sw_strategy -> swapgeom_strategy acquires and releases it. dougm: swapdev_trim releases it if it returns a non-NULL bp.
sw_strategy -> swapgeom_strategy… | |||||
return; | |||||
} | |||||
mtx_unlock(&sw_dev_mtx); | |||||
} | |||||
/* | |||||
* SWP_PAGER_FREESWAPSPACE() - free raw swap space | * SWP_PAGER_FREESWAPSPACE() - free raw swap space | ||||
* | * | ||||
* This routine returns the specified swap blocks back to the bitmap. | * This routine returns the specified swap blocks back to the bitmap. | ||||
* | * | ||||
* This routine may not sleep. | * This routine may not sleep. | ||||
*/ | */ | ||||
static void | static void | ||||
swp_pager_freeswapspace(daddr_t blk, daddr_t npages) | swp_pager_freeswapspace(daddr_t blk, daddr_t npages) | ||||
▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines | |||||
int | int | ||||
swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) | swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) | ||||
{ | { | ||||
daddr_t addr, blk, n_free, s_free; | daddr_t addr, blk, n_free, s_free; | ||||
int i, j, n; | int i, j, n; | ||||
swp_pager_init_freerange(&s_free, &n_free); | swp_pager_init_freerange(&s_free, &n_free); | ||||
VM_OBJECT_WLOCK(object); | VM_OBJECT_WLOCK(object); | ||||
for (i = 0; i < size; i += n) { | for (i = 0; i < size; i += n) { | ||||
Not Done Inline ActionsIf swap space is fragmented, a call that allocates a moderate amount of swap space will overflow the stack and crash the kernel. alc: If swap space is fragmented, a call that allocates a moderate amount of swap space will… | |||||
n = size - i; | n = size - i; | ||||
blk = swp_pager_getswapspace(&n, 1); | blk = swp_pager_getswapspace(&n, 1); | ||||
if (blk == SWAPBLK_NONE) { | if (blk == SWAPBLK_NONE) { | ||||
swp_pager_meta_free(object, start, i); | swp_pager_meta_free(object, start, i); | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
return (-1); | return (-1); | ||||
Not Done Inline ActionsThis actually looks like a bug fix. Specifically, it addresses an unlikely, but hypothetically possible, swap space leak. Please create a separate change for this fix. alc: This actually looks like a bug fix. Specifically, it addresses an unlikely, but hypothetically… | |||||
} | } | ||||
for (j = 0; j < n; ++j) { | for (j = 0; j < n; ++j) { | ||||
addr = swp_pager_meta_build(object, | addr = swp_pager_meta_build(object, | ||||
start + i + j, blk + j); | start + i + j, blk + j); | ||||
if (addr != SWAPBLK_NONE) | if (addr != SWAPBLK_NONE) | ||||
swp_pager_update_freerange(&s_free, &n_free, | swp_pager_update_freerange(&s_free, &n_free, | ||||
addr); | addr); | ||||
} | } | ||||
} | } | ||||
swp_pager_freeswapspace(s_free, n_free); | swp_pager_freeswapspace(s_free, n_free); | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
swp_pager_trimswapspace(); | |||||
return (0); | return (0); | ||||
} | } | ||||
/* | /* | ||||
* SWAP_PAGER_COPY() - copy blocks from source pager to destination pager | * SWAP_PAGER_COPY() - copy blocks from source pager to destination pager | ||||
* and destroy the source. | * and destroy the source. | ||||
* | * | ||||
* Copy any valid swapblks from the source to the destination. In | * Copy any valid swapblks from the source to the destination. In | ||||
▲ Show 20 Lines • Show All 451 Lines • ▼ Show 20 Lines | for (i = 0; i < count; i += n) { | ||||
struct buf *bp; | struct buf *bp; | ||||
daddr_t blk; | daddr_t blk; | ||||
/* Maximum I/O size is limited by maximum swap block size. */ | /* Maximum I/O size is limited by maximum swap block size. */ | ||||
n = min(count - i, nsw_cluster_max); | n = min(count - i, nsw_cluster_max); | ||||
/* Get a block of swap of size up to size n. */ | /* Get a block of swap of size up to size n. */ | ||||
blk = swp_pager_getswapspace(&n, 4); | blk = swp_pager_getswapspace(&n, 4); | ||||
swp_pager_trimswapspace(); | |||||
if (blk == SWAPBLK_NONE) { | if (blk == SWAPBLK_NONE) { | ||||
for (j = 0; j < n; ++j) | for (j = 0; j < n; ++j) | ||||
rtvals[i+j] = VM_PAGER_FAIL; | rtvals[i+j] = VM_PAGER_FAIL; | ||||
continue; | continue; | ||||
} | } | ||||
/* | /* | ||||
* All I/O parameters have been satisfied, build the I/O | * All I/O parameters have been satisfied, build the I/O | ||||
▲ Show 20 Lines • Show All 906 Lines • ▼ Show 20 Lines | swaponsomething(struct vnode *vp, void *id, u_long nblks, | ||||
sp->sw_dev = dev; | sp->sw_dev = dev; | ||||
sp->sw_nblks = nblks; | sp->sw_nblks = nblks; | ||||
sp->sw_used = 0; | sp->sw_used = 0; | ||||
sp->sw_strategy = strategy; | sp->sw_strategy = strategy; | ||||
sp->sw_close = close; | sp->sw_close = close; | ||||
sp->sw_flags = flags; | sp->sw_flags = flags; | ||||
sp->sw_blist = blist_create(nblks, M_WAITOK); | sp->sw_blist = blist_create(nblks, M_WAITOK); | ||||
sp->sw_cursor = 0; | |||||
sp->sw_trimmer = 0; | |||||
/* | /* | ||||
* Do not free the first two block in order to avoid overwriting | * Do not free the first two block in order to avoid overwriting | ||||
* any bsd label at the front of the partition | * any bsd label at the front of the partition | ||||
*/ | */ | ||||
blist_free(sp->sw_blist, 2, nblks - 2); | blist_free(sp->sw_blist, 2, nblks - 2); | ||||
dvbase = 0; | dvbase = 0; | ||||
mtx_lock(&sw_dev_mtx); | mtx_lock(&sw_dev_mtx); | ||||
▲ Show 20 Lines • Show All 423 Lines • ▼ Show 20 Lines | if (cp == NULL) { | ||||
mtx_unlock(&sw_dev_mtx); | mtx_unlock(&sw_dev_mtx); | ||||
bp->b_error = ENXIO; | bp->b_error = ENXIO; | ||||
bp->b_ioflags |= BIO_ERROR; | bp->b_ioflags |= BIO_ERROR; | ||||
bufdone(bp); | bufdone(bp); | ||||
return; | return; | ||||
} | } | ||||
swapgeom_acquire(cp); | swapgeom_acquire(cp); | ||||
mtx_unlock(&sw_dev_mtx); | mtx_unlock(&sw_dev_mtx); | ||||
if (bp->b_iocmd == BIO_WRITE) | if (bp->b_iocmd == BIO_WRITE || bp->b_iocmd == BIO_DELETE) | ||||
bio = g_new_bio(); | bio = g_new_bio(); | ||||
else | else | ||||
bio = g_alloc_bio(); | bio = g_alloc_bio(); | ||||
if (bio == NULL) { | if (bio == NULL) { | ||||
mtx_lock(&sw_dev_mtx); | mtx_lock(&sw_dev_mtx); | ||||
swapgeom_release(cp, sp); | swapgeom_release(cp, sp); | ||||
mtx_unlock(&sw_dev_mtx); | mtx_unlock(&sw_dev_mtx); | ||||
bp->b_error = ENOMEM; | bp->b_error = ENOMEM; | ||||
▲ Show 20 Lines • Show All 244 Lines • Show Last 20 Lines |
This is problematic because the cursors are 64-bit variables while npages is a 32-bit variable.