Index: kern/subr_blist.c =================================================================== --- kern/subr_blist.c +++ kern/subr_blist.c @@ -74,12 +74,11 @@ * allocated. * * NOTE: the allocator cannot currently allocate more than - * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too - * large' if you try. This is an area that could use improvement. The - * radix is large enough that this restriction does not effect the swap - * system, though. Currently only the allocation code is affected by - * this algorithmic unfeature. The freeing code can handle arbitrary - * ranges. + * BLIST_BMAP_RADIX blocks per call. This is an area that could use + * improvement. The radix is large enough that this restriction does + * not effect the swap system, though. Currently only the allocation + * code is affected by this algorithmic unfeature. The freeing code + * can handle arbitrary ranges. * * This code can be compiled stand-alone for debugging. */ @@ -120,19 +119,19 @@ #define malloc(a,b,c) calloc(a, 1) #define free(a,b) free(a) #define ummin(a,b) ((a) < (b) ? (a) : (b)) +#define KASSERT(a,b) #include -void panic(const char *ctl, ...); - #endif /* * static support functions */ -static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count); -static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, - u_daddr_t radix); +static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, + int *count, int maxcount); +static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count, + int maxcount, u_daddr_t radix); static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix); @@ -192,8 +191,7 @@ /* - * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t. - * Assumes that the argument has only one bit set. + * Find the first bit set in a u_daddr_t. */ static inline int bitpos(u_daddr_t mask) @@ -205,15 +203,19 @@ case sizeof(long long): return (ffsll(mask) - 1); #endif +#ifdef HAVE_INLINE_FFS + case sizeof(int): + return (ffs(mask) - 1); +#endif default: lo = 0; hi = BLIST_BMAP_RADIX; while (lo + 1 < hi) { mid = (lo + hi) >> 1; - if ((mask >> mid) != 0) + if (mask & bitrange(0, mid)) + hi = mid; + else lo = mid; - else - hi = mid; } return (lo); } @@ -235,8 +237,7 @@ blist_t bl; u_daddr_t nodes, radix; - if (blocks == 0) - panic("invalid block count"); + KASSERT(blocks >= 0, ("invalid block count")); /* * Calculate the radix and node count used for scanning. @@ -284,12 +285,14 @@ * not be allocated. */ daddr_t -blist_alloc(blist_t bl, daddr_t count) +blist_alloc(blist_t bl, int *count, int maxcount) { daddr_t blk; - if (count > BLIST_MAX_ALLOC) - panic("allocation too large"); + KASSERT(*count <= maxcount, + ("invalid parameters %d > %d", *count, maxcount)); + KASSERT(maxcount <= BLIST_MAX_ALLOC, + ("allocation too large: %d", maxcount)); /* * This loop iterates at most twice. An allocation failure in the @@ -299,10 +302,10 @@ */ for (;;) { blk = blst_meta_alloc(bl->bl_root, bl->bl_cursor, count, - bl->bl_radix); + maxcount, bl->bl_radix); if (blk != SWAPBLK_NONE) { - bl->bl_avail -= count; - bl->bl_cursor = blk + count; + bl->bl_avail -= *count; + bl->bl_cursor = blk + *count; if (bl->bl_cursor == bl->bl_blocks) bl->bl_cursor = 0; return (blk); @@ -324,15 +327,15 @@ /* * blist_free() - free up space in the block bitmap. Return the base - * of a contiguous region. Panic if an inconsistancy is - * found. + * of a contiguous region. */ void blist_free(blist_t bl, daddr_t blkno, daddr_t count) { - if (blkno < 0 || blkno + count > bl->bl_blocks) - panic("freeing invalid range"); + KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks, + ("freeing invalid range: blkno %llx, count %d, blocks %lld", + (long long)blkno, (int)count, (long long)bl->bl_blocks)); blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix); bl->bl_avail += count; } @@ -348,8 +351,9 @@ { daddr_t filled; - if (blkno < 0 || blkno + count > bl->bl_blocks) - panic("filling invalid range"); + KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks, + ("filling invalid range: blkno %llx, count %d, blocks %lld", + (long long)blkno, (int)count, (long long)bl->bl_blocks)); filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix); bl->bl_avail -= filled; return (filled); @@ -603,42 +607,48 @@ * common ancestor to mark any subtrees made completely empty. */ static int -blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) +blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, int maxcount) { blmeta_t *next; - daddr_t skip; u_daddr_t radix; - int digit; + int avail, digit; next = scan + 1; blk += BLIST_BMAP_RADIX; radix = BLIST_BMAP_RADIX; - while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0 && - (next->bm_bitmap & 1) == 1) { + while ((next->bm_bitmap & 1) == 1 && + (digit = ((blk / radix) & BLIST_META_MASK)) == 0) { next++; radix *= BLIST_META_RADIX; } - if (((next->bm_bitmap + 1) & ~((u_daddr_t)-1 << count)) != 0) { - /* - * The next leaf doesn't have enough free blocks at the - * beginning to complete the spanning allocation. - */ - return (ENOMEM); + if ((next->bm_bitmap & 1) != 1) + return (0); + avail = (~next->bm_bitmap != 0) ? + bitpos(~next->bm_bitmap) : BLIST_BMAP_RADIX; + if (avail < count) { + /* + * The next leaf doesn't have enough free blocks at the + * beginning to complete the spanning allocation. + */ + return (0); } + count = ummin(avail, maxcount); /* Clear the first 'count' bits in the next leaf to allocate. */ - next->bm_bitmap &= (u_daddr_t)-1 << count; + next->bm_bitmap &= ~bitrange(0, count); /* * Update bitmaps of next-ancestors, up to least common ancestor. */ - skip = radix_to_skip(radix); - while (radix != BLIST_BMAP_RADIX && next->bm_bitmap == 0) { - (--next)->bm_bitmap ^= 1; - radix /= BLIST_META_RADIX; - } - if (next->bm_bitmap == 0) - scan[-digit * skip].bm_bitmap ^= (u_daddr_t)1 << digit; - return (0); + while (next->bm_bitmap == 0) { + if (--next == scan) { + scan[-digit * radix_to_skip(radix)].bm_bitmap ^= + (u_daddr_t)1 << digit; + break; + } + next->bm_bitmap ^= 1; + } + + return (count); } /* @@ -649,13 +659,13 @@ * crosses a leaf boundary. */ static daddr_t -blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) +blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int *count, int maxcount) { u_daddr_t cursor_mask, mask; int count1, hi, lo, num_shifts, range1, range_ext; range1 = 0; - count1 = count - 1; + count1 = *count - 1; num_shifts = fls(count1); mask = scan->bm_bitmap; while ((-mask & ~mask) != 0 && num_shifts > 0) { @@ -710,31 +720,44 @@ /* * The least significant set bit in mask marks the start of the first - * available range of sufficient size. Clear all the bits but that one, - * and then find its position. + * available range of sufficient size. Find its position. */ - mask &= -mask; lo = bitpos(mask); - hi = lo + count; - if (hi > BLIST_BMAP_RADIX) { - /* - * An allocation within this leaf is impossible, so a successful - * allocation depends on the next leaf providing some of the blocks. - */ - if (blst_next_leaf_alloc(scan, blk, hi - BLIST_BMAP_RADIX) != 0) + /* + * Find how much space is available starting at that position. + */ + if ((-mask & ~mask) != 0) { + /* Count the 1 bits starting at position lo. */ + hi = bitpos(-mask & ~mask) + count1; + if (maxcount < hi - lo) + hi = lo + maxcount; + *count = hi - lo; + mask = bitrange(lo, *count); + } else if (maxcount <= BLIST_BMAP_RADIX - lo) { + /* All the blocks we can use are available here. */ + hi = lo + maxcount; + *count = maxcount; + mask = bitrange(lo, *count); + } else { + /* Check next leaf for some of the blocks we want or need. */ + count1 = *count - (BLIST_BMAP_RADIX - lo); + maxcount -= BLIST_BMAP_RADIX - lo; + hi = blst_next_leaf_alloc(scan, blk, count1, maxcount); + if (hi < count1) /* - * The hint cannot be updated, because the same - * allocation request could be satisfied later, by this - * leaf, if the state of the next leaf changes, and - * without any changes to this leaf. + * The next leaf cannot supply enough blocks to reach + * the minimum required allocation. The hint cannot be + * updated, because the same allocation request could + * be satisfied later, by this leaf, if the state of + * the next leaf changes, and without any changes to + * this leaf. */ return (SWAPBLK_NONE); + *count = BLIST_BMAP_RADIX - lo + hi; hi = BLIST_BMAP_RADIX; } - /* Set the bits of mask at position 'lo' and higher. */ - mask = -mask; if (hi == BLIST_BMAP_RADIX) { /* * Update bighint. There is no allocation bigger than range1 @@ -741,9 +764,6 @@ * available in this leaf after this allocation completes. */ scan->bm_bighint = range1; - } else { - /* Clear the bits of mask at position 'hi' and higher. */ - mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi); } /* Clear the allocated bits from this leaf. */ scan->bm_bitmap &= ~mask; @@ -759,7 +779,8 @@ * and we have a few optimizations strewn in as well. */ static daddr_t -blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix) +blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count, + int maxcount, u_daddr_t radix) { daddr_t blk, i, r, skip; u_daddr_t bit, mask; @@ -767,7 +788,7 @@ int digit; if (radix == BLIST_BMAP_RADIX) - return (blst_leaf_alloc(scan, cursor, count)); + return (blst_leaf_alloc(scan, cursor, count, maxcount)); blk = cursor & -radix; scan_from_start = (cursor == blk); radix /= BLIST_META_RADIX; @@ -797,12 +818,12 @@ bit = mask & -mask; digit = bitpos(bit); i = 1 + digit * skip; - if (count <= scan[i].bm_bighint) { + if (*count <= scan[i].bm_bighint) { /* * The allocation might fit beginning in the i'th subtree. */ r = blst_meta_alloc(&scan[i], cursor + digit * radix, - count, radix); + count, maxcount, radix); if (r != SWAPBLK_NONE) { if (scan[i].bm_bitmap == 0) scan->bm_bitmap ^= bit; @@ -818,7 +839,7 @@ */ if (scan_from_start && !(digit == BLIST_META_RADIX - 1 && scan[i].bm_bighint == BLIST_MAX_ALLOC)) - scan->bm_bighint = count - 1; + scan->bm_bighint = *count - 1; return (SWAPBLK_NONE); } @@ -839,8 +860,9 @@ * count n */ mask = bitrange(blk & BLIST_BMAP_MASK, count); - if (scan->bm_bitmap & mask) - panic("freeing free block"); + KASSERT((scan->bm_bitmap & mask) != 0, + ("freeing free block: %llx, size %d, mask %llx", + (long long)blk, count, (long long)scan->bm_bitmap & mask)); scan->bm_bitmap |= mask; } @@ -1077,7 +1099,7 @@ for (;;) { char buf[1024]; long long da = 0; - long long count = 0; + int count = 0, maxcount = 0; printf("%lld/%lld/%lld> ", (long long)blist_avail(bl), (long long)size, (long long)bl->bl_radix); @@ -1086,7 +1108,7 @@ break; switch(buf[0]) { case 'r': - if (sscanf(buf + 1, "%lld", &count) == 1) { + if (sscanf(buf + 1, "%d", &count) == 1) { blist_resize(&bl, count, 1, M_WAITOK); } else { printf("?\n"); @@ -1102,15 +1124,16 @@ sbuf_delete(s); break; case 'a': - if (sscanf(buf + 1, "%lld", &count) == 1) { - daddr_t blk = blist_alloc(bl, count); - printf(" R=%08llx\n", (long long)blk); + if (sscanf(buf + 1, "%d%d", &count, &maxcount) == 2) { + daddr_t blk = blist_alloc(bl, &count, maxcount); + printf(" R=%08llx, c=%08d\n", + (long long)blk, count); } else { printf("?\n"); } break; case 'f': - if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { + if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) { blist_free(bl, da, count); } else { printf("?\n"); @@ -1117,7 +1140,7 @@ } break; case 'l': - if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { + if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) { printf(" n=%jd\n", (intmax_t)blist_fill(bl, da, count)); } else { @@ -1129,31 +1152,24 @@ puts( "p -print\n" "s -stats\n" - "a %d -allocate\n" + "a %d %d -allocate\n" "f %x %d -free\n" "l %x %d -fill\n" "r %d -resize\n" - "h/? -help" + "h/? -help\n" + "q -quit" ); break; + case 'q': + break; default: printf("?\n"); break; } + if (buf[0] == 'q') + break; } - return(0); + return (0); } -void -panic(const char *ctl, ...) -{ - va_list va; - - va_start(va, ctl); - vfprintf(stderr, ctl, va); - fprintf(stderr, "\n"); - va_end(va); - exit(1); -} - #endif Index: sys/blist.h =================================================================== --- sys/blist.h +++ sys/blist.h @@ -33,7 +33,7 @@ * Usage: * blist = blist_create(blocks, flags) * (void) blist_destroy(blist) - * blkno = blist_alloc(blist, count) + * blkno = blist_alloc(blist, &count, maxcount) * (void) blist_free(blist, blkno, count) * nblks = blist_fill(blist, blkno, count) * (void) blist_resize(&blist, count, freeextra, flags) @@ -69,11 +69,11 @@ #define SWAPBLK_NONE ((daddr_t)((u_daddr_t)SWAPBLK_MASK + 1))/* flag */ /* - * Both blmeta and bmu_bitmap MUST be a power of 2 in size. + * Both blmeta and bm_bitmap MUST be a power of 2 in size. */ typedef struct blmeta { - u_daddr_t bm_bitmap; /* bitmap if we are a leaf */ + u_daddr_t bm_bitmap; /* bitmap marking unfilled block sets*/ daddr_t bm_bighint; /* biggest contiguous block hint*/ } blmeta_t; @@ -92,7 +92,7 @@ struct sbuf; -daddr_t blist_alloc(blist_t blist, daddr_t count); +daddr_t blist_alloc(blist_t blist, int *count, int maxcount); daddr_t blist_avail(blist_t blist); blist_t blist_create(daddr_t blocks, int flags); void blist_destroy(blist_t blist); Index: vm/swap_pager.c =================================================================== --- vm/swap_pager.c +++ vm/swap_pager.c @@ -407,7 +407,7 @@ * Swap bitmap functions */ static void swp_pager_freeswapspace(daddr_t blk, daddr_t npages); -static daddr_t swp_pager_getswapspace(int npages); +static daddr_t swp_pager_getswapspace(int *npages, int limit); /* * Metadata functions @@ -708,9 +708,10 @@ /* * SWP_PAGER_GETSWAPSPACE() - allocate raw swap space * - * Allocate swap for the requested number of pages. The starting - * swap block number (a page index) is returned or SWAPBLK_NONE - * if the allocation failed. + * Allocate swap for up to the requested number of pages, and at + * least a minimum number of pages. The starting swap block number + * (a page index) is returned or SWAPBLK_NONE if the allocation + * failed. * * Also has the side effect of advising that somebody made a mistake * when they configured swap and didn't configure enough. @@ -720,38 +721,47 @@ * We allocate in round-robin fashion from the configured devices. */ static daddr_t -swp_pager_getswapspace(int npages) +swp_pager_getswapspace(int *io_npages, int limit) { daddr_t blk; struct swdevt *sp; - int i; + int mpages, npages; blk = SWAPBLK_NONE; + npages = mpages = *io_npages; mtx_lock(&sw_dev_mtx); sp = swdevhd; - for (i = 0; i < nswapdev; i++) { + while (!TAILQ_EMPTY(&swtailq)) { if (sp == NULL) sp = TAILQ_FIRST(&swtailq); - if (!(sp->sw_flags & SW_CLOSING)) { - blk = blist_alloc(sp->sw_blist, npages); - if (blk != SWAPBLK_NONE) { - blk += sp->sw_first; - sp->sw_used += npages; - swap_pager_avail -= npages; - swp_sizecheck(); - swdevhd = TAILQ_NEXT(sp, sw_list); - goto done; - } + if ((sp->sw_flags & SW_CLOSING) == 0) + blk = blist_alloc(sp->sw_blist, &npages, mpages); + if (blk != SWAPBLK_NONE) + break; + sp = TAILQ_NEXT(sp, sw_list); + if (swdevhd == sp) { + if (npages <= limit) + break; + mpages = npages - 1; + npages >>= 1; } - sp = TAILQ_NEXT(sp, sw_list); } - if (swap_pager_full != 2) { - printf("swap_pager_getswapspace(%d): failed\n", npages); - swap_pager_full = 2; - swap_pager_almost_full = 1; + if (blk != SWAPBLK_NONE) { + *io_npages = npages; + blk += sp->sw_first; + sp->sw_used += npages; + swap_pager_avail -= npages; + swp_sizecheck(); + swdevhd = TAILQ_NEXT(sp, sw_list); + } else { + if (swap_pager_full != 2) { + printf("swp_pager_getswapspace(%d): failed\n", + *io_npages); + swap_pager_full = 2; + swap_pager_almost_full = 1; + } + swdevhd = NULL; } - swdevhd = NULL; -done: mtx_unlock(&sw_dev_mtx); return (blk); } @@ -886,35 +896,28 @@ int swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) { - int n = 0; - daddr_t blk = SWAPBLK_NONE; - vm_pindex_t beg = start; /* save start index */ - daddr_t addr, n_free, s_free; + daddr_t addr, blk, n_free, s_free; + int i, j, n; swp_pager_init_freerange(&s_free, &n_free); VM_OBJECT_WLOCK(object); - while (size) { - if (n == 0) { - n = BLIST_MAX_ALLOC; - while ((blk = swp_pager_getswapspace(n)) == SWAPBLK_NONE) { - n >>= 1; - if (n == 0) { - swp_pager_meta_free(object, beg, start - beg); - VM_OBJECT_WUNLOCK(object); - return (-1); - } - } + for (i = 0; i < size; i += n) { + n = min(BLIST_MAX_ALLOC, size - i); + blk = swp_pager_getswapspace(&n, 1); + if (blk == SWAPBLK_NONE) { + swp_pager_meta_free(object, start, i); + VM_OBJECT_WUNLOCK(object); + return (-1); } - addr = swp_pager_meta_build(object, start, blk); - if (addr != SWAPBLK_NONE) - swp_pager_update_freerange(&s_free, &n_free, addr); - --size; - ++start; - ++blk; - --n; + for (j = 0; j < n; ++j) { + addr = swp_pager_meta_build(object, + start + i + j, blk + j); + if (addr != SWAPBLK_NONE) + swp_pager_update_freerange(&s_free, &n_free, + addr); + } } swp_pager_freeswapspace(s_free, n_free); - swp_pager_meta_free(object, start, n); VM_OBJECT_WUNLOCK(object); return (0); } @@ -1384,18 +1387,8 @@ n = min(BLIST_MAX_ALLOC, count - i); n = min(n, nsw_cluster_max); - /* - * Get biggest block of swap we can. If we fail, fall - * back and try to allocate a smaller block. Don't go - * overboard trying to allocate space if it would overly - * fragment swap. - */ - while ( - (blk = swp_pager_getswapspace(n)) == SWAPBLK_NONE && - n > 4 - ) { - n >>= 1; - } + /* Get a block of swap of size up to size n. */ + blk = swp_pager_getswapspace(&n, 4); if (blk == SWAPBLK_NONE) { for (j = 0; j < n; ++j) rtvals[i+j] = VM_PAGER_FAIL;