Index: sys/kern/subr_blist.c =================================================================== --- sys/kern/subr_blist.c +++ sys/kern/subr_blist.c @@ -36,15 +36,14 @@ * * A radix tree controls access to pieces of the bitmap, and includes * auxiliary information at each interior node about the availabilty of - * contiguous free blocks in the subtree rooted at that node. Two radix - * constants are involved: one for the size of the bitmaps contained in the - * leaf nodes (BLIST_BMAP_RADIX), and one for the number of descendents of - * each of the meta (interior) nodes (BLIST_META_RADIX). Each subtree is - * associated with a range of blocks. The root of any subtree stores a - * hint field that defines an upper bound on the size of the largest - * allocation that can begin in the associated block range. A hint is an - * upper bound on a potential allocation, but not necessarily a tight upper - * bound. + * contiguous free blocks in the subtree rooted at that node. A radix + * constant defines the size of the bitmaps contained in the leaf nodes + * and the number of descendents of each of the meta (interior) nodes. + * Each subtree is associated with a range of blocks. The root of any + * subtree stores a hint field that defines an upper bound on the size + * of the largest allocation that can begin in the associated block + * range. A hint is an upper bound on a potential allocation, but not + * necessarily a tight upper bound. * * The bitmap field in each node directs the search for available blocks. * For a leaf node, a bit is set if the corresponding block is free. For a @@ -64,13 +63,13 @@ * * LAYOUT: The radix tree is laid out recursively using a linear array. * Each meta node is immediately followed (laid out sequentially in - * memory) by BLIST_META_RADIX lower level nodes. This is a recursive + * memory) by BLIST_RADIX lower level nodes. This is a recursive * structure but one that can be easily scanned through a very simple * 'skip' calculation. The memory allocation is only large enough to * cover the number of blocks requested at creation time. Nodes that * represent blocks beyond that limit, nodes that would never be read * or written, are not allocated, so that the last of the - * BLIST_META_RADIX lower level nodes of a some nodes may not be + * BLIST_RADIX lower level nodes of a some nodes may not be * allocated. * * NOTE: the allocator cannot currently allocate more than @@ -152,24 +151,19 @@ static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); #endif -_Static_assert(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0, - "radix divisibility error"); -#define BLIST_BMAP_MASK (BLIST_BMAP_RADIX - 1) -#define BLIST_META_MASK (BLIST_META_RADIX - 1) +#define BLIST_MASK (BLIST_RADIX - 1) /* * For a subtree that can represent the state of up to 'radix' blocks, the - * number of leaf nodes of the subtree is L=radix/BLIST_BMAP_RADIX. If 'm' - * is short for BLIST_META_RADIX, then for a tree of height h with L=m**h + * number of leaf nodes of the subtree is L=radix/BLIST_RADIX. If 'm' + * is short for BLIST_RADIX, then for a tree of height h with L=m**h * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h, * or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip' * in the 'meta' functions that process subtrees. Since integer division * discards remainders, we can express this computation as * skip = (m * m**h) / (m - 1) - * skip = (m * (radix / BLIST_BMAP_RADIX)) / (m - 1) - * and since m divides BLIST_BMAP_RADIX, we can simplify further to - * skip = (radix / (BLIST_BMAP_RADIX / m)) / (m - 1) - * skip = radix / ((BLIST_BMAP_RADIX / m) * (m - 1)) + * skip = (m * (radix / m)) / (m - 1) + * skip = radix / (m - 1) * so that simple integer division by a constant can safely be used for the * calculation. */ @@ -177,8 +171,7 @@ radix_to_skip(daddr_t radix) { - return (radix / - ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK)); + return (radix / BLIST_MASK); } /* @@ -189,7 +182,7 @@ { return (((u_daddr_t)-1 << n) & - ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - (n + count)))); + ((u_daddr_t)-1 >> (BLIST_RADIX - (n + count)))); } /* @@ -201,7 +194,7 @@ int hi, lo, mid; lo = 0; - hi = BLIST_BMAP_RADIX; + hi = BLIST_RADIX; while (lo + 1 < hi) { mid = (lo + hi) >> 1; if (mask & bitrange(0, mid)) @@ -238,7 +231,7 @@ * flags - malloc flags * * The smallest blist consists of a single leaf node capable of - * managing BLIST_BMAP_RADIX blocks. + * managing BLIST_RADIX blocks. */ blist_t blist_create(daddr_t blocks, int flags) @@ -252,10 +245,10 @@ * Calculate the radix and node count used for scanning. */ nodes = 1; - radix = BLIST_BMAP_RADIX; - while (radix <= blocks) { - nodes += 1 + (blocks - 1) / radix; - radix *= BLIST_META_RADIX; + radix = 1; + while (radix <= blocks / BLIST_RADIX) { + nodes += 1 + (blocks - 1) / radix / BLIST_RADIX; + radix *= BLIST_RADIX; } bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags | @@ -549,19 +542,12 @@ init_gap_stats(stats); nodes = 0; - i = bl->bl_radix; - while (i < bl->bl_radix + bl->bl_blocks) { + radix = bl->bl_radix; + for (i = 0; i < bl->bl_blocks; ) { /* - * Find max size subtree starting at i. - */ - radix = BLIST_BMAP_RADIX; - while (((i / radix) & BLIST_META_MASK) == 0) - radix *= BLIST_META_RADIX; - - /* * Check for skippable subtrees starting at i. */ - while (radix > BLIST_BMAP_RADIX) { + while (radix != 1) { if (bl->bl_root[nodes].bm_bitmap == 0) { if (gap_stats_counting(stats)) update_gap_stats(stats, i); @@ -572,9 +558,9 @@ * Skip subtree root. */ nodes++; - radix /= BLIST_META_RADIX; + radix /= BLIST_RADIX; } - if (radix == BLIST_BMAP_RADIX) { + if (radix == 1) { /* * Scan leaf. */ @@ -588,8 +574,15 @@ diff ^= bitrange(digit, 1); } } - nodes += radix_to_skip(radix); - i += radix; + nodes += radix_to_skip(radix * BLIST_RADIX); + i += radix * BLIST_RADIX; + + /* + * Find max size subtree starting at i. + */ + radix = 1; + while (((i / BLIST_RADIX / radix) & BLIST_MASK) == 0) + radix *= BLIST_RADIX; } update_gap_stats(stats, i); dump_gap_stats(stats, s); @@ -623,13 +616,13 @@ daddr_t blk; int avail, digit; - start += BLIST_BMAP_RADIX; - for (blk = start; blk - start < maxcount; blk += BLIST_BMAP_RADIX) { + start += BLIST_RADIX; + for (blk = start; blk - start < maxcount; blk += BLIST_RADIX) { /* Skip meta-nodes, as long as they promise more free blocks. */ - radix = BLIST_BMAP_RADIX; + radix = BLIST_RADIX; while (((++scan)->bm_bitmap & 1) == 1 && - ((blk / radix) & BLIST_META_MASK) == 0) - radix *= BLIST_META_RADIX; + ((blk / radix) & BLIST_MASK) == 0) + radix *= BLIST_RADIX; if (~scan->bm_bitmap != 0) { /* * Either there is no next leaf with any free blocks, @@ -647,39 +640,39 @@ return (avail); } maxcount = imin(avail, maxcount); - if (maxcount % BLIST_BMAP_RADIX == 0) { + if (maxcount % BLIST_RADIX == 0) { /* * There was no next leaf. Back scan up to * last leaf. */ --scan; - while (radix != BLIST_BMAP_RADIX) { - radix /= BLIST_META_RADIX; + while (radix != BLIST_RADIX) { + radix /= BLIST_RADIX; --scan; } - blk -= BLIST_BMAP_RADIX; + blk -= BLIST_RADIX; } } } /* * 'scan' is the last leaf that provides blocks. Clear from 1 to - * BLIST_BMAP_RADIX bits to represent the allocation of those last + * BLIST_RADIX bits to represent the allocation of those last * blocks. */ - if (maxcount % BLIST_BMAP_RADIX != 0) - scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_BMAP_RADIX); + if (maxcount % BLIST_RADIX != 0) + scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_RADIX); else scan->bm_bitmap = 0; for (;;) { /* Back up over meta-nodes, clearing bits if necessary. */ - blk -= BLIST_BMAP_RADIX; - radix = BLIST_BMAP_RADIX; - while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0) { + blk -= BLIST_RADIX; + radix = BLIST_RADIX; + while ((digit = ((blk / radix) & BLIST_MASK)) == 0) { if ((scan--)->bm_bitmap == 0) scan->bm_bitmap ^= 1; - radix *= BLIST_META_RADIX; + radix *= BLIST_RADIX; } if ((scan--)->bm_bitmap == 0) scan[-digit * radix_to_skip(radix)].bm_bitmap ^= @@ -734,17 +727,17 @@ } /* Discard any candidates that appear before blk. */ - if ((blk & BLIST_BMAP_MASK) != 0) { - if ((~mask & bitrange(0, blk & BLIST_BMAP_MASK)) != 0) { + if ((blk & BLIST_MASK) != 0) { + if ((~mask & bitrange(0, blk & BLIST_MASK)) != 0) { /* Grow bighint in case all discarded bits are set. */ - bighint += blk & BLIST_BMAP_MASK; - mask |= bitrange(0, blk & BLIST_BMAP_MASK); + bighint += blk & BLIST_MASK; + mask |= bitrange(0, blk & BLIST_MASK); if (~mask == 0) { scan->bm_bighint = bighint; return (SWAPBLK_NONE); } } - blk -= blk & BLIST_BMAP_MASK; + blk -= blk & BLIST_MASK; } /* @@ -763,17 +756,17 @@ hi = lo + maxcount; *count = hi - lo; mask = ~bitrange(lo, *count); - } else if (maxcount <= BLIST_BMAP_RADIX - lo) { + } else if (maxcount <= BLIST_RADIX - lo) { /* All the blocks we can use are available here. */ hi = lo + maxcount; *count = maxcount; mask = ~bitrange(lo, *count); - if (hi == BLIST_BMAP_RADIX) + if (hi == BLIST_RADIX) scan->bm_bighint = bighint; } else { /* Check next leaf for some of the blocks we want or need. */ - count1 = *count - (BLIST_BMAP_RADIX - lo); - maxcount -= BLIST_BMAP_RADIX - lo; + count1 = *count - (BLIST_RADIX - lo); + maxcount -= BLIST_RADIX - lo; hi = blst_next_leaf_alloc(scan, blk, count1, maxcount); if (hi < count1) /* @@ -785,7 +778,7 @@ * this leaf. */ return (SWAPBLK_NONE); - *count = BLIST_BMAP_RADIX - lo + hi; + *count = BLIST_RADIX - lo + hi; scan->bm_bighint = bighint; } @@ -811,16 +804,15 @@ bool scan_from_start; int digit; - if (radix == BLIST_BMAP_RADIX) + if (radix == 1) return (blst_leaf_alloc(scan, cursor, count, maxcount)); - blk = cursor & -radix; + blk = cursor & -(radix * BLIST_RADIX); scan_from_start = (cursor == blk); - radix /= BLIST_META_RADIX; skip = radix_to_skip(radix); mask = scan->bm_bitmap; /* Discard any candidates that appear before cursor. */ - digit = (cursor / radix) & BLIST_META_MASK; + digit = (cursor / radix) & BLIST_MASK; mask &= (u_daddr_t)-1 << digit; if (mask == 0) return (SWAPBLK_NONE); @@ -846,7 +838,7 @@ * The allocation might fit beginning in the i'th subtree. */ r = blst_meta_alloc(&scan[i], cursor + digit * radix, - count, maxcount, radix); + count, maxcount, radix / BLIST_RADIX); if (r != SWAPBLK_NONE) { if (scan[i].bm_bitmap == 0) scan->bm_bitmap ^= bitrange(digit, 1); @@ -860,7 +852,7 @@ * We couldn't allocate count in this subtree. If the whole tree was * scanned, and the last tree node is allocated, update bighint. */ - if (scan_from_start && !(digit == BLIST_META_RADIX - 1 && + if (scan_from_start && !(digit == BLIST_RADIX - 1 && scan[i].bm_bighint == BLIST_MAX_ALLOC)) scan->bm_bighint = *count - 1; @@ -882,7 +874,7 @@ * \_________/\__/ * count n */ - mask = bitrange(blk & BLIST_BMAP_MASK, count); + mask = bitrange(blk & BLIST_MASK, count); KASSERT((scan->bm_bitmap & mask) == 0, ("freeing free block: %jx, size %d, mask %jx", (uintmax_t)blk, count, (uintmax_t)scan->bm_bitmap & mask)); @@ -913,20 +905,22 @@ */ scan->bm_bighint = BLIST_MAX_ALLOC; - if (radix == BLIST_BMAP_RADIX) + if (radix == 1) return (blst_leaf_free(scan, freeBlk, count)); - endBlk = ummin(freeBlk + count, (freeBlk + radix) & -radix); - radix /= BLIST_META_RADIX; + endBlk = freeBlk + count; + blk = (freeBlk + radix * BLIST_RADIX) & -(radix * BLIST_RADIX); + if (blk != 0) + endBlk = ummin(endBlk, blk); skip = radix_to_skip(radix); blk = freeBlk & -radix; - digit = (blk / radix) & BLIST_META_MASK; - endDigit = 1 + (((endBlk - 1) / radix) & BLIST_META_MASK); + digit = (blk / radix) & BLIST_MASK; + endDigit = 1 + (((endBlk - 1) / radix) & BLIST_MASK); scan->bm_bitmap |= bitrange(digit, endDigit - digit); for (i = 1 + digit * skip; blk < endBlk; i += skip) { blk += radix; count = ummin(blk, endBlk) - freeBlk; - blst_meta_free(&scan[i], freeBlk, count, radix); + blst_meta_free(&scan[i], freeBlk, count, radix / BLIST_RADIX); freeBlk = blk; } } @@ -947,7 +941,7 @@ * Leaf node */ - if (radix == BLIST_BMAP_RADIX) { + if (radix == 1) { u_daddr_t v = scan->bm_bitmap; if (v == (u_daddr_t)-1) { @@ -975,14 +969,14 @@ } endBlk = blk + count; - radix /= BLIST_META_RADIX; skip = radix_to_skip(radix); for (i = 1; blk < endBlk; i += skip) { blk += radix; count = radix; if (blk >= endBlk) count -= blk - endBlk; - blst_copy(&scan[i], blk - radix, radix, dest, count); + blst_copy(&scan[i], blk - radix, + radix / BLIST_RADIX, dest, count); } } @@ -999,7 +993,7 @@ daddr_t nblks; u_daddr_t mask; - mask = bitrange(blk & BLIST_BMAP_MASK, count); + mask = bitrange(blk & BLIST_MASK, count); /* Count the number of blocks that we are allocating. */ nblks = bitcount64(scan->bm_bitmap & mask); @@ -1022,20 +1016,23 @@ daddr_t blk, endBlk, i, nblks, skip; int digit; - if (radix == BLIST_BMAP_RADIX) + if (radix == 1) return (blst_leaf_fill(scan, allocBlk, count)); - endBlk = ummin(allocBlk + count, (allocBlk + radix) & -radix); - radix /= BLIST_META_RADIX; + endBlk = allocBlk + count; + blk = (allocBlk + radix * BLIST_RADIX) & -(radix * BLIST_RADIX); + if (blk != 0) + endBlk = ummin(endBlk, blk); skip = radix_to_skip(radix); blk = allocBlk & -radix; nblks = 0; while (blk < endBlk) { - digit = (blk / radix) & BLIST_META_MASK; + digit = (blk / radix) & BLIST_MASK; i = 1 + digit * skip; blk += radix; count = ummin(blk, endBlk) - allocBlk; - nblks += blst_meta_fill(&scan[i], allocBlk, count, radix); + nblks += blst_meta_fill(&scan[i], allocBlk, count, + radix / BLIST_RADIX); if (scan[i].bm_bitmap == 0) scan->bm_bitmap &= ~((u_daddr_t)1 << digit); allocBlk = blk; @@ -1052,12 +1049,12 @@ u_daddr_t mask; int digit; - if (radix == BLIST_BMAP_RADIX) { + if (radix == 1) { printf( "%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n", tab, tab, "", - (long long)blk, (long long)radix, - 1 + (BLIST_BMAP_RADIX - 1) / 4, + (long long)blk, (long long)BLIST_RADIX, + (int)(1 + (BLIST_RADIX - 1) / 4), (long long)scan->bm_bitmap, (long long)scan->bm_bighint ); @@ -1067,14 +1064,13 @@ printf( "%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n", tab, tab, "", - (long long)blk, (long long)radix, - (long long)radix, - 1 + (BLIST_META_RADIX - 1) / 4, + (long long)blk, (long long)radix * BLIST_RADIX, + (long long)radix * BLIST_RADIX, + (int)(1 + (BLIST_RADIX - 1) / 4), (long long)scan->bm_bitmap, (long long)scan->bm_bighint ); - radix /= BLIST_META_RADIX; skip = radix_to_skip(radix); tab += 4; @@ -1083,7 +1079,7 @@ do { digit = bitpos(mask); blst_radix_print(&scan[1 + digit * skip], blk + digit * radix, - radix, tab); + radix / BLIST_RADIX, tab); } while ((mask ^= bitrange(digit, 1)) != 0); tab -= 4; @@ -1100,7 +1096,7 @@ int main(int ac, char **av) { - int size = BLIST_META_RADIX * BLIST_BMAP_RADIX; + daddr_t size = BLIST_RADIX * BLIST_RADIX; int i; blist_t bl; struct sbuf *s; @@ -1108,7 +1104,7 @@ for (i = 1; i < ac; ++i) { const char *ptr = av[i]; if (*ptr != '-') { - size = strtol(ptr, NULL, 0); + size = strtoll(ptr, NULL, 0); continue; } ptr += 2; @@ -1116,6 +1112,10 @@ exit(1); } bl = blist_create(size, M_WAITOK); + if (bl == NULL) { + fprintf(stderr, "blist_create failed\n"); + exit(1); + } blist_free(bl, 0, size); for (;;) { @@ -1124,7 +1124,7 @@ int count = 0, maxcount = 0; printf("%lld/%lld/%lld> ", (long long)blist_avail(bl), - (long long)size, (long long)bl->bl_radix); + (long long)size, (long long)bl->bl_radix * BLIST_RADIX); fflush(stdout); if (fgets(buf, sizeof(buf), stdin) == NULL) break; Index: sys/sys/blist.h =================================================================== --- sys/sys/blist.h +++ sys/sys/blist.h @@ -85,10 +85,9 @@ blmeta_t bl_root[1]; /* root of radix tree */ } *blist_t; -#define BLIST_BMAP_RADIX (sizeof(u_daddr_t)*8) -#define BLIST_META_RADIX BLIST_BMAP_RADIX +#define BLIST_RADIX (sizeof(u_daddr_t)*8) -#define BLIST_MAX_ALLOC BLIST_BMAP_RADIX +#define BLIST_MAX_ALLOC BLIST_RADIX struct sbuf; Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -2328,7 +2328,6 @@ { struct swdevt *sp, *tsp; daddr_t dvbase; - u_long mblocks; /* * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks. @@ -2339,19 +2338,8 @@ nblks &= ~(ctodb(1) - 1); nblks = dbtoc(nblks); - /* - * If we go beyond this, we get overflows in the radix - * tree bitmap code. - */ - mblocks = 0x40000000 / BLIST_META_RADIX; - if (nblks > mblocks) { - printf( - "WARNING: reducing swap size to maximum of %luMB per unit\n", - mblocks / 1024 / 1024 * PAGE_SIZE); - nblks = mblocks; - } - sp = malloc(sizeof *sp, M_VMPGDATA, M_WAITOK | M_ZERO); + sp->sw_blist = blist_create(nblks, M_WAITOK); sp->sw_vp = vp; sp->sw_id = id; sp->sw_dev = dev; @@ -2361,7 +2349,6 @@ sp->sw_close = close; sp->sw_flags = flags; - sp->sw_blist = blist_create(nblks, M_WAITOK); /* * Do not free the first blocks in order to avoid overwriting * any bsd label at the front of the partition