Changeset View
Changeset View
Standalone View
Standalone View
head/sys/kern/subr_blist.c
Show All 30 Lines | |||||
* | * | ||||
* This module implements a general bitmap allocator/deallocator. The | * This module implements a general bitmap allocator/deallocator. The | ||||
* allocator eats around 2 bits per 'block'. The module does not | * allocator eats around 2 bits per 'block'. The module does not | ||||
* try to interpret the meaning of a 'block' other than to return | * try to interpret the meaning of a 'block' other than to return | ||||
* SWAPBLK_NONE on an allocation failure. | * SWAPBLK_NONE on an allocation failure. | ||||
* | * | ||||
* A radix tree controls access to pieces of the bitmap, and includes | * A radix tree controls access to pieces of the bitmap, and includes | ||||
* auxiliary information at each interior node about the availabilty of | * auxiliary information at each interior node about the availabilty of | ||||
* contiguous free blocks in the subtree rooted at that node. Two radix | * contiguous free blocks in the subtree rooted at that node. A radix | ||||
* constants are involved: one for the size of the bitmaps contained in the | * constant defines the size of the bitmaps contained in a leaf node | ||||
* leaf nodes (BLIST_BMAP_RADIX), and one for the number of descendents of | * and the number of descendents of each of the meta (interior) nodes. | ||||
* each of the meta (interior) nodes (BLIST_META_RADIX). Each subtree is | * Each subtree is associated with a range of blocks. The root of any | ||||
* associated with a range of blocks. The root of any subtree stores a | * subtree stores a hint field that defines an upper bound on the size | ||||
* hint field that defines an upper bound on the size of the largest | * of the largest allocation that can begin in the associated block | ||||
* allocation that can begin in the associated block range. A hint is an | * range. A hint is an upper bound on a potential allocation, but not | ||||
* upper bound on a potential allocation, but not necessarily a tight upper | * necessarily a tight upper bound. | ||||
* bound. | |||||
* | * | ||||
* The bitmap field in each node directs the search for available blocks. | * The bitmap field in each node directs the search for available blocks. | ||||
* For a leaf node, a bit is set if the corresponding block is free. For a | * For a leaf node, a bit is set if the corresponding block is free. For a | ||||
* meta node, a bit is set if the corresponding subtree contains a free | * meta node, a bit is set if the corresponding subtree contains a free | ||||
* block somewhere within it. The search at a meta node considers only | * block somewhere within it. The search at a meta node considers only | ||||
* children of that node that represent a range that includes a free block. | * children of that node that represent a range that includes a free block. | ||||
* | * | ||||
* The hinting greatly increases code efficiency for allocations while | * The hinting greatly increases code efficiency for allocations while | ||||
* the general radix structure optimizes both allocations and frees. The | * the general radix structure optimizes both allocations and frees. The | ||||
* radix tree should be able to operate well no matter how much | * radix tree should be able to operate well no matter how much | ||||
* fragmentation there is and no matter how large a bitmap is used. | * fragmentation there is and no matter how large a bitmap is used. | ||||
* | * | ||||
* The blist code wires all necessary memory at creation time. Neither | * The blist code wires all necessary memory at creation time. Neither | ||||
* allocations nor frees require interaction with the memory subsystem. | * allocations nor frees require interaction with the memory subsystem. | ||||
* The non-blocking nature of allocations and frees is required by swap | * The non-blocking nature of allocations and frees is required by swap | ||||
* code (vm/swap_pager.c). | * code (vm/swap_pager.c). | ||||
* | * | ||||
* LAYOUT: The radix tree is laid out recursively using a linear array. | * LAYOUT: The radix tree is laid out recursively using a linear array. | ||||
* Each meta node is immediately followed (laid out sequentially in | * Each meta node is immediately followed (laid out sequentially in | ||||
* memory) by BLIST_META_RADIX lower level nodes. This is a recursive | * memory) by BLIST_RADIX lower-level nodes. This is a recursive | ||||
* structure but one that can be easily scanned through a very simple | * structure but one that can be easily scanned through a very simple | ||||
* 'skip' calculation. The memory allocation is only large enough to | * 'skip' calculation. The memory allocation is only large enough to | ||||
* cover the number of blocks requested at creation time. Nodes that | * cover the number of blocks requested at creation time. Nodes that | ||||
* represent blocks beyond that limit, nodes that would never be read | * represent blocks beyond that limit, nodes that would never be read | ||||
* or written, are not allocated, so that the last of the | * or written, are not allocated, so that the last of the | ||||
* BLIST_META_RADIX lower level nodes of a some nodes may not be | * BLIST_RADIX lower-level nodes of a some nodes may not be allocated. | ||||
* allocated. | |||||
* | * | ||||
* NOTE: the allocator cannot currently allocate more than | * NOTE: the allocator cannot currently allocate more than | ||||
* BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too | * BLIST_RADIX blocks per call. It will panic with 'allocation too | ||||
* large' if you try. This is an area that could use improvement. The | * large' if you try. This is an area that could use improvement. The | ||||
* radix is large enough that this restriction does not effect the swap | * radix is large enough that this restriction does not effect the swap | ||||
* system, though. Currently only the allocation code is affected by | * system, though. Currently only the allocation code is affected by | ||||
* this algorithmic unfeature. The freeing code can handle arbitrary | * this algorithmic unfeature. The freeing code can handle arbitrary | ||||
* ranges. | * ranges. | ||||
* | * | ||||
* This code can be compiled stand-alone for debugging. | * This code can be compiled stand-alone for debugging. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | |||||
static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, | static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, | ||||
int tab); | int tab); | ||||
#endif | #endif | ||||
#ifdef _KERNEL | #ifdef _KERNEL | ||||
static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); | static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); | ||||
#endif | #endif | ||||
_Static_assert(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0, | #define BLIST_MASK (BLIST_RADIX - 1) | ||||
"radix divisibility error"); | |||||
#define BLIST_BMAP_MASK (BLIST_BMAP_RADIX - 1) | |||||
#define BLIST_META_MASK (BLIST_META_RADIX - 1) | |||||
/* | /* | ||||
* For a subtree that can represent the state of up to 'radix' blocks, the | * For a subtree that can represent the state of up to 'radix' blocks, the | ||||
* number of leaf nodes of the subtree is L=radix/BLIST_BMAP_RADIX. If 'm' | * number of leaf nodes of the subtree is L=radix/BLIST_RADIX. If 'm' | ||||
* is short for BLIST_META_RADIX, then for a tree of height h with L=m**h | * is short for BLIST_RADIX, then for a tree of height h with L=m**h | ||||
* leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h, | * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h, | ||||
* or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip' | * or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip' | ||||
* in the 'meta' functions that process subtrees. Since integer division | * in the 'meta' functions that process subtrees. Since integer division | ||||
* discards remainders, we can express this computation as | * discards remainders, we can express this computation as | ||||
* skip = (m * m**h) / (m - 1) | * skip = (m * m**h) / (m - 1) | ||||
* skip = (m * (radix / BLIST_BMAP_RADIX)) / (m - 1) | * skip = (m * (radix / m)) / (m - 1) | ||||
* and since m divides BLIST_BMAP_RADIX, we can simplify further to | * skip = radix / (m - 1) | ||||
* skip = (radix / (BLIST_BMAP_RADIX / m)) / (m - 1) | |||||
* skip = radix / ((BLIST_BMAP_RADIX / m) * (m - 1)) | |||||
* so that simple integer division by a constant can safely be used for the | * so that simple integer division by a constant can safely be used for the | ||||
* calculation. | * calculation. | ||||
*/ | */ | ||||
static inline daddr_t | static inline daddr_t | ||||
radix_to_skip(daddr_t radix) | radix_to_skip(daddr_t radix) | ||||
{ | { | ||||
return (radix / | return (radix / BLIST_MASK); | ||||
((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK)); | |||||
} | } | ||||
/* | /* | ||||
* Provide a mask with count bits set, starting as position n. | * Provide a mask with count bits set, starting as position n. | ||||
*/ | */ | ||||
static inline u_daddr_t | static inline u_daddr_t | ||||
bitrange(int n, int count) | bitrange(int n, int count) | ||||
{ | { | ||||
return (((u_daddr_t)-1 << n) & | return (((u_daddr_t)-1 << n) & | ||||
((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - (n + count)))); | ((u_daddr_t)-1 >> (BLIST_RADIX - (n + count)))); | ||||
} | } | ||||
/* | /* | ||||
* Find the first bit set in a u_daddr_t. | * Find the first bit set in a u_daddr_t. | ||||
*/ | */ | ||||
static inline int | static inline int | ||||
generic_bitpos(u_daddr_t mask) | generic_bitpos(u_daddr_t mask) | ||||
{ | { | ||||
int hi, lo, mid; | int hi, lo, mid; | ||||
lo = 0; | lo = 0; | ||||
hi = BLIST_BMAP_RADIX; | hi = BLIST_RADIX; | ||||
while (lo + 1 < hi) { | while (lo + 1 < hi) { | ||||
mid = (lo + hi) >> 1; | mid = (lo + hi) >> 1; | ||||
if (mask & bitrange(0, mid)) | if (mask & bitrange(0, mid)) | ||||
hi = mid; | hi = mid; | ||||
else | else | ||||
lo = mid; | lo = mid; | ||||
} | } | ||||
return (lo); | return (lo); | ||||
Show All 20 Lines | |||||
/* | /* | ||||
* blist_create() - create a blist capable of handling up to the specified | * blist_create() - create a blist capable of handling up to the specified | ||||
* number of blocks | * number of blocks | ||||
* | * | ||||
* blocks - must be greater than 0 | * blocks - must be greater than 0 | ||||
* flags - malloc flags | * flags - malloc flags | ||||
* | * | ||||
* The smallest blist consists of a single leaf node capable of | * The smallest blist consists of a single leaf node capable of | ||||
* managing BLIST_BMAP_RADIX blocks. | * managing BLIST_RADIX blocks. | ||||
*/ | */ | ||||
blist_t | blist_t | ||||
blist_create(daddr_t blocks, int flags) | blist_create(daddr_t blocks, int flags) | ||||
{ | { | ||||
blist_t bl; | blist_t bl; | ||||
u_daddr_t nodes, radix; | u_daddr_t nodes, radix; | ||||
KASSERT(blocks > 0, ("invalid block count")); | KASSERT(blocks > 0, ("invalid block count")); | ||||
/* | /* | ||||
* Calculate the radix and node count used for scanning. | * Calculate the radix and node count used for scanning. | ||||
*/ | */ | ||||
nodes = 1; | nodes = 1; | ||||
radix = BLIST_BMAP_RADIX; | for (radix = 1; radix <= blocks / BLIST_RADIX; radix *= BLIST_RADIX) | ||||
while (radix <= blocks) { | nodes += 1 + (blocks - 1) / radix / BLIST_RADIX; | ||||
nodes += 1 + (blocks - 1) / radix; | |||||
radix *= BLIST_META_RADIX; | |||||
} | |||||
bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags | | bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags | | ||||
M_ZERO); | M_ZERO); | ||||
if (bl == NULL) | if (bl == NULL) | ||||
return (NULL); | return (NULL); | ||||
bl->bl_blocks = blocks; | bl->bl_blocks = blocks; | ||||
bl->bl_radix = radix; | bl->bl_radix = radix; | ||||
▲ Show 20 Lines • Show All 276 Lines • ▼ Show 20 Lines | blist_stats(blist_t bl, struct sbuf *s) | ||||
struct gap_stats gstats; | struct gap_stats gstats; | ||||
struct gap_stats *stats = &gstats; | struct gap_stats *stats = &gstats; | ||||
daddr_t i, nodes, radix; | daddr_t i, nodes, radix; | ||||
u_daddr_t diff, mask; | u_daddr_t diff, mask; | ||||
int digit; | int digit; | ||||
init_gap_stats(stats); | init_gap_stats(stats); | ||||
nodes = 0; | nodes = 0; | ||||
i = bl->bl_radix; | radix = bl->bl_radix; | ||||
while (i < bl->bl_radix + bl->bl_blocks) { | for (i = 0; i < bl->bl_blocks; ) { | ||||
/* | /* | ||||
* Find max size subtree starting at i. | |||||
*/ | |||||
radix = BLIST_BMAP_RADIX; | |||||
while (((i / radix) & BLIST_META_MASK) == 0) | |||||
radix *= BLIST_META_RADIX; | |||||
/* | |||||
* Check for skippable subtrees starting at i. | * Check for skippable subtrees starting at i. | ||||
*/ | */ | ||||
while (radix > BLIST_BMAP_RADIX) { | while (radix != 1) { | ||||
if (bl->bl_root[nodes].bm_bitmap == 0) { | if (bl->bl_root[nodes].bm_bitmap == 0) { | ||||
if (gap_stats_counting(stats)) | if (gap_stats_counting(stats)) | ||||
update_gap_stats(stats, i); | update_gap_stats(stats, i); | ||||
break; | break; | ||||
} | } | ||||
/* | /* | ||||
* Skip subtree root. | * Skip subtree root. | ||||
*/ | */ | ||||
nodes++; | nodes++; | ||||
radix /= BLIST_META_RADIX; | radix /= BLIST_RADIX; | ||||
} | } | ||||
if (radix == BLIST_BMAP_RADIX) { | if (radix == 1) { | ||||
/* | /* | ||||
* Scan leaf. | * Scan leaf. | ||||
*/ | */ | ||||
mask = bl->bl_root[nodes].bm_bitmap; | mask = bl->bl_root[nodes].bm_bitmap; | ||||
diff = mask ^ (mask << 1); | diff = mask ^ (mask << 1); | ||||
if (gap_stats_counting(stats)) | if (gap_stats_counting(stats)) | ||||
diff ^= 1; | diff ^= 1; | ||||
while (diff != 0) { | while (diff != 0) { | ||||
digit = bitpos(diff); | digit = bitpos(diff); | ||||
update_gap_stats(stats, i + digit); | update_gap_stats(stats, i + digit); | ||||
diff ^= bitrange(digit, 1); | diff ^= bitrange(digit, 1); | ||||
} | } | ||||
} | } | ||||
nodes += radix_to_skip(radix); | nodes += radix_to_skip(radix * BLIST_RADIX); | ||||
i += radix; | i += radix * BLIST_RADIX; | ||||
/* | |||||
* Find max size subtree starting at i. | |||||
*/ | |||||
for (radix = 1; | |||||
((i / BLIST_RADIX / radix) & BLIST_MASK) == 0; | |||||
radix *= BLIST_RADIX) | |||||
; | |||||
} | } | ||||
update_gap_stats(stats, i); | update_gap_stats(stats, i); | ||||
dump_gap_stats(stats, s); | dump_gap_stats(stats, s); | ||||
} | } | ||||
/************************************************************************ | /************************************************************************ | ||||
* ALLOCATION SUPPORT FUNCTIONS * | * ALLOCATION SUPPORT FUNCTIONS * | ||||
************************************************************************ | ************************************************************************ | ||||
Show All 17 Lines | |||||
*/ | */ | ||||
static int | static int | ||||
blst_next_leaf_alloc(blmeta_t *scan, daddr_t start, int count, int maxcount) | blst_next_leaf_alloc(blmeta_t *scan, daddr_t start, int count, int maxcount) | ||||
{ | { | ||||
u_daddr_t radix; | u_daddr_t radix; | ||||
daddr_t blk; | daddr_t blk; | ||||
int avail, digit; | int avail, digit; | ||||
start += BLIST_BMAP_RADIX; | start += BLIST_RADIX; | ||||
for (blk = start; blk - start < maxcount; blk += BLIST_BMAP_RADIX) { | for (blk = start; blk - start < maxcount; blk += BLIST_RADIX) { | ||||
/* Skip meta-nodes, as long as they promise more free blocks. */ | /* Skip meta-nodes, as long as they promise more free blocks. */ | ||||
radix = BLIST_BMAP_RADIX; | radix = BLIST_RADIX; | ||||
while (((++scan)->bm_bitmap & 1) == 1 && | while (((++scan)->bm_bitmap & 1) == 1 && | ||||
((blk / radix) & BLIST_META_MASK) == 0) | ((blk / radix) & BLIST_MASK) == 0) | ||||
radix *= BLIST_META_RADIX; | radix *= BLIST_RADIX; | ||||
if (~scan->bm_bitmap != 0) { | if (~scan->bm_bitmap != 0) { | ||||
/* | /* | ||||
* Either there is no next leaf with any free blocks, | * Either there is no next leaf with any free blocks, | ||||
* or we've reached the next leaf and found that some | * or we've reached the next leaf and found that some | ||||
* of its blocks are not free. In the first case, | * of its blocks are not free. In the first case, | ||||
* bitpos() returns zero here. | * bitpos() returns zero here. | ||||
*/ | */ | ||||
avail = blk - start + bitpos(~scan->bm_bitmap); | avail = blk - start + bitpos(~scan->bm_bitmap); | ||||
if (avail < count || avail == 0) { | if (avail < count || avail == 0) { | ||||
/* | /* | ||||
* There isn't a next leaf with enough free | * There isn't a next leaf with enough free | ||||
* blocks at its beginning to bother | * blocks at its beginning to bother | ||||
* allocating. | * allocating. | ||||
*/ | */ | ||||
return (avail); | return (avail); | ||||
} | } | ||||
maxcount = imin(avail, maxcount); | maxcount = imin(avail, maxcount); | ||||
if (maxcount % BLIST_BMAP_RADIX == 0) { | if (maxcount % BLIST_RADIX == 0) { | ||||
/* | /* | ||||
* There was no next leaf. Back scan up to | * There was no next leaf. Back scan up to | ||||
* last leaf. | * last leaf. | ||||
*/ | */ | ||||
do { | |||||
radix /= BLIST_RADIX; | |||||
--scan; | --scan; | ||||
while (radix != BLIST_BMAP_RADIX) { | } while (radix != 1); | ||||
radix /= BLIST_META_RADIX; | blk -= BLIST_RADIX; | ||||
--scan; | |||||
} | } | ||||
blk -= BLIST_BMAP_RADIX; | |||||
} | } | ||||
} | } | ||||
} | |||||
/* | /* | ||||
* 'scan' is the last leaf that provides blocks. Clear from 1 to | * 'scan' is the last leaf that provides blocks. Clear from 1 to | ||||
* BLIST_BMAP_RADIX bits to represent the allocation of those last | * BLIST_RADIX bits to represent the allocation of those last blocks. | ||||
* blocks. | |||||
*/ | */ | ||||
if (maxcount % BLIST_BMAP_RADIX != 0) | if (maxcount % BLIST_RADIX != 0) | ||||
scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_BMAP_RADIX); | scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_RADIX); | ||||
else | else | ||||
scan->bm_bitmap = 0; | scan->bm_bitmap = 0; | ||||
for (;;) { | for (;;) { | ||||
/* Back up over meta-nodes, clearing bits if necessary. */ | /* Back up over meta-nodes, clearing bits if necessary. */ | ||||
blk -= BLIST_BMAP_RADIX; | blk -= BLIST_RADIX; | ||||
radix = BLIST_BMAP_RADIX; | for (radix = BLIST_RADIX; | ||||
while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0) { | (digit = ((blk / radix) & BLIST_MASK)) == 0; | ||||
radix *= BLIST_RADIX) { | |||||
if ((scan--)->bm_bitmap == 0) | if ((scan--)->bm_bitmap == 0) | ||||
scan->bm_bitmap ^= 1; | scan->bm_bitmap ^= 1; | ||||
radix *= BLIST_META_RADIX; | |||||
} | } | ||||
if ((scan--)->bm_bitmap == 0) | if ((scan--)->bm_bitmap == 0) | ||||
scan[-digit * radix_to_skip(radix)].bm_bitmap ^= | scan[-digit * radix_to_skip(radix)].bm_bitmap ^= | ||||
(u_daddr_t)1 << digit; | (u_daddr_t)1 << digit; | ||||
if (blk == start) | if (blk == start) | ||||
break; | break; | ||||
/* Clear all the bits of this leaf. */ | /* Clear all the bits of this leaf. */ | ||||
Show All 38 Lines | if (~mask == 0) { | ||||
* Update bighint. There is no allocation bigger than | * Update bighint. There is no allocation bigger than | ||||
* count1 >> num_shifts starting in this leaf. | * count1 >> num_shifts starting in this leaf. | ||||
*/ | */ | ||||
scan->bm_bighint = bighint; | scan->bm_bighint = bighint; | ||||
return (SWAPBLK_NONE); | return (SWAPBLK_NONE); | ||||
} | } | ||||
/* Discard any candidates that appear before blk. */ | /* Discard any candidates that appear before blk. */ | ||||
if ((blk & BLIST_BMAP_MASK) != 0) { | if ((blk & BLIST_MASK) != 0) { | ||||
if ((~mask & bitrange(0, blk & BLIST_BMAP_MASK)) != 0) { | if ((~mask & bitrange(0, blk & BLIST_MASK)) != 0) { | ||||
/* Grow bighint in case all discarded bits are set. */ | /* Grow bighint in case all discarded bits are set. */ | ||||
bighint += blk & BLIST_BMAP_MASK; | bighint += blk & BLIST_MASK; | ||||
mask |= bitrange(0, blk & BLIST_BMAP_MASK); | mask |= bitrange(0, blk & BLIST_MASK); | ||||
if (~mask == 0) { | if (~mask == 0) { | ||||
scan->bm_bighint = bighint; | scan->bm_bighint = bighint; | ||||
return (SWAPBLK_NONE); | return (SWAPBLK_NONE); | ||||
} | } | ||||
} | } | ||||
blk -= blk & BLIST_BMAP_MASK; | blk -= blk & BLIST_MASK; | ||||
} | } | ||||
/* | /* | ||||
* The least significant set bit in mask marks the start of the first | * The least significant set bit in mask marks the start of the first | ||||
* available range of sufficient size. Find its position. | * available range of sufficient size. Find its position. | ||||
*/ | */ | ||||
lo = bitpos(~mask); | lo = bitpos(~mask); | ||||
/* | /* | ||||
* Find how much space is available starting at that position. | * Find how much space is available starting at that position. | ||||
*/ | */ | ||||
if ((mask & (mask + 1)) != 0) { | if ((mask & (mask + 1)) != 0) { | ||||
/* Count the 1 bits starting at position lo. */ | /* Count the 1 bits starting at position lo. */ | ||||
hi = bitpos(mask & (mask + 1)) + count1; | hi = bitpos(mask & (mask + 1)) + count1; | ||||
if (maxcount < hi - lo) | if (maxcount < hi - lo) | ||||
hi = lo + maxcount; | hi = lo + maxcount; | ||||
*count = hi - lo; | *count = hi - lo; | ||||
mask = ~bitrange(lo, *count); | mask = ~bitrange(lo, *count); | ||||
} else if (maxcount <= BLIST_BMAP_RADIX - lo) { | } else if (maxcount <= BLIST_RADIX - lo) { | ||||
/* All the blocks we can use are available here. */ | /* All the blocks we can use are available here. */ | ||||
hi = lo + maxcount; | hi = lo + maxcount; | ||||
*count = maxcount; | *count = maxcount; | ||||
mask = ~bitrange(lo, *count); | mask = ~bitrange(lo, *count); | ||||
if (hi == BLIST_BMAP_RADIX) | if (hi == BLIST_RADIX) | ||||
scan->bm_bighint = bighint; | scan->bm_bighint = bighint; | ||||
} else { | } else { | ||||
/* Check next leaf for some of the blocks we want or need. */ | /* Check next leaf for some of the blocks we want or need. */ | ||||
count1 = *count - (BLIST_BMAP_RADIX - lo); | count1 = *count - (BLIST_RADIX - lo); | ||||
maxcount -= BLIST_BMAP_RADIX - lo; | maxcount -= BLIST_RADIX - lo; | ||||
hi = blst_next_leaf_alloc(scan, blk, count1, maxcount); | hi = blst_next_leaf_alloc(scan, blk, count1, maxcount); | ||||
if (hi < count1) | if (hi < count1) | ||||
/* | /* | ||||
* The next leaf cannot supply enough blocks to reach | * The next leaf cannot supply enough blocks to reach | ||||
* the minimum required allocation. The hint cannot be | * the minimum required allocation. The hint cannot be | ||||
* updated, because the same allocation request could | * updated, because the same allocation request could | ||||
* be satisfied later, by this leaf, if the state of | * be satisfied later, by this leaf, if the state of | ||||
* the next leaf changes, and without any changes to | * the next leaf changes, and without any changes to | ||||
* this leaf. | * this leaf. | ||||
*/ | */ | ||||
return (SWAPBLK_NONE); | return (SWAPBLK_NONE); | ||||
*count = BLIST_BMAP_RADIX - lo + hi; | *count = BLIST_RADIX - lo + hi; | ||||
scan->bm_bighint = bighint; | scan->bm_bighint = bighint; | ||||
} | } | ||||
/* Clear the allocated bits from this leaf. */ | /* Clear the allocated bits from this leaf. */ | ||||
scan->bm_bitmap &= mask; | scan->bm_bitmap &= mask; | ||||
return (blk + lo); | return (blk + lo); | ||||
} | } | ||||
Show All 9 Lines | |||||
blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count, | blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count, | ||||
int maxcount, u_daddr_t radix) | int maxcount, u_daddr_t radix) | ||||
{ | { | ||||
daddr_t blk, i, r, skip; | daddr_t blk, i, r, skip; | ||||
u_daddr_t mask; | u_daddr_t mask; | ||||
bool scan_from_start; | bool scan_from_start; | ||||
int digit; | int digit; | ||||
if (radix == BLIST_BMAP_RADIX) | if (radix == 1) | ||||
return (blst_leaf_alloc(scan, cursor, count, maxcount)); | return (blst_leaf_alloc(scan, cursor, count, maxcount)); | ||||
blk = cursor & -radix; | blk = cursor & -(radix * BLIST_RADIX); | ||||
scan_from_start = (cursor == blk); | scan_from_start = (cursor == blk); | ||||
radix /= BLIST_META_RADIX; | |||||
skip = radix_to_skip(radix); | skip = radix_to_skip(radix); | ||||
mask = scan->bm_bitmap; | mask = scan->bm_bitmap; | ||||
/* Discard any candidates that appear before cursor. */ | /* Discard any candidates that appear before cursor. */ | ||||
digit = (cursor / radix) & BLIST_META_MASK; | digit = (cursor / radix) & BLIST_MASK; | ||||
mask &= (u_daddr_t)-1 << digit; | mask &= (u_daddr_t)-1 << digit; | ||||
if (mask == 0) | if (mask == 0) | ||||
return (SWAPBLK_NONE); | return (SWAPBLK_NONE); | ||||
/* | /* | ||||
* If the first try is for a block that includes the cursor, pre-undo | * If the first try is for a block that includes the cursor, pre-undo | ||||
* the digit * radix offset in the first call; otherwise, ignore the | * the digit * radix offset in the first call; otherwise, ignore the | ||||
* cursor entirely. | * cursor entirely. | ||||
Show All 9 Lines | blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count, | ||||
do { | do { | ||||
digit = bitpos(mask); | digit = bitpos(mask); | ||||
i = 1 + digit * skip; | i = 1 + digit * skip; | ||||
if (*count <= scan[i].bm_bighint) { | if (*count <= scan[i].bm_bighint) { | ||||
/* | /* | ||||
* The allocation might fit beginning in the i'th subtree. | * The allocation might fit beginning in the i'th subtree. | ||||
*/ | */ | ||||
r = blst_meta_alloc(&scan[i], cursor + digit * radix, | r = blst_meta_alloc(&scan[i], cursor + digit * radix, | ||||
count, maxcount, radix); | count, maxcount, radix / BLIST_RADIX); | ||||
if (r != SWAPBLK_NONE) { | if (r != SWAPBLK_NONE) { | ||||
if (scan[i].bm_bitmap == 0) | if (scan[i].bm_bitmap == 0) | ||||
scan->bm_bitmap ^= bitrange(digit, 1); | scan->bm_bitmap ^= bitrange(digit, 1); | ||||
return (r); | return (r); | ||||
} | } | ||||
} | } | ||||
cursor = blk; | cursor = blk; | ||||
} while ((mask ^= bitrange(digit, 1)) != 0); | } while ((mask ^= bitrange(digit, 1)) != 0); | ||||
/* | /* | ||||
* We couldn't allocate count in this subtree. If the whole tree was | * We couldn't allocate count in this subtree. If the whole tree was | ||||
* scanned, and the last tree node is allocated, update bighint. | * scanned, and the last tree node is allocated, update bighint. | ||||
*/ | */ | ||||
if (scan_from_start && !(digit == BLIST_META_RADIX - 1 && | if (scan_from_start && !(digit == BLIST_RADIX - 1 && | ||||
scan[i].bm_bighint == BLIST_MAX_ALLOC)) | scan[i].bm_bighint == BLIST_MAX_ALLOC)) | ||||
scan->bm_bighint = *count - 1; | scan->bm_bighint = *count - 1; | ||||
return (SWAPBLK_NONE); | return (SWAPBLK_NONE); | ||||
} | } | ||||
/* | /* | ||||
* BLST_LEAF_FREE() - free allocated block from leaf bitmap | * BLST_LEAF_FREE() - free allocated block from leaf bitmap | ||||
* | * | ||||
*/ | */ | ||||
static void | static void | ||||
blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) | blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) | ||||
{ | { | ||||
u_daddr_t mask; | u_daddr_t mask; | ||||
/* | /* | ||||
* free some data in this bitmap | * free some data in this bitmap | ||||
* mask=0000111111111110000 | * mask=0000111111111110000 | ||||
* \_________/\__/ | * \_________/\__/ | ||||
* count n | * count n | ||||
*/ | */ | ||||
mask = bitrange(blk & BLIST_BMAP_MASK, count); | mask = bitrange(blk & BLIST_MASK, count); | ||||
KASSERT((scan->bm_bitmap & mask) == 0, | KASSERT((scan->bm_bitmap & mask) == 0, | ||||
("freeing free block: %jx, size %d, mask %jx", | ("freeing free block: %jx, size %d, mask %jx", | ||||
(uintmax_t)blk, count, (uintmax_t)scan->bm_bitmap & mask)); | (uintmax_t)blk, count, (uintmax_t)scan->bm_bitmap & mask)); | ||||
scan->bm_bitmap |= mask; | scan->bm_bitmap |= mask; | ||||
} | } | ||||
/* | /* | ||||
* BLST_META_FREE() - free allocated blocks from radix tree meta info | * BLST_META_FREE() - free allocated blocks from radix tree meta info | ||||
Show All 14 Lines | blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix) | ||||
/* | /* | ||||
* We could probably do a better job here. We are required to make | * We could probably do a better job here. We are required to make | ||||
* bighint at least as large as the biggest allocable block of data. | * bighint at least as large as the biggest allocable block of data. | ||||
* If we just shoehorn it, a little extra overhead will be incurred | * If we just shoehorn it, a little extra overhead will be incurred | ||||
* on the next allocation (but only that one typically). | * on the next allocation (but only that one typically). | ||||
*/ | */ | ||||
scan->bm_bighint = BLIST_MAX_ALLOC; | scan->bm_bighint = BLIST_MAX_ALLOC; | ||||
if (radix == BLIST_BMAP_RADIX) | if (radix == 1) | ||||
return (blst_leaf_free(scan, freeBlk, count)); | return (blst_leaf_free(scan, freeBlk, count)); | ||||
endBlk = ummin(freeBlk + count, (freeBlk + radix) & -radix); | endBlk = freeBlk + count; | ||||
radix /= BLIST_META_RADIX; | blk = (freeBlk + radix * BLIST_RADIX) & -(radix * BLIST_RADIX); | ||||
/* | |||||
* blk is first block past the end of the range of this meta node, | |||||
* or 0 in case of overflow. | |||||
*/ | |||||
if (blk != 0) | |||||
endBlk = ummin(endBlk, blk); | |||||
skip = radix_to_skip(radix); | skip = radix_to_skip(radix); | ||||
blk = freeBlk & -radix; | blk = freeBlk & -radix; | ||||
digit = (blk / radix) & BLIST_META_MASK; | digit = (blk / radix) & BLIST_MASK; | ||||
endDigit = 1 + (((endBlk - 1) / radix) & BLIST_META_MASK); | endDigit = 1 + (((endBlk - 1) / radix) & BLIST_MASK); | ||||
scan->bm_bitmap |= bitrange(digit, endDigit - digit); | scan->bm_bitmap |= bitrange(digit, endDigit - digit); | ||||
for (i = 1 + digit * skip; blk < endBlk; i += skip) { | for (i = 1 + digit * skip; blk < endBlk; i += skip) { | ||||
blk += radix; | blk += radix; | ||||
count = ummin(blk, endBlk) - freeBlk; | count = ummin(blk, endBlk) - freeBlk; | ||||
blst_meta_free(&scan[i], freeBlk, count, radix); | blst_meta_free(&scan[i], freeBlk, count, radix / BLIST_RADIX); | ||||
freeBlk = blk; | freeBlk = blk; | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* BLST_COPY() - copy one radix tree to another | * BLST_COPY() - copy one radix tree to another | ||||
* | * | ||||
* Locates free space in the source tree and frees it in the destination | * Locates free space in the source tree and frees it in the destination | ||||
* tree. The space may not already be free in the destination. | * tree. The space may not already be free in the destination. | ||||
*/ | */ | ||||
static void | static void | ||||
blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest, | blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest, | ||||
daddr_t count) | daddr_t count) | ||||
{ | { | ||||
daddr_t endBlk, i, skip; | daddr_t endBlk, i, skip; | ||||
/* | /* | ||||
* Leaf node | * Leaf node | ||||
*/ | */ | ||||
if (radix == BLIST_BMAP_RADIX) { | if (radix == 1) { | ||||
u_daddr_t v = scan->bm_bitmap; | u_daddr_t v = scan->bm_bitmap; | ||||
if (v == (u_daddr_t)-1) { | if (v == (u_daddr_t)-1) { | ||||
blist_free(dest, blk, count); | blist_free(dest, blk, count); | ||||
} else if (v != 0) { | } else if (v != 0) { | ||||
int i; | int i; | ||||
for (i = 0; i < count; ++i) { | for (i = 0; i < count; ++i) { | ||||
Show All 11 Lines | blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest, | ||||
if (scan->bm_bitmap == 0) { | if (scan->bm_bitmap == 0) { | ||||
/* | /* | ||||
* Source all allocated, leave dest allocated | * Source all allocated, leave dest allocated | ||||
*/ | */ | ||||
return; | return; | ||||
} | } | ||||
endBlk = blk + count; | endBlk = blk + count; | ||||
radix /= BLIST_META_RADIX; | |||||
skip = radix_to_skip(radix); | skip = radix_to_skip(radix); | ||||
for (i = 1; blk < endBlk; i += skip) { | for (i = 1; blk < endBlk; i += skip) { | ||||
blk += radix; | blk += radix; | ||||
count = radix; | count = radix; | ||||
if (blk >= endBlk) | if (blk >= endBlk) | ||||
count -= blk - endBlk; | count -= blk - endBlk; | ||||
blst_copy(&scan[i], blk - radix, radix, dest, count); | blst_copy(&scan[i], blk - radix, | ||||
radix / BLIST_RADIX, dest, count); | |||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap | * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap | ||||
* | * | ||||
* This routine allocates all blocks in the specified range | * This routine allocates all blocks in the specified range | ||||
* regardless of any existing allocations in that range. Returns | * regardless of any existing allocations in that range. Returns | ||||
* the number of blocks allocated by the call. | * the number of blocks allocated by the call. | ||||
*/ | */ | ||||
static daddr_t | static daddr_t | ||||
blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) | blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) | ||||
{ | { | ||||
daddr_t nblks; | daddr_t nblks; | ||||
u_daddr_t mask; | u_daddr_t mask; | ||||
mask = bitrange(blk & BLIST_BMAP_MASK, count); | mask = bitrange(blk & BLIST_MASK, count); | ||||
/* Count the number of blocks that we are allocating. */ | /* Count the number of blocks that we are allocating. */ | ||||
nblks = bitcount64(scan->bm_bitmap & mask); | nblks = bitcount64(scan->bm_bitmap & mask); | ||||
scan->bm_bitmap &= ~mask; | scan->bm_bitmap &= ~mask; | ||||
return (nblks); | return (nblks); | ||||
} | } | ||||
/* | /* | ||||
* BLIST_META_FILL() - allocate specific blocks at a meta node | * BLIST_META_FILL() - allocate specific blocks at a meta node | ||||
* | * | ||||
* This routine allocates the specified range of blocks, | * This routine allocates the specified range of blocks, | ||||
* regardless of any existing allocations in the range. The | * regardless of any existing allocations in the range. The | ||||
* range must be within the extent of this node. Returns the | * range must be within the extent of this node. Returns the | ||||
* number of blocks allocated by the call. | * number of blocks allocated by the call. | ||||
*/ | */ | ||||
static daddr_t | static daddr_t | ||||
blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix) | blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix) | ||||
{ | { | ||||
daddr_t blk, endBlk, i, nblks, skip; | daddr_t blk, endBlk, i, nblks, skip; | ||||
int digit; | int digit; | ||||
if (radix == BLIST_BMAP_RADIX) | if (radix == 1) | ||||
return (blst_leaf_fill(scan, allocBlk, count)); | return (blst_leaf_fill(scan, allocBlk, count)); | ||||
endBlk = ummin(allocBlk + count, (allocBlk + radix) & -radix); | endBlk = allocBlk + count; | ||||
radix /= BLIST_META_RADIX; | blk = (allocBlk + radix * BLIST_RADIX) & -(radix * BLIST_RADIX); | ||||
/* | |||||
* blk is first block past the end of the range of this meta node, | |||||
* or 0 in case of overflow. | |||||
*/ | |||||
if (blk != 0) | |||||
endBlk = ummin(endBlk, blk); | |||||
skip = radix_to_skip(radix); | skip = radix_to_skip(radix); | ||||
blk = allocBlk & -radix; | blk = allocBlk & -radix; | ||||
nblks = 0; | nblks = 0; | ||||
while (blk < endBlk) { | while (blk < endBlk) { | ||||
digit = (blk / radix) & BLIST_META_MASK; | digit = (blk / radix) & BLIST_MASK; | ||||
i = 1 + digit * skip; | i = 1 + digit * skip; | ||||
blk += radix; | blk += radix; | ||||
count = ummin(blk, endBlk) - allocBlk; | count = ummin(blk, endBlk) - allocBlk; | ||||
nblks += blst_meta_fill(&scan[i], allocBlk, count, radix); | nblks += blst_meta_fill(&scan[i], allocBlk, count, | ||||
radix / BLIST_RADIX); | |||||
if (scan[i].bm_bitmap == 0) | if (scan[i].bm_bitmap == 0) | ||||
scan->bm_bitmap &= ~((u_daddr_t)1 << digit); | scan->bm_bitmap &= ~((u_daddr_t)1 << digit); | ||||
allocBlk = blk; | allocBlk = blk; | ||||
} | } | ||||
return (nblks); | return (nblks); | ||||
} | } | ||||
#ifdef BLIST_DEBUG | #ifdef BLIST_DEBUG | ||||
static void | static void | ||||
blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab) | blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab) | ||||
{ | { | ||||
daddr_t skip; | daddr_t skip; | ||||
u_daddr_t mask; | u_daddr_t mask; | ||||
int digit; | int digit; | ||||
if (radix == BLIST_BMAP_RADIX) { | if (radix == 1) { | ||||
printf( | printf( | ||||
"%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n", | "%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n", | ||||
tab, tab, "", | tab, tab, "", | ||||
(long long)blk, (long long)radix, | (long long)blk, (long long)BLIST_RADIX, | ||||
1 + (BLIST_BMAP_RADIX - 1) / 4, | (int)(1 + (BLIST_RADIX - 1) / 4), | ||||
(long long)scan->bm_bitmap, | (long long)scan->bm_bitmap, | ||||
(long long)scan->bm_bighint | (long long)scan->bm_bighint | ||||
); | ); | ||||
return; | return; | ||||
} | } | ||||
printf( | printf( | ||||
"%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n", | "%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n", | ||||
tab, tab, "", | tab, tab, "", | ||||
(long long)blk, (long long)radix, | (long long)blk, (long long)radix * BLIST_RADIX, | ||||
(long long)radix, | (long long)radix * BLIST_RADIX, | ||||
1 + (BLIST_META_RADIX - 1) / 4, | (int)(1 + (BLIST_RADIX - 1) / 4), | ||||
(long long)scan->bm_bitmap, | (long long)scan->bm_bitmap, | ||||
(long long)scan->bm_bighint | (long long)scan->bm_bighint | ||||
); | ); | ||||
radix /= BLIST_META_RADIX; | |||||
skip = radix_to_skip(radix); | skip = radix_to_skip(radix); | ||||
tab += 4; | tab += 4; | ||||
mask = scan->bm_bitmap; | mask = scan->bm_bitmap; | ||||
/* Examine the nonempty subtree associated with each bit set in mask */ | /* Examine the nonempty subtree associated with each bit set in mask */ | ||||
do { | do { | ||||
digit = bitpos(mask); | digit = bitpos(mask); | ||||
blst_radix_print(&scan[1 + digit * skip], blk + digit * radix, | blst_radix_print(&scan[1 + digit * skip], blk + digit * radix, | ||||
radix, tab); | radix / BLIST_RADIX, tab); | ||||
} while ((mask ^= bitrange(digit, 1)) != 0); | } while ((mask ^= bitrange(digit, 1)) != 0); | ||||
tab -= 4; | tab -= 4; | ||||
printf( | printf( | ||||
"%*.*s}\n", | "%*.*s}\n", | ||||
tab, tab, "" | tab, tab, "" | ||||
); | ); | ||||
} | } | ||||
#endif | #endif | ||||
#ifdef BLIST_DEBUG | #ifdef BLIST_DEBUG | ||||
int | int | ||||
main(int ac, char **av) | main(int ac, char **av) | ||||
{ | { | ||||
int size = BLIST_META_RADIX * BLIST_BMAP_RADIX; | daddr_t size = BLIST_RADIX * BLIST_RADIX; | ||||
int i; | int i; | ||||
blist_t bl; | blist_t bl; | ||||
struct sbuf *s; | struct sbuf *s; | ||||
for (i = 1; i < ac; ++i) { | for (i = 1; i < ac; ++i) { | ||||
const char *ptr = av[i]; | const char *ptr = av[i]; | ||||
if (*ptr != '-') { | if (*ptr != '-') { | ||||
size = strtol(ptr, NULL, 0); | size = strtoll(ptr, NULL, 0); | ||||
continue; | continue; | ||||
} | } | ||||
ptr += 2; | ptr += 2; | ||||
fprintf(stderr, "Bad option: %s\n", ptr - 2); | fprintf(stderr, "Bad option: %s\n", ptr - 2); | ||||
exit(1); | exit(1); | ||||
} | } | ||||
bl = blist_create(size, M_WAITOK); | bl = blist_create(size, M_WAITOK); | ||||
if (bl == NULL) { | |||||
fprintf(stderr, "blist_create failed\n"); | |||||
exit(1); | |||||
} | |||||
blist_free(bl, 0, size); | blist_free(bl, 0, size); | ||||
for (;;) { | for (;;) { | ||||
char buf[1024]; | char buf[1024]; | ||||
long long da = 0; | long long da = 0; | ||||
int count = 0, maxcount = 0; | int count = 0, maxcount = 0; | ||||
printf("%lld/%lld/%lld> ", (long long)blist_avail(bl), | printf("%lld/%lld/%lld> ", (long long)blist_avail(bl), | ||||
(long long)size, (long long)bl->bl_radix); | (long long)size, (long long)bl->bl_radix * BLIST_RADIX); | ||||
fflush(stdout); | fflush(stdout); | ||||
if (fgets(buf, sizeof(buf), stdin) == NULL) | if (fgets(buf, sizeof(buf), stdin) == NULL) | ||||
break; | break; | ||||
switch(buf[0]) { | switch(buf[0]) { | ||||
case 'r': | case 'r': | ||||
if (sscanf(buf + 1, "%d", &count) == 1) { | if (sscanf(buf + 1, "%d", &count) == 1) { | ||||
blist_resize(&bl, count, 1, M_WAITOK); | blist_resize(&bl, count, 1, M_WAITOK); | ||||
} else { | } else { | ||||
▲ Show 20 Lines • Show All 62 Lines • Show Last 20 Lines |