Changeset View
Standalone View
sys/kern/subr_blist.c
Context not available. | |||||
* upper bound on a potential allocation, but not necessarily a tight upper | * upper bound on a potential allocation, but not necessarily a tight upper | ||||
* bound. | * bound. | ||||
* | * | ||||
* The radix tree also implements two collapsed states for meta nodes: | * The bitmap field in each node directs the search for available blocks. | ||||
* the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is | * For a leaf node, a bit is set if the corresponding block is free. For a | ||||
* in either of these two states, all information contained underneath | * meta node, a bit is set if the corresponding subtree contains a free | ||||
* the node is considered stale. These states are used to optimize | * block somewhere within it. The search at a meta node considers only | ||||
* allocation and freeing operations. | * children of that node that represent a range that includes a free block. | ||||
* | * | ||||
* The hinting greatly increases code efficiency for allocations while | * The hinting greatly increases code efficiency for allocations while | ||||
* the general radix structure optimizes both allocations and frees. The | * the general radix structure optimizes both allocations and frees. The | ||||
Context not available. | |||||
* The non-blocking features of the blist code are used in the swap code | * The non-blocking features of the blist code are used in the swap code | ||||
alc: It would be clearer for this sentence to say, "The non-blocking nature of allocations and frees… | |||||
* (vm/swap_pager.c). | * (vm/swap_pager.c). | ||||
* | * | ||||
* LAYOUT: The radix tree is laid out recursively using a | * LAYOUT: The radix tree is laid out recursively using a linear array. | ||||
* linear array. Each meta node is immediately followed (laid out | * Each meta node is immediately followed (laid out sequentially in memory) | ||||
* sequentially in memory) by BLIST_META_RADIX lower level nodes. This | * by BLIST_META_RADIX lower level nodes. This is a recursive structure | ||||
* is a recursive structure but one that can be easily scanned through | * but one that can be easily scanned through a very simple 'skip' | ||||
* a very simple 'skip' calculation. In order to support large radixes, | * calculation. The memory allocation is only large enough to cover the | ||||
* portions of the tree may reside outside our memory allocation. We | * number of blocks requested at creation time even if it must be | ||||
* handle this with an early-termination optimization (when bighint is | * encompassed in larger root-node radix. | ||||
* set to -1) on the scan. The memory allocation is only large enough | |||||
* to cover the number of blocks requested at creation time even if it | |||||
* must be encompassed in larger root-node radix. | |||||
* | * | ||||
Done Inline ActionsI'm not sure that this line is clearly worded. At the very least, it seems like there is a missing article ("a") in the middle of it. alc: I'm not sure that this line is clearly worded. At the very least, it seems like there is a… | |||||
* NOTE: the allocator cannot currently allocate more than | * NOTE: the allocator cannot currently allocate more than | ||||
* BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too | * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too | ||||
Context not available. | |||||
#define BLIST_DEBUG | #define BLIST_DEBUG | ||||
#endif | #endif | ||||
#include <sys/errno.h> | |||||
#include <sys/types.h> | #include <sys/types.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/sbuf.h> | #include <sys/sbuf.h> | ||||
Context not available. | |||||
#define bitcount64(x) __bitcount64((uint64_t)(x)) | #define bitcount64(x) __bitcount64((uint64_t)(x)) | ||||
#define malloc(a,b,c) calloc(a, 1) | #define malloc(a,b,c) calloc(a, 1) | ||||
#define free(a,b) free(a) | #define free(a,b) free(a) | ||||
static __inline int imax(int a, int b) { return (a > b ? a : b); } | #define ummin(a,b) ((a) < (b) ? (a) : (b)) | ||||
#include <sys/blist.h> | #include <sys/blist.h> | ||||
Context not available. | |||||
} | } | ||||
/* | /* | ||||
* Provide a mask with count bits set, starting as position n. | |||||
*/ | |||||
static inline u_daddr_t | |||||
bitrange(int n, int count) | |||||
{ | |||||
Done Inline ActionsInsert a blank line here. alc: Insert a blank line here. | |||||
return (((u_daddr_t)-1 << n) & | |||||
Done Inline ActionsThis continuation line should be indented by 12 character positions. alc: This continuation line should be indented by 12 character positions. | |||||
((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - (n + count)))); | |||||
} | |||||
/* | |||||
* Use binary search, or a faster method, to find the 1 bit in a u_daddr_t. | * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t. | ||||
* Assumes that the argument has only one bit set. | * Assumes that the argument has only one bit set. | ||||
*/ | */ | ||||
Context not available. | |||||
blist_create(daddr_t blocks, int flags) | blist_create(daddr_t blocks, int flags) | ||||
{ | { | ||||
blist_t bl; | blist_t bl; | ||||
daddr_t i, last_block; | u_daddr_t nodes, radix; | ||||
u_daddr_t nodes, radix, skip; | |||||
int digit; | |||||
/* | /* | ||||
* Calculate the radix and node count used for scanning. Find the last | * Calculate the radix and node count used for scanning. Include space | ||||
* block that is followed by a terminator. | * for an always-zero leaf at the end, if the last leaf can have a free | ||||
* last bit. | |||||
*/ | */ | ||||
last_block = blocks - 1; | nodes = 1; | ||||
radix = BLIST_BMAP_RADIX; | radix = BLIST_BMAP_RADIX; | ||||
while (radix < blocks) { | while (radix <= blocks) { | ||||
if (((last_block / radix + 1) & BLIST_META_MASK) != 0) | nodes += 1 + (blocks - 1) / radix; | ||||
/* | |||||
* A terminator will be added. Update last_block to the | |||||
* position just before that terminator. | |||||
*/ | |||||
last_block |= radix - 1; | |||||
radix *= BLIST_META_RADIX; | radix *= BLIST_META_RADIX; | ||||
} | } | ||||
/* | |||||
* Count the meta-nodes in the expanded tree, including the final | |||||
* terminator, from the bottom level up to the root. | |||||
*/ | |||||
nodes = (last_block >= blocks) ? 2 : 1; | |||||
last_block /= BLIST_BMAP_RADIX; | |||||
while (last_block > 0) { | |||||
nodes += last_block + 1; | |||||
last_block /= BLIST_META_RADIX; | |||||
} | |||||
bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags | | bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags | | ||||
M_ZERO); | M_ZERO); | ||||
if (bl == NULL) | if (bl == NULL) | ||||
Context not available. | |||||
bl->bl_blocks = blocks; | bl->bl_blocks = blocks; | ||||
bl->bl_radix = radix; | bl->bl_radix = radix; | ||||
bl->bl_cursor = 0; | |||||
/* | |||||
* Initialize the empty tree by filling in root values, then initialize | |||||
* just the terminators in the rest of the tree. | |||||
*/ | |||||
bl->bl_root[0].bm_bighint = 0; | |||||
if (radix == BLIST_BMAP_RADIX) | |||||
bl->bl_root[0].u.bmu_bitmap = 0; | |||||
else | |||||
bl->bl_root[0].u.bmu_avail = 0; | |||||
last_block = blocks - 1; | |||||
i = 0; | |||||
while (radix > BLIST_BMAP_RADIX) { | |||||
radix /= BLIST_META_RADIX; | |||||
skip = radix_to_skip(radix); | |||||
digit = last_block / radix; | |||||
i += 1 + digit * skip; | |||||
if (digit != BLIST_META_MASK) { | |||||
/* | |||||
* Add a terminator. | |||||
*/ | |||||
bl->bl_root[i + skip].bm_bighint = (daddr_t)-1; | |||||
bl->bl_root[i + skip].u.bmu_bitmap = 0; | |||||
} | |||||
last_block %= radix; | |||||
} | |||||
#if defined(BLIST_DEBUG) | #if defined(BLIST_DEBUG) | ||||
printf( | printf( | ||||
Context not available. | |||||
{ | { | ||||
daddr_t blk; | daddr_t blk; | ||||
if (count > BLIST_MAX_ALLOC) | |||||
panic("allocation too large"); | |||||
/* | /* | ||||
* This loop iterates at most twice. An allocation failure in the | * This loop iterates at most twice. An allocation failure in the | ||||
* first iteration leads to a second iteration only if the cursor was | * first iteration leads to a second iteration only if the cursor was | ||||
Context not available. | |||||
blk = blst_meta_alloc(bl->bl_root, bl->bl_cursor, count, | blk = blst_meta_alloc(bl->bl_root, bl->bl_cursor, count, | ||||
bl->bl_radix); | bl->bl_radix); | ||||
if (blk != SWAPBLK_NONE) { | if (blk != SWAPBLK_NONE) { | ||||
bl->bl_avail -= count; | |||||
bl->bl_cursor = blk + count; | bl->bl_cursor = blk + count; | ||||
if (bl->bl_cursor == bl->bl_blocks) | if (bl->bl_cursor == bl->bl_blocks) | ||||
bl->bl_cursor = 0; | bl->bl_cursor = 0; | ||||
return (blk); | return (blk); | ||||
} else if (bl->bl_cursor != 0) | } | ||||
bl->bl_cursor = 0; | bl->bl_cursor = 0; | ||||
} | } | ||||
return (SWAPBLK_NONE); | return (SWAPBLK_NONE); | ||||
} | } | ||||
Context not available. | |||||
blist_avail(blist_t bl) | blist_avail(blist_t bl) | ||||
{ | { | ||||
if (bl->bl_radix == BLIST_BMAP_RADIX) | return (bl->bl_avail); | ||||
return (bitcount64(bl->bl_root->u.bmu_bitmap)); | |||||
else | |||||
return (bl->bl_root->u.bmu_avail); | |||||
} | } | ||||
/* | /* | ||||
Context not available. | |||||
blist_free(blist_t bl, daddr_t blkno, daddr_t count) | blist_free(blist_t bl, daddr_t blkno, daddr_t count) | ||||
{ | { | ||||
if (blkno < 0 || blkno + count > bl->bl_blocks) | |||||
panic("freeing invalid range"); | |||||
blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix); | blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix); | ||||
bl->bl_avail += count; | |||||
} | } | ||||
/* | /* | ||||
Context not available. | |||||
daddr_t | daddr_t | ||||
blist_fill(blist_t bl, daddr_t blkno, daddr_t count) | blist_fill(blist_t bl, daddr_t blkno, daddr_t count) | ||||
{ | { | ||||
daddr_t filled; | |||||
return (blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix)); | if (blkno < 0 || blkno + count > bl->bl_blocks) | ||||
panic("filling invalid range"); | |||||
filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix); | |||||
bl->bl_avail -= filled; | |||||
return (filled); | |||||
} | } | ||||
/* | /* | ||||
Context not available. | |||||
void | void | ||||
blist_print(blist_t bl) | blist_print(blist_t bl) | ||||
{ | { | ||||
printf("BLIST cursor = %08jx {\n", (uintmax_t)bl->bl_cursor); | printf("BLIST avail = %jd, cursor = %08jx {\n", | ||||
blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4); | (uintmax_t)bl->bl_avail, (uintmax_t)bl->bl_cursor); | ||||
if (bl->bl_root->bm_bitmap != 0) | |||||
blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4); | |||||
printf("}\n"); | printf("}\n"); | ||||
} | } | ||||
Context not available. | |||||
* Check for skippable subtrees starting at i. | * Check for skippable subtrees starting at i. | ||||
*/ | */ | ||||
while (radix > BLIST_BMAP_RADIX) { | while (radix > BLIST_BMAP_RADIX) { | ||||
if (bl->bl_root[nodes].u.bmu_avail == 0) { | if (bl->bl_root[nodes].bm_bitmap == 0) { | ||||
if (gap_stats_counting(stats)) | if (gap_stats_counting(stats)) | ||||
update_gap_stats(stats, i); | update_gap_stats(stats, i); | ||||
break; | break; | ||||
} | } | ||||
if (bl->bl_root[nodes].u.bmu_avail == radix) { | |||||
if (!gap_stats_counting(stats)) | |||||
update_gap_stats(stats, i); | |||||
break; | |||||
} | |||||
/* | /* | ||||
* Skip subtree root. | * Skip subtree root. | ||||
Context not available. | |||||
/* | /* | ||||
* Scan leaf. | * Scan leaf. | ||||
*/ | */ | ||||
mask = bl->bl_root[nodes].u.bmu_bitmap; | mask = bl->bl_root[nodes].bm_bitmap; | ||||
diff = mask ^ (mask << 1); | diff = mask ^ (mask << 1); | ||||
if (gap_stats_counting(stats)) | if (gap_stats_counting(stats)) | ||||
diff ^= 1; | diff ^= 1; | ||||
Context not available. | |||||
*/ | */ | ||||
/* | /* | ||||
* blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). | * BLST_NEXT_LEAF_ALLOC() - allocate the first few blocks in the next leaf. | ||||
* | |||||
* 'scan' is a leaf node, associated with a block containing 'blk'. The | |||||
* next leaf node could be adjacent, or several nodes away if the least | |||||
* common ancestor of 'scan' and its neighbor is several levels up. Use | |||||
* 'blk' to determine how many meta-nodes lie between the leaves. If the | |||||
* next leaf has enough initial bits set, clear them and clear the bits in | |||||
* the meta nodes on the path up to the least common ancestor to mark any | |||||
* subtrees made completely empty. | |||||
*/ | |||||
static int | |||||
blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) | |||||
{ | |||||
blmeta_t *next; | |||||
daddr_t skip; | |||||
u_daddr_t radix; | |||||
int digit; | |||||
next = scan + 1; | |||||
blk += BLIST_BMAP_RADIX; | |||||
radix = BLIST_BMAP_RADIX; | |||||
while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0 && | |||||
Done Inline ActionsThis continuation line should be indented by 12 character positions. alc: This continuation line should be indented by 12 character positions. | |||||
(next->bm_bitmap & 1) == 1) { | |||||
next++; | |||||
radix *= BLIST_META_RADIX; | |||||
} | |||||
if (((next->bm_bitmap + 1) & ~((u_daddr_t)-1 << count)) != 0) { | |||||
/* | |||||
* The next leaf doesn't have enough free blocks at the | |||||
* beginning to complete the spanning allocation. | |||||
*/ | |||||
return (ENOMEM); | |||||
} | |||||
/* Clear the first 'count' bits in the next leaf to allocate. */ | |||||
next->bm_bitmap &= (u_daddr_t)-1 << count; | |||||
Done Inline ActionsNeeds a period at the end of the sentence. alc: Needs a period at the end of the sentence. | |||||
/* Update bitmaps of next-ancestors, up to least common ancestor. */ | |||||
skip = radix_to_skip(radix); | |||||
while (radix != BLIST_BMAP_RADIX && next->bm_bitmap == 0) { | |||||
(--next)->bm_bitmap ^= 1; | |||||
radix /= BLIST_META_RADIX; | |||||
} | |||||
if (next->bm_bitmap == 0) | |||||
scan[-digit * skip].bm_bitmap ^= (u_daddr_t)1 << digit; | |||||
return (0); | |||||
} | |||||
/* | |||||
* BLST_LEAF_ALLOC() - allocate at a leaf in the radix tree (a bitmap). | |||||
* | * | ||||
* This is the core of the allocator and is optimized for the | * This is the core of the allocator and is optimized for the | ||||
* BLIST_BMAP_RADIX block allocation case. Otherwise, execution | * BLIST_BMAP_RADIX block allocation case. Otherwise, execution | ||||
Context not available. | |||||
range1 = 0; | range1 = 0; | ||||
count1 = count - 1; | count1 = count - 1; | ||||
num_shifts = fls(count1); | num_shifts = fls(count1); | ||||
mask = scan->u.bmu_bitmap; | mask = scan->bm_bitmap; | ||||
while ((-mask & ~mask) != 0 && num_shifts > 0) { | while ((-mask & ~mask) != 0 && num_shifts > 0) { | ||||
/* | /* | ||||
* If bit i is set in mask, then bits in [i, i+range1] are set | * If bit i is set in mask, then bits in [i, i+range1] are set | ||||
* in scan->u.bmu_bitmap. The value of range1 is equal to | * in scan->bm_bitmap. The value of range1 is equal to count1 | ||||
* count1 >> num_shifts. Grow range and reduce num_shifts to 0, | * >> num_shifts. Grow range and reduce num_shifts to 0, while | ||||
* while preserving these invariants. The updates to mask leave | * preserving these invariants. The updates to mask leave fewer | ||||
* fewer bits set, but each bit that remains set represents a | * bits set, but each bit that remains set represents a longer | ||||
* longer string of consecutive bits set in scan->u.bmu_bitmap. | * string of consecutive bits set in scan->bm_bitmap. If more | ||||
* If more updates to mask cannot clear more bits, because mask | * updates to mask cannot clear more bits, because mask is | ||||
* is partitioned with all 0 bits preceding all 1 bits, the loop | * partitioned with all 0 bits preceding all 1 bits, the loop | ||||
* terminates immediately. | * terminates immediately. | ||||
*/ | */ | ||||
num_shifts--; | num_shifts--; | ||||
Context not available. | |||||
if (hi > BLIST_BMAP_RADIX) { | if (hi > BLIST_BMAP_RADIX) { | ||||
/* | /* | ||||
* An allocation within this leaf is impossible, so a successful | * An allocation within this leaf is impossible, so a successful | ||||
* allocation depends on the next leaf providing some of the blocks. | * allocation depends on the next leaf providing some of the | ||||
* blocks. | |||||
*/ | */ | ||||
Done Inline ActionsThe "/*" should not be on the same line as the first sentence. alc: The "/*" should not be on the same line as the first sentence. | |||||
if (((blk / BLIST_BMAP_RADIX + 1) & BLIST_META_MASK) == 0) { | if (blst_next_leaf_alloc(scan, blk, hi - BLIST_BMAP_RADIX) != 0) | ||||
/* | /* | ||||
* The next leaf has a different meta-node parent, so it | * The hint cannot be updated, because the same | ||||
* is not necessarily initialized. Update bighint, | * allocation request could be satisfied later, by this | ||||
* comparing the range found at the end of mask to the | * leaf, if the state of the next leaf changes, and | ||||
* largest earlier range that could have been made to | * without any changes to this leaf. | ||||
* vanish in the initial processing of mask. | |||||
*/ | */ | ||||
scan->bm_bighint = imax(BLIST_BMAP_RADIX - lo, range1); | |||||
return (SWAPBLK_NONE); | return (SWAPBLK_NONE); | ||||
} | |||||
hi -= BLIST_BMAP_RADIX; | |||||
if (((scan[1].u.bmu_bitmap + 1) & ~((u_daddr_t)-1 << hi)) != 0) { | |||||
/* | |||||
* The next leaf doesn't have enough free blocks at the | |||||
* beginning to complete the spanning allocation. The | |||||
* hint cannot be updated, because the same allocation | |||||
* request could be satisfied later, by this leaf, if | |||||
* the state of the next leaf changes, and without any | |||||
* changes to this leaf. | |||||
*/ | |||||
return (SWAPBLK_NONE); | |||||
} | |||||
/* Clear the first 'hi' bits in the next leaf, allocating them. */ | |||||
scan[1].u.bmu_bitmap &= (u_daddr_t)-1 << hi; | |||||
hi = BLIST_BMAP_RADIX; | hi = BLIST_BMAP_RADIX; | ||||
} | } | ||||
Context not available. | |||||
} else { | } else { | ||||
/* Clear the bits of mask at position 'hi' and higher. */ | /* Clear the bits of mask at position 'hi' and higher. */ | ||||
mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi); | mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi); | ||||
/* If this allocation uses all the bits, clear the hint. */ | |||||
if (mask == scan->u.bmu_bitmap) | |||||
scan->bm_bighint = 0; | |||||
} | } | ||||
/* Clear the allocated bits from this leaf. */ | /* Clear the allocated bits from this leaf. */ | ||||
scan->u.bmu_bitmap &= ~mask; | scan->bm_bitmap &= ~mask; | ||||
return ((blk & ~BLIST_BMAP_MASK) + lo); | return ((blk & ~BLIST_BMAP_MASK) + lo); | ||||
} | } | ||||
Context not available. | |||||
static daddr_t | static daddr_t | ||||
blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix) | blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix) | ||||
{ | { | ||||
daddr_t blk, i, next_skip, r, skip; | daddr_t blk, i, r, skip; | ||||
int child; | u_daddr_t bit, mask; | ||||
bool scan_from_start; | bool scan_from_start; | ||||
int digit; | |||||
if (radix == BLIST_BMAP_RADIX) | if (radix == BLIST_BMAP_RADIX) | ||||
return (blst_leaf_alloc(scan, cursor, count)); | return (blst_leaf_alloc(scan, cursor, count)); | ||||
if (scan->u.bmu_avail < count) { | |||||
/* | |||||
* The meta node's hint must be too large if the allocation | |||||
* exceeds the number of free blocks. Reduce the hint, and | |||||
* return failure. | |||||
*/ | |||||
scan->bm_bighint = scan->u.bmu_avail; | |||||
return (SWAPBLK_NONE); | |||||
} | |||||
blk = cursor & -radix; | blk = cursor & -radix; | ||||
scan_from_start = (cursor == blk); | |||||
radix /= BLIST_META_RADIX; | |||||
skip = radix_to_skip(radix); | skip = radix_to_skip(radix); | ||||
next_skip = skip / BLIST_META_RADIX; | mask = scan->bm_bitmap; | ||||
/* Discard any candidates that appear before cursor. */ | |||||
digit = (cursor / radix) & BLIST_META_MASK; | |||||
mask &= (u_daddr_t)-1 << digit; | |||||
/* | /* | ||||
* An ALL-FREE meta node requires special handling before allocating | * If the first try is for a block that includes the cursor, pre-undo | ||||
* any of its blocks. | * the digit * radix offset in the first call; otherwise, ignore the | ||||
* cursor entirely. | |||||
*/ | */ | ||||
if (scan->u.bmu_avail == radix) { | if (((mask >> digit) & 1) == 1) | ||||
radix /= BLIST_META_RADIX; | cursor -= digit * radix; | ||||
else | |||||
Done Inline ActionsNeeds a period at the end of the sentence. alc: Needs a period at the end of the sentence. | |||||
/* | cursor = blk; | ||||
* Reinitialize each of the meta node's children. An ALL-FREE | |||||
* meta node cannot have a terminator in any subtree. | |||||
*/ | |||||
for (i = 1; i < skip; i += next_skip) { | |||||
if (next_skip == 1) | |||||
scan[i].u.bmu_bitmap = (u_daddr_t)-1; | |||||
else | |||||
scan[i].u.bmu_avail = radix; | |||||
scan[i].bm_bighint = radix; | |||||
} | |||||
} else { | |||||
radix /= BLIST_META_RADIX; | |||||
} | |||||
if (count > radix) { | /* Examine the nonempty subtree associated with each bit set in mask. */ | ||||
/* | do { | ||||
* The allocation exceeds the number of blocks that are | bit = mask & -mask; | ||||
* managed by a subtree of this meta node. | digit = bitpos(bit); | ||||
*/ | i = 1 + digit * skip; | ||||
panic("allocation too large"); | |||||
} | |||||
scan_from_start = cursor == blk; | |||||
child = (cursor - blk) / radix; | |||||
blk += child * radix; | |||||
for (i = 1 + child * next_skip; i < skip; i += next_skip) { | |||||
if (count <= scan[i].bm_bighint) { | if (count <= scan[i].bm_bighint) { | ||||
/* | /* | ||||
* The allocation might fit beginning in the i'th subtree. | * The allocation might fit beginning in the i'th | ||||
* subtree. | |||||
*/ | */ | ||||
r = blst_meta_alloc(&scan[i], | r = blst_meta_alloc(&scan[i], cursor + digit * radix, | ||||
cursor > blk ? cursor : blk, count, radix); | count, radix); | ||||
if (r != SWAPBLK_NONE) { | if (r != SWAPBLK_NONE) { | ||||
scan->u.bmu_avail -= count; | if (scan[i].bm_bitmap == 0) | ||||
scan->bm_bitmap ^= bit; | |||||
return (r); | return (r); | ||||
} | } | ||||
} else if (scan[i].bm_bighint == (daddr_t)-1) { | |||||
/* | |||||
* Terminator | |||||
*/ | |||||
break; | |||||
} | } | ||||
blk += radix; | cursor = blk; | ||||
} | } while ((mask ^= bit) != 0); | ||||
/* | /* | ||||
* We couldn't allocate count in this subtree, update bighint. | * We couldn't allocate count in this subtree, update bighint, unless | ||||
* the last block of the subtree remains free. | |||||
*/ | */ | ||||
if (scan_from_start && scan->bm_bighint >= count) | if (scan_from_start && !(digit == BLIST_META_RADIX - 1 && | ||||
scan[i].bm_bighint == BLIST_BMAP_RADIX)) | |||||
alcUnsubmitted Done Inline ActionsElsewhere, you use the spelling BLIST_MAX_ALLOC for this constant. Please pick one spelling. alc: Elsewhere, you use the spelling BLIST_MAX_ALLOC for this constant. Please pick one spelling. | |||||
dougmAuthorUnsubmitted Not Done Inline ActionsBLIST_MAX_ALLOC and BLIST_META_RADIX have the same value after this change, but that does not mean they will always have the same value. A future change could seek to raise BLIST_MAX_ALLOC to 256, perhaps, while leaving BLIST_META_RADIX unchanged. So I disagree with this suggestion. Should I also eliminate the alternate spelling BLIST_BMAP_RADIX for this constant? dougm: BLIST_MAX_ALLOC and BLIST_META_RADIX have the same value after this change, but that does not… | |||||
alcUnsubmitted Done Inline ActionsYou never assign the value BLIST_BMAP_RADIX to bm_bighint, so testing for it is confusing. alc: You never assign the value BLIST_BMAP_RADIX to bm_bighint, so testing for it is confusing. | |||||
scan->bm_bighint = count - 1; | scan->bm_bighint = count - 1; | ||||
return (SWAPBLK_NONE); | return (SWAPBLK_NONE); | ||||
Context not available. | |||||
blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) | blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) | ||||
{ | { | ||||
u_daddr_t mask; | u_daddr_t mask; | ||||
int n; | |||||
/* | /* | ||||
* free some data in this bitmap | * free some data in this bitmap | ||||
Context not available. | |||||
* \_________/\__/ | * \_________/\__/ | ||||
* count n | * count n | ||||
*/ | */ | ||||
n = blk & BLIST_BMAP_MASK; | mask = bitrange(blk & BLIST_BMAP_MASK, count); | ||||
mask = ((u_daddr_t)-1 << n) & | if (scan->bm_bitmap & mask) | ||||
((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); | |||||
if (scan->u.bmu_bitmap & mask) | |||||
panic("freeing free block"); | panic("freeing free block"); | ||||
scan->u.bmu_bitmap |= mask; | scan->bm_bitmap |= mask; | ||||
/* | |||||
* We could probably do a better job here. We are required to make | |||||
* bighint at least as large as the biggest contiguous block of | |||||
* data. If we just shoehorn it, a little extra overhead will | |||||
* be incured on the next allocation (but only that one typically). | |||||
*/ | |||||
scan->bm_bighint = BLIST_BMAP_RADIX; | |||||
} | } | ||||
/* | /* | ||||
Context not available. | |||||
static void | static void | ||||
blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix) | blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix) | ||||
{ | { | ||||
daddr_t blk, i, next_skip, skip, v; | daddr_t blk, endBlk, i, skip; | ||||
int child; | int digit, endDigit; | ||||
if (scan->bm_bighint == (daddr_t)-1) | |||||
panic("freeing invalid range"); | |||||
if (radix == BLIST_BMAP_RADIX) | |||||
return (blst_leaf_free(scan, freeBlk, count)); | |||||
skip = radix_to_skip(radix); | |||||
next_skip = skip / BLIST_META_RADIX; | |||||
if (scan->u.bmu_avail == 0) { | |||||
/* | |||||
* ALL-ALLOCATED special case, with possible | |||||
* shortcut to ALL-FREE special case. | |||||
*/ | |||||
scan->u.bmu_avail = count; | |||||
scan->bm_bighint = count; | |||||
if (count != radix) { | |||||
for (i = 1; i < skip; i += next_skip) { | |||||
if (scan[i].bm_bighint == (daddr_t)-1) | |||||
break; | |||||
scan[i].bm_bighint = 0; | |||||
if (next_skip == 1) { | |||||
scan[i].u.bmu_bitmap = 0; | |||||
} else { | |||||
scan[i].u.bmu_avail = 0; | |||||
} | |||||
} | |||||
/* fall through */ | |||||
} | |||||
} else { | |||||
scan->u.bmu_avail += count; | |||||
/* scan->bm_bighint = radix; */ | |||||
} | |||||
/* | /* | ||||
* ALL-FREE special case. | * We could probably do a better job here. We are required to make | ||||
* bighint at least as large as the biggest allocable block of data. If | |||||
* we just shoehorn it, a little extra overhead will be incurred on the | |||||
* next allocation (but only that one typically). | |||||
*/ | */ | ||||
Done Inline ActionsBecause imin() is an inline function and not a polymorphic macro, it is going to truncate the 64-bit daddr_t values to 32 bits. Please check for similar typing errors elsewhere. This one just immediately jumped out at me. alc: Because imin() is an inline function and not a polymorphic macro, it is going to truncate the… | |||||
Done Inline ActionsI addressed this by changing to ummin. dougm: I addressed this by changing to ummin. | |||||
scan->bm_bighint = BLIST_MAX_ALLOC; | |||||
if (scan->u.bmu_avail == radix) | if (radix == BLIST_BMAP_RADIX) | ||||
return; | return (blst_leaf_free(scan, freeBlk, count)); | ||||
if (scan->u.bmu_avail > radix) | |||||
panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", | |||||
(long long)count, (long long)scan->u.bmu_avail, | |||||
(long long)radix); | |||||
/* | |||||
* Break the free down into its components | |||||
*/ | |||||
blk = freeBlk & -radix; | endBlk = ummin(freeBlk + count, (freeBlk + radix) & -radix); | ||||
radix /= BLIST_META_RADIX; | radix /= BLIST_META_RADIX; | ||||
skip = radix_to_skip(radix); | |||||
child = (freeBlk - blk) / radix; | blk = freeBlk & -radix; | ||||
blk += child * radix; | digit = (blk / radix) & BLIST_META_MASK; | ||||
i = 1 + child * next_skip; | endDigit = 1 + (((endBlk - 1) / radix) & BLIST_META_MASK); | ||||
while (i < skip && blk < freeBlk + count) { | scan->bm_bitmap |= bitrange(digit, endDigit - digit); | ||||
v = blk + radix - freeBlk; | for (i = 1 + digit * skip; blk < endBlk; i += skip) { | ||||
if (v > count) | |||||
v = count; | |||||
blst_meta_free(&scan[i], freeBlk, v, radix); | |||||
if (scan->bm_bighint < scan[i].bm_bighint) | |||||
scan->bm_bighint = scan[i].bm_bighint; | |||||
count -= v; | |||||
freeBlk += v; | |||||
blk += radix; | blk += radix; | ||||
i += next_skip; | count = ((blk < endBlk) ? blk : endBlk) - freeBlk; | ||||
blst_meta_free(&scan[i], freeBlk, count, radix); | |||||
freeBlk = blk; | |||||
} | } | ||||
} | } | ||||
Done Inline ActionsThere is an extra, unnecessary space after the ','. alc: There is an extra, unnecessary space after the ','. | |||||
/* | /* | ||||
* BLIST_RADIX_COPY() - copy one radix tree to another | * BLST_COPY() - copy one radix tree to another | ||||
* | * | ||||
Done Inline ActionsAren't you using ummin() elsewhere for similar calculations? alc: Aren't you using ummin() elsewhere for similar calculations? | |||||
* Locates free space in the source tree and frees it in the destination | * Locates free space in the source tree and frees it in the destination | ||||
* tree. The space may not already be free in the destination. | * tree. The space may not already be free in the destination. | ||||
Context not available. | |||||
blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest, | blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest, | ||||
daddr_t count) | daddr_t count) | ||||
{ | { | ||||
daddr_t i, next_skip, skip; | daddr_t endBlk, i, skip; | ||||
/* | /* | ||||
* Leaf node | * Leaf node | ||||
*/ | */ | ||||
if (radix == BLIST_BMAP_RADIX) { | if (radix == BLIST_BMAP_RADIX) { | ||||
u_daddr_t v = scan->u.bmu_bitmap; | u_daddr_t v = scan->bm_bitmap; | ||||
if (v == (u_daddr_t)-1) { | if (v == (u_daddr_t)-1) { | ||||
blist_free(dest, blk, count); | blist_free(dest, blk, count); | ||||
} else if (v != 0) { | } else if (v != 0) { | ||||
int i; | int i; | ||||
for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { | for (i = 0; i < count; ++i) { | ||||
if (v & ((u_daddr_t)1 << i)) | if (v & ((u_daddr_t)1 << i)) | ||||
blist_free(dest, blk + i, 1); | blist_free(dest, blk + i, 1); | ||||
} | } | ||||
Context not available. | |||||
* Meta node | * Meta node | ||||
*/ | */ | ||||
if (scan->u.bmu_avail == 0) { | if (scan->bm_bitmap == 0) { | ||||
/* | /* | ||||
* Source all allocated, leave dest allocated | * Source all allocated, leave dest allocated | ||||
*/ | */ | ||||
return; | return; | ||||
} | } | ||||
if (scan->u.bmu_avail == radix) { | |||||
/* | |||||
* Source all free, free entire dest | |||||
*/ | |||||
if (count < radix) | |||||
blist_free(dest, blk, count); | |||||
else | |||||
blist_free(dest, blk, radix); | |||||
return; | |||||
} | |||||
endBlk = blk + count; | |||||
skip = radix_to_skip(radix); | |||||
next_skip = skip / BLIST_META_RADIX; | |||||
radix /= BLIST_META_RADIX; | radix /= BLIST_META_RADIX; | ||||
skip = radix_to_skip(radix); | |||||
for (i = 1; count && i < skip; i += next_skip) { | for (i = 1; blk < endBlk; i += skip) { | ||||
if (scan[i].bm_bighint == (daddr_t)-1) | |||||
break; | |||||
if (count >= radix) { | |||||
blst_copy(&scan[i], blk, radix, dest, radix); | |||||
count -= radix; | |||||
} else { | |||||
if (count) { | |||||
blst_copy(&scan[i], blk, radix, dest, count); | |||||
} | |||||
count = 0; | |||||
} | |||||
blk += radix; | blk += radix; | ||||
count = radix; | |||||
if (blk >= endBlk) | |||||
count -= blk - endBlk; | |||||
blst_copy(&scan[i], blk - radix, radix, dest, count); | |||||
} | } | ||||
} | } | ||||
Context not available. | |||||
{ | { | ||||
daddr_t nblks; | daddr_t nblks; | ||||
u_daddr_t mask; | u_daddr_t mask; | ||||
int n; | |||||
n = blk & BLIST_BMAP_MASK; | mask = bitrange(blk & BLIST_BMAP_MASK, count); | ||||
mask = ((u_daddr_t)-1 << n) & | |||||
((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); | |||||
/* Count the number of blocks that we are allocating. */ | /* Count the number of blocks that we are allocating. */ | ||||
nblks = bitcount64(scan->u.bmu_bitmap & mask); | nblks = bitcount64(scan->bm_bitmap & mask); | ||||
scan->u.bmu_bitmap &= ~mask; | scan->bm_bitmap &= ~mask; | ||||
return (nblks); | return (nblks); | ||||
} | } | ||||
Context not available. | |||||
static daddr_t | static daddr_t | ||||
blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix) | blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix) | ||||
{ | { | ||||
daddr_t blk, i, nblks, next_skip, skip, v; | daddr_t blk, endBlk, i, nblks, skip; | ||||
int child; | int digit; | ||||
if (scan->bm_bighint == (daddr_t)-1) | |||||
panic("filling invalid range"); | |||||
if (count > radix) { | |||||
/* | |||||
* The allocation exceeds the number of blocks that are | |||||
* managed by this node. | |||||
*/ | |||||
panic("fill too large"); | |||||
} | |||||
if (radix == BLIST_BMAP_RADIX) | if (radix == BLIST_BMAP_RADIX) | ||||
return (blst_leaf_fill(scan, allocBlk, count)); | return (blst_leaf_fill(scan, allocBlk, count)); | ||||
if (count == radix || scan->u.bmu_avail == 0) { | |||||
/* | endBlk = ummin(allocBlk + count, (allocBlk + radix) & -radix); | ||||
* ALL-ALLOCATED special case | radix /= BLIST_META_RADIX; | ||||
*/ | |||||
nblks = scan->u.bmu_avail; | |||||
scan->u.bmu_avail = 0; | |||||
scan->bm_bighint = 0; | |||||
return (nblks); | |||||
} | |||||
skip = radix_to_skip(radix); | skip = radix_to_skip(radix); | ||||
next_skip = skip / BLIST_META_RADIX; | |||||
blk = allocBlk & -radix; | blk = allocBlk & -radix; | ||||
/* | |||||
* An ALL-FREE meta node requires special handling before allocating | |||||
* any of its blocks. | |||||
*/ | |||||
if (scan->u.bmu_avail == radix) { | |||||
radix /= BLIST_META_RADIX; | |||||
/* | |||||
* Reinitialize each of the meta node's children. An ALL-FREE | |||||
* meta node cannot have a terminator in any subtree. | |||||
*/ | |||||
for (i = 1; i < skip; i += next_skip) { | |||||
if (next_skip == 1) | |||||
scan[i].u.bmu_bitmap = (u_daddr_t)-1; | |||||
else | |||||
scan[i].u.bmu_avail = radix; | |||||
scan[i].bm_bighint = radix; | |||||
} | |||||
} else { | |||||
radix /= BLIST_META_RADIX; | |||||
} | |||||
nblks = 0; | nblks = 0; | ||||
child = (allocBlk - blk) / radix; | while (blk < endBlk) { | ||||
blk += child * radix; | digit = (blk / radix) & BLIST_META_MASK; | ||||
i = 1 + child * next_skip; | i = 1 + digit * skip; | ||||
while (i < skip && blk < allocBlk + count) { | |||||
v = blk + radix - allocBlk; | |||||
if (v > count) | |||||
v = count; | |||||
nblks += blst_meta_fill(&scan[i], allocBlk, v, radix); | |||||
count -= v; | |||||
allocBlk += v; | |||||
blk += radix; | blk += radix; | ||||
i += next_skip; | count = ((blk < endBlk) ? blk : endBlk) - allocBlk; | ||||
nblks += blst_meta_fill(&scan[i], allocBlk, count, radix); | |||||
if (scan[i].bm_bitmap == 0) | |||||
scan->bm_bitmap &= ~((u_daddr_t)1 << digit); | |||||
allocBlk = blk; | |||||
} | } | ||||
scan->u.bmu_avail -= nblks; | |||||
return (nblks); | return (nblks); | ||||
} | } | ||||
Done Inline ActionsDitto. alc: Ditto. | |||||
Context not available. | |||||
static void | static void | ||||
blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab) | blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab) | ||||
{ | { | ||||
daddr_t i, next_skip, skip; | daddr_t skip; | ||||
u_daddr_t bit, mask; | |||||
int digit; | |||||
if (radix == BLIST_BMAP_RADIX) { | if (radix == BLIST_BMAP_RADIX) { | ||||
printf( | printf( | ||||
"%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n", | "%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n", | ||||
tab, tab, "", | tab, tab, "", | ||||
(long long)blk, (long long)radix, | (long long)blk, (long long)radix, | ||||
(long long)scan->u.bmu_bitmap, | 1 + (BLIST_BMAP_RADIX - 1) / 4, | ||||
(long long)scan->bm_bitmap, | |||||
(long long)scan->bm_bighint | (long long)scan->bm_bighint | ||||
); | ); | ||||
return; | return; | ||||
} | } | ||||
if (scan->u.bmu_avail == 0) { | |||||
printf( | |||||
"%*.*s(%08llx,%lld) ALL ALLOCATED\n", | |||||
tab, tab, "", | |||||
(long long)blk, | |||||
(long long)radix | |||||
); | |||||
return; | |||||
} | |||||
if (scan->u.bmu_avail == radix) { | |||||
printf( | |||||
"%*.*s(%08llx,%lld) ALL FREE\n", | |||||
tab, tab, "", | |||||
(long long)blk, | |||||
(long long)radix | |||||
); | |||||
return; | |||||
} | |||||
printf( | printf( | ||||
"%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", | "%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n", | ||||
tab, tab, "", | tab, tab, "", | ||||
(long long)blk, (long long)radix, | (long long)blk, (long long)radix, | ||||
(long long)scan->u.bmu_avail, | |||||
(long long)radix, | (long long)radix, | ||||
1 + (BLIST_META_RADIX - 1) / 4, | |||||
(long long)scan->bm_bitmap, | |||||
(long long)scan->bm_bighint | (long long)scan->bm_bighint | ||||
); | ); | ||||
skip = radix_to_skip(radix); | |||||
next_skip = skip / BLIST_META_RADIX; | |||||
radix /= BLIST_META_RADIX; | radix /= BLIST_META_RADIX; | ||||
skip = radix_to_skip(radix); | |||||
tab += 4; | tab += 4; | ||||
for (i = 1; i < skip; i += next_skip) { | mask = scan->bm_bitmap; | ||||
if (scan[i].bm_bighint == (daddr_t)-1) { | /* Examine the nonempty subtree associated with each bit set in mask */ | ||||
printf( | do { | ||||
"%*.*s(%08llx,%lld): Terminator\n", | bit = mask & -mask; | ||||
tab, tab, "", | digit = bitpos(bit); | ||||
(long long)blk, (long long)radix | blst_radix_print(&scan[1 + digit * skip], blk + digit * radix, | ||||
); | radix, tab); | ||||
break; | } while ((mask ^= bit) != 0); | ||||
} | |||||
blst_radix_print(&scan[i], blk, radix, tab); | |||||
blk += radix; | |||||
} | |||||
tab -= 4; | tab -= 4; | ||||
printf( | printf( | ||||
Context not available. | |||||
int | int | ||||
main(int ac, char **av) | main(int ac, char **av) | ||||
{ | { | ||||
int size = 1024; | int size = BLIST_META_RADIX * BLIST_BMAP_RADIX; | ||||
int i; | int i; | ||||
blist_t bl; | blist_t bl; | ||||
struct sbuf *s; | struct sbuf *s; | ||||
Context not available. |
It would be clearer for this sentence to say, "The non-blocking nature of allocations and frees is required by the swap code."