Index: sys/kern/subr_blist.c =================================================================== --- sys/kern/subr_blist.c +++ sys/kern/subr_blist.c @@ -33,13 +33,13 @@ * SWAPBLK_NONE on an allocation failure. * * A radix tree is used to maintain the bitmap. Two radix constants are - * involved: One for the bitmaps contained in the leaf nodes (typically - * 64), and one for the meta nodes (typically 16). Both meta and leaf - * nodes have a hint field. This field gives us a hint as to the largest - * free contiguous range of blocks under the node. It may contain a - * value that is too high, but will never contain a value that is too - * low. When the radix tree is searched, allocation failures in subtrees - * update the hint. + * involved: One for the bitmaps contained in the leaf nodes (typically + * 64), and one for the meta nodes (typically 16). Each subtree is + * associated with a range of blocks. The root of any subtree stores a + * hint field that defines an upper bound on the size of the largest + * allocation that can begin in the associated block range. A hint is an + * upper bound on a potential allocation, but not necessarily a tight upper + * bound. * * The radix tree also implements two collapsed states for meta nodes: * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is @@ -110,6 +110,7 @@ #define bitcount64(x) __bitcount64((uint64_t)(x)) #define malloc(a,b,c) calloc(a, 1) #define free(a,b) free(a) +#define imax(a,b) (((a)>(b))?(a):(b)) #define CTASSERT(expr) #include @@ -121,8 +122,7 @@ /* * static support functions */ -static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, - daddr_t cursor); +static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count); static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix); static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); @@ -142,6 +142,8 @@ #ifdef _KERNEL static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); #endif +#define BLIST_BMAP_MASK (BLIST_BMAP_RADIX - 1) +#define BLIST_META_MASK (BLIST_META_RADIX - 1) CTASSERT(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0); @@ -166,7 +168,7 @@ { return (radix / - ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * (BLIST_META_RADIX - 1))); + ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK)); } /* @@ -356,33 +358,17 @@ * time is proportional to log2(count) + log2(BLIST_BMAP_RADIX). */ static daddr_t -blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor) +blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) { u_daddr_t mask; int count1, hi, lo, mid, num_shifts, range1, range_ext; - if (count == BLIST_BMAP_RADIX) { - /* - * Optimize allocation of BLIST_BMAP_RADIX bits. If this wasn't - * a special case, then forming the final value of 'mask' below - * would require special handling to avoid an invalid left shift - * when count equals the number of bits in mask. - */ - if (~scan->u.bmu_bitmap != 0) { - scan->bm_bighint = BLIST_BMAP_RADIX - 1; - return (SWAPBLK_NONE); - } - if (cursor != blk) - return (SWAPBLK_NONE); - scan->u.bmu_bitmap = 0; - scan->bm_bighint = 0; - return (blk); - } + lo = blk & BLIST_BMAP_MASK; range1 = 0; count1 = count - 1; num_shifts = fls(count1); mask = scan->u.bmu_bitmap; - while (mask != 0 && num_shifts > 0) { + while ((-mask & ~mask) != 0 && num_shifts > 0) { /* * If bit i is set in mask, then bits in [i, i+range1] are set * in scan->u.bmu_bitmap. The value of range1 is equal to @@ -390,26 +376,34 @@ * while preserving these invariants. The updates to mask leave * fewer bits set, but each bit that remains set represents a * longer string of consecutive bits set in scan->u.bmu_bitmap. + * If more updates to mask cannot clear more bits, because mask + * is partitioned with all 0 bits preceding all 1 bits, the loop + * terminates immediately. */ num_shifts--; range_ext = range1 + ((count1 >> num_shifts) & 1); - mask &= mask >> range_ext; + /* + * mask is a signed quantity for the shift because when it is + * shifted right, the sign bit should copied; when the last + * block of the leaf is free, pretend, for a while, that all the + * blocks that follow it are also free. + */ + mask &= (daddr_t)mask >> range_ext; range1 += range_ext; } if (mask == 0) { /* * Update bighint. There is no allocation bigger than range1 - * available in this leaf. + * starting in this leaf. */ scan->bm_bighint = range1; return (SWAPBLK_NONE); } /* - * Discard any candidates that appear before the cursor. + * Discard any candidates that appear before blk. */ - lo = cursor - blk; - mask &= ~(u_daddr_t)0 << lo; + mask &= (daddr_t)-1 << lo; if (mask == 0) return (SWAPBLK_NONE); @@ -420,7 +414,8 @@ * and then perform a binary search to find its position. */ mask &= -mask; - hi = BLIST_BMAP_RADIX - count1; + blk -= lo; + hi = BLIST_BMAP_RADIX; while (lo + 1 < hi) { mid = (lo + hi) >> 1; if ((mask >> mid) != 0) @@ -429,11 +424,64 @@ hi = mid; } + hi = lo + count; + if (hi > BLIST_BMAP_RADIX) { + /* + * An allocation within this leaf is impossible, so a successful + * allocation depends on the next leaf providing some of the blocks. + */ + if (((blk / BLIST_BMAP_RADIX + 1) & BLIST_META_MASK) == 0) { + /* + * The next leaf has a different meta-node parent, so it + * is not necessarily initialized. Update bighint, + * comparing the range found at the end of mask to the + * largest earlier range that could have been made to + * vanish in the initial processing of mask. + */ + scan->bm_bighint = imax(BLIST_BMAP_RADIX - lo, range1); + return (SWAPBLK_NONE); + } + hi -= BLIST_BMAP_RADIX; + if (((scan[1].u.bmu_bitmap + 1) & ~((daddr_t)-1 << hi)) != 0) { + /* + * The next leaf doesn't have enough free blocks at the + * beginning to complete the spanning allocation. The + * hint cannot be updated, because same allocation + * request could be satisfied later, by this leaf, if + * the state of the next leaf changes, and without any + * changes to this leaf. + */ + return (SWAPBLK_NONE); + } + /* + * Clear the first 'hi' bits in the next leaf, allocating them. + */ + scan[1].u.bmu_bitmap &= (daddr_t)-1 << hi; + + hi = BLIST_BMAP_RADIX; + } + /* - * Set in mask exactly the bits being allocated, and clear them from - * the set of available bits. + * Set the bits of mask at position 'lo' and higher. + */ + mask = -mask; + + if (hi == BLIST_BMAP_RADIX) { + /* + * Update bighint. There is no allocation bigger than range1 + * available in this leaf after this allocation completes. + */ + scan->bm_bighint = range1; + } else { + /* + * Clear the bits of mask at position 'hi' and higher. + */ + mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi); + } + + /* + * Clear the allocated bits from this leaf. */ - mask = (mask << count) - mask; scan->u.bmu_bitmap &= ~mask; return (blk + lo); } @@ -453,9 +501,8 @@ int child; bool scan_from_start; - blk = cursor & -radix; if (radix == BLIST_BMAP_RADIX) - return (blst_leaf_alloc(scan, blk, count, cursor)); + return (blst_leaf_alloc(scan, cursor, count)); if (scan->u.bmu_avail < count) { /* * The meta node's hint must be too large if the allocation @@ -465,6 +512,7 @@ scan->bm_bighint = scan->u.bmu_avail; return (SWAPBLK_NONE); } + blk = cursor & -radix; skip = radix_to_skip(radix); next_skip = skip / BLIST_META_RADIX; @@ -544,7 +592,7 @@ * \_________/\__/ * v n */ - int n = blk & (BLIST_BMAP_RADIX - 1); + int n = blk & BLIST_BMAP_MASK; u_daddr_t mask; mask = ((u_daddr_t)-1 << n) & @@ -732,7 +780,7 @@ static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) { - int n = blk & (BLIST_BMAP_RADIX - 1); + int n = blk & BLIST_BMAP_MASK; daddr_t nblks; u_daddr_t mask;