Index: sys/kern/subr_blist.c =================================================================== --- sys/kern/subr_blist.c +++ sys/kern/subr_blist.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -101,6 +102,7 @@ #include #include +#include #include #include #include @@ -144,6 +146,7 @@ #endif CTASSERT(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0); +#define BLIST_META_MASK (BLIST_META_RADIX - 1) /* * For a subtree that can represent the state of up to 'radix' blocks, the @@ -166,7 +169,35 @@ { return (radix / - ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * (BLIST_META_RADIX - 1))); + ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK)); +} + +/* + * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t. + * Assumes that the argument has only one bit set. + */ +static inline int +bitpos(u_daddr_t mask) +{ + int hi, lo, mid; + + switch (sizeof(u_daddr_t)) { +#ifdef HAVE_INLINE_FFSLL + case sizeof(long long): + return (ffsll(mask) - 1); +#endif + default: + lo = 0; + hi = BLIST_BMAP_RADIX; + while (lo + 1 < hi) { + mid = (lo + hi) >> 1; + if ((mask >> mid) != 0) + lo = mid; + else + hi = mid; + } + return (lo); + } } /* @@ -340,6 +371,189 @@ #endif +static const u_daddr_t fib[] = +{1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, + 2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, + 317811, 514229, 832040, 1346269, 2178309, 3524578, 5702887, 9227465, + 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, 267914296, + 433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, + 7778742049, 12586269025, 20365011074, 32951280099, 53316291173, + 86267571272, 139583862445, 225851433717, 365435296162, 591286729879, + 956722026041, 1548008755920, 2504730781961, 4052739537881, 6557470319842, + 10610209857723, 17167680177565, 27777890035288, 44945570212853, 72723460248141, + 117669030460994, 190392490709135, 308061521170129, 498454011879264, + 806515533049393, 1304969544928657, 2111485077978050, 3416454622906707, + 5527939700884757, 8944394323791464, 14472334024676221, 23416728348467685, + 37889062373143906, 61305790721611591, 99194853094755497, 160500643816367088, + 259695496911122585, 420196140727489673, 679891637638612258, + 1100087778366101931, 1779979416004714189, 2880067194370816120, + 4660046610375530309, 7540113804746346429, +}; + +struct gap_stats { + daddr_t start; /* current gap start, or SWAPBLK_NONE */ + daddr_t num; /* number of gaps observed */ + daddr_t max; /* largest gap size */ + daddr_t avg; /* average gap size */ + daddr_t err; /* sum - num * avg */ + daddr_t histo[nitems(fib)]; /* # gaps in each size range */ + int max_bucket; /* last histo elt with nonzero val */ +}; + +/* + * gap_stats_counting() - is the state 'counting 1 bits'? + * or 'skipping 0 bits'? + */ +static inline int +gap_stats_counting(struct gap_stats *stats) +{ + + return (stats->start != SWAPBLK_NONE); +} + +/* + * init_gap_stats() - initialize stats on gap sizes + */ +static inline void +init_gap_stats(struct gap_stats *stats) +{ + + bzero(stats, sizeof(*stats)); + stats->start = SWAPBLK_NONE; +} + +/* + * update_gap_stats() - update stats on gap sizes + */ +static void +update_gap_stats(struct gap_stats *stats, u_daddr_t posn) +{ + u_daddr_t size; + int hi, lo, mid; + + if (!gap_stats_counting(stats)) { + stats->start = posn; + return; + } + size = posn - stats->start; + stats->start = SWAPBLK_NONE; + if (size > stats->max) + stats->max = size; + + /* + * Find the fibonacci range that contains size, + * expecting to find it in an early range. + */ + lo = 0; + hi = 1; + while (hi < nitems(fib) && fib[hi] <= size) { + lo = hi; + hi *= 2; + } + if (hi >= nitems(fib)) + hi = nitems(fib); + while (lo + 1 != hi) { + mid = (lo + hi) >> 1; + if (fib[mid] <= size) + lo = mid; + else + hi = mid; + } + stats->histo[lo]++; + if (lo > stats->max_bucket) + stats->max_bucket = lo; + stats->err += (daddr_t)size - stats->avg; + stats->num++; + stats->avg += stats->err / stats->num; + stats->err %= stats->num; +} + +/* + * dump_gap_stats() - print stats on gap sizes + */ +static inline void +dump_gap_stats(struct sbuf *s, struct gap_stats *stats) +{ + int i; + + sbuf_printf(s, "number of gaps: %jd\n", (intmax_t)stats->num); + sbuf_printf(s, "largest gap: %jd\n", (intmax_t)stats->max); + sbuf_printf(s, "average gap size: %jd\n", (intmax_t)stats->avg); + sbuf_printf(s, "number of gaps of different size ranges:\n"); + sbuf_printf(s, " count | size range\n"); + for (i = 0; i < stats->max_bucket; i++) { + if (stats->histo[i] != 0) + sbuf_printf(s, "%20jd | %jd to %jd\n", + (intmax_t)stats->histo[i], + (intmax_t)fib[i], (intmax_t)fib[i+1] - 1); + } + sbuf_printf(s, "%20jd | %jd to %jd\n", (intmax_t)stats->histo[i], + (intmax_t)fib[i], (intmax_t)stats->max); +} + +/* + * blist_stats_compute() - dump radix tree stats + */ +void +blist_stats_compute(struct sbuf *s, blist_t bl) +{ + daddr_t bit, diff, i, nodes, radix; + u_daddr_t mask; + struct gap_stats gstats; + struct gap_stats *stats = &gstats; + + init_gap_stats(stats); + nodes = 0; + i = bl->bl_radix; + while (i < bl->bl_radix + bl->bl_blocks) { + /* + * Find max size subtree starting at i + */ + radix = BLIST_BMAP_RADIX; + while (((i / radix) & BLIST_META_MASK) == 0) + radix *= BLIST_META_RADIX; + + /* + * Check for skippable subtrees starting at i + */ + while (radix > BLIST_BMAP_RADIX) { + if (bl->bl_root[nodes].u.bmu_avail == 0) { + if (gap_stats_counting(stats)) + update_gap_stats(stats, i); + break; + } + if (bl->bl_root[nodes].u.bmu_avail == radix) { + if (!gap_stats_counting(stats)) + update_gap_stats(stats, i); + break; + } + + /* + * Skip subtree root. + */ + nodes++; + radix /= BLIST_META_RADIX; + } + if (radix == BLIST_BMAP_RADIX) { + /* + * Scan leaf. + */ + mask = bl->bl_root[nodes].u.bmu_bitmap; + diff = mask ^ (mask << 1); + diff ^= gap_stats_counting(stats); + while (diff != 0) { + bit = diff & -diff; + update_gap_stats(stats, i + bitpos(bit)); + diff ^= bit; + } + } + nodes += radix_to_skip(radix); + i += radix; + } + update_gap_stats(stats, i); + dump_gap_stats(s, stats); +} + /************************************************************************ * ALLOCATION SUPPORT FUNCTIONS * ************************************************************************ @@ -355,13 +569,13 @@ * * This is the core of the allocator and is optimized for the * BLIST_BMAP_RADIX block allocation case. Otherwise, execution - * time is proportional to log2(count) + log2(BLIST_BMAP_RADIX). + * time is proportional to log2(count) + bitpos time. */ static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor) { u_daddr_t mask; - int count1, hi, lo, mid, num_shifts, range1, range_ext; + int count1, lo, num_shifts, range1, range_ext; if (count == BLIST_BMAP_RADIX) { /* @@ -419,17 +633,10 @@ /* * The least significant set bit in mask marks the start of the first * available range of sufficient size. Clear all the bits but that one, - * and then perform a binary search to find its position. + * and then find its position. */ mask &= -mask; - hi = BLIST_BMAP_RADIX - count1; - while (lo + 1 < hi) { - mid = (lo + hi) >> 1; - if ((mask >> mid) != 0) - lo = mid; - else - hi = mid; - } + lo = bitpos(mask); /* * Set in mask exactly the bits being allocated, and clear them from @@ -980,6 +1187,7 @@ int size = 1024; int i; blist_t bl; + struct sbuf *s; for (i = 1; i < ac; ++i) { const char *ptr = av[i]; @@ -1014,6 +1222,13 @@ case 'p': blist_print(bl); break; + case 's': + s = sbuf_new_auto(); + blist_stats_compute(s, bl); + sbuf_finish(s); + printf("%s", sbuf_data(s)); + sbuf_delete(s); + break; case 'a': if (sscanf(buf + 1, "%lld", &count) == 1) { daddr_t blk = blist_alloc(bl, count); @@ -1041,6 +1256,7 @@ case 'h': puts( "p -print\n" + "s -stats\n" "a %d -allocate\n" "f %x %d -free\n" "l %x %d -fill\n" Index: sys/sys/blist.h =================================================================== --- sys/sys/blist.h +++ sys/sys/blist.h @@ -90,6 +90,7 @@ #define BLIST_MAX_ALLOC BLIST_BMAP_RADIX +struct sbuf; daddr_t blist_alloc(blist_t blist, daddr_t count); daddr_t blist_avail(blist_t blist); blist_t blist_create(daddr_t blocks, int flags); @@ -98,6 +99,7 @@ void blist_free(blist_t blist, daddr_t blkno, daddr_t count); void blist_print(blist_t blist); void blist_resize(blist_t *pblist, daddr_t count, int freenew, int flags); +void blist_stats_compute(struct sbuf *s, blist_t bl); #endif /* _SYS_BLIST_H_ */ Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -92,6 +92,7 @@ #include #include #include +#include #include #include #include @@ -323,6 +324,9 @@ SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I", "Maximum running async swap ops"); +static int sysctl_swap_pager_dumpstats(SYSCTL_HANDLER_ARGS); +SYSCTL_OID(_vm, OID_AUTO, swap_pager_dumpstats, CTLTYPE_STRING | CTLFLAG_RD, + NULL, 0, sysctl_swap_pager_dumpstats, "A", "Swap Pager Info"); static struct sx sw_alloc_sx; @@ -780,6 +784,37 @@ } /* + * SYSCTL_SWAP_PAGER_DUMPSTATS() - produce raw swap space stats + */ +static int +sysctl_swap_pager_dumpstats(SYSCTL_HANDLER_ARGS) +{ + struct sbuf sbuf; + const char *devname; + struct swdevt *sp; + int error; + + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + sbuf_new_for_sysctl(&sbuf, NULL, 128, req); + + mtx_lock(&sw_dev_mtx); + TAILQ_FOREACH(sp, &swtailq, sw_list) { + if (vn_isdisk(sp->sw_vp, NULL)) + devname = devtoname(sp->sw_vp->v_rdev); + else + devname = "[file]"; + sbuf_printf(&sbuf, "\nFree gaps for device %s:\n", devname); + blist_stats_compute(&sbuf, sp->sw_blist); + } + mtx_unlock(&sw_dev_mtx); + error = sbuf_finish(&sbuf); + sbuf_delete(&sbuf); + return (error); +} + +/* * SWAP_PAGER_FREESPACE() - frees swap blocks associated with a page * range within an object. *