Page MenuHomeFreeBSD

D11906.id32888.diff
No OneTemporary

D11906.id32888.diff

Index: sys/kern/subr_blist.c
===================================================================
--- sys/kern/subr_blist.c
+++ sys/kern/subr_blist.c
@@ -90,6 +90,7 @@
#include <sys/kernel.h>
#include <sys/blist.h>
#include <sys/malloc.h>
+#include <sys/sbuf.h>
#include <sys/proc.h>
#include <sys/mutex.h>
@@ -101,6 +102,7 @@
#include <sys/types.h>
#include <sys/malloc.h>
+#include <sys/sbuf.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -144,6 +146,7 @@
#endif
CTASSERT(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0);
+#define BLIST_META_MASK (BLIST_META_RADIX - 1)
/*
* For a subtree that can represent the state of up to 'radix' blocks, the
@@ -166,7 +169,35 @@
{
return (radix /
- ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * (BLIST_META_RADIX - 1)));
+ ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
+}
+
+/*
+ * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t.
+ * Assumes that the argument has only one bit set.
+ */
+static inline int
+bitpos(u_daddr_t mask)
+{
+ int hi, lo, mid;
+
+ switch (sizeof(mask)) {
+#ifdef HAVE_INLINE_FFSLL
+ case sizeof(long long):
+ return (ffsll(mask) - 1);
+#endif
+ default:
+ lo = 0;
+ hi = BLIST_BMAP_RADIX;
+ while (lo + 1 < hi) {
+ mid = (lo + hi) >> 1;
+ if ((mask >> mid) != 0)
+ lo = mid;
+ else
+ hi = mid;
+ }
+ return (lo);
+ }
}
/*
@@ -340,6 +371,192 @@
#endif
+static const u_daddr_t fib[] = {
+ 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
+ 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
+ 514229, 832040, 1346269, 2178309, 3524578,
+};
+
+/*
+ * Use 'gap' to describe a maximal range of unallocated blocks/bits.
+ */
+struct gap_stats {
+ daddr_t start; /* current gap start, or SWAPBLK_NONE */
+ daddr_t num; /* number of gaps observed */
+ daddr_t max; /* largest gap size */
+ daddr_t avg; /* average gap size */
+ daddr_t err; /* sum - num * avg */
+ daddr_t histo[nitems(fib)]; /* # gaps in each size range */
+ int max_bucket; /* last histo elt with nonzero val */
+};
+
+/*
+ * gap_stats_counting() - is the state 'counting 1 bits'?
+ * or 'skipping 0 bits'?
+ */
+static inline bool
+gap_stats_counting(const struct gap_stats *stats)
+{
+
+ return (stats->start != SWAPBLK_NONE);
+}
+
+/*
+ * init_gap_stats() - initialize stats on gap sizes
+ */
+static inline void
+init_gap_stats(struct gap_stats *stats)
+{
+
+ bzero(stats, sizeof(*stats));
+ stats->start = SWAPBLK_NONE;
+}
+
+/*
+ * update_gap_stats() - update stats on gap sizes
+ */
+static void
+update_gap_stats(struct gap_stats *stats, daddr_t posn)
+{
+ daddr_t size;
+ int hi, lo, mid;
+
+ if (!gap_stats_counting(stats)) {
+ stats->start = posn;
+ return;
+ }
+ size = posn - stats->start;
+ stats->start = SWAPBLK_NONE;
+ if (size > stats->max)
+ stats->max = size;
+
+ /*
+ * Find the fibonacci range that contains size,
+ * expecting to find it in an early range.
+ */
+ lo = 0;
+ hi = 1;
+ while (hi < nitems(fib) && fib[hi] <= size) {
+ lo = hi;
+ hi *= 2;
+ }
+ if (hi >= nitems(fib))
+ hi = nitems(fib);
+ while (lo + 1 != hi) {
+ mid = (lo + hi) >> 1;
+ if (fib[mid] <= size)
+ lo = mid;
+ else
+ hi = mid;
+ }
+ stats->histo[lo]++;
+ if (lo > stats->max_bucket)
+ stats->max_bucket = lo;
+ stats->err += size - stats->avg;
+ stats->num++;
+ stats->avg += stats->err / stats->num;
+ stats->err %= stats->num;
+}
+
+/*
+ * dump_gap_stats() - print stats on gap sizes
+ */
+static inline void
+dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
+{
+ int i;
+
+ sbuf_printf(s, "number of maximal free ranges: %jd\n",
+ (intmax_t)stats->num);
+ sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
+ sbuf_printf(s, "average maximal free range size: %jd\n",
+ (intmax_t)stats->avg);
+ sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
+ sbuf_printf(s, " count | size range\n");
+ sbuf_printf(s, " ----- | ----------\n");
+ for (i = 0; i < stats->max_bucket; i++) {
+ if (stats->histo[i] != 0) {
+ sbuf_printf(s, "%20jd | ",
+ (intmax_t)stats->histo[i]);
+ if (fib[i] != fib[i + 1] - 1)
+ sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
+ (intmax_t)fib[i + 1] - 1);
+ else
+ sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
+ }
+ }
+ sbuf_printf(s, "%20jd | ", (intmax_t)stats->histo[i]);
+ if (stats->histo[i] > 1)
+ sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
+ (intmax_t)stats->max);
+ else
+ sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
+}
+
+/*
+ * blist_stats() - dump radix tree stats
+ */
+void
+blist_stats(blist_t bl, struct sbuf *s)
+{
+ struct gap_stats gstats;
+ struct gap_stats *stats = &gstats;
+ daddr_t i, nodes, radix;
+ u_daddr_t bit, diff, mask;
+
+ init_gap_stats(stats);
+ nodes = 0;
+ i = bl->bl_radix;
+ while (i < bl->bl_radix + bl->bl_blocks) {
+ /*
+ * Find max size subtree starting at i.
+ */
+ radix = BLIST_BMAP_RADIX;
+ while (((i / radix) & BLIST_META_MASK) == 0)
+ radix *= BLIST_META_RADIX;
+
+ /*
+ * Check for skippable subtrees starting at i.
+ */
+ while (radix > BLIST_BMAP_RADIX) {
+ if (bl->bl_root[nodes].u.bmu_avail == 0) {
+ if (gap_stats_counting(stats))
+ update_gap_stats(stats, i);
+ break;
+ }
+ if (bl->bl_root[nodes].u.bmu_avail == radix) {
+ if (!gap_stats_counting(stats))
+ update_gap_stats(stats, i);
+ break;
+ }
+
+ /*
+ * Skip subtree root.
+ */
+ nodes++;
+ radix /= BLIST_META_RADIX;
+ }
+ if (radix == BLIST_BMAP_RADIX) {
+ /*
+ * Scan leaf.
+ */
+ mask = bl->bl_root[nodes].u.bmu_bitmap;
+ diff = mask ^ (mask << 1);
+ if (gap_stats_counting(stats))
+ diff ^= 1;
+ while (diff != 0) {
+ bit = diff & -diff;
+ update_gap_stats(stats, i + bitpos(bit));
+ diff ^= bit;
+ }
+ }
+ nodes += radix_to_skip(radix);
+ i += radix;
+ }
+ update_gap_stats(stats, i);
+ dump_gap_stats(stats, s);
+}
+
/************************************************************************
* ALLOCATION SUPPORT FUNCTIONS *
************************************************************************
@@ -355,13 +572,13 @@
*
* This is the core of the allocator and is optimized for the
* BLIST_BMAP_RADIX block allocation case. Otherwise, execution
- * time is proportional to log2(count) + log2(BLIST_BMAP_RADIX).
+ * time is proportional to log2(count) + bitpos time.
*/
static daddr_t
blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor)
{
u_daddr_t mask;
- int count1, hi, lo, mid, num_shifts, range1, range_ext;
+ int count1, lo, num_shifts, range1, range_ext;
if (count == BLIST_BMAP_RADIX) {
/*
@@ -419,17 +636,10 @@
/*
* The least significant set bit in mask marks the start of the first
* available range of sufficient size. Clear all the bits but that one,
- * and then perform a binary search to find its position.
+ * and then find its position.
*/
mask &= -mask;
- hi = BLIST_BMAP_RADIX - count1;
- while (lo + 1 < hi) {
- mid = (lo + hi) >> 1;
- if ((mask >> mid) != 0)
- lo = mid;
- else
- hi = mid;
- }
+ lo = bitpos(mask);
/*
* Set in mask exactly the bits being allocated, and clear them from
@@ -980,6 +1190,7 @@
int size = 1024;
int i;
blist_t bl;
+ struct sbuf *s;
for (i = 1; i < ac; ++i) {
const char *ptr = av[i];
@@ -1014,6 +1225,13 @@
case 'p':
blist_print(bl);
break;
+ case 's':
+ s = sbuf_new_auto();
+ blist_stats(bl, s);
+ sbuf_finish(s);
+ printf("%s", sbuf_data(s));
+ sbuf_delete(s);
+ break;
case 'a':
if (sscanf(buf + 1, "%lld", &count) == 1) {
daddr_t blk = blist_alloc(bl, count);
@@ -1041,6 +1259,7 @@
case 'h':
puts(
"p -print\n"
+ "s -stats\n"
"a %d -allocate\n"
"f %x %d -free\n"
"l %x %d -fill\n"
Index: sys/sys/blist.h
===================================================================
--- sys/sys/blist.h
+++ sys/sys/blist.h
@@ -90,6 +90,8 @@
#define BLIST_MAX_ALLOC BLIST_BMAP_RADIX
+struct sbuf;
+
daddr_t blist_alloc(blist_t blist, daddr_t count);
daddr_t blist_avail(blist_t blist);
blist_t blist_create(daddr_t blocks, int flags);
@@ -98,6 +100,7 @@
void blist_free(blist_t blist, daddr_t blkno, daddr_t count);
void blist_print(blist_t blist);
void blist_resize(blist_t *pblist, daddr_t count, int freenew, int flags);
+void blist_stats(blist_t blist, struct sbuf *s);
#endif /* _SYS_BLIST_H_ */
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -92,6 +92,7 @@
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
+#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/sysproto.h>
#include <sys/blist.h>
@@ -323,6 +324,10 @@
SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW |
CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I",
"Maximum running async swap ops");
+static int sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_vm, OID_AUTO, swap_fragmentation, CTLTYPE_STRING | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_fragmentation, "A",
+ "Swap Fragmentation Info");
static struct sx sw_alloc_sx;
@@ -780,6 +785,36 @@
}
/*
+ * SYSCTL_SWAP_FRAGMENTATION() - produce raw swap space stats
+ */
+static int
+sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS)
+{
+ struct sbuf sbuf;
+ struct swdevt *sp;
+ const char *devname;
+ int error;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
+ mtx_lock(&sw_dev_mtx);
+ TAILQ_FOREACH(sp, &swtailq, sw_list) {
+ if (vn_isdisk(sp->sw_vp, NULL))
+ devname = devtoname(sp->sw_vp->v_rdev);
+ else
+ devname = "[file]";
+ sbuf_printf(&sbuf, "\nFree space on device %s:\n", devname);
+ blist_stats(sp->sw_blist, &sbuf);
+ }
+ mtx_unlock(&sw_dev_mtx);
+ error = sbuf_finish(&sbuf);
+ sbuf_delete(&sbuf);
+ return (error);
+}
+
+/*
* SWAP_PAGER_FREESPACE() - frees swap blocks associated with a page
* range within an object.
*

File Metadata

Mime Type
text/plain
Expires
Thu, Nov 6, 10:41 PM (11 h, 13 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
24962208
Default Alt Text
D11906.id32888.diff (9 KB)

Event Timeline