diff --git a/sys/compat/linuxkpi/common/include/linux/rbtree.h b/sys/compat/linuxkpi/common/include/linux/rbtree.h index 1db9b1d4fa21..1f337d59545c 100644 --- a/sys/compat/linuxkpi/common/include/linux/rbtree.h +++ b/sys/compat/linuxkpi/common/include/linux/rbtree.h @@ -1,179 +1,179 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUXKPI_LINUX_RBTREE_H_ #define _LINUXKPI_LINUX_RBTREE_H_ #ifndef _STANDALONE #include #endif #include #include struct rb_node { RB_ENTRY(rb_node) __entry; }; -#define rb_left __entry.rbe_left -#define rb_right __entry.rbe_right +#define rb_left __entry.rbe_link[_RB_L] +#define rb_right __entry.rbe_link[_RB_R] /* * We provide a false structure that has the same bit pattern as tree.h * presents so it matches the member names expected by linux. */ struct rb_root { struct rb_node *rb_node; }; struct rb_root_cached { struct rb_root rb_root; struct rb_node *rb_leftmost; }; /* * In linux all of the comparisons are done by the caller. */ int panic_cmp(struct rb_node *one, struct rb_node *two); RB_HEAD(linux_root, rb_node); RB_PROTOTYPE(linux_root, rb_node, __entry, panic_cmp); #define rb_parent(r) RB_PARENT(r, __entry) #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define rb_entry_safe(ptr, type, member) \ ((ptr) != NULL ? rb_entry(ptr, type, member) : NULL) #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) #define RB_EMPTY_NODE(node) (RB_PARENT(node, __entry) == node) #define RB_CLEAR_NODE(node) RB_SET_PARENT(node, node, __entry) #define rb_insert_color(node, root) \ linux_root_RB_INSERT_COLOR((struct linux_root *)(root), (node)) #define rb_erase(node, root) \ linux_root_RB_REMOVE((struct linux_root *)(root), (node)) #define rb_next(node) RB_NEXT(linux_root, NULL, (node)) #define rb_prev(node) RB_PREV(linux_root, NULL, (node)) #define rb_first(root) RB_MIN(linux_root, (struct linux_root *)(root)) #define rb_last(root) RB_MAX(linux_root, (struct linux_root *)(root)) #define rb_first_cached(root) (root)->rb_leftmost static inline struct rb_node * __rb_deepest_left(struct rb_node *node) { struct rb_node *parent = NULL; while (node != NULL) { parent = node; if (RB_LEFT(node, __entry)) node = RB_LEFT(node, __entry); else node = RB_RIGHT(node, __entry); } return (parent); } static inline struct rb_node * rb_next_postorder(const struct rb_node *node) { struct rb_node *parent = RB_PARENT(__DECONST(struct rb_node *, node), __entry); /* left -> right, right -> root */ if (parent != NULL && (node == RB_LEFT(parent, __entry)) && (RB_RIGHT(parent, __entry))) return (__rb_deepest_left(RB_RIGHT(parent, __entry))); else return (parent); } #define rbtree_postorder_for_each_entry_safe(x, y, head, member) \ for ((x) = rb_entry_safe(__rb_deepest_left((head)->rb_node), \ __typeof(*x), member); \ ((x) != NULL) && ((y) = \ rb_entry_safe(rb_next_postorder(&x->member), typeof(*x), member), 1); \ (x) = (y)) static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, struct rb_node **rb_link) { RB_SET(node, parent, __entry); *rb_link = node; } static inline void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root) { RB_SWAP_CHILD((struct linux_root *)root, rb_parent(victim), victim, new, __entry); - if (victim->rb_left) - RB_SET_PARENT(victim->rb_left, new, __entry); - if (victim->rb_right) - RB_SET_PARENT(victim->rb_right, new, __entry); + if (RB_LEFT(victim, __entry)) + RB_SET_PARENT(RB_LEFT(victim, __entry), new, __entry); + if (RB_RIGHT(victim, __entry)) + RB_SET_PARENT(RB_RIGHT(victim, __entry), new, __entry); *new = *victim; } static inline void rb_insert_color_cached(struct rb_node *node, struct rb_root_cached *root, bool leftmost) { linux_root_RB_INSERT_COLOR((struct linux_root *)&root->rb_root, node); if (leftmost) root->rb_leftmost = node; } static inline struct rb_node * rb_erase_cached(struct rb_node *node, struct rb_root_cached *root) { struct rb_node *retval; if (node == root->rb_leftmost) retval = root->rb_leftmost = linux_root_RB_NEXT(node); else retval = NULL; linux_root_RB_REMOVE((struct linux_root *)&root->rb_root, node); return (retval); } static inline void rb_replace_node_cached(struct rb_node *old, struct rb_node *new, struct rb_root_cached *root) { rb_replace_node(old, new, &root->rb_root); if (root->rb_leftmost == old) root->rb_leftmost = new; } #undef RB_ROOT #define RB_ROOT (struct rb_root) { NULL } #define RB_ROOT_CACHED (struct rb_root_cached) { RB_ROOT, NULL } #endif /* _LINUXKPI_LINUX_RBTREE_H_ */ diff --git a/sys/kern/subr_stats.c b/sys/kern/subr_stats.c index 0984d2a21014..ff8ddbac4d81 100644 --- a/sys/kern/subr_stats.c +++ b/sys/kern/subr_stats.c @@ -1,3958 +1,3958 @@ /*- * Copyright (c) 2014-2018 Netflix, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Author: Lawrence Stewart */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #if defined(DIAGNOSTIC) #include #endif #include /* Must come after qmath.h and arb.h */ #include #include #include #ifdef _KERNEL #include #include #include #include #include #else /* ! _KERNEL */ #include #include #include #include #include #endif /* _KERNEL */ struct voistatdata_voistate { /* Previous VOI value for diff calculation. */ struct voistatdata_numeric prev; }; #define VS_VSDVALID 0x0001 /* Stat's voistatdata updated at least once. */ struct voistat { int8_t stype; /* Type of stat e.g. VS_STYPE_SUM. */ enum vsd_dtype dtype : 8; /* Data type of this stat's data. */ uint16_t data_off; /* Blob offset for this stat's data. */ uint16_t dsz; /* Size of stat's data. */ #define VS_EBITS 8 uint16_t errs : VS_EBITS;/* Non-wrapping error count. */ uint16_t flags : 16 - VS_EBITS; }; /* The voistat error count is capped to avoid wrapping. */ #define VS_INCERRS(vs) do { \ if ((vs)->errs < (1U << VS_EBITS) - 1) \ (vs)->errs++; \ } while (0) /* * Ideas for flags: * - Global or entity specific (global would imply use of counter(9)?) * - Whether to reset stats on read or not * - Signal an overflow? * - Compressed voistat array */ #define VOI_REQSTATE 0x0001 /* VOI requires VS_STYPE_VOISTATE. */ struct voi { int16_t id; /* VOI id. */ enum vsd_dtype dtype : 8; /* Data type of the VOI itself. */ int8_t voistatmaxid; /* Largest allocated voistat index. */ uint16_t stats_off; /* Blob offset for this VOIs stats. */ uint16_t flags; }; /* * Memory for the entire blob is allocated as a slab and then offsets are * maintained to carve up the slab into sections holding different data types. * * Ideas for flags: * - Compressed voi array (trade off memory usage vs search time) * - Units of offsets (default bytes, flag for e.g. vm_page/KiB/Mib) */ struct statsblobv1 { uint8_t abi; uint8_t endian; uint16_t flags; uint16_t maxsz; uint16_t cursz; /* Fields from here down are opaque to consumers. */ uint32_t tplhash; /* Base template hash ID. */ uint16_t stats_off; /* voistat array blob offset. */ uint16_t statsdata_off; /* voistatdata array blob offset. */ sbintime_t created; /* Blob creation time. */ sbintime_t lastrst; /* Time of last reset. */ struct voi vois[]; /* Array indexed by [voi_id]. */ } __aligned(sizeof(void *)); _Static_assert(offsetof(struct statsblobv1, cursz) + SIZEOF_MEMBER(struct statsblobv1, cursz) == offsetof(struct statsblob, opaque), "statsblobv1 ABI mismatch"); struct statsblobv1_tpl { struct metablob *mb; struct statsblobv1 *sb; }; /* Context passed to iterator callbacks. */ struct sb_iter_ctx { void *usrctx; /* Caller supplied context. */ uint32_t flags; /* Flags for current iteration. */ int16_t vslot; /* struct voi slot index. */ int8_t vsslot; /* struct voistat slot index. */ }; struct sb_tostrcb_ctx { struct sbuf *buf; struct statsblob_tpl *tpl; enum sb_str_fmt fmt; uint32_t flags; }; struct sb_visitcb_ctx { stats_blob_visitcb_t cb; void *usrctx; }; /* Stats blob iterator callback. */ typedef int (*stats_v1_blob_itercb_t)(struct statsblobv1 *sb, struct voi *v, struct voistat *vs, struct sb_iter_ctx *ctx); #ifdef _KERNEL static struct rwlock tpllistlock; RW_SYSINIT(stats_tpl_list, &tpllistlock, "Stat template list lock"); #define TPL_LIST_RLOCK() rw_rlock(&tpllistlock) #define TPL_LIST_RUNLOCK() rw_runlock(&tpllistlock) #define TPL_LIST_WLOCK() rw_wlock(&tpllistlock) #define TPL_LIST_WUNLOCK() rw_wunlock(&tpllistlock) #define TPL_LIST_LOCK_ASSERT() rw_assert(&tpllistlock, RA_LOCKED) #define TPL_LIST_RLOCK_ASSERT() rw_assert(&tpllistlock, RA_RLOCKED) #define TPL_LIST_WLOCK_ASSERT() rw_assert(&tpllistlock, RA_WLOCKED) MALLOC_DEFINE(M_STATS, "stats(9) related memory", "stats(9) related memory"); #define stats_free(ptr) free((ptr), M_STATS) #else /* ! _KERNEL */ static void stats_constructor(void); static void stats_destructor(void); static pthread_rwlock_t tpllistlock; #define TPL_LIST_UNLOCK() pthread_rwlock_unlock(&tpllistlock) #define TPL_LIST_RLOCK() pthread_rwlock_rdlock(&tpllistlock) #define TPL_LIST_RUNLOCK() TPL_LIST_UNLOCK() #define TPL_LIST_WLOCK() pthread_rwlock_wrlock(&tpllistlock) #define TPL_LIST_WUNLOCK() TPL_LIST_UNLOCK() #define TPL_LIST_LOCK_ASSERT() do { } while (0) #define TPL_LIST_RLOCK_ASSERT() do { } while (0) #define TPL_LIST_WLOCK_ASSERT() do { } while (0) #ifdef NDEBUG #define KASSERT(cond, msg) do {} while (0) #define stats_abort() do {} while (0) #else /* ! NDEBUG */ #define KASSERT(cond, msg) do { \ if (!(cond)) { \ panic msg; \ } \ } while (0) #define stats_abort() abort() #endif /* NDEBUG */ #define stats_free(ptr) free(ptr) #define panic(fmt, ...) do { \ fprintf(stderr, (fmt), ##__VA_ARGS__); \ stats_abort(); \ } while (0) #endif /* _KERNEL */ #define SB_V1_MAXSZ 65535 /* Obtain a blob offset pointer. */ #define BLOB_OFFSET(sb, off) ((void *)(((uint8_t *)(sb)) + (off))) /* * Number of VOIs in the blob's vois[] array. By virtue of struct voi being a * power of 2 size, we can shift instead of divide. The shift amount must be * updated if sizeof(struct voi) ever changes, which the assert should catch. */ #define NVOIS(sb) ((int32_t)((((struct statsblobv1 *)(sb))->stats_off - \ sizeof(struct statsblobv1)) >> 3)) _Static_assert(sizeof(struct voi) == 8, "statsblobv1 voi ABI mismatch"); /* Try restrict names to alphanumeric and underscore to simplify JSON compat. */ const char *vs_stype2name[VS_NUM_STYPES] = { [VS_STYPE_VOISTATE] = "VOISTATE", [VS_STYPE_SUM] = "SUM", [VS_STYPE_MAX] = "MAX", [VS_STYPE_MIN] = "MIN", [VS_STYPE_HIST] = "HIST", [VS_STYPE_TDGST] = "TDGST", }; const char *vs_stype2desc[VS_NUM_STYPES] = { [VS_STYPE_VOISTATE] = "VOI related state data (not a real stat)", [VS_STYPE_SUM] = "Simple arithmetic accumulator", [VS_STYPE_MAX] = "Maximum observed VOI value", [VS_STYPE_MIN] = "Minimum observed VOI value", [VS_STYPE_HIST] = "Histogram of observed VOI values", [VS_STYPE_TDGST] = "t-digest of observed VOI values", }; const char *vsd_dtype2name[VSD_NUM_DTYPES] = { [VSD_DTYPE_VOISTATE] = "VOISTATE", [VSD_DTYPE_INT_S32] = "INT_S32", [VSD_DTYPE_INT_U32] = "INT_U32", [VSD_DTYPE_INT_S64] = "INT_S64", [VSD_DTYPE_INT_U64] = "INT_U64", [VSD_DTYPE_INT_SLONG] = "INT_SLONG", [VSD_DTYPE_INT_ULONG] = "INT_ULONG", [VSD_DTYPE_Q_S32] = "Q_S32", [VSD_DTYPE_Q_U32] = "Q_U32", [VSD_DTYPE_Q_S64] = "Q_S64", [VSD_DTYPE_Q_U64] = "Q_U64", [VSD_DTYPE_CRHIST32] = "CRHIST32", [VSD_DTYPE_DRHIST32] = "DRHIST32", [VSD_DTYPE_DVHIST32] = "DVHIST32", [VSD_DTYPE_CRHIST64] = "CRHIST64", [VSD_DTYPE_DRHIST64] = "DRHIST64", [VSD_DTYPE_DVHIST64] = "DVHIST64", [VSD_DTYPE_TDGSTCLUST32] = "TDGSTCLUST32", [VSD_DTYPE_TDGSTCLUST64] = "TDGSTCLUST64", }; const size_t vsd_dtype2size[VSD_NUM_DTYPES] = { [VSD_DTYPE_VOISTATE] = sizeof(struct voistatdata_voistate), [VSD_DTYPE_INT_S32] = sizeof(struct voistatdata_int32), [VSD_DTYPE_INT_U32] = sizeof(struct voistatdata_int32), [VSD_DTYPE_INT_S64] = sizeof(struct voistatdata_int64), [VSD_DTYPE_INT_U64] = sizeof(struct voistatdata_int64), [VSD_DTYPE_INT_SLONG] = sizeof(struct voistatdata_intlong), [VSD_DTYPE_INT_ULONG] = sizeof(struct voistatdata_intlong), [VSD_DTYPE_Q_S32] = sizeof(struct voistatdata_q32), [VSD_DTYPE_Q_U32] = sizeof(struct voistatdata_q32), [VSD_DTYPE_Q_S64] = sizeof(struct voistatdata_q64), [VSD_DTYPE_Q_U64] = sizeof(struct voistatdata_q64), [VSD_DTYPE_CRHIST32] = sizeof(struct voistatdata_crhist32), [VSD_DTYPE_DRHIST32] = sizeof(struct voistatdata_drhist32), [VSD_DTYPE_DVHIST32] = sizeof(struct voistatdata_dvhist32), [VSD_DTYPE_CRHIST64] = sizeof(struct voistatdata_crhist64), [VSD_DTYPE_DRHIST64] = sizeof(struct voistatdata_drhist64), [VSD_DTYPE_DVHIST64] = sizeof(struct voistatdata_dvhist64), [VSD_DTYPE_TDGSTCLUST32] = sizeof(struct voistatdata_tdgstclust32), [VSD_DTYPE_TDGSTCLUST64] = sizeof(struct voistatdata_tdgstclust64), }; static const bool vsd_compoundtype[VSD_NUM_DTYPES] = { [VSD_DTYPE_VOISTATE] = true, [VSD_DTYPE_INT_S32] = false, [VSD_DTYPE_INT_U32] = false, [VSD_DTYPE_INT_S64] = false, [VSD_DTYPE_INT_U64] = false, [VSD_DTYPE_INT_SLONG] = false, [VSD_DTYPE_INT_ULONG] = false, [VSD_DTYPE_Q_S32] = false, [VSD_DTYPE_Q_U32] = false, [VSD_DTYPE_Q_S64] = false, [VSD_DTYPE_Q_U64] = false, [VSD_DTYPE_CRHIST32] = true, [VSD_DTYPE_DRHIST32] = true, [VSD_DTYPE_DVHIST32] = true, [VSD_DTYPE_CRHIST64] = true, [VSD_DTYPE_DRHIST64] = true, [VSD_DTYPE_DVHIST64] = true, [VSD_DTYPE_TDGSTCLUST32] = true, [VSD_DTYPE_TDGSTCLUST64] = true, }; const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1] = { [LIM_MIN] = { [VSD_DTYPE_VOISTATE] = {0}, [VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MIN}}, [VSD_DTYPE_INT_U32] = {.int32 = {.u32 = 0}}, [VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MIN}}, [VSD_DTYPE_INT_U64] = {.int64 = {.u64 = 0}}, [VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MIN}}, [VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = 0}}, [VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMINVAL(INT32_MIN)}}, [VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = 0}}, [VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMINVAL(INT64_MIN)}}, [VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = 0}}, }, [LIM_MAX] = { [VSD_DTYPE_VOISTATE] = {0}, [VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MAX}}, [VSD_DTYPE_INT_U32] = {.int32 = {.u32 = UINT32_MAX}}, [VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MAX}}, [VSD_DTYPE_INT_U64] = {.int64 = {.u64 = UINT64_MAX}}, [VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MAX}}, [VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = ULONG_MAX}}, [VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMAXVAL(INT32_MAX)}}, [VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = Q_IFMAXVAL(UINT32_MAX)}}, [VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMAXVAL(INT64_MAX)}}, [VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = Q_IFMAXVAL(UINT64_MAX)}}, } }; /* tpllistlock protects tpllist and ntpl */ static uint32_t ntpl; static struct statsblob_tpl **tpllist; static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags); //static void stats_v1_blob_finalise(struct statsblobv1 *sb); static int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags); static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes, int newvoistatbytes, int newvoistatdatabytes); static void stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags); static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt); static inline int ctd32cmp(const struct voistatdata_tdgstctd32 *c1, const struct voistatdata_tdgstctd32 *c2) { KASSERT(Q_PRECEQ(c1->mu, c2->mu), ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__, Q_RELPREC(c1->mu, c2->mu))); return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1); } ARB_GENERATE_STATIC(ctdth32, voistatdata_tdgstctd32, ctdlnk, ctd32cmp); static inline int ctd64cmp(const struct voistatdata_tdgstctd64 *c1, const struct voistatdata_tdgstctd64 *c2) { KASSERT(Q_PRECEQ(c1->mu, c2->mu), ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__, Q_RELPREC(c1->mu, c2->mu))); return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1); } ARB_GENERATE_STATIC(ctdth64, voistatdata_tdgstctd64, ctdlnk, ctd64cmp); #ifdef DIAGNOSTIC RB_GENERATE_STATIC(rbctdth32, voistatdata_tdgstctd32, rblnk, ctd32cmp); RB_GENERATE_STATIC(rbctdth64, voistatdata_tdgstctd64, rblnk, ctd64cmp); #endif static inline sbintime_t stats_sbinuptime(void) { sbintime_t sbt; #ifdef _KERNEL sbt = sbinuptime(); #else /* ! _KERNEL */ struct timespec tp; clock_gettime(CLOCK_MONOTONIC_FAST, &tp); sbt = tstosbt(tp); #endif /* _KERNEL */ return (sbt); } static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags) { #ifdef _KERNEL /* Default to M_NOWAIT if neither M_NOWAIT or M_WAITOK are set. */ if (!(flags & (M_WAITOK | M_NOWAIT))) flags |= M_NOWAIT; ptr = realloc(ptr, newsz, M_STATS, flags); #else /* ! _KERNEL */ ptr = realloc(ptr, newsz); if ((flags & M_ZERO) && ptr != NULL) { if (oldsz == 0) memset(ptr, '\0', newsz); else if (newsz > oldsz) memset(BLOB_OFFSET(ptr, oldsz), '\0', newsz - oldsz); } #endif /* _KERNEL */ return (ptr); } static inline char * stats_strdup(const char *s, #ifdef _KERNEL int flags) { char *copy; size_t len; if (!(flags & (M_WAITOK | M_NOWAIT))) flags |= M_NOWAIT; len = strlen(s) + 1; if ((copy = malloc(len, M_STATS, flags)) != NULL) bcopy(s, copy, len); return (copy); #else int flags __unused) { return (strdup(s)); #endif } static inline void stats_tpl_update_hash(struct statsblob_tpl *tpl) { TPL_LIST_WLOCK_ASSERT(); tpl->mb->tplhash = hash32_str(tpl->mb->tplname, 0); for (int voi_id = 0; voi_id < NVOIS(tpl->sb); voi_id++) { if (tpl->mb->voi_meta[voi_id].name != NULL) tpl->mb->tplhash = hash32_str( tpl->mb->voi_meta[voi_id].name, tpl->mb->tplhash); } tpl->mb->tplhash = hash32_buf(tpl->sb, tpl->sb->cursz, tpl->mb->tplhash); } static inline uint64_t stats_pow_u64(uint64_t base, uint64_t exp) { uint64_t result = 1; while (exp) { if (exp & 1) result *= base; exp >>= 1; base *= base; } return (result); } static inline int stats_vss_hist_bkt_hlpr(struct vss_hist_hlpr_info *info, uint32_t curbkt, struct voistatdata_numeric *bkt_lb, struct voistatdata_numeric *bkt_ub) { uint64_t step = 0; int error = 0; switch (info->scheme) { case BKT_LIN: step = info->lin.stepinc; break; case BKT_EXP: step = stats_pow_u64(info->exp.stepbase, info->exp.stepexp + curbkt); break; case BKT_LINEXP: { uint64_t curstepexp = 1; switch (info->voi_dtype) { case VSD_DTYPE_INT_S32: while ((int32_t)stats_pow_u64(info->linexp.stepbase, curstepexp) <= bkt_lb->int32.s32) curstepexp++; break; case VSD_DTYPE_INT_U32: while ((uint32_t)stats_pow_u64(info->linexp.stepbase, curstepexp) <= bkt_lb->int32.u32) curstepexp++; break; case VSD_DTYPE_INT_S64: while ((int64_t)stats_pow_u64(info->linexp.stepbase, curstepexp) <= bkt_lb->int64.s64) curstepexp++; break; case VSD_DTYPE_INT_U64: while ((uint64_t)stats_pow_u64(info->linexp.stepbase, curstepexp) <= bkt_lb->int64.u64) curstepexp++; break; case VSD_DTYPE_INT_SLONG: while ((long)stats_pow_u64(info->linexp.stepbase, curstepexp) <= bkt_lb->intlong.slong) curstepexp++; break; case VSD_DTYPE_INT_ULONG: while ((unsigned long)stats_pow_u64(info->linexp.stepbase, curstepexp) <= bkt_lb->intlong.ulong) curstepexp++; break; case VSD_DTYPE_Q_S32: while ((s32q_t)stats_pow_u64(info->linexp.stepbase, curstepexp) <= Q_GIVAL(bkt_lb->q32.sq32)) break; case VSD_DTYPE_Q_U32: while ((u32q_t)stats_pow_u64(info->linexp.stepbase, curstepexp) <= Q_GIVAL(bkt_lb->q32.uq32)) break; case VSD_DTYPE_Q_S64: while ((s64q_t)stats_pow_u64(info->linexp.stepbase, curstepexp) <= Q_GIVAL(bkt_lb->q64.sq64)) curstepexp++; break; case VSD_DTYPE_Q_U64: while ((u64q_t)stats_pow_u64(info->linexp.stepbase, curstepexp) <= Q_GIVAL(bkt_lb->q64.uq64)) curstepexp++; break; default: break; } step = stats_pow_u64(info->linexp.stepbase, curstepexp) / info->linexp.linstepdiv; if (step == 0) step = 1; break; } default: break; } if (info->scheme == BKT_USR) { *bkt_lb = info->usr.bkts[curbkt].lb; *bkt_ub = info->usr.bkts[curbkt].ub; } else if (step != 0) { switch (info->voi_dtype) { case VSD_DTYPE_INT_S32: bkt_ub->int32.s32 += (int32_t)step; break; case VSD_DTYPE_INT_U32: bkt_ub->int32.u32 += (uint32_t)step; break; case VSD_DTYPE_INT_S64: bkt_ub->int64.s64 += (int64_t)step; break; case VSD_DTYPE_INT_U64: bkt_ub->int64.u64 += (uint64_t)step; break; case VSD_DTYPE_INT_SLONG: bkt_ub->intlong.slong += (long)step; break; case VSD_DTYPE_INT_ULONG: bkt_ub->intlong.ulong += (unsigned long)step; break; case VSD_DTYPE_Q_S32: error = Q_QADDI(&bkt_ub->q32.sq32, step); break; case VSD_DTYPE_Q_U32: error = Q_QADDI(&bkt_ub->q32.uq32, step); break; case VSD_DTYPE_Q_S64: error = Q_QADDI(&bkt_ub->q64.sq64, step); break; case VSD_DTYPE_Q_U64: error = Q_QADDI(&bkt_ub->q64.uq64, step); break; default: break; } } else { /* info->scheme != BKT_USR && step == 0 */ return (EINVAL); } return (error); } static uint32_t stats_vss_hist_nbkts_hlpr(struct vss_hist_hlpr_info *info) { struct voistatdata_numeric bkt_lb, bkt_ub; uint32_t nbkts; int done; if (info->scheme == BKT_USR) { /* XXXLAS: Setting info->{lb,ub} from macro is tricky. */ info->lb = info->usr.bkts[0].lb; info->ub = info->usr.bkts[info->usr.nbkts - 1].lb; } nbkts = 0; done = 0; bkt_ub = info->lb; do { bkt_lb = bkt_ub; if (stats_vss_hist_bkt_hlpr(info, nbkts++, &bkt_lb, &bkt_ub)) return (0); if (info->scheme == BKT_USR) done = (nbkts == info->usr.nbkts); else { switch (info->voi_dtype) { case VSD_DTYPE_INT_S32: done = (bkt_ub.int32.s32 > info->ub.int32.s32); break; case VSD_DTYPE_INT_U32: done = (bkt_ub.int32.u32 > info->ub.int32.u32); break; case VSD_DTYPE_INT_S64: done = (bkt_ub.int64.s64 > info->ub.int64.s64); break; case VSD_DTYPE_INT_U64: done = (bkt_ub.int64.u64 > info->ub.int64.u64); break; case VSD_DTYPE_INT_SLONG: done = (bkt_ub.intlong.slong > info->ub.intlong.slong); break; case VSD_DTYPE_INT_ULONG: done = (bkt_ub.intlong.ulong > info->ub.intlong.ulong); break; case VSD_DTYPE_Q_S32: done = Q_QGTQ(bkt_ub.q32.sq32, info->ub.q32.sq32); break; case VSD_DTYPE_Q_U32: done = Q_QGTQ(bkt_ub.q32.uq32, info->ub.q32.uq32); break; case VSD_DTYPE_Q_S64: done = Q_QGTQ(bkt_ub.q64.sq64, info->ub.q64.sq64); break; case VSD_DTYPE_Q_U64: done = Q_QGTQ(bkt_ub.q64.uq64, info->ub.q64.uq64); break; default: return (0); } } } while (!done); if (info->flags & VSD_HIST_LBOUND_INF) nbkts++; if (info->flags & VSD_HIST_UBOUND_INF) nbkts++; return (nbkts); } int stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss, struct vss_hist_hlpr_info *info) { struct voistatdata_hist *hist; struct voistatdata_numeric bkt_lb, bkt_ub, *lbinfbktlb, *lbinfbktub, *ubinfbktlb, *ubinfbktub; uint32_t bkt, nbkts, nloop; if (vss == NULL || info == NULL || (info->flags & (VSD_HIST_LBOUND_INF|VSD_HIST_UBOUND_INF) && (info->hist_dtype == VSD_DTYPE_DVHIST32 || info->hist_dtype == VSD_DTYPE_DVHIST64))) return (EINVAL); info->voi_dtype = voi_dtype; if ((nbkts = stats_vss_hist_nbkts_hlpr(info)) == 0) return (EINVAL); switch (info->hist_dtype) { case VSD_DTYPE_CRHIST32: vss->vsdsz = HIST_NBKTS2VSDSZ(crhist32, nbkts); break; case VSD_DTYPE_DRHIST32: vss->vsdsz = HIST_NBKTS2VSDSZ(drhist32, nbkts); break; case VSD_DTYPE_DVHIST32: vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist32, nbkts); break; case VSD_DTYPE_CRHIST64: vss->vsdsz = HIST_NBKTS2VSDSZ(crhist64, nbkts); break; case VSD_DTYPE_DRHIST64: vss->vsdsz = HIST_NBKTS2VSDSZ(drhist64, nbkts); break; case VSD_DTYPE_DVHIST64: vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist64, nbkts); break; default: return (EINVAL); } vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO); if (vss->iv == NULL) return (ENOMEM); hist = (struct voistatdata_hist *)vss->iv; bkt_ub = info->lb; for (bkt = (info->flags & VSD_HIST_LBOUND_INF), nloop = 0; bkt < nbkts; bkt++, nloop++) { bkt_lb = bkt_ub; if (stats_vss_hist_bkt_hlpr(info, nloop, &bkt_lb, &bkt_ub)) return (EINVAL); switch (info->hist_dtype) { case VSD_DTYPE_CRHIST32: VSD(crhist32, hist)->bkts[bkt].lb = bkt_lb; break; case VSD_DTYPE_DRHIST32: VSD(drhist32, hist)->bkts[bkt].lb = bkt_lb; VSD(drhist32, hist)->bkts[bkt].ub = bkt_ub; break; case VSD_DTYPE_DVHIST32: VSD(dvhist32, hist)->bkts[bkt].val = bkt_lb; break; case VSD_DTYPE_CRHIST64: VSD(crhist64, hist)->bkts[bkt].lb = bkt_lb; break; case VSD_DTYPE_DRHIST64: VSD(drhist64, hist)->bkts[bkt].lb = bkt_lb; VSD(drhist64, hist)->bkts[bkt].ub = bkt_ub; break; case VSD_DTYPE_DVHIST64: VSD(dvhist64, hist)->bkts[bkt].val = bkt_lb; break; default: return (EINVAL); } } lbinfbktlb = lbinfbktub = ubinfbktlb = ubinfbktub = NULL; switch (info->hist_dtype) { case VSD_DTYPE_CRHIST32: lbinfbktlb = &VSD(crhist32, hist)->bkts[0].lb; ubinfbktlb = &VSD(crhist32, hist)->bkts[nbkts - 1].lb; break; case VSD_DTYPE_DRHIST32: lbinfbktlb = &VSD(drhist32, hist)->bkts[0].lb; lbinfbktub = &VSD(drhist32, hist)->bkts[0].ub; ubinfbktlb = &VSD(drhist32, hist)->bkts[nbkts - 1].lb; ubinfbktub = &VSD(drhist32, hist)->bkts[nbkts - 1].ub; break; case VSD_DTYPE_CRHIST64: lbinfbktlb = &VSD(crhist64, hist)->bkts[0].lb; ubinfbktlb = &VSD(crhist64, hist)->bkts[nbkts - 1].lb; break; case VSD_DTYPE_DRHIST64: lbinfbktlb = &VSD(drhist64, hist)->bkts[0].lb; lbinfbktub = &VSD(drhist64, hist)->bkts[0].ub; ubinfbktlb = &VSD(drhist64, hist)->bkts[nbkts - 1].lb; ubinfbktub = &VSD(drhist64, hist)->bkts[nbkts - 1].ub; break; case VSD_DTYPE_DVHIST32: case VSD_DTYPE_DVHIST64: break; default: return (EINVAL); } if ((info->flags & VSD_HIST_LBOUND_INF) && lbinfbktlb) { *lbinfbktlb = numeric_limits[LIM_MIN][info->voi_dtype]; /* * Assignment from numeric_limit array for Q types assigns max * possible integral/fractional value for underlying data type, * but we must set control bits for this specific histogram per * the user's choice of fractional bits, which we extract from * info->lb. */ if (info->voi_dtype == VSD_DTYPE_Q_S32 || info->voi_dtype == VSD_DTYPE_Q_U32) { /* Signedness doesn't matter for setting control bits. */ Q_SCVAL(lbinfbktlb->q32.sq32, Q_GCVAL(info->lb.q32.sq32)); } else if (info->voi_dtype == VSD_DTYPE_Q_S64 || info->voi_dtype == VSD_DTYPE_Q_U64) { /* Signedness doesn't matter for setting control bits. */ Q_SCVAL(lbinfbktlb->q64.sq64, Q_GCVAL(info->lb.q64.sq64)); } if (lbinfbktub) *lbinfbktub = info->lb; } if ((info->flags & VSD_HIST_UBOUND_INF) && ubinfbktlb) { *ubinfbktlb = bkt_lb; if (ubinfbktub) { *ubinfbktub = numeric_limits[LIM_MAX][info->voi_dtype]; if (info->voi_dtype == VSD_DTYPE_Q_S32 || info->voi_dtype == VSD_DTYPE_Q_U32) { Q_SCVAL(ubinfbktub->q32.sq32, Q_GCVAL(info->lb.q32.sq32)); } else if (info->voi_dtype == VSD_DTYPE_Q_S64 || info->voi_dtype == VSD_DTYPE_Q_U64) { Q_SCVAL(ubinfbktub->q64.sq64, Q_GCVAL(info->lb.q64.sq64)); } } } return (0); } int stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss, struct vss_tdgst_hlpr_info *info) { struct voistatdata_tdgst *tdgst; struct ctdth32 *ctd32tree; struct ctdth64 *ctd64tree; struct voistatdata_tdgstctd32 *ctd32; struct voistatdata_tdgstctd64 *ctd64; info->voi_dtype = voi_dtype; switch (info->tdgst_dtype) { case VSD_DTYPE_TDGSTCLUST32: vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust32, info->nctds); break; case VSD_DTYPE_TDGSTCLUST64: vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust64, info->nctds); break; default: return (EINVAL); } vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO); if (vss->iv == NULL) return (ENOMEM); tdgst = (struct voistatdata_tdgst *)vss->iv; switch (info->tdgst_dtype) { case VSD_DTYPE_TDGSTCLUST32: ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree; ARB_INIT(ctd32, ctdlnk, ctd32tree, info->nctds) { Q_INI(&ctd32->mu, 0, 0, info->prec); } break; case VSD_DTYPE_TDGSTCLUST64: ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree; ARB_INIT(ctd64, ctdlnk, ctd64tree, info->nctds) { Q_INI(&ctd64->mu, 0, 0, info->prec); } break; default: return (EINVAL); } return (0); } int stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss, struct vss_numeric_hlpr_info *info) { struct voistatdata_numeric iv; switch (vss->stype) { case VS_STYPE_SUM: iv = stats_ctor_vsd_numeric(0); break; case VS_STYPE_MIN: iv = numeric_limits[LIM_MAX][voi_dtype]; break; case VS_STYPE_MAX: iv = numeric_limits[LIM_MIN][voi_dtype]; break; default: return (EINVAL); } vss->iv = stats_realloc(NULL, 0, vsd_dtype2size[voi_dtype], 0); if (vss->iv == NULL) return (ENOMEM); vss->vs_dtype = voi_dtype; vss->vsdsz = vsd_dtype2size[voi_dtype]; switch (voi_dtype) { case VSD_DTYPE_INT_S32: *((int32_t *)vss->iv) = iv.int32.s32; break; case VSD_DTYPE_INT_U32: *((uint32_t *)vss->iv) = iv.int32.u32; break; case VSD_DTYPE_INT_S64: *((int64_t *)vss->iv) = iv.int64.s64; break; case VSD_DTYPE_INT_U64: *((uint64_t *)vss->iv) = iv.int64.u64; break; case VSD_DTYPE_INT_SLONG: *((long *)vss->iv) = iv.intlong.slong; break; case VSD_DTYPE_INT_ULONG: *((unsigned long *)vss->iv) = iv.intlong.ulong; break; case VSD_DTYPE_Q_S32: *((s32q_t *)vss->iv) = Q_SCVAL(iv.q32.sq32, Q_CTRLINI(info->prec)); break; case VSD_DTYPE_Q_U32: *((u32q_t *)vss->iv) = Q_SCVAL(iv.q32.uq32, Q_CTRLINI(info->prec)); break; case VSD_DTYPE_Q_S64: *((s64q_t *)vss->iv) = Q_SCVAL(iv.q64.sq64, Q_CTRLINI(info->prec)); break; case VSD_DTYPE_Q_U64: *((u64q_t *)vss->iv) = Q_SCVAL(iv.q64.uq64, Q_CTRLINI(info->prec)); break; default: break; } return (0); } int stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss) { int i, ret; for (i = nvss - 1; i >= 0; i--) { if (vss[i].hlpr && (ret = vss[i].hlpr(voi_dtype, &vss[i], vss[i].hlprinfo)) != 0) return (ret); } return (0); } void stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss) { int i; for (i = nvss - 1; i >= 0; i--) { if (vss[i].hlpr) { stats_free((void *)vss[i].iv); vss[i].iv = NULL; } } } int stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl) { int error; error = 0; TPL_LIST_WLOCK(); if (tpl_id < 0 || tpl_id >= (int)ntpl) { error = ENOENT; } else { *tpl = tpllist[tpl_id]; /* XXXLAS: Acquire refcount on tpl. */ } TPL_LIST_WUNLOCK(); return (error); } int stats_tpl_fetch_allocid(const char *name, uint32_t hash) { int i, tpl_id; tpl_id = -ESRCH; TPL_LIST_RLOCK(); for (i = ntpl - 1; i >= 0; i--) { if (name != NULL) { if (strlen(name) == strlen(tpllist[i]->mb->tplname) && strncmp(name, tpllist[i]->mb->tplname, TPL_MAX_NAME_LEN) == 0 && (!hash || hash == tpllist[i]->mb->tplhash)) { tpl_id = i; break; } } else if (hash == tpllist[i]->mb->tplhash) { tpl_id = i; break; } } TPL_LIST_RUNLOCK(); return (tpl_id); } int stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len) { int error; error = 0; TPL_LIST_RLOCK(); if (tpl_id < ntpl) { if (buf != NULL && len > strlen(tpllist[tpl_id]->mb->tplname)) strlcpy(buf, tpllist[tpl_id]->mb->tplname, len); else error = EOVERFLOW; } else error = ENOENT; TPL_LIST_RUNLOCK(); return (error); } int stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates, int nrates, void *seed_bytes, size_t seed_len) { uint32_t cum_pct, rnd_pct; int i; cum_pct = 0; /* * Choose a pseudorandom or seeded number in range [0,100] and use * it to make a sampling decision and template selection where required. * If no seed is supplied, a PRNG is used to generate a pseudorandom * number so that every selection is independent. If a seed is supplied, * the caller desires random selection across different seeds, but * deterministic selection given the same seed. This is achieved by * hashing the seed and using the hash as the random number source. * * XXXLAS: Characterise hash function output distribution. */ if (seed_bytes == NULL) rnd_pct = random() / (INT32_MAX / 100); else rnd_pct = hash32_buf(seed_bytes, seed_len, 0) / (UINT32_MAX / 100U); /* * We map the randomly selected percentage on to the interval [0,100] * consisting of the cumulatively summed template sampling percentages. * The difference between the cumulative sum of all template sampling * percentages and 100 is treated as a NULL assignment i.e. no stats * template will be assigned, and -1 returned instead. */ for (i = 0; i < nrates; i++) { cum_pct += rates[i].tpl_sample_pct; KASSERT(cum_pct <= 100, ("%s cum_pct %u > 100", __func__, cum_pct)); if (rnd_pct > cum_pct || rates[i].tpl_sample_pct == 0) continue; return (rates[i].tpl_slot_id); } return (-1); } int stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz, struct statsblobv1 *src, uint32_t flags) { int error; error = 0; if (src == NULL || dst == NULL || src->cursz < sizeof(struct statsblob) || ((flags & SB_CLONE_ALLOCDST) && (flags & (SB_CLONE_USRDSTNOFAULT | SB_CLONE_USRDST)))) { error = EINVAL; } else if (flags & SB_CLONE_ALLOCDST) { *dst = stats_realloc(NULL, 0, src->cursz, 0); if (*dst) (*dst)->maxsz = dstmaxsz = src->cursz; else error = ENOMEM; } else if (*dst == NULL || dstmaxsz < sizeof(struct statsblob)) { error = EINVAL; } if (!error) { size_t postcurszlen; /* * Clone src into dst except for the maxsz field. If dst is too * small to hold all of src, only copy src's header and return * EOVERFLOW. */ #ifdef _KERNEL if (flags & SB_CLONE_USRDSTNOFAULT) copyout_nofault(src, *dst, offsetof(struct statsblob, maxsz)); else if (flags & SB_CLONE_USRDST) copyout(src, *dst, offsetof(struct statsblob, maxsz)); else #endif memcpy(*dst, src, offsetof(struct statsblob, maxsz)); if (dstmaxsz >= src->cursz) { postcurszlen = src->cursz - offsetof(struct statsblob, cursz); } else { error = EOVERFLOW; postcurszlen = sizeof(struct statsblob) - offsetof(struct statsblob, cursz); } #ifdef _KERNEL if (flags & SB_CLONE_USRDSTNOFAULT) copyout_nofault(&(src->cursz), &((*dst)->cursz), postcurszlen); else if (flags & SB_CLONE_USRDST) copyout(&(src->cursz), &((*dst)->cursz), postcurszlen); else #endif memcpy(&((*dst)->cursz), &(src->cursz), postcurszlen); } return (error); } int stats_v1_tpl_alloc(const char *name, uint32_t flags __unused) { struct statsblobv1_tpl *tpl, **newtpllist; struct statsblobv1 *tpl_sb; struct metablob *tpl_mb; int tpl_id; if (name != NULL && strlen(name) > TPL_MAX_NAME_LEN) return (-EINVAL); if (name != NULL && stats_tpl_fetch_allocid(name, 0) >= 0) return (-EEXIST); tpl = stats_realloc(NULL, 0, sizeof(struct statsblobv1_tpl), M_ZERO); tpl_mb = stats_realloc(NULL, 0, sizeof(struct metablob), M_ZERO); tpl_sb = stats_realloc(NULL, 0, sizeof(struct statsblobv1), M_ZERO); if (tpl_mb != NULL && name != NULL) tpl_mb->tplname = stats_strdup(name, 0); if (tpl == NULL || tpl_sb == NULL || tpl_mb == NULL || tpl_mb->tplname == NULL) { stats_free(tpl); stats_free(tpl_sb); if (tpl_mb != NULL) { stats_free(tpl_mb->tplname); stats_free(tpl_mb); } return (-ENOMEM); } tpl->mb = tpl_mb; tpl->sb = tpl_sb; tpl_sb->abi = STATS_ABI_V1; tpl_sb->endian = #if BYTE_ORDER == LITTLE_ENDIAN SB_LE; #elif BYTE_ORDER == BIG_ENDIAN SB_BE; #else SB_UE; #endif tpl_sb->cursz = tpl_sb->maxsz = sizeof(struct statsblobv1); tpl_sb->stats_off = tpl_sb->statsdata_off = sizeof(struct statsblobv1); TPL_LIST_WLOCK(); newtpllist = stats_realloc(tpllist, ntpl * sizeof(void *), (ntpl + 1) * sizeof(void *), 0); if (newtpllist != NULL) { tpl_id = ntpl++; tpllist = (struct statsblob_tpl **)newtpllist; tpllist[tpl_id] = (struct statsblob_tpl *)tpl; stats_tpl_update_hash(tpllist[tpl_id]); } else { stats_free(tpl); stats_free(tpl_sb); if (tpl_mb != NULL) { stats_free(tpl_mb->tplname); stats_free(tpl_mb); } tpl_id = -ENOMEM; } TPL_LIST_WUNLOCK(); return (tpl_id); } int stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name, enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss, uint32_t flags) { struct voi *voi; struct voistat *tmpstat; struct statsblobv1 *tpl_sb; struct metablob *tpl_mb; int error, i, newstatdataidx, newvoibytes, newvoistatbytes, newvoistatdatabytes, newvoistatmaxid; uint32_t nbytes; if (voi_id < 0 || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || nvss == 0 || vss == NULL) return (EINVAL); error = nbytes = newvoibytes = newvoistatbytes = newvoistatdatabytes = 0; newvoistatmaxid = -1; /* Calculate the number of bytes required for the new voistats. */ for (i = nvss - 1; i >= 0; i--) { if (vss[i].stype == 0 || vss[i].stype >= VS_NUM_STYPES || vss[i].vs_dtype == 0 || vss[i].vs_dtype >= VSD_NUM_DTYPES || vss[i].iv == NULL || vss[i].vsdsz == 0) return (EINVAL); if ((int)vss[i].stype > newvoistatmaxid) newvoistatmaxid = vss[i].stype; newvoistatdatabytes += vss[i].vsdsz; } if (flags & SB_VOI_RELUPDATE) { /* XXXLAS: VOI state bytes may need to vary based on stat types. */ newvoistatdatabytes += sizeof(struct voistatdata_voistate); } nbytes += newvoistatdatabytes; TPL_LIST_WLOCK(); if (tpl_id < ntpl) { tpl_sb = (struct statsblobv1 *)tpllist[tpl_id]->sb; tpl_mb = tpllist[tpl_id]->mb; if (voi_id >= NVOIS(tpl_sb) || tpl_sb->vois[voi_id].id == -1) { /* Adding a new VOI and associated stats. */ if (voi_id >= NVOIS(tpl_sb)) { /* We need to grow the tpl_sb->vois array. */ newvoibytes = (voi_id - (NVOIS(tpl_sb) - 1)) * sizeof(struct voi); nbytes += newvoibytes; } newvoistatbytes = (newvoistatmaxid + 1) * sizeof(struct voistat); } else { /* Adding stats to an existing VOI. */ if (newvoistatmaxid > tpl_sb->vois[voi_id].voistatmaxid) { newvoistatbytes = (newvoistatmaxid - tpl_sb->vois[voi_id].voistatmaxid) * sizeof(struct voistat); } /* XXXLAS: KPI does not yet support expanding VOIs. */ error = EOPNOTSUPP; } nbytes += newvoistatbytes; if (!error && newvoibytes > 0) { struct voi_meta *voi_meta = tpl_mb->voi_meta; voi_meta = stats_realloc(voi_meta, voi_meta == NULL ? 0 : NVOIS(tpl_sb) * sizeof(struct voi_meta), (1 + voi_id) * sizeof(struct voi_meta), M_ZERO); if (voi_meta == NULL) error = ENOMEM; else tpl_mb->voi_meta = voi_meta; } if (!error) { /* NB: Resizing can change where tpl_sb points. */ error = stats_v1_blob_expand(&tpl_sb, newvoibytes, newvoistatbytes, newvoistatdatabytes); } if (!error) { tpl_mb->voi_meta[voi_id].name = stats_strdup(voi_name, 0); if (tpl_mb->voi_meta[voi_id].name == NULL) error = ENOMEM; } if (!error) { /* Update the template list with the resized pointer. */ tpllist[tpl_id]->sb = (struct statsblob *)tpl_sb; /* Update the template. */ voi = &tpl_sb->vois[voi_id]; if (voi->id < 0) { /* VOI is new and needs to be initialised. */ voi->id = voi_id; voi->dtype = voi_dtype; voi->stats_off = tpl_sb->stats_off; if (flags & SB_VOI_RELUPDATE) voi->flags |= VOI_REQSTATE; } else { /* * XXXLAS: When this else block is written, the * "KPI does not yet support expanding VOIs" * error earlier in this function can be * removed. What is required here is to shuffle * the voistat array such that the new stats for * the voi are contiguous, which will displace * stats for other vois that reside after the * voi being updated. The other vois then need * to have their stats_off adjusted post * shuffle. */ } voi->voistatmaxid = newvoistatmaxid; newstatdataidx = 0; if (voi->flags & VOI_REQSTATE) { /* Initialise the voistate stat in slot 0. */ tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off); tmpstat->stype = VS_STYPE_VOISTATE; tmpstat->flags = 0; tmpstat->dtype = VSD_DTYPE_VOISTATE; newstatdataidx = tmpstat->dsz = sizeof(struct voistatdata_numeric); tmpstat->data_off = tpl_sb->statsdata_off; } for (i = 0; (uint32_t)i < nvss; i++) { tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off + (vss[i].stype * sizeof(struct voistat))); KASSERT(tmpstat->stype < 0, ("voistat %p " "already initialised", tmpstat)); tmpstat->stype = vss[i].stype; tmpstat->flags = vss[i].flags; tmpstat->dtype = vss[i].vs_dtype; tmpstat->dsz = vss[i].vsdsz; tmpstat->data_off = tpl_sb->statsdata_off + newstatdataidx; memcpy(BLOB_OFFSET(tpl_sb, tmpstat->data_off), vss[i].iv, vss[i].vsdsz); newstatdataidx += vss[i].vsdsz; } /* Update the template version hash. */ stats_tpl_update_hash(tpllist[tpl_id]); /* XXXLAS: Confirm tpl name/hash pair remains unique. */ } } else error = EINVAL; TPL_LIST_WUNLOCK(); return (error); } struct statsblobv1 * stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags __unused) { struct statsblobv1 *sb; int error; sb = NULL; TPL_LIST_RLOCK(); if (tpl_id < ntpl) { sb = stats_realloc(NULL, 0, tpllist[tpl_id]->sb->maxsz, 0); if (sb != NULL) { sb->maxsz = tpllist[tpl_id]->sb->maxsz; error = stats_v1_blob_init_locked(sb, tpl_id, 0); } else error = ENOMEM; if (error) { stats_free(sb); sb = NULL; } } TPL_LIST_RUNLOCK(); return (sb); } void stats_v1_blob_destroy(struct statsblobv1 *sb) { stats_free(sb); } int stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id, enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd, size_t *retvsdsz) { struct voi *v; struct voistat *vs; if (retvsd == NULL || sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb)) return (EINVAL); v = &sb->vois[voi_id]; if ((__typeof(v->voistatmaxid))stype > v->voistatmaxid) return (EINVAL); vs = BLOB_OFFSET(sb, v->stats_off + (stype * sizeof(struct voistat))); *retvsd = BLOB_OFFSET(sb, vs->data_off); if (retdtype != NULL) *retdtype = vs->dtype; if (retvsdsz != NULL) *retvsdsz = vs->dsz; return (0); } int stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags) { int error; error = 0; TPL_LIST_RLOCK(); if (sb == NULL || tpl_id >= ntpl) { error = EINVAL; } else { error = stats_v1_blob_init_locked(sb, tpl_id, flags); } TPL_LIST_RUNLOCK(); return (error); } static inline int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags __unused) { int error; TPL_LIST_RLOCK_ASSERT(); error = (sb->maxsz >= tpllist[tpl_id]->sb->cursz) ? 0 : EOVERFLOW; KASSERT(!error, ("sb %d instead of %d bytes", sb->maxsz, tpllist[tpl_id]->sb->cursz)); if (!error) { memcpy(sb, tpllist[tpl_id]->sb, tpllist[tpl_id]->sb->cursz); sb->created = sb->lastrst = stats_sbinuptime(); sb->tplhash = tpllist[tpl_id]->mb->tplhash; } return (error); } static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes, int newvoistatbytes, int newvoistatdatabytes) { struct statsblobv1 *sb; struct voi *tmpvoi; struct voistat *tmpvoistat, *voistat_array; int error, i, idxnewvois, idxnewvoistats, nbytes, nvoistats; KASSERT(newvoibytes % sizeof(struct voi) == 0, ("Bad newvoibytes %d", newvoibytes)); KASSERT(newvoistatbytes % sizeof(struct voistat) == 0, ("Bad newvoistatbytes %d", newvoistatbytes)); error = ((newvoibytes % sizeof(struct voi) == 0) && (newvoistatbytes % sizeof(struct voistat) == 0)) ? 0 : EINVAL; sb = *sbpp; nbytes = newvoibytes + newvoistatbytes + newvoistatdatabytes; /* * XXXLAS: Required until we gain support for flags which alter the * units of size/offset fields in key structs. */ if (!error && ((((int)sb->cursz) + nbytes) > SB_V1_MAXSZ)) error = EFBIG; if (!error && (sb->cursz + nbytes > sb->maxsz)) { /* Need to expand our blob. */ sb = stats_realloc(sb, sb->maxsz, sb->cursz + nbytes, M_ZERO); if (sb != NULL) { sb->maxsz = sb->cursz + nbytes; *sbpp = sb; } else error = ENOMEM; } if (!error) { /* * Shuffle memory within the expanded blob working from the end * backwards, leaving gaps for the new voistat and voistatdata * structs at the beginning of their respective blob regions, * and for the new voi structs at the end of their blob region. */ memmove(BLOB_OFFSET(sb, sb->statsdata_off + nbytes), BLOB_OFFSET(sb, sb->statsdata_off), sb->cursz - sb->statsdata_off); memmove(BLOB_OFFSET(sb, sb->stats_off + newvoibytes + newvoistatbytes), BLOB_OFFSET(sb, sb->stats_off), sb->statsdata_off - sb->stats_off); /* First index of new voi/voistat structs to be initialised. */ idxnewvois = NVOIS(sb); idxnewvoistats = (newvoistatbytes / sizeof(struct voistat)) - 1; /* Update housekeeping variables and offsets. */ sb->cursz += nbytes; sb->stats_off += newvoibytes; sb->statsdata_off += newvoibytes + newvoistatbytes; /* XXXLAS: Zeroing not strictly needed but aids debugging. */ memset(&sb->vois[idxnewvois], '\0', newvoibytes); memset(BLOB_OFFSET(sb, sb->stats_off), '\0', newvoistatbytes); memset(BLOB_OFFSET(sb, sb->statsdata_off), '\0', newvoistatdatabytes); /* Initialise new voi array members and update offsets. */ for (i = 0; i < NVOIS(sb); i++) { tmpvoi = &sb->vois[i]; if (i >= idxnewvois) { tmpvoi->id = tmpvoi->voistatmaxid = -1; } else if (tmpvoi->id > -1) { tmpvoi->stats_off += newvoibytes + newvoistatbytes; } } /* Initialise new voistat array members and update offsets. */ nvoistats = (sb->statsdata_off - sb->stats_off) / sizeof(struct voistat); voistat_array = BLOB_OFFSET(sb, sb->stats_off); for (i = 0; i < nvoistats; i++) { tmpvoistat = &voistat_array[i]; if (i <= idxnewvoistats) { tmpvoistat->stype = -1; } else if (tmpvoistat->stype > -1) { tmpvoistat->data_off += nbytes; } } } return (error); } static void stats_v1_blob_finalise(struct statsblobv1 *sb __unused) { /* XXXLAS: Fill this in. */ } static void stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags) { struct voi *v; struct voistat *vs; struct sb_iter_ctx ctx; int i, j, firstvoi; ctx.usrctx = usrctx; ctx.flags = SB_IT_FIRST_CB; firstvoi = 1; for (i = 0; i < NVOIS(sb); i++) { v = &sb->vois[i]; ctx.vslot = i; ctx.vsslot = -1; ctx.flags |= SB_IT_FIRST_VOISTAT; if (firstvoi) ctx.flags |= SB_IT_FIRST_VOI; else if (i == (NVOIS(sb) - 1)) ctx.flags |= SB_IT_LAST_VOI | SB_IT_LAST_CB; if (v->id < 0 && (flags & SB_IT_NULLVOI)) { if (icb(sb, v, NULL, &ctx)) return; firstvoi = 0; ctx.flags &= ~SB_IT_FIRST_CB; } /* If NULL voi, v->voistatmaxid == -1 */ for (j = 0; j <= v->voistatmaxid; j++) { vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[j]; if (vs->stype < 0 && !(flags & SB_IT_NULLVOISTAT)) continue; if (j == v->voistatmaxid) { ctx.flags |= SB_IT_LAST_VOISTAT; if (i == (NVOIS(sb) - 1)) ctx.flags |= SB_IT_LAST_CB; } else ctx.flags &= ~SB_IT_LAST_CB; ctx.vsslot = j; if (icb(sb, v, vs, &ctx)) return; ctx.flags &= ~(SB_IT_FIRST_CB | SB_IT_FIRST_VOISTAT | SB_IT_LAST_VOISTAT); } ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI); } } static inline void stats_voistatdata_tdgst_tostr(enum vsd_dtype voi_dtype __unused, const struct voistatdata_tdgst *tdgst, enum vsd_dtype tdgst_dtype, size_t tdgst_dsz __unused, enum sb_str_fmt fmt, struct sbuf *buf, int objdump) { const struct ctdth32 *ctd32tree; const struct ctdth64 *ctd64tree; const struct voistatdata_tdgstctd32 *ctd32; const struct voistatdata_tdgstctd64 *ctd64; const char *fmtstr; uint64_t smplcnt, compcnt; int is32bit, qmaxstrlen; uint16_t maxctds, curctds; switch (tdgst_dtype) { case VSD_DTYPE_TDGSTCLUST32: smplcnt = CONSTVSD(tdgstclust32, tdgst)->smplcnt; compcnt = CONSTVSD(tdgstclust32, tdgst)->compcnt; maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree); curctds = ARB_CURNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree); ctd32tree = &CONSTVSD(tdgstclust32, tdgst)->ctdtree; ctd32 = (objdump ? ARB_CNODE(ctd32tree, 0) : ARB_CMIN(ctdth32, ctd32tree)); qmaxstrlen = (ctd32 == NULL) ? 1 : Q_MAXSTRLEN(ctd32->mu, 10); is32bit = 1; ctd64tree = NULL; ctd64 = NULL; break; case VSD_DTYPE_TDGSTCLUST64: smplcnt = CONSTVSD(tdgstclust64, tdgst)->smplcnt; compcnt = CONSTVSD(tdgstclust64, tdgst)->compcnt; maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree); curctds = ARB_CURNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree); ctd64tree = &CONSTVSD(tdgstclust64, tdgst)->ctdtree; ctd64 = (objdump ? ARB_CNODE(ctd64tree, 0) : ARB_CMIN(ctdth64, ctd64tree)); qmaxstrlen = (ctd64 == NULL) ? 1 : Q_MAXSTRLEN(ctd64->mu, 10); is32bit = 0; ctd32tree = NULL; ctd32 = NULL; break; default: return; } switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = "smplcnt=%ju, compcnt=%ju, maxctds=%hu, nctds=%hu"; break; case SB_STRFMT_JSON: default: fmtstr = "\"smplcnt\":%ju,\"compcnt\":%ju,\"maxctds\":%hu," "\"nctds\":%hu,\"ctds\":["; break; } sbuf_printf(buf, fmtstr, (uintmax_t)smplcnt, (uintmax_t)compcnt, maxctds, curctds); while ((is32bit ? NULL != ctd32 : NULL != ctd64)) { char qstr[qmaxstrlen]; switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = "\n\t\t\t\t"; break; case SB_STRFMT_JSON: default: fmtstr = "{"; break; } sbuf_cat(buf, fmtstr); if (objdump) { switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = "ctd[%hu]."; break; case SB_STRFMT_JSON: default: fmtstr = "\"ctd\":%hu,"; break; } sbuf_printf(buf, fmtstr, is32bit ? ARB_SELFIDX(ctd32tree, ctd32) : ARB_SELFIDX(ctd64tree, ctd64)); } switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = "{mu="; break; case SB_STRFMT_JSON: default: fmtstr = "\"mu\":"; break; } sbuf_cat(buf, fmtstr); Q_TOSTR((is32bit ? ctd32->mu : ctd64->mu), -1, 10, qstr, sizeof(qstr)); sbuf_cat(buf, qstr); switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}"; break; case SB_STRFMT_JSON: default: fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}"; break; } sbuf_printf(buf, fmtstr, is32bit ? ctd32->cnt : (uintmax_t)ctd64->cnt); if (is32bit) ctd32 = (objdump ? ARB_CNODE(ctd32tree, ARB_SELFIDX(ctd32tree, ctd32) + 1) : ARB_CNEXT(ctdth32, ctd32tree, ctd32)); else ctd64 = (objdump ? ARB_CNODE(ctd64tree, ARB_SELFIDX(ctd64tree, ctd64) + 1) : ARB_CNEXT(ctdth64, ctd64tree, ctd64)); if (fmt == SB_STRFMT_JSON && (is32bit ? NULL != ctd32 : NULL != ctd64)) sbuf_putc(buf, ','); } if (fmt == SB_STRFMT_JSON) sbuf_cat(buf, "]"); } static inline void stats_voistatdata_hist_tostr(enum vsd_dtype voi_dtype, const struct voistatdata_hist *hist, enum vsd_dtype hist_dtype, size_t hist_dsz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump) { const struct voistatdata_numeric *bkt_lb, *bkt_ub; const char *fmtstr; int is32bit; uint16_t i, nbkts; switch (hist_dtype) { case VSD_DTYPE_CRHIST32: nbkts = HIST_VSDSZ2NBKTS(crhist32, hist_dsz); is32bit = 1; break; case VSD_DTYPE_DRHIST32: nbkts = HIST_VSDSZ2NBKTS(drhist32, hist_dsz); is32bit = 1; break; case VSD_DTYPE_DVHIST32: nbkts = HIST_VSDSZ2NBKTS(dvhist32, hist_dsz); is32bit = 1; break; case VSD_DTYPE_CRHIST64: nbkts = HIST_VSDSZ2NBKTS(crhist64, hist_dsz); is32bit = 0; break; case VSD_DTYPE_DRHIST64: nbkts = HIST_VSDSZ2NBKTS(drhist64, hist_dsz); is32bit = 0; break; case VSD_DTYPE_DVHIST64: nbkts = HIST_VSDSZ2NBKTS(dvhist64, hist_dsz); is32bit = 0; break; default: return; } switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = "nbkts=%hu, "; break; case SB_STRFMT_JSON: default: fmtstr = "\"nbkts\":%hu,"; break; } sbuf_printf(buf, fmtstr, nbkts); switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = (is32bit ? "oob=%u" : "oob=%ju"); break; case SB_STRFMT_JSON: default: fmtstr = (is32bit ? "\"oob\":%u,\"bkts\":[" : "\"oob\":%ju,\"bkts\":["); break; } sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, oob) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, oob)); for (i = 0; i < nbkts; i++) { switch (hist_dtype) { case VSD_DTYPE_CRHIST32: case VSD_DTYPE_CRHIST64: bkt_lb = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype, bkts[i].lb); if (i < nbkts - 1) bkt_ub = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype, bkts[i + 1].lb); else bkt_ub = &numeric_limits[LIM_MAX][voi_dtype]; break; case VSD_DTYPE_DRHIST32: case VSD_DTYPE_DRHIST64: bkt_lb = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype, bkts[i].lb); bkt_ub = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype, bkts[i].ub); break; case VSD_DTYPE_DVHIST32: case VSD_DTYPE_DVHIST64: bkt_lb = bkt_ub = VSD_CONSTDVHIST_FIELDPTR(hist, hist_dtype, bkts[i].val); break; default: break; } switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = "\n\t\t\t\t"; break; case SB_STRFMT_JSON: default: fmtstr = "{"; break; } sbuf_cat(buf, fmtstr); if (objdump) { switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = "bkt[%hu]."; break; case SB_STRFMT_JSON: default: fmtstr = "\"bkt\":%hu,"; break; } sbuf_printf(buf, fmtstr, i); } switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = "{lb="; break; case SB_STRFMT_JSON: default: fmtstr = "\"lb\":"; break; } sbuf_cat(buf, fmtstr); stats_voistatdata_tostr((const struct voistatdata *)bkt_lb, voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric), fmt, buf, objdump); switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = ",ub="; break; case SB_STRFMT_JSON: default: fmtstr = ",\"ub\":"; break; } sbuf_cat(buf, fmtstr); stats_voistatdata_tostr((const struct voistatdata *)bkt_ub, voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric), fmt, buf, objdump); switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}"; break; case SB_STRFMT_JSON: default: fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}"; break; } sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt)); if (fmt == SB_STRFMT_JSON && i < nbkts - 1) sbuf_putc(buf, ','); } if (fmt == SB_STRFMT_JSON) sbuf_cat(buf, "]"); } int stats_voistatdata_tostr(const struct voistatdata *vsd, enum vsd_dtype voi_dtype, enum vsd_dtype vsd_dtype, size_t vsd_sz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump) { const char *fmtstr; if (vsd == NULL || buf == NULL || voi_dtype >= VSD_NUM_DTYPES || vsd_dtype >= VSD_NUM_DTYPES || fmt >= SB_STRFMT_NUM_FMTS) return (EINVAL); switch (vsd_dtype) { case VSD_DTYPE_VOISTATE: switch (fmt) { case SB_STRFMT_FREEFORM: fmtstr = "prev="; break; case SB_STRFMT_JSON: default: fmtstr = "\"prev\":"; break; } sbuf_cat(buf, fmtstr); /* * Render prev by passing it as *vsd and voi_dtype as vsd_dtype. */ stats_voistatdata_tostr( (const struct voistatdata *)&CONSTVSD(voistate, vsd)->prev, voi_dtype, voi_dtype, vsd_sz, fmt, buf, objdump); break; case VSD_DTYPE_INT_S32: sbuf_printf(buf, "%d", vsd->int32.s32); break; case VSD_DTYPE_INT_U32: sbuf_printf(buf, "%u", vsd->int32.u32); break; case VSD_DTYPE_INT_S64: sbuf_printf(buf, "%jd", (intmax_t)vsd->int64.s64); break; case VSD_DTYPE_INT_U64: sbuf_printf(buf, "%ju", (uintmax_t)vsd->int64.u64); break; case VSD_DTYPE_INT_SLONG: sbuf_printf(buf, "%ld", vsd->intlong.slong); break; case VSD_DTYPE_INT_ULONG: sbuf_printf(buf, "%lu", vsd->intlong.ulong); break; case VSD_DTYPE_Q_S32: { char qstr[Q_MAXSTRLEN(vsd->q32.sq32, 10)]; Q_TOSTR((s32q_t)vsd->q32.sq32, -1, 10, qstr, sizeof(qstr)); sbuf_cat(buf, qstr); } break; case VSD_DTYPE_Q_U32: { char qstr[Q_MAXSTRLEN(vsd->q32.uq32, 10)]; Q_TOSTR((u32q_t)vsd->q32.uq32, -1, 10, qstr, sizeof(qstr)); sbuf_cat(buf, qstr); } break; case VSD_DTYPE_Q_S64: { char qstr[Q_MAXSTRLEN(vsd->q64.sq64, 10)]; Q_TOSTR((s64q_t)vsd->q64.sq64, -1, 10, qstr, sizeof(qstr)); sbuf_cat(buf, qstr); } break; case VSD_DTYPE_Q_U64: { char qstr[Q_MAXSTRLEN(vsd->q64.uq64, 10)]; Q_TOSTR((u64q_t)vsd->q64.uq64, -1, 10, qstr, sizeof(qstr)); sbuf_cat(buf, qstr); } break; case VSD_DTYPE_CRHIST32: case VSD_DTYPE_DRHIST32: case VSD_DTYPE_DVHIST32: case VSD_DTYPE_CRHIST64: case VSD_DTYPE_DRHIST64: case VSD_DTYPE_DVHIST64: stats_voistatdata_hist_tostr(voi_dtype, CONSTVSD(hist, vsd), vsd_dtype, vsd_sz, fmt, buf, objdump); break; case VSD_DTYPE_TDGSTCLUST32: case VSD_DTYPE_TDGSTCLUST64: stats_voistatdata_tdgst_tostr(voi_dtype, CONSTVSD(tdgst, vsd), vsd_dtype, vsd_sz, fmt, buf, objdump); break; default: break; } return (sbuf_error(buf)); } static void stats_v1_itercb_tostr_freeform(struct statsblobv1 *sb, struct voi *v, struct voistat *vs, struct sb_iter_ctx *ctx) { struct sb_tostrcb_ctx *sctx; struct metablob *tpl_mb; struct sbuf *buf; void *vsd; uint8_t dump; sctx = ctx->usrctx; buf = sctx->buf; tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL; dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0); if (ctx->flags & SB_IT_FIRST_CB) { sbuf_printf(buf, "struct statsblobv1@%p", sb); if (dump) { sbuf_printf(buf, ", abi=%hhu, endian=%hhu, maxsz=%hu, " "cursz=%hu, created=%jd, lastrst=%jd, flags=0x%04hx, " "stats_off=%hu, statsdata_off=%hu", sb->abi, sb->endian, sb->maxsz, sb->cursz, sb->created, sb->lastrst, sb->flags, sb->stats_off, sb->statsdata_off); } sbuf_printf(buf, ", tplhash=%u", sb->tplhash); } if (ctx->flags & SB_IT_FIRST_VOISTAT) { sbuf_printf(buf, "\n\tvois[%hd]: id=%hd", ctx->vslot, v->id); if (v->id < 0) return; sbuf_printf(buf, ", name=\"%s\"", (tpl_mb == NULL) ? "" : tpl_mb->voi_meta[v->id].name); if (dump) sbuf_printf(buf, ", flags=0x%04hx, dtype=%s, " "voistatmaxid=%hhd, stats_off=%hu", v->flags, vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off); } if (!dump && vs->stype <= 0) return; sbuf_printf(buf, "\n\t\tvois[%hd]stat[%hhd]: stype=", v->id, ctx->vsslot); if (vs->stype < 0) { sbuf_printf(buf, "%hhd", vs->stype); return; } else sbuf_printf(buf, "%s, errs=%hu", vs_stype2name[vs->stype], vs->errs); vsd = BLOB_OFFSET(sb, vs->data_off); if (dump) sbuf_printf(buf, ", flags=0x%04x, dtype=%s, dsz=%hu, " "data_off=%hu", vs->flags, vsd_dtype2name[vs->dtype], vs->dsz, vs->data_off); sbuf_printf(buf, "\n\t\t\tvoistatdata: "); stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz, sctx->fmt, buf, dump); } static void stats_v1_itercb_tostr_json(struct statsblobv1 *sb, struct voi *v, struct voistat *vs, struct sb_iter_ctx *ctx) { struct sb_tostrcb_ctx *sctx; struct metablob *tpl_mb; struct sbuf *buf; const char *fmtstr; void *vsd; uint8_t dump; sctx = ctx->usrctx; buf = sctx->buf; tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL; dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0); if (ctx->flags & SB_IT_FIRST_CB) { sbuf_putc(buf, '{'); if (dump) { sbuf_printf(buf, "\"abi\":%hhu,\"endian\":%hhu," "\"maxsz\":%hu,\"cursz\":%hu,\"created\":%jd," "\"lastrst\":%jd,\"flags\":%hu,\"stats_off\":%hu," "\"statsdata_off\":%hu,", sb->abi, sb->endian, sb->maxsz, sb->cursz, sb->created, sb->lastrst, sb->flags, sb->stats_off, sb->statsdata_off); } if (tpl_mb == NULL) fmtstr = "\"tplname\":%s,\"tplhash\":%u,\"vois\":{"; else fmtstr = "\"tplname\":\"%s\",\"tplhash\":%u,\"vois\":{"; sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->tplname : "null", sb->tplhash); } if (ctx->flags & SB_IT_FIRST_VOISTAT) { if (dump) { sbuf_printf(buf, "\"[%d]\":{\"id\":%d", ctx->vslot, v->id); if (v->id < 0) { sbuf_printf(buf, "},"); return; } if (tpl_mb == NULL) fmtstr = ",\"name\":%s,\"flags\":%hu," "\"dtype\":\"%s\",\"voistatmaxid\":%hhd," "\"stats_off\":%hu,"; else fmtstr = ",\"name\":\"%s\",\"flags\":%hu," "\"dtype\":\"%s\",\"voistatmaxid\":%hhd," "\"stats_off\":%hu,"; sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->voi_meta[v->id].name : "null", v->flags, vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off); } else { if (tpl_mb == NULL) { sbuf_printf(buf, "\"[%hd]\":{", v->id); } else { sbuf_printf(buf, "\"%s\":{", tpl_mb->voi_meta[v->id].name); } } sbuf_cat(buf, "\"stats\":{"); } vsd = BLOB_OFFSET(sb, vs->data_off); if (dump) { sbuf_printf(buf, "\"[%hhd]\":", ctx->vsslot); if (vs->stype < 0) { sbuf_printf(buf, "{\"stype\":-1},"); return; } sbuf_printf(buf, "{\"stype\":\"%s\",\"errs\":%hu,\"flags\":%hu," "\"dtype\":\"%s\",\"data_off\":%hu,\"voistatdata\":{", vs_stype2name[vs->stype], vs->errs, vs->flags, vsd_dtype2name[vs->dtype], vs->data_off); } else if (vs->stype > 0) { if (tpl_mb == NULL) sbuf_printf(buf, "\"[%hhd]\":", vs->stype); else sbuf_printf(buf, "\"%s\":", vs_stype2name[vs->stype]); } else return; if ((vs->flags & VS_VSDVALID) || dump) { if (!dump) sbuf_printf(buf, "{\"errs\":%hu,", vs->errs); /* Simple non-compound VSD types need a key. */ if (!vsd_compoundtype[vs->dtype]) sbuf_cat(buf, "\"val\":"); stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz, sctx->fmt, buf, dump); sbuf_cat(buf, dump ? "}}" : "}"); } else sbuf_cat(buf, dump ? "null}" : "null"); if (ctx->flags & SB_IT_LAST_VOISTAT) sbuf_cat(buf, "}}"); if (ctx->flags & SB_IT_LAST_CB) sbuf_cat(buf, "}}"); else sbuf_putc(buf, ','); } static int stats_v1_itercb_tostr(struct statsblobv1 *sb, struct voi *v, struct voistat *vs, struct sb_iter_ctx *ctx) { struct sb_tostrcb_ctx *sctx; sctx = ctx->usrctx; switch (sctx->fmt) { case SB_STRFMT_FREEFORM: stats_v1_itercb_tostr_freeform(sb, v, vs, ctx); break; case SB_STRFMT_JSON: stats_v1_itercb_tostr_json(sb, v, vs, ctx); break; default: break; } return (sbuf_error(sctx->buf)); } int stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf, enum sb_str_fmt fmt, uint32_t flags) { struct sb_tostrcb_ctx sctx; uint32_t iflags; if (sb == NULL || sb->abi != STATS_ABI_V1 || buf == NULL || fmt >= SB_STRFMT_NUM_FMTS) return (EINVAL); sctx.buf = buf; sctx.fmt = fmt; sctx.flags = flags; if (flags & SB_TOSTR_META) { if (stats_tpl_fetch(stats_tpl_fetch_allocid(NULL, sb->tplhash), &sctx.tpl)) return (EINVAL); } else sctx.tpl = NULL; iflags = 0; if (flags & SB_TOSTR_OBJDUMP) iflags |= (SB_IT_NULLVOI | SB_IT_NULLVOISTAT); stats_v1_blob_iter(sb, stats_v1_itercb_tostr, &sctx, iflags); return (sbuf_error(buf)); } static int stats_v1_itercb_visit(struct statsblobv1 *sb, struct voi *v, struct voistat *vs, struct sb_iter_ctx *ctx) { struct sb_visitcb_ctx *vctx; struct sb_visit sbv; vctx = ctx->usrctx; sbv.tplhash = sb->tplhash; sbv.voi_id = v->id; sbv.voi_dtype = v->dtype; sbv.vs_stype = vs->stype; sbv.vs_dtype = vs->dtype; sbv.vs_dsz = vs->dsz; sbv.vs_data = BLOB_OFFSET(sb, vs->data_off); sbv.vs_errs = vs->errs; sbv.flags = ctx->flags & (SB_IT_FIRST_CB | SB_IT_LAST_CB | SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT | SB_IT_LAST_VOISTAT); return (vctx->cb(&sbv, vctx->usrctx)); } int stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func, void *usrctx) { struct sb_visitcb_ctx vctx; if (sb == NULL || sb->abi != STATS_ABI_V1 || func == NULL) return (EINVAL); vctx.cb = func; vctx.usrctx = usrctx; stats_v1_blob_iter(sb, stats_v1_itercb_visit, &vctx, 0); return (0); } static int stats_v1_icb_reset_voistat(struct statsblobv1 *sb, struct voi *v __unused, struct voistat *vs, struct sb_iter_ctx *ctx __unused) { void *vsd; if (vs->stype == VS_STYPE_VOISTATE) return (0); vsd = BLOB_OFFSET(sb, vs->data_off); /* Perform the stat type's default reset action. */ switch (vs->stype) { case VS_STYPE_SUM: switch (vs->dtype) { case VSD_DTYPE_Q_S32: Q_SIFVAL(VSD(q32, vsd)->sq32, 0); break; case VSD_DTYPE_Q_U32: Q_SIFVAL(VSD(q32, vsd)->uq32, 0); break; case VSD_DTYPE_Q_S64: Q_SIFVAL(VSD(q64, vsd)->sq64, 0); break; case VSD_DTYPE_Q_U64: Q_SIFVAL(VSD(q64, vsd)->uq64, 0); break; default: bzero(vsd, vs->dsz); break; } break; case VS_STYPE_MAX: switch (vs->dtype) { case VSD_DTYPE_Q_S32: Q_SIFVAL(VSD(q32, vsd)->sq32, Q_IFMINVAL(VSD(q32, vsd)->sq32)); break; case VSD_DTYPE_Q_U32: Q_SIFVAL(VSD(q32, vsd)->uq32, Q_IFMINVAL(VSD(q32, vsd)->uq32)); break; case VSD_DTYPE_Q_S64: Q_SIFVAL(VSD(q64, vsd)->sq64, Q_IFMINVAL(VSD(q64, vsd)->sq64)); break; case VSD_DTYPE_Q_U64: Q_SIFVAL(VSD(q64, vsd)->uq64, Q_IFMINVAL(VSD(q64, vsd)->uq64)); break; default: memcpy(vsd, &numeric_limits[LIM_MIN][vs->dtype], vs->dsz); break; } break; case VS_STYPE_MIN: switch (vs->dtype) { case VSD_DTYPE_Q_S32: Q_SIFVAL(VSD(q32, vsd)->sq32, Q_IFMAXVAL(VSD(q32, vsd)->sq32)); break; case VSD_DTYPE_Q_U32: Q_SIFVAL(VSD(q32, vsd)->uq32, Q_IFMAXVAL(VSD(q32, vsd)->uq32)); break; case VSD_DTYPE_Q_S64: Q_SIFVAL(VSD(q64, vsd)->sq64, Q_IFMAXVAL(VSD(q64, vsd)->sq64)); break; case VSD_DTYPE_Q_U64: Q_SIFVAL(VSD(q64, vsd)->uq64, Q_IFMAXVAL(VSD(q64, vsd)->uq64)); break; default: memcpy(vsd, &numeric_limits[LIM_MAX][vs->dtype], vs->dsz); break; } break; case VS_STYPE_HIST: { /* Reset bucket counts. */ struct voistatdata_hist *hist; int i, is32bit; uint16_t nbkts; hist = VSD(hist, vsd); switch (vs->dtype) { case VSD_DTYPE_CRHIST32: nbkts = HIST_VSDSZ2NBKTS(crhist32, vs->dsz); is32bit = 1; break; case VSD_DTYPE_DRHIST32: nbkts = HIST_VSDSZ2NBKTS(drhist32, vs->dsz); is32bit = 1; break; case VSD_DTYPE_DVHIST32: nbkts = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz); is32bit = 1; break; case VSD_DTYPE_CRHIST64: nbkts = HIST_VSDSZ2NBKTS(crhist64, vs->dsz); is32bit = 0; break; case VSD_DTYPE_DRHIST64: nbkts = HIST_VSDSZ2NBKTS(drhist64, vs->dsz); is32bit = 0; break; case VSD_DTYPE_DVHIST64: nbkts = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz); is32bit = 0; break; default: return (0); } bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, oob), is32bit ? sizeof(uint32_t) : sizeof(uint64_t)); for (i = nbkts - 1; i >= 0; i--) { bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, bkts[i].cnt), is32bit ? sizeof(uint32_t) : sizeof(uint64_t)); } break; } case VS_STYPE_TDGST: { /* Reset sample count centroids array/tree. */ struct voistatdata_tdgst *tdgst; struct ctdth32 *ctd32tree; struct ctdth64 *ctd64tree; struct voistatdata_tdgstctd32 *ctd32; struct voistatdata_tdgstctd64 *ctd64; tdgst = VSD(tdgst, vsd); switch (vs->dtype) { case VSD_DTYPE_TDGSTCLUST32: VSD(tdgstclust32, tdgst)->smplcnt = 0; VSD(tdgstclust32, tdgst)->compcnt = 0; ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree; ARB_INIT(ctd32, ctdlnk, ctd32tree, ARB_MAXNODES(ctd32tree)) { ctd32->cnt = 0; Q_SIFVAL(ctd32->mu, 0); } #ifdef DIAGNOSTIC RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree); #endif break; case VSD_DTYPE_TDGSTCLUST64: VSD(tdgstclust64, tdgst)->smplcnt = 0; VSD(tdgstclust64, tdgst)->compcnt = 0; ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree; ARB_INIT(ctd64, ctdlnk, ctd64tree, ARB_MAXNODES(ctd64tree)) { ctd64->cnt = 0; Q_SIFVAL(ctd64->mu, 0); } #ifdef DIAGNOSTIC RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree); #endif break; default: return (0); } break; } default: KASSERT(0, ("Unknown VOI stat type %d", vs->stype)); break; } vs->errs = 0; vs->flags &= ~VS_VSDVALID; return (0); } int stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz, struct statsblobv1 *src, uint32_t flags) { int error; if (src != NULL && src->abi == STATS_ABI_V1) { error = stats_v1_blob_clone(dst, dstmaxsz, src, flags); if (!error) { if (flags & SB_CLONE_RSTSRC) { stats_v1_blob_iter(src, stats_v1_icb_reset_voistat, NULL, 0); src->lastrst = stats_sbinuptime(); } stats_v1_blob_finalise(*dst); } } else error = EINVAL; return (error); } static inline int stats_v1_voi_update_max(enum vsd_dtype voi_dtype __unused, struct voistatdata *voival, struct voistat *vs, void *vsd) { int error; KASSERT(vs->dtype < VSD_NUM_DTYPES, ("Unknown VSD dtype %d", vs->dtype)); error = 0; switch (vs->dtype) { case VSD_DTYPE_INT_S32: if (VSD(int32, vsd)->s32 < voival->int32.s32) { VSD(int32, vsd)->s32 = voival->int32.s32; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_INT_U32: if (VSD(int32, vsd)->u32 < voival->int32.u32) { VSD(int32, vsd)->u32 = voival->int32.u32; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_INT_S64: if (VSD(int64, vsd)->s64 < voival->int64.s64) { VSD(int64, vsd)->s64 = voival->int64.s64; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_INT_U64: if (VSD(int64, vsd)->u64 < voival->int64.u64) { VSD(int64, vsd)->u64 = voival->int64.u64; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_INT_SLONG: if (VSD(intlong, vsd)->slong < voival->intlong.slong) { VSD(intlong, vsd)->slong = voival->intlong.slong; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_INT_ULONG: if (VSD(intlong, vsd)->ulong < voival->intlong.ulong) { VSD(intlong, vsd)->ulong = voival->intlong.ulong; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_Q_S32: if (Q_QLTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) && (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32, voival->q32.sq32)))) { vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_Q_U32: if (Q_QLTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) && (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32, voival->q32.uq32)))) { vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_Q_S64: if (Q_QLTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) && (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64, voival->q64.sq64)))) { vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_Q_U64: if (Q_QLTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) && (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64, voival->q64.uq64)))) { vs->flags |= VS_VSDVALID; } break; default: error = EINVAL; break; } return (error); } static inline int stats_v1_voi_update_min(enum vsd_dtype voi_dtype __unused, struct voistatdata *voival, struct voistat *vs, void *vsd) { int error; KASSERT(vs->dtype < VSD_NUM_DTYPES, ("Unknown VSD dtype %d", vs->dtype)); error = 0; switch (vs->dtype) { case VSD_DTYPE_INT_S32: if (VSD(int32, vsd)->s32 > voival->int32.s32) { VSD(int32, vsd)->s32 = voival->int32.s32; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_INT_U32: if (VSD(int32, vsd)->u32 > voival->int32.u32) { VSD(int32, vsd)->u32 = voival->int32.u32; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_INT_S64: if (VSD(int64, vsd)->s64 > voival->int64.s64) { VSD(int64, vsd)->s64 = voival->int64.s64; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_INT_U64: if (VSD(int64, vsd)->u64 > voival->int64.u64) { VSD(int64, vsd)->u64 = voival->int64.u64; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_INT_SLONG: if (VSD(intlong, vsd)->slong > voival->intlong.slong) { VSD(intlong, vsd)->slong = voival->intlong.slong; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_INT_ULONG: if (VSD(intlong, vsd)->ulong > voival->intlong.ulong) { VSD(intlong, vsd)->ulong = voival->intlong.ulong; vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_Q_S32: if (Q_QGTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) && (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32, voival->q32.sq32)))) { vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_Q_U32: if (Q_QGTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) && (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32, voival->q32.uq32)))) { vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_Q_S64: if (Q_QGTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) && (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64, voival->q64.sq64)))) { vs->flags |= VS_VSDVALID; } break; case VSD_DTYPE_Q_U64: if (Q_QGTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) && (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64, voival->q64.uq64)))) { vs->flags |= VS_VSDVALID; } break; default: error = EINVAL; break; } return (error); } static inline int stats_v1_voi_update_sum(enum vsd_dtype voi_dtype __unused, struct voistatdata *voival, struct voistat *vs, void *vsd) { int error; KASSERT(vs->dtype < VSD_NUM_DTYPES, ("Unknown VSD dtype %d", vs->dtype)); error = 0; switch (vs->dtype) { case VSD_DTYPE_INT_S32: VSD(int32, vsd)->s32 += voival->int32.s32; break; case VSD_DTYPE_INT_U32: VSD(int32, vsd)->u32 += voival->int32.u32; break; case VSD_DTYPE_INT_S64: VSD(int64, vsd)->s64 += voival->int64.s64; break; case VSD_DTYPE_INT_U64: VSD(int64, vsd)->u64 += voival->int64.u64; break; case VSD_DTYPE_INT_SLONG: VSD(intlong, vsd)->slong += voival->intlong.slong; break; case VSD_DTYPE_INT_ULONG: VSD(intlong, vsd)->ulong += voival->intlong.ulong; break; case VSD_DTYPE_Q_S32: error = Q_QADDQ(&VSD(q32, vsd)->sq32, voival->q32.sq32); break; case VSD_DTYPE_Q_U32: error = Q_QADDQ(&VSD(q32, vsd)->uq32, voival->q32.uq32); break; case VSD_DTYPE_Q_S64: error = Q_QADDQ(&VSD(q64, vsd)->sq64, voival->q64.sq64); break; case VSD_DTYPE_Q_U64: error = Q_QADDQ(&VSD(q64, vsd)->uq64, voival->q64.uq64); break; default: error = EINVAL; break; } if (!error) vs->flags |= VS_VSDVALID; return (error); } static inline int stats_v1_voi_update_hist(enum vsd_dtype voi_dtype, struct voistatdata *voival, struct voistat *vs, struct voistatdata_hist *hist) { struct voistatdata_numeric *bkt_lb, *bkt_ub; uint64_t *oob64, *cnt64; uint32_t *oob32, *cnt32; int error, i, found, is32bit, has_ub, eq_only; error = 0; switch (vs->dtype) { case VSD_DTYPE_CRHIST32: i = HIST_VSDSZ2NBKTS(crhist32, vs->dsz); is32bit = 1; has_ub = eq_only = 0; oob32 = &VSD(crhist32, hist)->oob; break; case VSD_DTYPE_DRHIST32: i = HIST_VSDSZ2NBKTS(drhist32, vs->dsz); is32bit = has_ub = 1; eq_only = 0; oob32 = &VSD(drhist32, hist)->oob; break; case VSD_DTYPE_DVHIST32: i = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz); is32bit = eq_only = 1; has_ub = 0; oob32 = &VSD(dvhist32, hist)->oob; break; case VSD_DTYPE_CRHIST64: i = HIST_VSDSZ2NBKTS(crhist64, vs->dsz); is32bit = has_ub = eq_only = 0; oob64 = &VSD(crhist64, hist)->oob; break; case VSD_DTYPE_DRHIST64: i = HIST_VSDSZ2NBKTS(drhist64, vs->dsz); is32bit = eq_only = 0; has_ub = 1; oob64 = &VSD(drhist64, hist)->oob; break; case VSD_DTYPE_DVHIST64: i = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz); is32bit = has_ub = 0; eq_only = 1; oob64 = &VSD(dvhist64, hist)->oob; break; default: return (EINVAL); } i--; /* Adjust for 0-based array index. */ /* XXXLAS: Should probably use a better bucket search algorithm. ARB? */ for (found = 0; i >= 0 && !found; i--) { switch (vs->dtype) { case VSD_DTYPE_CRHIST32: bkt_lb = &VSD(crhist32, hist)->bkts[i].lb; cnt32 = &VSD(crhist32, hist)->bkts[i].cnt; break; case VSD_DTYPE_DRHIST32: bkt_lb = &VSD(drhist32, hist)->bkts[i].lb; bkt_ub = &VSD(drhist32, hist)->bkts[i].ub; cnt32 = &VSD(drhist32, hist)->bkts[i].cnt; break; case VSD_DTYPE_DVHIST32: bkt_lb = &VSD(dvhist32, hist)->bkts[i].val; cnt32 = &VSD(dvhist32, hist)->bkts[i].cnt; break; case VSD_DTYPE_CRHIST64: bkt_lb = &VSD(crhist64, hist)->bkts[i].lb; cnt64 = &VSD(crhist64, hist)->bkts[i].cnt; break; case VSD_DTYPE_DRHIST64: bkt_lb = &VSD(drhist64, hist)->bkts[i].lb; bkt_ub = &VSD(drhist64, hist)->bkts[i].ub; cnt64 = &VSD(drhist64, hist)->bkts[i].cnt; break; case VSD_DTYPE_DVHIST64: bkt_lb = &VSD(dvhist64, hist)->bkts[i].val; cnt64 = &VSD(dvhist64, hist)->bkts[i].cnt; break; default: return (EINVAL); } switch (voi_dtype) { case VSD_DTYPE_INT_S32: if (voival->int32.s32 >= bkt_lb->int32.s32) { if ((eq_only && voival->int32.s32 == bkt_lb->int32.s32) || (!eq_only && (!has_ub || voival->int32.s32 < bkt_ub->int32.s32))) found = 1; } break; case VSD_DTYPE_INT_U32: if (voival->int32.u32 >= bkt_lb->int32.u32) { if ((eq_only && voival->int32.u32 == bkt_lb->int32.u32) || (!eq_only && (!has_ub || voival->int32.u32 < bkt_ub->int32.u32))) found = 1; } break; case VSD_DTYPE_INT_S64: if (voival->int64.s64 >= bkt_lb->int64.s64) if ((eq_only && voival->int64.s64 == bkt_lb->int64.s64) || (!eq_only && (!has_ub || voival->int64.s64 < bkt_ub->int64.s64))) found = 1; break; case VSD_DTYPE_INT_U64: if (voival->int64.u64 >= bkt_lb->int64.u64) if ((eq_only && voival->int64.u64 == bkt_lb->int64.u64) || (!eq_only && (!has_ub || voival->int64.u64 < bkt_ub->int64.u64))) found = 1; break; case VSD_DTYPE_INT_SLONG: if (voival->intlong.slong >= bkt_lb->intlong.slong) if ((eq_only && voival->intlong.slong == bkt_lb->intlong.slong) || (!eq_only && (!has_ub || voival->intlong.slong < bkt_ub->intlong.slong))) found = 1; break; case VSD_DTYPE_INT_ULONG: if (voival->intlong.ulong >= bkt_lb->intlong.ulong) if ((eq_only && voival->intlong.ulong == bkt_lb->intlong.ulong) || (!eq_only && (!has_ub || voival->intlong.ulong < bkt_ub->intlong.ulong))) found = 1; break; case VSD_DTYPE_Q_S32: if (Q_QGEQ(voival->q32.sq32, bkt_lb->q32.sq32)) if ((eq_only && Q_QEQ(voival->q32.sq32, bkt_lb->q32.sq32)) || (!eq_only && (!has_ub || Q_QLTQ(voival->q32.sq32, bkt_ub->q32.sq32)))) found = 1; break; case VSD_DTYPE_Q_U32: if (Q_QGEQ(voival->q32.uq32, bkt_lb->q32.uq32)) if ((eq_only && Q_QEQ(voival->q32.uq32, bkt_lb->q32.uq32)) || (!eq_only && (!has_ub || Q_QLTQ(voival->q32.uq32, bkt_ub->q32.uq32)))) found = 1; break; case VSD_DTYPE_Q_S64: if (Q_QGEQ(voival->q64.sq64, bkt_lb->q64.sq64)) if ((eq_only && Q_QEQ(voival->q64.sq64, bkt_lb->q64.sq64)) || (!eq_only && (!has_ub || Q_QLTQ(voival->q64.sq64, bkt_ub->q64.sq64)))) found = 1; break; case VSD_DTYPE_Q_U64: if (Q_QGEQ(voival->q64.uq64, bkt_lb->q64.uq64)) if ((eq_only && Q_QEQ(voival->q64.uq64, bkt_lb->q64.uq64)) || (!eq_only && (!has_ub || Q_QLTQ(voival->q64.uq64, bkt_ub->q64.uq64)))) found = 1; break; default: break; } } if (found) { if (is32bit) *cnt32 += 1; else *cnt64 += 1; } else { if (is32bit) *oob32 += 1; else *oob64 += 1; } vs->flags |= VS_VSDVALID; return (error); } static inline int stats_v1_vsd_tdgst_compress(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst, int attempt) { struct ctdth32 *ctd32tree; struct ctdth64 *ctd64tree; struct voistatdata_tdgstctd32 *ctd32; struct voistatdata_tdgstctd64 *ctd64; uint64_t ebits, idxmask; uint32_t bitsperidx, nebits; int error, idx, is32bit, maxctds, remctds, tmperr; error = 0; switch (vs_dtype) { case VSD_DTYPE_TDGSTCLUST32: ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree; if (!ARB_FULL(ctd32tree)) return (0); VSD(tdgstclust32, tdgst)->compcnt++; maxctds = remctds = ARB_MAXNODES(ctd32tree); ARB_RESET_TREE(ctd32tree, ctdth32, maxctds); VSD(tdgstclust32, tdgst)->smplcnt = 0; is32bit = 1; ctd64tree = NULL; ctd64 = NULL; #ifdef DIAGNOSTIC RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree); #endif break; case VSD_DTYPE_TDGSTCLUST64: ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree; if (!ARB_FULL(ctd64tree)) return (0); VSD(tdgstclust64, tdgst)->compcnt++; maxctds = remctds = ARB_MAXNODES(ctd64tree); ARB_RESET_TREE(ctd64tree, ctdth64, maxctds); VSD(tdgstclust64, tdgst)->smplcnt = 0; is32bit = 0; ctd32tree = NULL; ctd32 = NULL; #ifdef DIAGNOSTIC RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree); #endif break; default: return (EINVAL); } /* * Rebuild the t-digest ARB by pseudorandomly selecting centroids and * re-inserting the mu/cnt of each as a value and corresponding weight. */ /* * XXXCEM: random(9) is currently rand(3), not random(3). rand(3) * RAND_MAX happens to be approximately 31 bits (range [0, * 0x7ffffffd]), so the math kinda works out. When/if this portion of * the code is compiled in userspace, it gets the random(3) behavior, * which has expected range [0, 0x7fffffff]. */ #define bitsperrand 31 ebits = 0; nebits = 0; bitsperidx = fls(maxctds); KASSERT(bitsperidx <= sizeof(ebits) << 3, ("%s: bitsperidx=%d, ebits=%d", __func__, bitsperidx, (int)(sizeof(ebits) << 3))); idxmask = (UINT64_C(1) << bitsperidx) - 1; /* Initialise the free list with randomised centroid indices. */ for (; remctds > 0; remctds--) { while (nebits < bitsperidx) { ebits |= ((uint64_t)random()) << nebits; nebits += bitsperrand; if (nebits > (sizeof(ebits) << 3)) nebits = sizeof(ebits) << 3; } idx = ebits & idxmask; nebits -= bitsperidx; ebits >>= bitsperidx; /* * Select the next centroid to put on the ARB free list. We * start with the centroid at our randomly selected array index, * and work our way forwards until finding one (the latter * aspect reduces re-insertion randomness, but is good enough). */ do { if (idx >= maxctds) idx %= maxctds; if (is32bit) ctd32 = ARB_NODE(ctd32tree, idx); else ctd64 = ARB_NODE(ctd64tree, idx); } while ((is32bit ? ARB_ISFREE(ctd32, ctdlnk) : ARB_ISFREE(ctd64, ctdlnk)) && ++idx); /* Put the centroid on the ARB free list. */ if (is32bit) ARB_RETURNFREE(ctd32tree, ctd32, ctdlnk); else ARB_RETURNFREE(ctd64tree, ctd64, ctdlnk); } /* * The free list now contains the randomised indices of every centroid. * Walk the free list from start to end, re-inserting each centroid's * mu/cnt. The tdgst_add() call may or may not consume the free centroid * we re-insert values from during each loop iteration, so we must latch * the index of the next free list centroid before the re-insertion * call. The previous loop above should have left the centroid pointer * pointing to the element at the head of the free list. */ KASSERT((is32bit ? ARB_FREEIDX(ctd32tree) == ARB_SELFIDX(ctd32tree, ctd32) : ARB_FREEIDX(ctd64tree) == ARB_SELFIDX(ctd64tree, ctd64)), ("%s: t-digest ARB@%p free list bug", __func__, (is32bit ? (void *)ctd32tree : (void *)ctd64tree))); remctds = maxctds; while ((is32bit ? ctd32 != NULL : ctd64 != NULL)) { tmperr = 0; if (is32bit) { s64q_t x; idx = ARB_NEXTFREEIDX(ctd32, ctdlnk); /* Cloning a s32q_t into a s64q_t should never fail. */ tmperr = Q_QCLONEQ(&x, ctd32->mu); tmperr = tmperr ? tmperr : stats_v1_vsd_tdgst_add( vs_dtype, tdgst, x, ctd32->cnt, attempt); ctd32 = ARB_NODE(ctd32tree, idx); KASSERT(ctd32 == NULL || ARB_ISFREE(ctd32, ctdlnk), ("%s: t-digest ARB@%p free list bug", __func__, ctd32tree)); } else { idx = ARB_NEXTFREEIDX(ctd64, ctdlnk); tmperr = stats_v1_vsd_tdgst_add(vs_dtype, tdgst, ctd64->mu, ctd64->cnt, attempt); ctd64 = ARB_NODE(ctd64tree, idx); KASSERT(ctd64 == NULL || ARB_ISFREE(ctd64, ctdlnk), ("%s: t-digest ARB@%p free list bug", __func__, ctd64tree)); } /* * This process should not produce errors, bugs notwithstanding. * Just in case, latch any errors and attempt all re-insertions. */ error = tmperr ? tmperr : error; remctds--; } KASSERT(remctds == 0, ("%s: t-digest ARB@%p free list bug", __func__, (is32bit ? (void *)ctd32tree : (void *)ctd64tree))); return (error); } static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt) { #ifdef DIAGNOSTIC char qstr[Q_MAXSTRLEN(x, 10)]; #endif struct ctdth32 *ctd32tree; struct ctdth64 *ctd64tree; void *closest, *cur, *lb, *ub; struct voistatdata_tdgstctd32 *ctd32; struct voistatdata_tdgstctd64 *ctd64; uint64_t cnt, smplcnt, sum, tmpsum; s64q_t k, minz, q, z; int error, is32bit, n; error = 0; minz = Q_INI(&z, 0, 0, Q_NFBITS(x)); switch (vs_dtype) { case VSD_DTYPE_TDGSTCLUST32: if ((UINT32_MAX - weight) < VSD(tdgstclust32, tdgst)->smplcnt) error = EOVERFLOW; smplcnt = VSD(tdgstclust32, tdgst)->smplcnt; ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree; is32bit = 1; ctd64tree = NULL; ctd64 = NULL; break; case VSD_DTYPE_TDGSTCLUST64: if ((UINT64_MAX - weight) < VSD(tdgstclust64, tdgst)->smplcnt) error = EOVERFLOW; smplcnt = VSD(tdgstclust64, tdgst)->smplcnt; ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree; is32bit = 0; ctd32tree = NULL; ctd32 = NULL; break; default: error = EINVAL; break; } if (error) return (error); /* * Inspired by Ted Dunning's AVLTreeDigest.java */ do { #if defined(DIAGNOSTIC) KASSERT(attempt < 5, ("%s: Too many attempts", __func__)); #endif if (attempt >= 5) return (EAGAIN); Q_SIFVAL(minz, Q_IFMAXVAL(minz)); closest = ub = NULL; sum = tmpsum = 0; if (is32bit) lb = cur = (void *)(ctd32 = ARB_MIN(ctdth32, ctd32tree)); else lb = cur = (void *)(ctd64 = ARB_MIN(ctdth64, ctd64tree)); if (lb == NULL) /* Empty tree. */ lb = (is32bit ? (void *)ARB_ROOT(ctd32tree) : (void *)ARB_ROOT(ctd64tree)); /* * Find the set of centroids with minimum distance to x and * compute the sum of counts for all centroids with mean less * than the first centroid in the set. */ for (; cur != NULL; cur = (is32bit ? (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) : (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) { if (is32bit) { cnt = ctd32->cnt; KASSERT(Q_PRECEQ(ctd32->mu, x), ("%s: Q_RELPREC(mu,x)=%d", __func__, Q_RELPREC(ctd32->mu, x))); /* Ok to assign as both have same precision. */ z = ctd32->mu; } else { cnt = ctd64->cnt; KASSERT(Q_PRECEQ(ctd64->mu, x), ("%s: Q_RELPREC(mu,x)=%d", __func__, Q_RELPREC(ctd64->mu, x))); /* Ok to assign as both have same precision. */ z = ctd64->mu; } error = Q_QSUBQ(&z, x); #if defined(DIAGNOSTIC) KASSERT(!error, ("%s: unexpected error %d", __func__, error)); #endif if (error) return (error); z = Q_QABS(z); if (Q_QLTQ(z, minz)) { minz = z; lb = cur; sum = tmpsum; tmpsum += cnt; } else if (Q_QGTQ(z, minz)) { ub = cur; break; } } cur = (is32bit ? (void *)(ctd32 = (struct voistatdata_tdgstctd32 *)lb) : (void *)(ctd64 = (struct voistatdata_tdgstctd64 *)lb)); for (n = 0; cur != ub; cur = (is32bit ? (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) : (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) { if (is32bit) cnt = ctd32->cnt; else cnt = ctd64->cnt; q = Q_CTRLINI(16); if (smplcnt == 1) error = Q_QFRACI(&q, 1, 2); else /* [ sum + ((cnt - 1) / 2) ] / (smplcnt - 1) */ error = Q_QFRACI(&q, (sum << 1) + cnt - 1, (smplcnt - 1) << 1); k = q; /* k = q x 4 x samplcnt x attempt */ error |= Q_QMULI(&k, 4 * smplcnt * attempt); /* k = k x (1 - q) */ error |= Q_QSUBI(&q, 1); q = Q_QABS(q); error |= Q_QMULQ(&k, q); #if defined(DIAGNOSTIC) #if !defined(_KERNEL) double q_dbl, k_dbl, q2d, k2d; q2d = Q_Q2D(q); k2d = Q_Q2D(k); q_dbl = smplcnt == 1 ? 0.5 : (sum + ((cnt - 1) / 2.0)) / (double)(smplcnt - 1); k_dbl = 4 * smplcnt * q_dbl * (1.0 - q_dbl) * attempt; /* * If the difference between q and q_dbl is greater than * the fractional precision of q, something is off. * NB: q is holding the value of 1 - q */ q_dbl = 1.0 - q_dbl; KASSERT((q_dbl > q2d ? q_dbl - q2d : q2d - q_dbl) < (1.05 * ((double)1 / (double)(1ULL << Q_NFBITS(q)))), ("Q-type q bad precision")); KASSERT((k_dbl > k2d ? k_dbl - k2d : k2d - k_dbl) < 1.0 + (0.01 * smplcnt), ("Q-type k bad precision")); #endif /* !_KERNEL */ KASSERT(!error, ("%s: unexpected error %d", __func__, error)); #endif /* DIAGNOSTIC */ if (error) return (error); if ((is32bit && ((ctd32->cnt + weight) <= (uint64_t)Q_GIVAL(k))) || (!is32bit && ((ctd64->cnt + weight) <= (uint64_t)Q_GIVAL(k)))) { n++; /* random() produces 31 bits. */ if (random() < (INT32_MAX / n)) closest = cur; } sum += cnt; } } while (closest == NULL && (is32bit ? ARB_FULL(ctd32tree) : ARB_FULL(ctd64tree)) && (error = stats_v1_vsd_tdgst_compress(vs_dtype, tdgst, attempt++)) == 0); if (error) return (error); if (closest != NULL) { /* Merge with an existing centroid. */ if (is32bit) { ctd32 = (struct voistatdata_tdgstctd32 *)closest; error = Q_QSUBQ(&x, ctd32->mu); /* * The following calculation "x / (cnt + weight)" * computes the amount by which to adjust the centroid's * mu value in order to merge in the VOI sample. * * It can underflow (Q_QDIVI() returns ERANGE) when the * user centroids' fractional precision (which is * inherited by 'x') is too low to represent the result. * * A sophisticated approach to dealing with this issue * would minimise accumulation of error by tracking * underflow per centroid and making an adjustment when * a LSB's worth of underflow has accumulated. * * A simpler approach is to let the result underflow * i.e. merge the VOI sample into the centroid without * adjusting the centroid's mu, and rely on the user to * specify their t-digest with sufficient centroid * fractional precision such that the accumulation of * error from multiple underflows is of no material * consequence to the centroid's final value of mu. * * For the moment, the latter approach is employed by * simply ignoring ERANGE here. * * XXXLAS: Per-centroid underflow tracking is likely too * onerous, but it probably makes sense to accumulate a * single underflow error variable across all centroids * and report it as part of the digest to provide * additional visibility into the digest's fidelity. */ error = error ? error : Q_QDIVI(&x, ctd32->cnt + weight); if ((error && error != ERANGE) || (error = Q_QADDQ(&ctd32->mu, x))) { #ifdef DIAGNOSTIC KASSERT(!error, ("%s: unexpected error %d", __func__, error)); #endif return (error); } ctd32->cnt += weight; error = ARB_REINSERT(ctdth32, ctd32tree, ctd32) == NULL ? 0 : EALREADY; #ifdef DIAGNOSTIC RB_REINSERT(rbctdth32, &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32); #endif } else { ctd64 = (struct voistatdata_tdgstctd64 *)closest; error = Q_QSUBQ(&x, ctd64->mu); error = error ? error : Q_QDIVI(&x, ctd64->cnt + weight); /* Refer to is32bit ERANGE discussion above. */ if ((error && error != ERANGE) || (error = Q_QADDQ(&ctd64->mu, x))) { KASSERT(!error, ("%s: unexpected error %d", __func__, error)); return (error); } ctd64->cnt += weight; error = ARB_REINSERT(ctdth64, ctd64tree, ctd64) == NULL ? 0 : EALREADY; #ifdef DIAGNOSTIC RB_REINSERT(rbctdth64, &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64); #endif } } else { /* * Add a new centroid. If digest compression is working * correctly, there should always be at least one free. */ if (is32bit) { ctd32 = ARB_GETFREE(ctd32tree, ctdlnk); #ifdef DIAGNOSTIC KASSERT(ctd32 != NULL, ("%s: t-digest@%p has no free centroids", __func__, tdgst)); #endif if (ctd32 == NULL) return (EAGAIN); if ((error = Q_QCPYVALQ(&ctd32->mu, x))) return (error); ctd32->cnt = weight; error = ARB_INSERT(ctdth32, ctd32tree, ctd32) == NULL ? 0 : EALREADY; #ifdef DIAGNOSTIC RB_INSERT(rbctdth32, &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32); #endif } else { ctd64 = ARB_GETFREE(ctd64tree, ctdlnk); #ifdef DIAGNOSTIC KASSERT(ctd64 != NULL, ("%s: t-digest@%p has no free centroids", __func__, tdgst)); #endif if (ctd64 == NULL) /* Should not happen. */ return (EAGAIN); /* Direct assignment ok as both have same type/prec. */ ctd64->mu = x; ctd64->cnt = weight; error = ARB_INSERT(ctdth64, ctd64tree, ctd64) == NULL ? 0 : EALREADY; #ifdef DIAGNOSTIC RB_INSERT(rbctdth64, &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64); #endif } } if (is32bit) VSD(tdgstclust32, tdgst)->smplcnt += weight; else { VSD(tdgstclust64, tdgst)->smplcnt += weight; #ifdef DIAGNOSTIC struct rbctdth64 *rbctdtree = &VSD(tdgstclust64, tdgst)->rbctdtree; struct voistatdata_tdgstctd64 *rbctd64; int i = 0; ARB_FOREACH(ctd64, ctdth64, ctd64tree) { rbctd64 = (i == 0 ? RB_MIN(rbctdth64, rbctdtree) : RB_NEXT(rbctdth64, rbctdtree, rbctd64)); if (i >= ARB_CURNODES(ctd64tree) || ctd64 != rbctd64 || ARB_MIN(ctdth64, ctd64tree) != RB_MIN(rbctdth64, rbctdtree) || ARB_MAX(ctdth64, ctd64tree) != RB_MAX(rbctdth64, rbctdtree) || ARB_LEFTIDX(ctd64, ctdlnk) != ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk)) || ARB_RIGHTIDX(ctd64, ctdlnk) != ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk)) || ARB_PARENTIDX(ctd64, ctdlnk) != ARB_SELFIDX(ctd64tree, RB_PARENT(rbctd64, rblnk))) { Q_TOSTR(ctd64->mu, -1, 10, qstr, sizeof(qstr)); printf("ARB ctd=%3d p=%3d l=%3d r=%3d c=%2d " "mu=%s\n", (int)ARB_SELFIDX(ctd64tree, ctd64), ARB_PARENTIDX(ctd64, ctdlnk), ARB_LEFTIDX(ctd64, ctdlnk), ARB_RIGHTIDX(ctd64, ctdlnk), ARB_COLOR(ctd64, ctdlnk), qstr); Q_TOSTR(rbctd64->mu, -1, 10, qstr, sizeof(qstr)); struct voistatdata_tdgstctd64 *parent; parent = RB_PARENT(rbctd64, rblnk); int rb_color = parent == NULL ? 0 : RB_LEFT(parent, rblnk) == rbctd64 ? - (RB_BITS(parent, rblnk) & RB_RED_L) != 0 : - (RB_BITS(parent, rblnk) & RB_RED_R) != 0; + (_RB_BITSUP(parent, rblnk) & _RB_L) != 0 : + (_RB_BITSUP(parent, rblnk) & _RB_R) != 0; printf(" RB ctd=%3d p=%3d l=%3d r=%3d c=%2d " "mu=%s\n", (int)ARB_SELFIDX(ctd64tree, rbctd64), (int)ARB_SELFIDX(ctd64tree, RB_PARENT(rbctd64, rblnk)), (int)ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk)), (int)ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk)), rb_color, qstr); panic("RB@%p and ARB@%p trees differ\n", rbctdtree, ctd64tree); } i++; } #endif /* DIAGNOSTIC */ } return (error); } static inline int stats_v1_voi_update_tdgst(enum vsd_dtype voi_dtype, struct voistatdata *voival, struct voistat *vs, struct voistatdata_tdgst *tdgst) { s64q_t x; int error; error = 0; switch (vs->dtype) { case VSD_DTYPE_TDGSTCLUST32: /* Use same precision as the user's centroids. */ Q_INI(&x, 0, 0, Q_NFBITS( ARB_CNODE(&VSD(tdgstclust32, tdgst)->ctdtree, 0)->mu)); break; case VSD_DTYPE_TDGSTCLUST64: /* Use same precision as the user's centroids. */ Q_INI(&x, 0, 0, Q_NFBITS( ARB_CNODE(&VSD(tdgstclust64, tdgst)->ctdtree, 0)->mu)); break; default: KASSERT(vs->dtype == VSD_DTYPE_TDGSTCLUST32 || vs->dtype == VSD_DTYPE_TDGSTCLUST64, ("%s: vs->dtype(%d) != VSD_DTYPE_TDGSTCLUST<32|64>", __func__, vs->dtype)); return (EINVAL); } /* * XXXLAS: Should have both a signed and unsigned 'x' variable to avoid * returning EOVERFLOW if the voival would have fit in a u64q_t. */ switch (voi_dtype) { case VSD_DTYPE_INT_S32: error = Q_QCPYVALI(&x, voival->int32.s32); break; case VSD_DTYPE_INT_U32: error = Q_QCPYVALI(&x, voival->int32.u32); break; case VSD_DTYPE_INT_S64: error = Q_QCPYVALI(&x, voival->int64.s64); break; case VSD_DTYPE_INT_U64: error = Q_QCPYVALI(&x, voival->int64.u64); break; case VSD_DTYPE_INT_SLONG: error = Q_QCPYVALI(&x, voival->intlong.slong); break; case VSD_DTYPE_INT_ULONG: error = Q_QCPYVALI(&x, voival->intlong.ulong); break; case VSD_DTYPE_Q_S32: error = Q_QCPYVALQ(&x, voival->q32.sq32); break; case VSD_DTYPE_Q_U32: error = Q_QCPYVALQ(&x, voival->q32.uq32); break; case VSD_DTYPE_Q_S64: error = Q_QCPYVALQ(&x, voival->q64.sq64); break; case VSD_DTYPE_Q_U64: error = Q_QCPYVALQ(&x, voival->q64.uq64); break; default: error = EINVAL; break; } if (error || (error = stats_v1_vsd_tdgst_add(vs->dtype, tdgst, x, 1, 1))) return (error); vs->flags |= VS_VSDVALID; return (0); } int stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id, enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags) { struct voi *v; struct voistat *vs; void *statevsd, *vsd; int error, i, tmperr; error = 0; if (sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb) || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || voival == NULL) return (EINVAL); v = &sb->vois[voi_id]; if (voi_dtype != v->dtype || v->id < 0 || ((flags & SB_VOI_RELUPDATE) && !(v->flags & VOI_REQSTATE))) return (EINVAL); vs = BLOB_OFFSET(sb, v->stats_off); if (v->flags & VOI_REQSTATE) statevsd = BLOB_OFFSET(sb, vs->data_off); else statevsd = NULL; if (flags & SB_VOI_RELUPDATE) { switch (voi_dtype) { case VSD_DTYPE_INT_S32: voival->int32.s32 += VSD(voistate, statevsd)->prev.int32.s32; break; case VSD_DTYPE_INT_U32: voival->int32.u32 += VSD(voistate, statevsd)->prev.int32.u32; break; case VSD_DTYPE_INT_S64: voival->int64.s64 += VSD(voistate, statevsd)->prev.int64.s64; break; case VSD_DTYPE_INT_U64: voival->int64.u64 += VSD(voistate, statevsd)->prev.int64.u64; break; case VSD_DTYPE_INT_SLONG: voival->intlong.slong += VSD(voistate, statevsd)->prev.intlong.slong; break; case VSD_DTYPE_INT_ULONG: voival->intlong.ulong += VSD(voistate, statevsd)->prev.intlong.ulong; break; case VSD_DTYPE_Q_S32: error = Q_QADDQ(&voival->q32.sq32, VSD(voistate, statevsd)->prev.q32.sq32); break; case VSD_DTYPE_Q_U32: error = Q_QADDQ(&voival->q32.uq32, VSD(voistate, statevsd)->prev.q32.uq32); break; case VSD_DTYPE_Q_S64: error = Q_QADDQ(&voival->q64.sq64, VSD(voistate, statevsd)->prev.q64.sq64); break; case VSD_DTYPE_Q_U64: error = Q_QADDQ(&voival->q64.uq64, VSD(voistate, statevsd)->prev.q64.uq64); break; default: KASSERT(0, ("Unknown VOI data type %d", voi_dtype)); break; } } if (error) return (error); for (i = v->voistatmaxid; i > 0; i--) { vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[i]; if (vs->stype < 0) continue; vsd = BLOB_OFFSET(sb, vs->data_off); switch (vs->stype) { case VS_STYPE_MAX: tmperr = stats_v1_voi_update_max(voi_dtype, voival, vs, vsd); break; case VS_STYPE_MIN: tmperr = stats_v1_voi_update_min(voi_dtype, voival, vs, vsd); break; case VS_STYPE_SUM: tmperr = stats_v1_voi_update_sum(voi_dtype, voival, vs, vsd); break; case VS_STYPE_HIST: tmperr = stats_v1_voi_update_hist(voi_dtype, voival, vs, vsd); break; case VS_STYPE_TDGST: tmperr = stats_v1_voi_update_tdgst(voi_dtype, voival, vs, vsd); break; default: KASSERT(0, ("Unknown VOI stat type %d", vs->stype)); break; } if (tmperr) { error = tmperr; VS_INCERRS(vs); } } if (statevsd) { switch (voi_dtype) { case VSD_DTYPE_INT_S32: VSD(voistate, statevsd)->prev.int32.s32 = voival->int32.s32; break; case VSD_DTYPE_INT_U32: VSD(voistate, statevsd)->prev.int32.u32 = voival->int32.u32; break; case VSD_DTYPE_INT_S64: VSD(voistate, statevsd)->prev.int64.s64 = voival->int64.s64; break; case VSD_DTYPE_INT_U64: VSD(voistate, statevsd)->prev.int64.u64 = voival->int64.u64; break; case VSD_DTYPE_INT_SLONG: VSD(voistate, statevsd)->prev.intlong.slong = voival->intlong.slong; break; case VSD_DTYPE_INT_ULONG: VSD(voistate, statevsd)->prev.intlong.ulong = voival->intlong.ulong; break; case VSD_DTYPE_Q_S32: error = Q_QCPYVALQ( &VSD(voistate, statevsd)->prev.q32.sq32, voival->q32.sq32); break; case VSD_DTYPE_Q_U32: error = Q_QCPYVALQ( &VSD(voistate, statevsd)->prev.q32.uq32, voival->q32.uq32); break; case VSD_DTYPE_Q_S64: error = Q_QCPYVALQ( &VSD(voistate, statevsd)->prev.q64.sq64, voival->q64.sq64); break; case VSD_DTYPE_Q_U64: error = Q_QCPYVALQ( &VSD(voistate, statevsd)->prev.q64.uq64, voival->q64.uq64); break; default: KASSERT(0, ("Unknown VOI data type %d", voi_dtype)); break; } } return (error); } #ifdef _KERNEL static void stats_init(void *arg) { } SYSINIT(stats, SI_SUB_KDTRACE, SI_ORDER_FIRST, stats_init, NULL); /* * Sysctl handler to display the list of available stats templates. */ static int stats_tpl_list_available(SYSCTL_HANDLER_ARGS) { struct sbuf *s; int err, i; err = 0; /* We can tolerate ntpl being stale, so do not take the lock. */ s = sbuf_new(NULL, NULL, /* +1 per tpl for , */ ntpl * (STATS_TPL_MAX_STR_SPEC_LEN + 1), SBUF_FIXEDLEN); if (s == NULL) return (ENOMEM); TPL_LIST_RLOCK(); for (i = 0; i < ntpl; i++) { err = sbuf_printf(s, "%s\"%s\":%u", i ? "," : "", tpllist[i]->mb->tplname, tpllist[i]->mb->tplhash); if (err) { /* Sbuf overflow condition. */ err = EOVERFLOW; break; } } TPL_LIST_RUNLOCK(); if (!err) { sbuf_finish(s); err = sysctl_handle_string(oidp, sbuf_data(s), 0, req); } sbuf_delete(s); return (err); } /* * Called by subsystem-specific sysctls to report and/or parse the list of * templates being sampled and their sampling rates. A stats_tpl_sr_cb_t * conformant function pointer must be passed in as arg1, which is used to * interact with the subsystem's stats template sample rates list. If arg2 > 0, * a zero-initialised allocation of arg2-sized contextual memory is * heap-allocated and passed in to all subsystem callbacks made during the * operation of stats_tpl_sample_rates(). * * XXXLAS: Assumes templates are never removed, which is currently true but may * need to be reworked in future if dynamic template management becomes a * requirement e.g. to support kernel module based templates. */ int stats_tpl_sample_rates(SYSCTL_HANDLER_ARGS) { char kvpair_fmt[16], tplspec_fmt[16]; char tpl_spec[STATS_TPL_MAX_STR_SPEC_LEN]; char tpl_name[TPL_MAX_NAME_LEN + 2]; /* +2 for "" */ stats_tpl_sr_cb_t subsys_cb; void *subsys_ctx; char *buf, *new_rates_usr_str, *tpl_name_p; struct stats_tpl_sample_rate *rates; struct sbuf *s, _s; uint32_t cum_pct, pct, tpl_hash; int err, i, off, len, newlen, nrates; buf = NULL; rates = NULL; err = nrates = 0; subsys_cb = (stats_tpl_sr_cb_t)arg1; KASSERT(subsys_cb != NULL, ("%s: subsys_cb == arg1 == NULL", __func__)); if (arg2 > 0) subsys_ctx = malloc(arg2, M_TEMP, M_WAITOK | M_ZERO); else subsys_ctx = NULL; /* Grab current count of subsystem rates. */ err = subsys_cb(TPL_SR_UNLOCKED_GET, NULL, &nrates, subsys_ctx); if (err) goto done; /* +1 to ensure we can append '\0' post copyin, +5 per rate for =nnn, */ len = max(req->newlen + 1, nrates * (STATS_TPL_MAX_STR_SPEC_LEN + 5)); if (req->oldptr != NULL || req->newptr != NULL) buf = malloc(len, M_TEMP, M_WAITOK); if (req->oldptr != NULL) { if (nrates == 0) { /* No rates, so return an empty string via oldptr. */ err = SYSCTL_OUT(req, "", 1); if (err) goto done; goto process_new; } s = sbuf_new(&_s, buf, len, SBUF_FIXEDLEN | SBUF_INCLUDENUL); /* Grab locked count of, and ptr to, subsystem rates. */ err = subsys_cb(TPL_SR_RLOCKED_GET, &rates, &nrates, subsys_ctx); if (err) goto done; TPL_LIST_RLOCK(); for (i = 0; i < nrates && !err; i++) { err = sbuf_printf(s, "%s\"%s\":%u=%u", i ? "," : "", tpllist[rates[i].tpl_slot_id]->mb->tplname, tpllist[rates[i].tpl_slot_id]->mb->tplhash, rates[i].tpl_sample_pct); } TPL_LIST_RUNLOCK(); /* Tell subsystem that we're done with its rates list. */ err = subsys_cb(TPL_SR_RUNLOCK, &rates, &nrates, subsys_ctx); if (err) goto done; err = sbuf_finish(s); if (err) goto done; /* We lost a race for buf to be too small. */ /* Return the rendered string data via oldptr. */ err = SYSCTL_OUT(req, sbuf_data(s), sbuf_len(s)); } else { /* Return the upper bound size for buffer sizing requests. */ err = SYSCTL_OUT(req, NULL, len); } process_new: if (err || req->newptr == NULL) goto done; newlen = req->newlen - req->newidx; err = SYSCTL_IN(req, buf, newlen); if (err) goto done; /* * Initialise format strings at run time. * * Write the max template spec string length into the * template_spec=percent key-value pair parsing format string as: * " %[^=]=%u %n" * * Write the max template name string length into the tplname:tplhash * parsing format string as: * "%[^:]:%u" * * Subtract 1 for \0 appended by sscanf(). */ sprintf(kvpair_fmt, " %%%zu[^=]=%%u %%n", sizeof(tpl_spec) - 1); sprintf(tplspec_fmt, "%%%zu[^:]:%%u", sizeof(tpl_name) - 1); /* * Parse each CSV key-value pair specifying a template and its sample * percentage. Whitespace either side of a key-value pair is ignored. * Templates can be specified by name, hash, or name and hash per the * following formats (chars in [] are optional): * ["]["]= * :hash=pct * ["]["]:hash= */ cum_pct = nrates = 0; rates = NULL; buf[newlen] = '\0'; /* buf is at least newlen+1 in size. */ new_rates_usr_str = buf; while (isspace(*new_rates_usr_str)) new_rates_usr_str++; /* Skip leading whitespace. */ while (*new_rates_usr_str != '\0') { tpl_name_p = tpl_name; tpl_name[0] = '\0'; tpl_hash = 0; off = 0; /* * Parse key-value pair which must perform 2 conversions, then * parse the template spec to extract either name, hash, or name * and hash depending on the three possible spec formats. The * tplspec_fmt format specifier parses name or name and hash * template specs, while the ":%u" format specifier parses * hash-only template specs. If parsing is successfull, ensure * the cumulative sampling percentage does not exceed 100. */ err = EINVAL; if (2 != sscanf(new_rates_usr_str, kvpair_fmt, tpl_spec, &pct, &off)) break; if ((1 > sscanf(tpl_spec, tplspec_fmt, tpl_name, &tpl_hash)) && (1 != sscanf(tpl_spec, ":%u", &tpl_hash))) break; if ((cum_pct += pct) > 100) break; err = 0; /* Strip surrounding "" from template name if present. */ len = strlen(tpl_name); if (len > 0) { if (tpl_name[len - 1] == '"') tpl_name[--len] = '\0'; if (tpl_name[0] == '"') { tpl_name_p++; len--; } } rates = stats_realloc(rates, 0, /* oldsz is unused in kernel. */ (nrates + 1) * sizeof(*rates), M_WAITOK); rates[nrates].tpl_slot_id = stats_tpl_fetch_allocid(len ? tpl_name_p : NULL, tpl_hash); if (rates[nrates].tpl_slot_id < 0) { err = -rates[nrates].tpl_slot_id; break; } rates[nrates].tpl_sample_pct = pct; nrates++; new_rates_usr_str += off; if (*new_rates_usr_str != ',') break; /* End-of-input or malformed. */ new_rates_usr_str++; /* Move past comma to next pair. */ } if (!err) { if ((new_rates_usr_str - buf) < newlen) { /* Entire input has not been consumed. */ err = EINVAL; } else { /* * Give subsystem the new rates. They'll return the * appropriate rates pointer for us to garbage collect. */ err = subsys_cb(TPL_SR_PUT, &rates, &nrates, subsys_ctx); } } stats_free(rates); done: free(buf, M_TEMP); free(subsys_ctx, M_TEMP); return (err); } SYSCTL_NODE(_kern, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "stats(9) MIB"); SYSCTL_PROC(_kern_stats, OID_AUTO, templates, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, stats_tpl_list_available, "A", "list the name/hash of all available stats(9) templates"); #else /* ! _KERNEL */ static void __attribute__ ((constructor)) stats_constructor(void) { pthread_rwlock_init(&tpllistlock, NULL); } static void __attribute__ ((destructor)) stats_destructor(void) { pthread_rwlock_destroy(&tpllistlock); } #endif /* _KERNEL */ diff --git a/sys/sys/tree.h b/sys/sys/tree.h index 74f1f23d3576..e0ab4449bd35 100644 --- a/sys/sys/tree.h +++ b/sys/sys/tree.h @@ -1,945 +1,938 @@ /* $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $ */ /* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */ /* $FreeBSD$ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 2002 Niels Provos * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _SYS_TREE_H_ #define _SYS_TREE_H_ #include /* * This file defines data structures for different types of trees: * splay trees and rank-balanced trees. * * A splay tree is a self-organizing data structure. Every operation * on the tree causes a splay to happen. The splay moves the requested * node to the root of the tree and partly rebalances it. * * This has the benefit that request locality causes faster lookups as * the requested nodes move to the top of the tree. On the other hand, * every lookup causes memory writes. * * The Balance Theorem bounds the total access time for m operations * and n inserts on an initially empty tree as O((m + n)lg n). The * amortized cost for a sequence of m accesses to a splay tree is O(lg n); * * A rank-balanced tree is a binary search tree with an integer * rank-difference as an attribute of each pointer from parent to child. * The sum of the rank-differences on any path from a node down to null is * the same, and defines the rank of that node. The rank of the null node * is -1. * - * Different additional conditions define different sorts of balanced - * trees, including "red-black" and "AVL" trees. The set of conditions - * applied here are the "weak-AVL" conditions of Haeupler, Sen and Tarjan: + * Different additional conditions define different sorts of balanced trees, + * including "red-black" and "AVL" trees. The set of conditions applied here + * are the "weak-AVL" conditions of Haeupler, Sen and Tarjan presented in in + * "Rank Balanced Trees", ACM Transactions on Algorithms Volume 11 Issue 4 June + * 2015 Article No.: 30pp 1–26 https://doi.org/10.1145/2689412 (the HST paper): * - every rank-difference is 1 or 2. * - the rank of any leaf is 1. * * For historical reasons, rank differences that are even are associated * with the color red (Rank-Even-Difference), and the child that a red edge * points to is called a red child. * * Every operation on a rank-balanced tree is bounded as O(lg n). * The maximum height of a rank-balanced tree is 2lg (n+1). */ #define SPLAY_HEAD(name, type) \ struct name { \ struct type *sph_root; /* root of the tree */ \ } #define SPLAY_INITIALIZER(root) \ { NULL } #define SPLAY_INIT(root) do { \ (root)->sph_root = NULL; \ } while (/*CONSTCOND*/ 0) #define SPLAY_ENTRY(type) \ struct { \ struct type *spe_left; /* left element */ \ struct type *spe_right; /* right element */ \ } #define SPLAY_LEFT(elm, field) (elm)->field.spe_left #define SPLAY_RIGHT(elm, field) (elm)->field.spe_right #define SPLAY_ROOT(head) (head)->sph_root #define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL) /* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */ #define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ (head)->sph_root = tmp; \ } while (/*CONSTCOND*/ 0) #define SPLAY_ROTATE_LEFT(head, tmp, field) do { \ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \ SPLAY_LEFT(tmp, field) = (head)->sph_root; \ (head)->sph_root = tmp; \ } while (/*CONSTCOND*/ 0) #define SPLAY_LINKLEFT(head, tmp, field) do { \ SPLAY_LEFT(tmp, field) = (head)->sph_root; \ tmp = (head)->sph_root; \ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ } while (/*CONSTCOND*/ 0) #define SPLAY_LINKRIGHT(head, tmp, field) do { \ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ tmp = (head)->sph_root; \ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ } while (/*CONSTCOND*/ 0) #define SPLAY_ASSEMBLE(head, node, left, right, field) do { \ SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \ SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \ } while (/*CONSTCOND*/ 0) /* Generates prototypes and inline functions */ #define SPLAY_PROTOTYPE(name, type, field, cmp) \ void name##_SPLAY(struct name *, struct type *); \ void name##_SPLAY_MINMAX(struct name *, int); \ struct type *name##_SPLAY_INSERT(struct name *, struct type *); \ struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \ \ /* Finds the node with the same key as elm */ \ static __unused __inline struct type * \ name##_SPLAY_FIND(struct name *head, struct type *elm) \ { \ if (SPLAY_EMPTY(head)) \ return(NULL); \ name##_SPLAY(head, elm); \ if ((cmp)(elm, (head)->sph_root) == 0) \ return (head->sph_root); \ return (NULL); \ } \ \ static __unused __inline struct type * \ name##_SPLAY_NEXT(struct name *head, struct type *elm) \ { \ name##_SPLAY(head, elm); \ if (SPLAY_RIGHT(elm, field) != NULL) { \ elm = SPLAY_RIGHT(elm, field); \ while (SPLAY_LEFT(elm, field) != NULL) { \ elm = SPLAY_LEFT(elm, field); \ } \ } else \ elm = NULL; \ return (elm); \ } \ \ static __unused __inline struct type * \ name##_SPLAY_MIN_MAX(struct name *head, int val) \ { \ name##_SPLAY_MINMAX(head, val); \ return (SPLAY_ROOT(head)); \ } /* Main splay operation. * Moves node close to the key of elm to top */ #define SPLAY_GENERATE(name, type, field, cmp) \ struct type * \ name##_SPLAY_INSERT(struct name *head, struct type *elm) \ { \ if (SPLAY_EMPTY(head)) { \ SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \ } else { \ __typeof(cmp(NULL, NULL)) __comp; \ name##_SPLAY(head, elm); \ __comp = (cmp)(elm, (head)->sph_root); \ if (__comp < 0) { \ SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\ SPLAY_RIGHT(elm, field) = (head)->sph_root; \ SPLAY_LEFT((head)->sph_root, field) = NULL; \ } else if (__comp > 0) { \ SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\ SPLAY_LEFT(elm, field) = (head)->sph_root; \ SPLAY_RIGHT((head)->sph_root, field) = NULL; \ } else \ return ((head)->sph_root); \ } \ (head)->sph_root = (elm); \ return (NULL); \ } \ \ struct type * \ name##_SPLAY_REMOVE(struct name *head, struct type *elm) \ { \ struct type *__tmp; \ if (SPLAY_EMPTY(head)) \ return (NULL); \ name##_SPLAY(head, elm); \ if ((cmp)(elm, (head)->sph_root) == 0) { \ if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\ } else { \ __tmp = SPLAY_RIGHT((head)->sph_root, field); \ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\ name##_SPLAY(head, elm); \ SPLAY_RIGHT((head)->sph_root, field) = __tmp; \ } \ return (elm); \ } \ return (NULL); \ } \ \ void \ name##_SPLAY(struct name *head, struct type *elm) \ { \ struct type __node, *__left, *__right, *__tmp; \ __typeof(cmp(NULL, NULL)) __comp; \ \ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ __left = __right = &__node; \ \ while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) { \ if (__comp < 0) { \ __tmp = SPLAY_LEFT((head)->sph_root, field); \ if (__tmp == NULL) \ break; \ if ((cmp)(elm, __tmp) < 0){ \ SPLAY_ROTATE_RIGHT(head, __tmp, field); \ if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ break; \ } \ SPLAY_LINKLEFT(head, __right, field); \ } else if (__comp > 0) { \ __tmp = SPLAY_RIGHT((head)->sph_root, field); \ if (__tmp == NULL) \ break; \ if ((cmp)(elm, __tmp) > 0){ \ SPLAY_ROTATE_LEFT(head, __tmp, field); \ if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ break; \ } \ SPLAY_LINKRIGHT(head, __left, field); \ } \ } \ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ } \ \ /* Splay with either the minimum or the maximum element \ * Used to find minimum or maximum element in tree. \ */ \ void name##_SPLAY_MINMAX(struct name *head, int __comp) \ { \ struct type __node, *__left, *__right, *__tmp; \ \ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ __left = __right = &__node; \ \ while (1) { \ if (__comp < 0) { \ __tmp = SPLAY_LEFT((head)->sph_root, field); \ if (__tmp == NULL) \ break; \ if (__comp < 0){ \ SPLAY_ROTATE_RIGHT(head, __tmp, field); \ if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ break; \ } \ SPLAY_LINKLEFT(head, __right, field); \ } else if (__comp > 0) { \ __tmp = SPLAY_RIGHT((head)->sph_root, field); \ if (__tmp == NULL) \ break; \ if (__comp > 0) { \ SPLAY_ROTATE_LEFT(head, __tmp, field); \ if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ break; \ } \ SPLAY_LINKRIGHT(head, __left, field); \ } \ } \ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ } #define SPLAY_NEGINF -1 #define SPLAY_INF 1 #define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y) #define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y) #define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y) #define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y) #define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \ : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF)) #define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \ : name##_SPLAY_MIN_MAX(x, SPLAY_INF)) #define SPLAY_FOREACH(x, name, head) \ for ((x) = SPLAY_MIN(name, head); \ (x) != NULL; \ (x) = SPLAY_NEXT(name, head, x)) /* Macros that define a rank-balanced tree */ #define RB_HEAD(name, type) \ struct name { \ struct type *rbh_root; /* root of the tree */ \ } #define RB_INITIALIZER(root) \ { NULL } #define RB_INIT(root) do { \ (root)->rbh_root = NULL; \ } while (/*CONSTCOND*/ 0) #define RB_ENTRY(type) \ struct { \ - struct type *rbe_left; /* left element */ \ - struct type *rbe_right; /* right element */ \ - struct type *rbe_parent; /* parent element */ \ + struct type *rbe_link[3]; \ } -#define RB_LEFT(elm, field) (elm)->field.rbe_left -#define RB_RIGHT(elm, field) (elm)->field.rbe_right - /* * With the expectation that any object of struct type has an * address that is a multiple of 4, and that therefore the * 2 least significant bits of a pointer to struct type are * always zero, this implementation sets those bits to indicate * that the left or right child of the tree node is "red". */ -#define RB_UP(elm, field) (elm)->field.rbe_parent -#define RB_BITS(elm, field) (*(__uintptr_t *)&RB_UP(elm, field)) -#define RB_RED_L ((__uintptr_t)1) -#define RB_RED_R ((__uintptr_t)2) -#define RB_RED_MASK ((__uintptr_t)3) -#define RB_FLIP_LEFT(elm, field) (RB_BITS(elm, field) ^= RB_RED_L) -#define RB_FLIP_RIGHT(elm, field) (RB_BITS(elm, field) ^= RB_RED_R) -#define RB_FLIP_ALL(elm, field) (RB_BITS(elm, field) ^= RB_RED_MASK) -#define _RB_PARENT_ONLY(elm) (__typeof(elm)) \ - ((__uintptr_t)elm & ~RB_RED_MASK) -#define RB_PARENT(elm, field) _RB_PARENT_ONLY(RB_UP(elm, field)) +#define _RB_LINK(elm, dir, field) (elm)->field.rbe_link[dir] +#define _RB_UP(elm, field) _RB_LINK(elm, 0, field) +#define _RB_L ((__uintptr_t)1) +#define _RB_R ((__uintptr_t)2) +#define _RB_LR ((__uintptr_t)3) +#define _RB_BITS(elm) (*(__uintptr_t *)&elm) +#define _RB_BITSUP(elm, field) _RB_BITS(_RB_UP(elm, field)) +#define _RB_PTR(elm) (__typeof(elm)) \ + ((__uintptr_t)elm & ~_RB_LR) + +#define RB_PARENT(elm, field) _RB_PTR(_RB_UP(elm, field)) +#define RB_LEFT(elm, field) _RB_LINK(elm, _RB_L, field) +#define RB_RIGHT(elm, field) _RB_LINK(elm, _RB_R, field) #define RB_ROOT(head) (head)->rbh_root #define RB_EMPTY(head) (RB_ROOT(head) == NULL) #define RB_SET_PARENT(dst, src, field) do { \ - RB_BITS(dst, field) = (__uintptr_t)src | \ - (RB_BITS(dst, field) & RB_RED_MASK); \ + _RB_BITSUP(dst, field) = (__uintptr_t)src | \ + (_RB_BITSUP(dst, field) & _RB_LR); \ } while (/*CONSTCOND*/ 0) #define RB_SET(elm, parent, field) do { \ - RB_UP(elm, field) = parent; \ + _RB_UP(elm, field) = parent; \ RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \ } while (/*CONSTCOND*/ 0) /* * Something to be invoked in a loop at the root of every modified subtree, * from the bottom up to the root, to update augmented node data. */ #ifndef RB_AUGMENT #define RB_AUGMENT(x) break #endif #define RB_UPDATE_AUGMENT(elm, field) do { \ __typeof(elm) rb_update_tmp = (elm); \ do { \ RB_AUGMENT(rb_update_tmp); \ } while ((rb_update_tmp = RB_PARENT(rb_update_tmp, field)) != NULL); \ } while (0) #define RB_SWAP_CHILD(head, par, out, in, field) do { \ if (par == NULL) \ RB_ROOT(head) = (in); \ else if ((out) == RB_LEFT(par, field)) \ RB_LEFT(par, field) = (in); \ else \ RB_RIGHT(par, field) = (in); \ } while (/*CONSTCOND*/ 0) /* - * RB_ROTATE macros partially restructure the tree to improve - * balance. The ROTATE_RIGHT case is just a reflection of the - * ROTATE_LEFT case. Initially, tmp is a right child of elm. After - * rotation, elm is a left child of tmp, and the subtree that - * represented the items between them, which formerly hung to the left - * of tmp now hangs to the right of elm. The parent-child - * relationship between elm and its former parent is not changed; - * where this macro once updated those fields, that is now left to the - * caller of RB_ROTATE to clean up, so that a pair of rotations does - * not twice update the same pair of pointer fields with distinct - * values. + * RB_ROTATE macro partially restructures the tree to improve balance. In the + * case when dir is _RB_L, tmp is a right child of elm. After rotation, elm + * is a left child of tmp, and the subtree that represented the items between + * them, which formerly hung to the left of tmp now hangs to the right of elm. + * The parent-child relationship between elm and its former parent is not + * changed; where this macro once updated those fields, that is now left to the + * caller of RB_ROTATE to clean up, so that a pair of rotations does not twice + * update the same pair of pointer fields with distinct values. */ -#define RB_ROTATE_LEFT(elm, tmp, field) do { \ - if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) { \ - RB_SET_PARENT(RB_RIGHT(elm, field), elm, field); \ - } \ - RB_LEFT(tmp, field) = (elm); \ - RB_SET_PARENT(elm, tmp, field); \ -} while (/*CONSTCOND*/ 0) - -#define RB_ROTATE_RIGHT(elm, tmp, field) do { \ - if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) { \ - RB_SET_PARENT(RB_LEFT(elm, field), elm, field); \ - } \ - RB_RIGHT(tmp, field) = (elm); \ +#define RB_ROTATE(elm, tmp, dir, field) do { \ + if ((_RB_LINK(elm, dir ^ _RB_LR, field) = \ + _RB_LINK(tmp, dir, field)) != NULL) \ + RB_SET_PARENT(_RB_LINK(tmp, dir, field), elm, field); \ + _RB_LINK(tmp, dir, field) = (elm); \ RB_SET_PARENT(elm, tmp, field); \ } while (/*CONSTCOND*/ 0) #if defined(_KERNEL) && defined(DIAGNOSTIC) && !defined(_RB_DIAGNOSTIC) #define _RB_DIAGNOSTIC 1 #endif /* Generates prototypes and inline functions */ #define RB_PROTOTYPE(name, type, field, cmp) \ RB_PROTOTYPE_INTERNAL(name, type, field, cmp,) #define RB_PROTOTYPE_STATIC(name, type, field, cmp) \ RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __unused static) #define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \ RB_PROTOTYPE_RANK(name, type, attr) \ RB_PROTOTYPE_INSERT_COLOR(name, type, attr); \ RB_PROTOTYPE_REMOVE_COLOR(name, type, attr); \ RB_PROTOTYPE_INSERT(name, type, attr); \ RB_PROTOTYPE_REMOVE(name, type, attr); \ RB_PROTOTYPE_FIND(name, type, attr); \ RB_PROTOTYPE_NFIND(name, type, attr); \ RB_PROTOTYPE_NEXT(name, type, attr); \ RB_PROTOTYPE_PREV(name, type, attr); \ RB_PROTOTYPE_MINMAX(name, type, attr); \ RB_PROTOTYPE_REINSERT(name, type, attr); #ifdef _RB_DIAGNOSTIC #define RB_PROTOTYPE_RANK(name, type, attr) \ attr int name##_RB_RANK(struct type *); #else #define RB_PROTOTYPE_RANK(name, type, attr) #endif #define RB_PROTOTYPE_INSERT_COLOR(name, type, attr) \ attr void name##_RB_INSERT_COLOR(struct name *, struct type *) #define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr) \ attr void name##_RB_REMOVE_COLOR(struct name *, \ struct type *, struct type *) #define RB_PROTOTYPE_REMOVE(name, type, attr) \ attr struct type *name##_RB_REMOVE(struct name *, struct type *) #define RB_PROTOTYPE_INSERT(name, type, attr) \ attr struct type *name##_RB_INSERT(struct name *, struct type *) #define RB_PROTOTYPE_FIND(name, type, attr) \ attr struct type *name##_RB_FIND(struct name *, struct type *) #define RB_PROTOTYPE_NFIND(name, type, attr) \ attr struct type *name##_RB_NFIND(struct name *, struct type *) #define RB_PROTOTYPE_NEXT(name, type, attr) \ attr struct type *name##_RB_NEXT(struct type *) #define RB_PROTOTYPE_PREV(name, type, attr) \ attr struct type *name##_RB_PREV(struct type *) #define RB_PROTOTYPE_MINMAX(name, type, attr) \ attr struct type *name##_RB_MINMAX(struct name *, int) #define RB_PROTOTYPE_REINSERT(name, type, attr) \ attr struct type *name##_RB_REINSERT(struct name *, struct type *) /* Main rb operation. * Moves node close to the key of elm to top */ #define RB_GENERATE(name, type, field, cmp) \ RB_GENERATE_INTERNAL(name, type, field, cmp,) #define RB_GENERATE_STATIC(name, type, field, cmp) \ RB_GENERATE_INTERNAL(name, type, field, cmp, __unused static) #define RB_GENERATE_INTERNAL(name, type, field, cmp, attr) \ RB_GENERATE_RANK(name, type, field, attr) \ RB_GENERATE_INSERT_COLOR(name, type, field, attr) \ RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \ RB_GENERATE_INSERT(name, type, field, cmp, attr) \ RB_GENERATE_REMOVE(name, type, field, attr) \ RB_GENERATE_FIND(name, type, field, cmp, attr) \ RB_GENERATE_NFIND(name, type, field, cmp, attr) \ RB_GENERATE_NEXT(name, type, field, attr) \ RB_GENERATE_PREV(name, type, field, attr) \ RB_GENERATE_MINMAX(name, type, field, attr) \ RB_GENERATE_REINSERT(name, type, field, cmp, attr) #ifdef _RB_DIAGNOSTIC #define RB_GENERATE_RANK(name, type, field, attr) \ attr int \ name##_RB_RANK(struct type *elm) \ { \ - struct type *left, *right; \ + struct type *left, *right, *up; \ int left_rank, right_rank; \ - __uintptr_t bits; \ \ if (elm == NULL) \ return (0); \ - bits = RB_BITS(elm, field); \ + up = _RB_UP(elm, field); \ left = RB_LEFT(elm, field); \ - left_rank = ((bits & RB_RED_L) ? 2 : 1) + name##_RB_RANK(left); \ + left_rank = ((_RB_BITS(up) & _RB_L) ? 2 : 1) + \ + name##_RB_RANK(left); \ right = RB_RIGHT(elm, field); \ - right_rank = ((bits & RB_RED_R) ? 2 : 1) + name##_RB_RANK(right); \ + right_rank = ((_RB_BITS(up) & _RB_R) ? 2 : 1) + \ + name##_RB_RANK(right); \ if (left_rank != right_rank || \ (left_rank == 2 && left == NULL && right == NULL)) \ return (-1); \ return (left_rank); \ } #else #define RB_GENERATE_RANK(name, type, field, attr) #endif #define RB_GENERATE_INSERT_COLOR(name, type, field, attr) \ attr void \ name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \ { \ /* \ * Initially, elm is a leaf. Either its parent was previously \ * a leaf, with two black null children, or an interior node \ * with a black non-null child and a red null child. The \ * balance criterion "the rank of any leaf is 1" precludes the \ * possibility of two red null children for the initial parent. \ * So the first loop iteration cannot lead to accessing an \ * uninitialized 'child', and a later iteration can only happen \ * when a value has been assigned to 'child' in the previous \ * one. \ - */ \ - struct type *child, *gpar = RB_UP(elm, field), *parent; \ - __uintptr_t red; \ + */ \ + struct type *child, *child_up, *gpar, *parent; \ + __uintptr_t elmdir, sibdir; \ \ + gpar = _RB_UP(elm, field); \ while ((parent = gpar) != NULL) { \ - red = RB_BITS(parent, field); \ - gpar = RB_UP(parent, field); \ - if (RB_LEFT(parent, field) == elm) { \ - if (red & RB_RED_L) { \ - RB_FLIP_LEFT(parent, field); \ - return; \ - } \ - RB_FLIP_RIGHT(parent, field); \ - if ((red & RB_RED_MASK) == 0) { \ - child = elm; \ - elm = parent; \ - continue; \ - } \ - red = RB_BITS(elm, field); \ - if (red & RB_RED_R) \ - child = elm; \ - else { \ - /* coverity[uninit_use] */ \ - RB_ROTATE_LEFT(elm, child, field); \ - red = RB_BITS(child, field); \ - if (red & RB_RED_R) \ - RB_FLIP_LEFT(parent, field); \ - if (red & RB_RED_L) \ - RB_FLIP_ALL(elm, field); \ - else \ - RB_FLIP_LEFT(elm, field); \ - if ((red & RB_RED_MASK) == 0) \ - elm = child; \ - } \ - RB_ROTATE_RIGHT(parent, child, field); \ - } else { \ - if (red & RB_RED_R) { \ - RB_FLIP_RIGHT(parent, field); \ - return; \ - } \ - RB_FLIP_LEFT(parent, field); \ - if ((red & RB_RED_MASK) == 0) { \ - child = elm; \ - elm = parent; \ - continue; \ - } \ - red = RB_BITS(elm, field); \ - if (red & RB_RED_L) \ - child = elm; \ - else { \ - /* coverity[uninit_use] */ \ - RB_ROTATE_RIGHT(elm, child, field); \ - red = RB_BITS(child, field); \ - if (red & RB_RED_L) \ - RB_FLIP_RIGHT(parent, field); \ - if (red & RB_RED_R) \ - RB_FLIP_ALL(elm, field); \ - else \ - RB_FLIP_RIGHT(elm, field); \ - if ((red & RB_RED_MASK) == 0) \ - elm = child; \ - } \ - RB_ROTATE_LEFT(parent, child, field); \ + /* the rank of the tree rooted at elm grew */ \ + gpar = _RB_UP(parent, field); \ + elmdir = RB_RIGHT(parent, field) == elm ? _RB_R : _RB_L; \ + if (_RB_BITS(gpar) & elmdir) { \ + /* shorten the parent-elm edge to rebalance */ \ + _RB_BITSUP(parent, field) ^= elmdir; \ + return; \ + } \ + sibdir = elmdir ^ _RB_LR; \ + /* the other edge must change length */ \ + _RB_BITSUP(parent, field) ^= sibdir; \ + if ((_RB_BITS(gpar) & _RB_LR) == 0) { \ + /* both edges now short, retry from parent */ \ + child = elm; \ + elm = parent; \ + continue; \ } \ - gpar = _RB_PARENT_ONLY(gpar); \ - RB_UP(child, field) = gpar; \ + _RB_UP(parent, field) = gpar = _RB_PTR(gpar); \ + if (_RB_BITSUP(elm, field) & elmdir) { \ + /* \ + * Exactly one of the edges descending from elm \ + * is long. The long one is in the same \ + * direction as the edge from parent to elm, \ + * so change that by rotation. The edge from \ + * parent to z was shortened above. Shorten \ + * the long edge down from elm, and adjust \ + * other edge lengths based on the downward \ + * edges from 'child'. \ + * \ + * par par \ + * / \ / \ \ + * elm z / z \ + * / \ child \ + * / child / \ \ + * / / \ elm \ \ + * w / \ / \ y \ + * x y w \ \ + * x \ + */ \ + RB_ROTATE(elm, child, elmdir, field); \ + child_up = _RB_UP(child, field); \ + if (_RB_BITS(child_up) & sibdir) \ + _RB_BITSUP(parent, field) ^= elmdir; \ + if (_RB_BITS(child_up) & elmdir) \ + _RB_BITSUP(elm, field) ^= _RB_LR; \ + else \ + _RB_BITSUP(elm, field) ^= elmdir; \ + /* if child is a leaf, don't augment elm, \ + * since it is restored to be a leaf again. */ \ + if ((_RB_BITS(child_up) & _RB_LR) == 0) \ + elm = child; \ + } else \ + child = elm; \ + \ + /* \ + * The long edge descending from 'child' points back \ + * in the direction of 'parent'. Rotate to make \ + * 'parent' a child of 'child', then make both edges \ + * of 'child' short to rebalance. \ + * \ + * par child \ + * / \ / \ \ + * / z x par \ + * child / \ \ + * / \ / z \ + * x \ y \ + * y \ + */ \ + RB_ROTATE(parent, child, sibdir, field); \ + _RB_UP(child, field) = gpar; \ RB_SWAP_CHILD(head, gpar, parent, child, field); \ if (elm != child) \ RB_AUGMENT(elm); \ RB_AUGMENT(parent); \ break; \ } \ } #ifndef RB_STRICT_HST /* * In REMOVE_COLOR, the HST paper, in figure 3, in the single-rotate case, has * 'parent' with one higher rank, and then reduces its rank if 'parent' has * become a leaf. This implementation always has the parent in its new position * with lower rank, to avoid the leaf check. Define RB_STRICT_HST to 1 to get * the behavior that HST describes. */ #define RB_STRICT_HST 0 #endif #define RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \ attr void \ name##_RB_REMOVE_COLOR(struct name *head, \ struct type *parent, struct type *elm) \ { \ - struct type *gpar, *sib; \ - __uintptr_t red; \ + struct type *gpar, *sib, *up; \ + __uintptr_t elmdir, sibdir; \ \ - if (RB_LEFT(parent, field) == elm && \ - RB_RIGHT(parent, field) == elm) { \ - RB_BITS(parent, field) &= ~RB_RED_MASK; \ + if (RB_RIGHT(parent, field) == elm && \ + RB_LEFT(parent, field) == elm) { \ + /* Deleting a leaf that is an only-child creates a \ + * rank-2 leaf. Demote that leaf. */ \ + _RB_UP(parent, field) = _RB_PTR(_RB_UP(parent, field)); \ elm = parent; \ - parent = RB_PARENT(elm, field); \ - if (parent == NULL) \ + if ((parent = _RB_UP(elm, field)) == NULL) \ return; \ } \ do { \ - red = RB_BITS(parent, field); \ - gpar = RB_UP(parent, field); \ - if (RB_LEFT(parent, field) == elm) { \ - if (~red & RB_RED_L) { \ - RB_FLIP_LEFT(parent, field); \ - return; \ - } \ - if ((~red & RB_RED_MASK) == 0) { \ - RB_FLIP_RIGHT(parent, field); \ - elm = parent; \ - continue; \ - } \ - sib = RB_RIGHT(parent, field); \ - red = RB_BITS(sib, field); \ - switch (red & RB_RED_MASK) { \ - case RB_RED_MASK: \ - RB_FLIP_ALL(sib, field); \ - elm = parent; \ - continue; \ - case RB_RED_R: \ - elm = RB_LEFT(sib, field); \ - RB_ROTATE_RIGHT(sib, elm, field); \ - red = RB_BITS(elm, field); \ - if (red & RB_RED_L) \ - RB_FLIP_ALL(parent, field); \ - else \ - RB_FLIP_LEFT(parent, field); \ - if (red & RB_RED_R) \ - RB_FLIP_ALL(sib, field); \ - else \ - RB_FLIP_RIGHT(sib, field); \ - RB_BITS(elm, field) |= RB_RED_MASK; \ - break; \ - case RB_RED_L: \ - if (RB_STRICT_HST && elm != NULL) { \ - RB_FLIP_RIGHT(parent, field); \ - RB_FLIP_ALL(sib, field); \ - elm = sib; \ - break; \ - } \ - RB_FLIP_LEFT(parent, field); \ - /* FALLTHROUGH */ \ - default: \ - RB_FLIP_RIGHT(sib, field); \ - elm = sib; \ - break; \ - } \ - RB_ROTATE_LEFT(parent, elm, field); \ + /* the rank of the tree rooted at elm shrank */ \ + gpar = _RB_UP(parent, field); \ + elmdir = RB_RIGHT(parent, field) == elm ? _RB_R : _RB_L; \ + _RB_BITS(gpar) ^= elmdir; \ + if (_RB_BITS(gpar) & elmdir) { \ + /* lengthen the parent-elm edge to rebalance */ \ + _RB_UP(parent, field) = gpar; \ + return; \ + } \ + if (_RB_BITS(gpar) & _RB_LR) { \ + /* shorten other edge, retry from parent */ \ + _RB_BITS(gpar) ^= _RB_LR; \ + _RB_UP(parent, field) = gpar; \ + gpar = _RB_PTR(gpar); \ + continue; \ + } \ + sibdir = elmdir ^ _RB_LR; \ + sib = _RB_LINK(parent, sibdir, field); \ + up = _RB_UP(sib, field); \ + _RB_BITS(up) ^= _RB_LR; \ + if ((_RB_BITS(up) & _RB_LR) == 0) { \ + /* shorten edges descending from sib, retry */ \ + _RB_UP(sib, field) = up; \ + continue; \ + } \ + if ((_RB_BITS(up) & sibdir) == 0) { \ + /* \ + * The edge descending from 'sib' away from \ + * 'parent' is long. The short edge descending \ + * from 'sib' toward 'parent' points to 'elm*' \ + * Rotate to make 'sib' a child of 'elm*' \ + * then adjust the lengths of the edges \ + * descending from 'sib' and 'elm*'. \ + * \ + * par par \ + * / \ / \ \ + * / sib elm \ \ + * / / \ elm* \ + * elm elm* \ / \ \ + * / \ \ / \ \ + * / \ z / \ \ + * x y x sib \ + * / \ \ + * / z \ + * y \ + */ \ + elm = _RB_LINK(sib, elmdir, field); \ + /* elm is a 1-child. First rotate at elm. */ \ + RB_ROTATE(sib, elm, sibdir, field); \ + up = _RB_UP(elm, field); \ + _RB_BITSUP(parent, field) ^= \ + (_RB_BITS(up) & elmdir) ? _RB_LR : elmdir; \ + _RB_BITSUP(sib, field) ^= \ + (_RB_BITS(up) & sibdir) ? _RB_LR : sibdir; \ + _RB_BITSUP(elm, field) |= _RB_LR; \ } else { \ - if (~red & RB_RED_R) { \ - RB_FLIP_RIGHT(parent, field); \ - return; \ - } \ - if ((~red & RB_RED_MASK) == 0) { \ - RB_FLIP_LEFT(parent, field); \ - elm = parent; \ - continue; \ - } \ - sib = RB_LEFT(parent, field); \ - red = RB_BITS(sib, field); \ - switch (red & RB_RED_MASK) { \ - case RB_RED_MASK: \ - RB_FLIP_ALL(sib, field); \ - elm = parent; \ - continue; \ - case RB_RED_L: \ - elm = RB_RIGHT(sib, field); \ - RB_ROTATE_LEFT(sib, elm, field); \ - red = RB_BITS(elm, field); \ - if (red & RB_RED_R) \ - RB_FLIP_ALL(parent, field); \ - else \ - RB_FLIP_RIGHT(parent, field); \ - if (red & RB_RED_L) \ - RB_FLIP_ALL(sib, field); \ - else \ - RB_FLIP_LEFT(sib, field); \ - RB_BITS(elm, field) |= RB_RED_MASK; \ - break; \ - case RB_RED_R: \ - if (RB_STRICT_HST && elm != NULL) { \ - RB_FLIP_LEFT(parent, field); \ - RB_FLIP_ALL(sib, field); \ - elm = sib; \ - break; \ - } \ - RB_FLIP_RIGHT(parent, field); \ - /* FALLTHROUGH */ \ - default: \ - RB_FLIP_LEFT(sib, field); \ - elm = sib; \ - break; \ - } \ - RB_ROTATE_RIGHT(parent, elm, field); \ + if ((_RB_BITS(up) & elmdir) == 0 && \ + RB_STRICT_HST && elm != NULL) { \ + /* if parent does not become a leaf, \ + do not demote parent yet. */ \ + _RB_BITSUP(parent, field) ^= sibdir; \ + _RB_BITSUP(sib, field) ^= _RB_LR; \ + } else if ((_RB_BITS(up) & elmdir) == 0) { \ + /* demote parent. */ \ + _RB_BITSUP(parent, field) ^= elmdir; \ + _RB_BITSUP(sib, field) ^= sibdir; \ + } else \ + _RB_BITSUP(sib, field) ^= sibdir; \ + elm = sib; \ } \ - gpar = _RB_PARENT_ONLY(gpar); \ + \ + /* \ + * The edge descending from 'elm' away from 'parent' \ + * is short. Rotate to make 'parent' a child of 'elm', \ + * then lengthen the short edges descending from \ + * 'parent' and 'elm' to rebalance. \ + * \ + * par elm \ + * / \ / \ \ + * e \ / \ \ + * elm / \ \ + * / \ par s \ + * / \ / \ \ + * / \ e \ \ + * x s x \ + */ \ + RB_ROTATE(parent, elm, elmdir, field); \ RB_SET_PARENT(elm, gpar, field); \ RB_SWAP_CHILD(head, gpar, parent, elm, field); \ if (sib != elm) \ RB_AUGMENT(sib); \ break; \ - } while ((parent = _RB_PARENT_ONLY(gpar)) != NULL); \ + } while (elm = parent, (parent = gpar) != NULL); \ } #define RB_GENERATE_REMOVE(name, type, field, attr) \ attr struct type * \ name##_RB_REMOVE(struct name *head, struct type *out) \ { \ struct type *child, *in, *opar, *parent; \ \ child = RB_LEFT(out, field); \ in = RB_RIGHT(out, field); \ - opar = RB_UP(out, field); \ + opar = _RB_UP(out, field); \ if (in == NULL || child == NULL) { \ in = child = in == NULL ? child : in; \ - parent = opar = _RB_PARENT_ONLY(opar); \ + parent = opar = _RB_PTR(opar); \ } else { \ parent = in; \ while (RB_LEFT(in, field)) \ in = RB_LEFT(in, field); \ RB_SET_PARENT(child, in, field); \ RB_LEFT(in, field) = child; \ child = RB_RIGHT(in, field); \ if (parent != in) { \ RB_SET_PARENT(parent, in, field); \ RB_RIGHT(in, field) = parent; \ parent = RB_PARENT(in, field); \ RB_LEFT(parent, field) = child; \ } \ - RB_UP(in, field) = opar; \ - opar = _RB_PARENT_ONLY(opar); \ + _RB_UP(in, field) = opar; \ + opar = _RB_PTR(opar); \ } \ RB_SWAP_CHILD(head, opar, out, in, field); \ if (child != NULL) \ - RB_UP(child, field) = parent; \ + _RB_UP(child, field) = parent; \ if (parent != NULL) { \ name##_RB_REMOVE_COLOR(head, parent, child); \ + /* if rotation has made 'parent' the root of the same \ + * subtree as before, don't re-augment it. */ \ if (parent == in && RB_LEFT(parent, field) == NULL) \ parent = RB_PARENT(parent, field); \ RB_UPDATE_AUGMENT(parent, field); \ } \ return (out); \ } #define RB_GENERATE_INSERT(name, type, field, cmp, attr) \ /* Inserts a node into the RB tree */ \ attr struct type * \ name##_RB_INSERT(struct name *head, struct type *elm) \ { \ struct type *tmp; \ struct type **tmpp = &RB_ROOT(head); \ struct type *parent = NULL; \ \ while ((tmp = *tmpp) != NULL) { \ parent = tmp; \ __typeof(cmp(NULL, NULL)) comp = (cmp)(elm, parent); \ if (comp < 0) \ tmpp = &RB_LEFT(parent, field); \ else if (comp > 0) \ tmpp = &RB_RIGHT(parent, field); \ else \ return (parent); \ } \ RB_SET(elm, parent, field); \ *tmpp = elm; \ name##_RB_INSERT_COLOR(head, elm); \ RB_UPDATE_AUGMENT(elm, field); \ return (NULL); \ } #define RB_GENERATE_FIND(name, type, field, cmp, attr) \ /* Finds the node with the same key as elm */ \ attr struct type * \ name##_RB_FIND(struct name *head, struct type *elm) \ { \ struct type *tmp = RB_ROOT(head); \ __typeof(cmp(NULL, NULL)) comp; \ while (tmp) { \ comp = cmp(elm, tmp); \ if (comp < 0) \ tmp = RB_LEFT(tmp, field); \ else if (comp > 0) \ tmp = RB_RIGHT(tmp, field); \ else \ return (tmp); \ } \ return (NULL); \ } #define RB_GENERATE_NFIND(name, type, field, cmp, attr) \ /* Finds the first node greater than or equal to the search key */ \ attr struct type * \ name##_RB_NFIND(struct name *head, struct type *elm) \ { \ struct type *tmp = RB_ROOT(head); \ struct type *res = NULL; \ __typeof(cmp(NULL, NULL)) comp; \ while (tmp) { \ comp = cmp(elm, tmp); \ if (comp < 0) { \ res = tmp; \ tmp = RB_LEFT(tmp, field); \ } \ else if (comp > 0) \ tmp = RB_RIGHT(tmp, field); \ else \ return (tmp); \ } \ return (res); \ } #define RB_GENERATE_NEXT(name, type, field, attr) \ /* ARGSUSED */ \ attr struct type * \ name##_RB_NEXT(struct type *elm) \ { \ if (RB_RIGHT(elm, field)) { \ elm = RB_RIGHT(elm, field); \ while (RB_LEFT(elm, field)) \ elm = RB_LEFT(elm, field); \ } else { \ while (RB_PARENT(elm, field) && \ (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \ elm = RB_PARENT(elm, field); \ elm = RB_PARENT(elm, field); \ } \ return (elm); \ } #define RB_GENERATE_PREV(name, type, field, attr) \ /* ARGSUSED */ \ attr struct type * \ name##_RB_PREV(struct type *elm) \ { \ if (RB_LEFT(elm, field)) { \ elm = RB_LEFT(elm, field); \ while (RB_RIGHT(elm, field)) \ elm = RB_RIGHT(elm, field); \ } else { \ while (RB_PARENT(elm, field) && \ (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ elm = RB_PARENT(elm, field); \ elm = RB_PARENT(elm, field); \ } \ return (elm); \ } #define RB_GENERATE_MINMAX(name, type, field, attr) \ attr struct type * \ name##_RB_MINMAX(struct name *head, int val) \ { \ struct type *tmp = RB_ROOT(head); \ struct type *parent = NULL; \ while (tmp) { \ parent = tmp; \ if (val < 0) \ tmp = RB_LEFT(tmp, field); \ else \ tmp = RB_RIGHT(tmp, field); \ } \ return (parent); \ } #define RB_GENERATE_REINSERT(name, type, field, cmp, attr) \ attr struct type * \ name##_RB_REINSERT(struct name *head, struct type *elm) \ { \ struct type *cmpelm; \ if (((cmpelm = RB_PREV(name, head, elm)) != NULL && \ cmp(cmpelm, elm) >= 0) || \ ((cmpelm = RB_NEXT(name, head, elm)) != NULL && \ cmp(elm, cmpelm) >= 0)) { \ /* XXXLAS: Remove/insert is heavy handed. */ \ RB_REMOVE(name, head, elm); \ return (RB_INSERT(name, head, elm)); \ } \ return (NULL); \ } \ #define RB_NEGINF -1 #define RB_INF 1 #define RB_INSERT(name, x, y) name##_RB_INSERT(x, y) #define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y) #define RB_FIND(name, x, y) name##_RB_FIND(x, y) #define RB_NFIND(name, x, y) name##_RB_NFIND(x, y) #define RB_NEXT(name, x, y) name##_RB_NEXT(y) #define RB_PREV(name, x, y) name##_RB_PREV(y) #define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF) #define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF) #define RB_REINSERT(name, x, y) name##_RB_REINSERT(x, y) #define RB_FOREACH(x, name, head) \ for ((x) = RB_MIN(name, head); \ (x) != NULL; \ (x) = name##_RB_NEXT(x)) #define RB_FOREACH_FROM(x, name, y) \ for ((x) = (y); \ ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \ (x) = (y)) #define RB_FOREACH_SAFE(x, name, head, y) \ for ((x) = RB_MIN(name, head); \ ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \ (x) = (y)) #define RB_FOREACH_REVERSE(x, name, head) \ for ((x) = RB_MAX(name, head); \ (x) != NULL; \ (x) = name##_RB_PREV(x)) #define RB_FOREACH_REVERSE_FROM(x, name, y) \ for ((x) = (y); \ ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \ (x) = (y)) #define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \ for ((x) = RB_MAX(name, head); \ ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \ (x) = (y)) #endif /* _SYS_TREE_H_ */