diff --git a/sys/dev/random/fenestrasX/fx_pool.c b/sys/dev/random/fenestrasX/fx_pool.c index b6cc0dad349d..f2fc15874b67 100644 --- a/sys/dev/random/fenestrasX/fx_pool.c +++ b/sys/dev/random/fenestrasX/fx_pool.c @@ -1,616 +1,619 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 Conrad Meyer * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Timer-based reseed interval growth factor and limit in seconds. (§ 3.2) */ #define FXENT_RESSED_INTVL_GFACT 3 #define FXENT_RESEED_INTVL_MAX 3600 /* * Pool reseed schedule. Initially, only pool 0 is active. Until the timer * interval reaches INTVL_MAX, only pool 0 is used. * * After reaching INTVL_MAX, pool k is either activated (if inactive) or used * (if active) every 3^k timer reseeds. (§ 3.3) * * (Entropy harvesting only round robins across active pools.) */ #define FXENT_RESEED_BASE 3 /* * Number of bytes from high quality sources to allocate to pool 0 before * normal round-robin allocation after each timer reseed. (§ 3.4) */ #define FXENT_HI_SRC_POOL0_BYTES 32 /* * § 3.1 * * Low sources provide unconditioned entropy, such as mouse movements; high * sources are assumed to provide high-quality random bytes. Pull sources are * those which can be polled, i.e., anything randomdev calls a "random_source." * * In the whitepaper, low sources are pull. For us, at least in the existing * design, low-quality sources push into some global ring buffer and then get * forwarded into the RNG by a thread that continually polls. Presumably their * design batches low entopy signals in some way (SHA512?) and only requests * them dynamically on reseed. I'm not sure what the benefit is vs feeding * into the pools directly. */ enum fxrng_ent_access_cls { FXRNG_PUSH, FXRNG_PULL, }; enum fxrng_ent_source_cls { FXRNG_HI, FXRNG_LO, FXRNG_GARBAGE, }; struct fxrng_ent_cls { enum fxrng_ent_access_cls entc_axx_cls; enum fxrng_ent_source_cls entc_src_cls; }; static const struct fxrng_ent_cls fxrng_hi_pull = { .entc_axx_cls = FXRNG_PULL, .entc_src_cls = FXRNG_HI, }; static const struct fxrng_ent_cls fxrng_hi_push = { .entc_axx_cls = FXRNG_PUSH, .entc_src_cls = FXRNG_HI, }; static const struct fxrng_ent_cls fxrng_lo_push = { .entc_axx_cls = FXRNG_PUSH, .entc_src_cls = FXRNG_LO, }; static const struct fxrng_ent_cls fxrng_garbage = { .entc_axx_cls = FXRNG_PUSH, .entc_src_cls = FXRNG_GARBAGE, }; /* * This table is a mapping of randomdev's current source abstractions to the * designations above; at some point, if the design seems reasonable, it would * make more sense to pull this up into the abstraction layer instead. */ static const struct fxrng_ent_char { const struct fxrng_ent_cls *entc_cls; } fxrng_ent_char[ENTROPYSOURCE] = { [RANDOM_CACHED] = { .entc_cls = &fxrng_hi_push, }, [RANDOM_ATTACH] = { .entc_cls = &fxrng_lo_push, }, [RANDOM_KEYBOARD] = { .entc_cls = &fxrng_lo_push, }, [RANDOM_MOUSE] = { .entc_cls = &fxrng_lo_push, }, [RANDOM_NET_TUN] = { .entc_cls = &fxrng_lo_push, }, [RANDOM_NET_ETHER] = { .entc_cls = &fxrng_lo_push, }, [RANDOM_NET_NG] = { .entc_cls = &fxrng_lo_push, }, [RANDOM_INTERRUPT] = { .entc_cls = &fxrng_lo_push, }, [RANDOM_SWI] = { .entc_cls = &fxrng_lo_push, }, [RANDOM_FS_ATIME] = { .entc_cls = &fxrng_lo_push, }, [RANDOM_UMA] = { .entc_cls = &fxrng_lo_push, }, + [RANDOM_CALLOUT] = { + .entc_cls = &fxrng_lo_push, + }, [RANDOM_PURE_OCTEON] = { .entc_cls = &fxrng_hi_push, /* Could be made pull. */ }, [RANDOM_PURE_SAFE] = { .entc_cls = &fxrng_hi_push, }, [RANDOM_PURE_GLXSB] = { .entc_cls = &fxrng_hi_push, }, [RANDOM_PURE_HIFN] = { .entc_cls = &fxrng_hi_push, }, [RANDOM_PURE_RDRAND] = { .entc_cls = &fxrng_hi_pull, }, [RANDOM_PURE_NEHEMIAH] = { .entc_cls = &fxrng_hi_pull, }, [RANDOM_PURE_RNDTEST] = { .entc_cls = &fxrng_garbage, }, [RANDOM_PURE_VIRTIO] = { .entc_cls = &fxrng_hi_pull, }, [RANDOM_PURE_BROADCOM] = { .entc_cls = &fxrng_hi_push, }, [RANDOM_PURE_CCP] = { .entc_cls = &fxrng_hi_pull, }, [RANDOM_PURE_DARN] = { .entc_cls = &fxrng_hi_pull, }, [RANDOM_PURE_TPM] = { .entc_cls = &fxrng_hi_push, }, [RANDOM_PURE_VMGENID] = { .entc_cls = &fxrng_hi_push, }, }; /* Useful for single-bit-per-source state. */ BITSET_DEFINE(fxrng_bits, ENTROPYSOURCE); /* XXX Borrowed from not-yet-committed D22702. */ #ifndef BIT_TEST_SET_ATOMIC_ACQ #define BIT_TEST_SET_ATOMIC_ACQ(_s, n, p) \ (atomic_testandset_acq_long( \ &(p)->__bits[__bitset_word((_s), (n))], (n)) != 0) #endif #define FXENT_TEST_SET_ATOMIC_ACQ(n, p) \ BIT_TEST_SET_ATOMIC_ACQ(ENTROPYSOURCE, n, p) /* For special behavior on first-time entropy sources. (§ 3.1) */ static struct fxrng_bits __read_mostly fxrng_seen; /* For special behavior for high-entropy sources after a reseed. (§ 3.4) */ _Static_assert(FXENT_HI_SRC_POOL0_BYTES <= UINT8_MAX, ""); static uint8_t __read_mostly fxrng_reseed_seen[ENTROPYSOURCE]; /* Entropy pools. Lock order is ENT -> RNG(root) -> RNG(leaf). */ static struct mtx fxent_pool_lk; MTX_SYSINIT(fx_pool, &fxent_pool_lk, "fx entropy pool lock", MTX_DEF); #define FXENT_LOCK() mtx_lock(&fxent_pool_lk) #define FXENT_UNLOCK() mtx_unlock(&fxent_pool_lk) #define FXENT_ASSERT(rng) mtx_assert(&fxent_pool_lk, MA_OWNED) #define FXENT_ASSERT_NOT(rng) mtx_assert(&fxent_pool_lk, MA_NOTOWNED) static struct fxrng_hash fxent_pool[FXRNG_NPOOLS]; static unsigned __read_mostly fxent_nactpools = 1; static struct timeout_task fxent_reseed_timer; static int __read_mostly fxent_timer_ready; /* * Track number of bytes of entropy harvested from high-quality sources prior * to initial keying. The idea is to collect more jitter entropy when fewer * high-quality bytes were available and less if we had other good sources. We * want to provide always-on availability but don't necessarily have *any* * great sources on some platforms. * * Like fxrng_ent_char: at some point, if the design seems reasonable, it would * make more sense to pull this up into the abstraction layer instead. * * Jitter entropy is unimplemented for now. */ static unsigned long fxrng_preseed_ent; void fxrng_pools_init(void) { size_t i; for (i = 0; i < nitems(fxent_pool); i++) fxrng_hash_init(&fxent_pool[i]); } static inline bool fxrng_hi_source(enum random_entropy_source src) { return (fxrng_ent_char[src].entc_cls->entc_src_cls == FXRNG_HI); } /* * A racy check that this high-entropy source's event should contribute to * pool0 on the basis of per-source byte count. The check is racy for two * reasons: * - Performance: The vast majority of the time, we've already taken 32 bytes * from any present high quality source and the racy check lets us avoid * dirtying the cache for the global array. * - Correctness: It's fine that the check is racy. The failure modes are: * • False positive: We will detect when we take the lock. * • False negative: We still collect the entropy; it just won't be * preferentially placed in pool0 in this case. */ static inline bool fxrng_hi_pool0_eligible_racy(enum random_entropy_source src) { return (atomic_load_acq_8(&fxrng_reseed_seen[src]) < FXENT_HI_SRC_POOL0_BYTES); } /* * Top level entropy processing API from randomdev. * * Invoked by the core randomdev subsystem both for preload entropy, "push" * sources (like interrupts, keyboard, etc) and pull sources (RDRAND, etc). */ void fxrng_event_processor(struct harvest_event *event) { enum random_entropy_source src; unsigned pool; bool first_time, first_32; src = event->he_source; ASSERT_DEBUG(event->he_size <= sizeof(event->he_entropy), "%s: he_size: %u > sizeof(he_entropy): %zu", __func__, (unsigned)event->he_size, sizeof(event->he_entropy)); /* * Zero bytes of source entropy doesn't count as observing this source * for the first time. We still harvest the counter entropy. */ first_time = event->he_size > 0 && !FXENT_TEST_SET_ATOMIC_ACQ(src, &fxrng_seen); if (__predict_false(first_time)) { /* * "The first time [any source] provides entropy, it is used to * directly reseed the root PRNG. The entropy pools are * bypassed." (§ 3.1) * * Unlike Windows, we cannot rely on loader(8) seed material * being present, so we perform initial keying in the kernel. * We use brng_generation 0 to represent an unkeyed state. * * Prior to initial keying, it doesn't make sense to try to mix * the entropy directly with the root PRNG state, as the root * PRNG is unkeyed. Instead, we collect pre-keying dynamic * entropy in pool0 and do not bump the root PRNG seed version * or set its key. Initial keying will incorporate pool0 and * bump the brng_generation (seed version). * * After initial keying, we do directly mix in first-time * entropy sources. We use the root BRNG to generate 32 bytes * and use fxrng_hash to mix it with the new entropy source and * re-key with the first 256 bits of hash output. */ FXENT_LOCK(); FXRNG_BRNG_LOCK(&fxrng_root); if (__predict_true(fxrng_root.brng_generation > 0)) { /* Bypass the pools: */ FXENT_UNLOCK(); fxrng_brng_src_reseed(event); FXRNG_BRNG_ASSERT_NOT(&fxrng_root); return; } /* * Keying the root PRNG requires both FXENT_LOCK and the PRNG's * lock, so we only need to hold on to the pool lock to prevent * initial keying without this entropy. */ FXRNG_BRNG_UNLOCK(&fxrng_root); /* Root PRNG hasn't been keyed yet, just accumulate event. */ fxrng_hash_update(&fxent_pool[0], &event->he_somecounter, sizeof(event->he_somecounter)); fxrng_hash_update(&fxent_pool[0], event->he_entropy, event->he_size); if (fxrng_hi_source(src)) { /* Prevent overflow. */ if (fxrng_preseed_ent <= ULONG_MAX - event->he_size) fxrng_preseed_ent += event->he_size; } FXENT_UNLOCK(); return; } /* !first_time */ /* * "The first 32 bytes produced by a high entropy source after a reseed * from the pools is always put in pool 0." (§ 3.4) * * The first-32-byte tracking data in fxrng_reseed_seen is reset in * fxent_timer_reseed_npools() below. */ first_32 = event->he_size > 0 && fxrng_hi_source(src) && atomic_load_acq_int(&fxent_nactpools) > 1 && fxrng_hi_pool0_eligible_racy(src); if (__predict_false(first_32)) { unsigned rem, seen; FXENT_LOCK(); seen = fxrng_reseed_seen[src]; if (seen == FXENT_HI_SRC_POOL0_BYTES) goto round_robin; rem = FXENT_HI_SRC_POOL0_BYTES - seen; rem = MIN(rem, event->he_size); fxrng_reseed_seen[src] = seen + rem; /* * We put 'rem' bytes in pool0, and any remaining bytes are * round-robin'd across other pools. */ fxrng_hash_update(&fxent_pool[0], ((uint8_t *)event->he_entropy) + event->he_size - rem, rem); if (rem == event->he_size) { fxrng_hash_update(&fxent_pool[0], &event->he_somecounter, sizeof(event->he_somecounter)); FXENT_UNLOCK(); return; } /* * If fewer bytes were needed than this even provied, We only * take the last rem bytes of the entropy buffer and leave the * timecounter to be round-robin'd with the remaining entropy. */ event->he_size -= rem; goto round_robin; } /* !first_32 */ FXENT_LOCK(); round_robin: FXENT_ASSERT(); pool = event->he_destination % fxent_nactpools; fxrng_hash_update(&fxent_pool[pool], event->he_entropy, event->he_size); fxrng_hash_update(&fxent_pool[pool], &event->he_somecounter, sizeof(event->he_somecounter)); if (__predict_false(fxrng_hi_source(src) && atomic_load_acq_64(&fxrng_root_generation) == 0)) { /* Prevent overflow. */ if (fxrng_preseed_ent <= ULONG_MAX - event->he_size) fxrng_preseed_ent += event->he_size; } FXENT_UNLOCK(); } /* * Top level "seeded" API/signal from randomdev. * * This is our warning that a request is coming: we need to be seeded. In * fenestrasX, a request for random bytes _never_ fails. "We (ed: ditto) have * observed that there are many callers that never check for the error code, * even if they are generating cryptographic key material." (§ 1.6) * * If we returned 'false', both read_random(9) and chacha20_randomstir() * (arc4random(9)) will blindly charge on with something almost certainly worse * than what we've got, or are able to get quickly enough. */ bool fxrng_alg_seeded(void) { uint8_t hash[FXRNG_HASH_SZ]; sbintime_t sbt; /* The vast majority of the time, we expect to already be seeded. */ if (__predict_true(atomic_load_acq_64(&fxrng_root_generation) != 0)) return (true); /* * Take the lock and recheck; only one thread needs to do the initial * seeding work. */ FXENT_LOCK(); if (atomic_load_acq_64(&fxrng_root_generation) != 0) { FXENT_UNLOCK(); return (true); } /* XXX Any one-off initial seeding goes here. */ fxrng_hash_finish(&fxent_pool[0], hash, sizeof(hash)); fxrng_hash_init(&fxent_pool[0]); fxrng_brng_reseed(hash, sizeof(hash)); FXENT_UNLOCK(); randomdev_unblock(); explicit_bzero(hash, sizeof(hash)); /* * This may be called too early for taskqueue_thread to be initialized. * fxent_pool_timer_init will detect if we've already unblocked and * queue the first timer reseed at that point. */ if (atomic_load_acq_int(&fxent_timer_ready) != 0) { sbt = SBT_1S; taskqueue_enqueue_timeout_sbt(taskqueue_thread, &fxent_reseed_timer, -sbt, (sbt / 3), C_PREL(2)); } return (true); } /* * Timer-based reseeds and pool expansion. */ static void fxent_timer_reseed_npools(unsigned n) { /* * 64 * 8 => moderately large 512 bytes. Could be static, as we are * only used in a static context. On the other hand, this is in * threadqueue TASK context and we're likely nearly at top of stack * already. */ uint8_t hash[FXRNG_HASH_SZ * FXRNG_NPOOLS]; unsigned i; ASSERT_DEBUG(n > 0 && n <= FXRNG_NPOOLS, "n:%u", n); FXENT_ASSERT(); /* * Collect entropy from pools 0..n-1 by concatenating the output hashes * and then feeding them into fxrng_brng_reseed, which will hash the * aggregate together with the current root PRNG keystate to produce a * new key. It will also bump the global generation counter * appropriately. */ for (i = 0; i < n; i++) { fxrng_hash_finish(&fxent_pool[i], hash + i * FXRNG_HASH_SZ, FXRNG_HASH_SZ); fxrng_hash_init(&fxent_pool[i]); } fxrng_brng_reseed(hash, n * FXRNG_HASH_SZ); explicit_bzero(hash, n * FXRNG_HASH_SZ); /* * "The first 32 bytes produced by a high entropy source after a reseed * from the pools is always put in pool 0." (§ 3.4) * * So here we reset the tracking (somewhat naively given the majority * of sources on most machines are not what we consider "high", but at * 32 bytes it's smaller than a cache line), so the next 32 bytes are * prioritized into pool0. * * See corresponding use of fxrng_reseed_seen in fxrng_event_processor. */ memset(fxrng_reseed_seen, 0, sizeof(fxrng_reseed_seen)); FXENT_ASSERT(); } static void fxent_timer_reseed(void *ctx __unused, int pending __unused) { static unsigned reseed_intvl_sec = 1; /* Only reseeds after FXENT_RESEED_INTVL_MAX is achieved. */ static uint64_t reseed_number = 1; unsigned next_ival, i, k; sbintime_t sbt; if (reseed_intvl_sec < FXENT_RESEED_INTVL_MAX) { next_ival = FXENT_RESSED_INTVL_GFACT * reseed_intvl_sec; if (next_ival > FXENT_RESEED_INTVL_MAX) next_ival = FXENT_RESEED_INTVL_MAX; FXENT_LOCK(); fxent_timer_reseed_npools(1); FXENT_UNLOCK(); } else { /* * The creation of entropy pools beyond 0 is enabled when the * reseed interval hits the maximum. (§ 3.3) */ next_ival = reseed_intvl_sec; /* * Pool 0 is used every reseed; pool 1..0 every 3rd reseed; and in * general, pool n..0 every 3^n reseeds. */ k = reseed_number; reseed_number++; /* Count how many pools, from [0, i), to use for reseed. */ for (i = 1; i < MIN(fxent_nactpools + 1, FXRNG_NPOOLS); i++) { if ((k % FXENT_RESEED_BASE) != 0) break; k /= FXENT_RESEED_BASE; } /* * If we haven't activated pool i yet, activate it and only * reseed from [0, i-1). (§ 3.3) */ FXENT_LOCK(); if (i == fxent_nactpools + 1) { fxent_timer_reseed_npools(fxent_nactpools); fxent_nactpools++; } else { /* Just reseed from [0, i). */ fxent_timer_reseed_npools(i); } FXENT_UNLOCK(); } /* Schedule the next reseed. */ sbt = next_ival * SBT_1S; taskqueue_enqueue_timeout_sbt(taskqueue_thread, &fxent_reseed_timer, -sbt, (sbt / 3), C_PREL(2)); reseed_intvl_sec = next_ival; } static void fxent_pool_timer_init(void *dummy __unused) { sbintime_t sbt; TIMEOUT_TASK_INIT(taskqueue_thread, &fxent_reseed_timer, 0, fxent_timer_reseed, NULL); if (atomic_load_acq_64(&fxrng_root_generation) != 0) { sbt = SBT_1S; taskqueue_enqueue_timeout_sbt(taskqueue_thread, &fxent_reseed_timer, -sbt, (sbt / 3), C_PREL(2)); } atomic_store_rel_int(&fxent_timer_ready, 1); } /* After taskqueue_thread is initialized in SI_SUB_TASKQ:SI_ORDER_SECOND. */ SYSINIT(fxent_pool_timer_init, SI_SUB_TASKQ, SI_ORDER_ANY, fxent_pool_timer_init, NULL); diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c index 46c069126949..aae8ec9172d7 100644 --- a/sys/dev/random/random_harvestq.c +++ b/sys/dev/random/random_harvestq.c @@ -1,669 +1,670 @@ /*- * Copyright (c) 2017 Oliver Pinter * Copyright (c) 2017 W. Dean Freeman * Copyright (c) 2000-2015 Mark R V Murray * Copyright (c) 2013 Arthur Mesh * Copyright (c) 2004 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(RANDOM_ENABLE_ETHER) #define _RANDOM_HARVEST_ETHER_OFF 0 #else #define _RANDOM_HARVEST_ETHER_OFF (1u << RANDOM_NET_ETHER) #endif #if defined(RANDOM_ENABLE_UMA) #define _RANDOM_HARVEST_UMA_OFF 0 #else #define _RANDOM_HARVEST_UMA_OFF (1u << RANDOM_UMA) #endif /* * Note that random_sources_feed() will also use this to try and split up * entropy into a subset of pools per iteration with the goal of feeding * HARVESTSIZE into every pool at least once per second. */ #define RANDOM_KTHREAD_HZ 10 static void random_kthread(void); static void random_sources_feed(void); /* * Random must initialize much earlier than epoch, but we can initialize the * epoch code before SMP starts. Prior to SMP, we can safely bypass * concurrency primitives. */ static __read_mostly bool epoch_inited; static __read_mostly epoch_t rs_epoch; /* * How many events to queue up. We create this many items in * an 'empty' queue, then transfer them to the 'harvest' queue with * supplied junk. When used, they are transferred back to the * 'empty' queue. */ #define RANDOM_RING_MAX 1024 #define RANDOM_ACCUM_MAX 8 /* 1 to let the kernel thread run, 0 to terminate, -1 to mark completion */ volatile int random_kthread_control; /* Allow the sysadmin to select the broad category of * entropy types to harvest. */ __read_frequently u_int hc_source_mask; struct random_sources { CK_LIST_ENTRY(random_sources) rrs_entries; struct random_source *rrs_source; }; static CK_LIST_HEAD(sources_head, random_sources) source_list = CK_LIST_HEAD_INITIALIZER(source_list); SYSCTL_NODE(_kern_random, OID_AUTO, harvest, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Entropy Device Parameters"); /* * Put all the harvest queue context stuff in one place. * this make is a bit easier to lock and protect. */ static struct harvest_context { /* The harvest mutex protects all of harvest_context and * the related data. */ struct mtx hc_mtx; /* Round-robin destination cache. */ u_int hc_destination[ENTROPYSOURCE]; /* The context of the kernel thread processing harvested entropy */ struct proc *hc_kthread_proc; /* * Lockless ring buffer holding entropy events * If ring.in == ring.out, * the buffer is empty. * If ring.in != ring.out, * the buffer contains harvested entropy. * If (ring.in + 1) == ring.out (mod RANDOM_RING_MAX), * the buffer is full. * * NOTE: ring.in points to the last added element, * and ring.out points to the last consumed element. * * The ring.in variable needs locking as there are multiple * sources to the ring. Only the sources may change ring.in, * but the consumer may examine it. * * The ring.out variable does not need locking as there is * only one consumer. Only the consumer may change ring.out, * but the sources may examine it. */ struct entropy_ring { struct harvest_event ring[RANDOM_RING_MAX]; volatile u_int in; volatile u_int out; } hc_entropy_ring; struct fast_entropy_accumulator { volatile u_int pos; uint32_t buf[RANDOM_ACCUM_MAX]; } hc_entropy_fast_accumulator; } harvest_context; static struct kproc_desc random_proc_kp = { "rand_harvestq", random_kthread, &harvest_context.hc_kthread_proc, }; /* Pass the given event straight through to Fortuna/Whatever. */ static __inline void random_harvestq_fast_process_event(struct harvest_event *event) { p_random_alg_context->ra_event_processor(event); explicit_bzero(event, sizeof(*event)); } static void random_kthread(void) { u_int maxloop, ring_out, i; /* * Locking is not needed as this is the only place we modify ring.out, and * we only examine ring.in without changing it. Both of these are volatile, * and this is a unique thread. */ for (random_kthread_control = 1; random_kthread_control;) { /* Deal with events, if any. Restrict the number we do in one go. */ maxloop = RANDOM_RING_MAX; while (harvest_context.hc_entropy_ring.out != harvest_context.hc_entropy_ring.in) { ring_out = (harvest_context.hc_entropy_ring.out + 1)%RANDOM_RING_MAX; random_harvestq_fast_process_event(harvest_context.hc_entropy_ring.ring + ring_out); harvest_context.hc_entropy_ring.out = ring_out; if (!--maxloop) break; } random_sources_feed(); /* XXX: FIX!! Increase the high-performance data rate? Need some measurements first. */ for (i = 0; i < RANDOM_ACCUM_MAX; i++) { if (harvest_context.hc_entropy_fast_accumulator.buf[i]) { random_harvest_direct(harvest_context.hc_entropy_fast_accumulator.buf + i, sizeof(harvest_context.hc_entropy_fast_accumulator.buf[0]), RANDOM_UMA); harvest_context.hc_entropy_fast_accumulator.buf[i] = 0; } } /* XXX: FIX!! This is a *great* place to pass hardware/live entropy to random(9) */ tsleep_sbt(&harvest_context.hc_kthread_proc, 0, "-", SBT_1S/RANDOM_KTHREAD_HZ, 0, C_PREL(1)); } random_kthread_control = -1; wakeup(&harvest_context.hc_kthread_proc); kproc_exit(0); /* NOTREACHED */ } /* This happens well after SI_SUB_RANDOM */ SYSINIT(random_device_h_proc, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, kproc_start, &random_proc_kp); static void rs_epoch_init(void *dummy __unused) { rs_epoch = epoch_alloc("Random Sources", EPOCH_PREEMPT); epoch_inited = true; } SYSINIT(rs_epoch_init, SI_SUB_EPOCH, SI_ORDER_ANY, rs_epoch_init, NULL); /* * Run through all fast sources reading entropy for the given * number of rounds, which should be a multiple of the number * of entropy accumulation pools in use; it is 32 for Fortuna. */ static void random_sources_feed(void) { uint32_t entropy[HARVESTSIZE]; struct epoch_tracker et; struct random_sources *rrs; u_int i, n, npools; bool rse_warm; rse_warm = epoch_inited; /* * Evenly-ish distribute pool population across the second based on how * frequently random_kthread iterates. * * For Fortuna, the math currently works out as such: * * 64 bits * 4 pools = 256 bits per iteration * 256 bits * 10 Hz = 2560 bits per second, 320 B/s * */ npools = howmany(p_random_alg_context->ra_poolcount, RANDOM_KTHREAD_HZ); /* * Step over all of live entropy sources, and feed their output * to the system-wide RNG. */ if (rse_warm) epoch_enter_preempt(rs_epoch, &et); CK_LIST_FOREACH(rrs, &source_list, rrs_entries) { for (i = 0; i < npools; i++) { n = rrs->rrs_source->rs_read(entropy, sizeof(entropy)); KASSERT((n <= sizeof(entropy)), ("%s: rs_read returned too much data (%u > %zu)", __func__, n, sizeof(entropy))); /* * Sometimes the HW entropy source doesn't have anything * ready for us. This isn't necessarily untrustworthy. * We don't perform any other verification of an entropy * source (i.e., length is allowed to be anywhere from 1 * to sizeof(entropy), quality is unchecked, etc), so * don't balk verbosely at slow random sources either. * There are reports that RDSEED on x86 metal falls * behind the rate at which we query it, for example. * But it's still a better entropy source than RDRAND. */ if (n == 0) continue; random_harvest_direct(entropy, n, rrs->rrs_source->rs_source); } } if (rse_warm) epoch_exit_preempt(rs_epoch, &et); explicit_bzero(entropy, sizeof(entropy)); } /* ARGSUSED */ static int random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS) { static const u_int user_immutable_mask = (((1 << ENTROPYSOURCE) - 1) & (-1UL << RANDOM_PURE_START)) | _RANDOM_HARVEST_ETHER_OFF | _RANDOM_HARVEST_UMA_OFF; int error; u_int value, orig_value; orig_value = value = hc_source_mask; error = sysctl_handle_int(oidp, &value, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (flsl(value) > ENTROPYSOURCE) return (EINVAL); /* * Disallow userspace modification of pure entropy sources. */ hc_source_mask = (value & ~user_immutable_mask) | (orig_value & user_immutable_mask); return (0); } SYSCTL_PROC(_kern_random_harvest, OID_AUTO, mask, CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, NULL, 0, random_check_uint_harvestmask, "IU", "Entropy harvesting mask"); /* ARGSUSED */ static int random_print_harvestmask(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; int error, i; error = sysctl_wire_old_buffer(req, 0); if (error == 0) { sbuf_new_for_sysctl(&sbuf, NULL, 128, req); for (i = ENTROPYSOURCE - 1; i >= 0; i--) sbuf_cat(&sbuf, (hc_source_mask & (1 << i)) ? "1" : "0"); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); } return (error); } SYSCTL_PROC(_kern_random_harvest, OID_AUTO, mask_bin, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, random_print_harvestmask, "A", "Entropy harvesting mask (printable)"); static const char *random_source_descr[ENTROPYSOURCE] = { [RANDOM_CACHED] = "CACHED", [RANDOM_ATTACH] = "ATTACH", [RANDOM_KEYBOARD] = "KEYBOARD", [RANDOM_MOUSE] = "MOUSE", [RANDOM_NET_TUN] = "NET_TUN", [RANDOM_NET_ETHER] = "NET_ETHER", [RANDOM_NET_NG] = "NET_NG", [RANDOM_INTERRUPT] = "INTERRUPT", [RANDOM_SWI] = "SWI", [RANDOM_FS_ATIME] = "FS_ATIME", - [RANDOM_UMA] = "UMA", /* ENVIRONMENTAL_END */ + [RANDOM_UMA] = "UMA", + [RANDOM_CALLOUT] = "CALLOUT", /* ENVIRONMENTAL_END */ [RANDOM_PURE_OCTEON] = "PURE_OCTEON", /* PURE_START */ [RANDOM_PURE_SAFE] = "PURE_SAFE", [RANDOM_PURE_GLXSB] = "PURE_GLXSB", [RANDOM_PURE_HIFN] = "PURE_HIFN", [RANDOM_PURE_RDRAND] = "PURE_RDRAND", [RANDOM_PURE_NEHEMIAH] = "PURE_NEHEMIAH", [RANDOM_PURE_RNDTEST] = "PURE_RNDTEST", [RANDOM_PURE_VIRTIO] = "PURE_VIRTIO", [RANDOM_PURE_BROADCOM] = "PURE_BROADCOM", [RANDOM_PURE_CCP] = "PURE_CCP", [RANDOM_PURE_DARN] = "PURE_DARN", [RANDOM_PURE_TPM] = "PURE_TPM", [RANDOM_PURE_VMGENID] = "VMGENID", /* "ENTROPYSOURCE" */ }; /* ARGSUSED */ static int random_print_harvestmask_symbolic(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; int error, i; bool first; first = true; error = sysctl_wire_old_buffer(req, 0); if (error == 0) { sbuf_new_for_sysctl(&sbuf, NULL, 128, req); for (i = ENTROPYSOURCE - 1; i >= 0; i--) { if (i >= RANDOM_PURE_START && (hc_source_mask & (1 << i)) == 0) continue; if (!first) sbuf_cat(&sbuf, ","); sbuf_cat(&sbuf, !(hc_source_mask & (1 << i)) ? "[" : ""); sbuf_cat(&sbuf, random_source_descr[i]); sbuf_cat(&sbuf, !(hc_source_mask & (1 << i)) ? "]" : ""); first = false; } error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); } return (error); } SYSCTL_PROC(_kern_random_harvest, OID_AUTO, mask_symbolic, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, random_print_harvestmask_symbolic, "A", "Entropy harvesting mask (symbolic)"); /* ARGSUSED */ static void random_harvestq_init(void *unused __unused) { static const u_int almost_everything_mask = (((1 << (RANDOM_ENVIRONMENTAL_END + 1)) - 1) & ~_RANDOM_HARVEST_ETHER_OFF & ~_RANDOM_HARVEST_UMA_OFF); hc_source_mask = almost_everything_mask; RANDOM_HARVEST_INIT_LOCK(); harvest_context.hc_entropy_ring.in = harvest_context.hc_entropy_ring.out = 0; } SYSINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_init, NULL); /* * Subroutine to slice up a contiguous chunk of 'entropy' and feed it into the * underlying algorithm. Returns number of bytes actually fed into underlying * algorithm. */ static size_t random_early_prime(char *entropy, size_t len) { struct harvest_event event; size_t i; len = rounddown(len, sizeof(event.he_entropy)); if (len == 0) return (0); for (i = 0; i < len; i += sizeof(event.he_entropy)) { event.he_somecounter = (uint32_t)get_cyclecount(); event.he_size = sizeof(event.he_entropy); event.he_source = RANDOM_CACHED; event.he_destination = harvest_context.hc_destination[RANDOM_CACHED]++; memcpy(event.he_entropy, entropy + i, sizeof(event.he_entropy)); random_harvestq_fast_process_event(&event); } explicit_bzero(entropy, len); return (len); } /* * Subroutine to search for known loader-loaded files in memory and feed them * into the underlying algorithm early in boot. Returns the number of bytes * loaded (zero if none were loaded). */ static size_t random_prime_loader_file(const char *type) { uint8_t *keyfile, *data; size_t size; keyfile = preload_search_by_type(type); if (keyfile == NULL) return (0); data = preload_fetch_addr(keyfile); size = preload_fetch_size(keyfile); if (data == NULL) return (0); return (random_early_prime(data, size)); } /* * This is used to prime the RNG by grabbing any early random stuff * known to the kernel, and inserting it directly into the hashing * module, currently Fortuna. */ /* ARGSUSED */ static void random_harvestq_prime(void *unused __unused) { size_t size; /* * Get entropy that may have been preloaded by loader(8) * and use it to pre-charge the entropy harvest queue. */ size = random_prime_loader_file(RANDOM_CACHED_BOOT_ENTROPY_MODULE); if (bootverbose) { if (size > 0) printf("random: read %zu bytes from preloaded cache\n", size); else printf("random: no preloaded entropy cache\n"); } } SYSINIT(random_device_prime, SI_SUB_RANDOM, SI_ORDER_MIDDLE, random_harvestq_prime, NULL); /* ARGSUSED */ static void random_harvestq_deinit(void *unused __unused) { /* Command the hash/reseed thread to end and wait for it to finish */ random_kthread_control = 0; while (random_kthread_control >= 0) tsleep(&harvest_context.hc_kthread_proc, 0, "harvqterm", hz/5); } SYSUNINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_deinit, NULL); /*- * Entropy harvesting queue routine. * * This is supposed to be fast; do not do anything slow in here! * It is also illegal (and morally reprehensible) to insert any * high-rate data here. "High-rate" is defined as a data source * that will usually cause lots of failures of the "Lockless read" * check a few lines below. This includes the "always-on" sources * like the Intel "rdrand" or the VIA Nehamiah "xstore" sources. */ /* XXXRW: get_cyclecount() is cheap on most modern hardware, where cycle * counters are built in, but on older hardware it will do a real time clock * read which can be quite expensive. */ void random_harvest_queue_(const void *entropy, u_int size, enum random_entropy_source origin) { struct harvest_event *event; u_int ring_in; KASSERT(origin >= RANDOM_START && origin < ENTROPYSOURCE, ("%s: origin %d invalid\n", __func__, origin)); RANDOM_HARVEST_LOCK(); ring_in = (harvest_context.hc_entropy_ring.in + 1)%RANDOM_RING_MAX; if (ring_in != harvest_context.hc_entropy_ring.out) { /* The ring is not full */ event = harvest_context.hc_entropy_ring.ring + ring_in; event->he_somecounter = (uint32_t)get_cyclecount(); event->he_source = origin; event->he_destination = harvest_context.hc_destination[origin]++; if (size <= sizeof(event->he_entropy)) { event->he_size = size; memcpy(event->he_entropy, entropy, size); } else { /* Big event, so squash it */ event->he_size = sizeof(event->he_entropy[0]); event->he_entropy[0] = jenkins_hash(entropy, size, (uint32_t)(uintptr_t)event); } harvest_context.hc_entropy_ring.in = ring_in; } RANDOM_HARVEST_UNLOCK(); } /*- * Entropy harvesting fast routine. * * This is supposed to be very fast; do not do anything slow in here! * This is the right place for high-rate harvested data. */ void random_harvest_fast_(const void *entropy, u_int size) { u_int pos; pos = harvest_context.hc_entropy_fast_accumulator.pos; harvest_context.hc_entropy_fast_accumulator.buf[pos] ^= jenkins_hash(entropy, size, (uint32_t)get_cyclecount()); harvest_context.hc_entropy_fast_accumulator.pos = (pos + 1)%RANDOM_ACCUM_MAX; } /*- * Entropy harvesting direct routine. * * This is not supposed to be fast, but will only be used during * (e.g.) booting when initial entropy is being gathered. */ void random_harvest_direct_(const void *entropy, u_int size, enum random_entropy_source origin) { struct harvest_event event; KASSERT(origin >= RANDOM_START && origin < ENTROPYSOURCE, ("%s: origin %d invalid\n", __func__, origin)); size = MIN(size, sizeof(event.he_entropy)); event.he_somecounter = (uint32_t)get_cyclecount(); event.he_size = size; event.he_source = origin; event.he_destination = harvest_context.hc_destination[origin]++; memcpy(event.he_entropy, entropy, size); random_harvestq_fast_process_event(&event); } void random_harvest_register_source(enum random_entropy_source source) { hc_source_mask |= (1 << source); } void random_harvest_deregister_source(enum random_entropy_source source) { hc_source_mask &= ~(1 << source); } void random_source_register(struct random_source *rsource) { struct random_sources *rrs; KASSERT(rsource != NULL, ("invalid input to %s", __func__)); rrs = malloc(sizeof(*rrs), M_ENTROPY, M_WAITOK); rrs->rrs_source = rsource; random_harvest_register_source(rsource->rs_source); printf("random: registering fast source %s\n", rsource->rs_ident); RANDOM_HARVEST_LOCK(); CK_LIST_INSERT_HEAD(&source_list, rrs, rrs_entries); RANDOM_HARVEST_UNLOCK(); } void random_source_deregister(struct random_source *rsource) { struct random_sources *rrs = NULL; KASSERT(rsource != NULL, ("invalid input to %s", __func__)); random_harvest_deregister_source(rsource->rs_source); RANDOM_HARVEST_LOCK(); CK_LIST_FOREACH(rrs, &source_list, rrs_entries) if (rrs->rrs_source == rsource) { CK_LIST_REMOVE(rrs, rrs_entries); break; } RANDOM_HARVEST_UNLOCK(); if (rrs != NULL && epoch_inited) epoch_wait_preempt(rs_epoch); free(rrs, M_ENTROPY); } static int random_source_handler(SYSCTL_HANDLER_ARGS) { struct epoch_tracker et; struct random_sources *rrs; struct sbuf sbuf; int error, count; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 64, req); count = 0; epoch_enter_preempt(rs_epoch, &et); CK_LIST_FOREACH(rrs, &source_list, rrs_entries) { sbuf_cat(&sbuf, (count++ ? ",'" : "'")); sbuf_cat(&sbuf, rrs->rrs_source->rs_ident); sbuf_cat(&sbuf, "'"); } epoch_exit_preempt(rs_epoch, &et); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } SYSCTL_PROC(_kern_random, OID_AUTO, random_sources, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, random_source_handler, "A", "List of active fast entropy sources."); MODULE_VERSION(random_harvestq, 1); diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index d0fb19661fa4..bc7b50e5ff8f 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -1,1557 +1,1568 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_callout_profiling.h" #include "opt_ddb.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #ifdef DDB #include #include #include #endif #ifdef SMP #include #endif DPCPU_DECLARE(sbintime_t, hardclocktime); SDT_PROVIDER_DEFINE(callout_execute); SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *"); SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *"); static void softclock_thread(void *arg); #ifdef CALLOUT_PROFILING static int avg_depth; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, "Average number of items examined per softclock call. Units = 1/1000"); static int avg_gcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, "Average number of Giant callouts made per softclock call. Units = 1/1000"); static int avg_lockcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, "Average number of lock callouts made per softclock call. Units = 1/1000"); static int avg_mpcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); static int avg_depth_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, "Average number of direct callouts examined per callout_process call. " "Units = 1/1000"); static int avg_lockcalls_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " "callout_process call. Units = 1/1000"); static int avg_mpcalls_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, 0, "Average number of MP direct callouts made per callout_process call. " "Units = 1/1000"); #endif static int ncallout; SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0, "Number of entries in callwheel and size of timeout() preallocation"); #ifdef RSS static int pin_default_swi = 1; static int pin_pcpu_swi = 1; #else static int pin_default_swi = 0; static int pin_pcpu_swi = 0; #endif SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi, 0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)"); SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi, 0, "Pin the per-CPU swis (except PCPU 0, which is also default)"); /* * TODO: * allocate more timeout table slots when table overflows. */ static u_int __read_mostly callwheelsize; static u_int __read_mostly callwheelmask; /* * The callout cpu exec entities represent informations necessary for * describing the state of callouts currently running on the CPU and the ones * necessary for migrating callouts to the new callout cpu. In particular, * the first entry of the array cc_exec_entity holds informations for callout * running in SWI thread context, while the second one holds informations * for callout running directly from hardware interrupt context. * The cached informations are very important for deferring migration when * the migrating callout is already running. */ struct cc_exec { struct callout *cc_curr; callout_func_t *cc_drain; void *cc_last_func; void *cc_last_arg; #ifdef SMP callout_func_t *ce_migration_func; void *ce_migration_arg; sbintime_t ce_migration_time; sbintime_t ce_migration_prec; int ce_migration_cpu; #endif bool cc_cancel; bool cc_waiting; }; /* * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. */ struct callout_cpu { struct mtx_padalign cc_lock; struct cc_exec cc_exec_entity[2]; struct callout *cc_next; struct callout_list *cc_callwheel; struct callout_tailq cc_expireq; sbintime_t cc_firstevent; sbintime_t cc_lastscan; struct thread *cc_thread; u_int cc_bucket; u_int cc_inited; #ifdef KTR char cc_ktr_event_name[20]; #endif }; #define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION) #define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr #define cc_exec_last_func(cc, dir) cc->cc_exec_entity[dir].cc_last_func #define cc_exec_last_arg(cc, dir) cc->cc_exec_entity[dir].cc_last_arg #define cc_exec_drain(cc, dir) cc->cc_exec_entity[dir].cc_drain #define cc_exec_next(cc) cc->cc_next #define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel #define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting #ifdef SMP #define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func #define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg #define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu #define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time #define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec static struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU #define CC_CPU(cpu) (&cc_cpu[(cpu)]) #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) #else static struct callout_cpu cc_cpu; #define CC_CPU(cpu) (&cc_cpu) #define CC_SELF() (&cc_cpu) #endif #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) static int __read_mostly cc_default_cpu; static void callout_cpu_init(struct callout_cpu *cc, int cpu); static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, #ifdef CALLOUT_PROFILING int *mpcalls, int *lockcalls, int *gcalls, #endif int direct); static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); /** * Locked by cc_lock: * cc_curr - If a callout is in progress, it is cc_curr. * If cc_curr is non-NULL, threads waiting in * callout_drain() will be woken up as soon as the * relevant callout completes. * cc_cancel - Changing to 1 with both callout_lock and cc_lock held * guarantees that the current callout will not run. * The softclock_call_cc() function sets this to 0 before it * drops callout_lock to acquire c_lock, and it calls * the handler only if curr_cancelled is still 0 after * cc_lock is successfully acquired. * cc_waiting - If a thread is waiting in callout_drain(), then * callout_wait is nonzero. Set only when * cc_curr is non-NULL. */ /* * Resets the execution entity tied to a specific callout cpu. */ static void cc_cce_cleanup(struct callout_cpu *cc, int direct) { cc_exec_curr(cc, direct) = NULL; cc_exec_cancel(cc, direct) = false; cc_exec_waiting(cc, direct) = false; #ifdef SMP cc_migration_cpu(cc, direct) = CPUBLOCK; cc_migration_time(cc, direct) = 0; cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; #endif } /* * Checks if migration is requested by a specific callout cpu. */ static int cc_cce_migrating(struct callout_cpu *cc, int direct) { #ifdef SMP return (cc_migration_cpu(cc, direct) != CPUBLOCK); #else return (0); #endif } /* * Kernel low level callwheel initialization * called on the BSP during kernel startup. */ static void callout_callwheel_init(void *dummy) { struct callout_cpu *cc; int cpu; /* * Calculate the size of the callout wheel and the preallocated * timeout() structures. * XXX: Clip callout to result of previous function of maxusers * maximum 384. This is still huge, but acceptable. */ ncallout = imin(16 + maxproc + maxfiles, 18508); TUNABLE_INT_FETCH("kern.ncallout", &ncallout); /* * Calculate callout wheel size, should be next power of two higher * than 'ncallout'. */ callwheelsize = 1 << fls(ncallout); callwheelmask = callwheelsize - 1; /* * Fetch whether we're pinning the swi's or not. */ TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi); TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi); /* * Initialize callout wheels. The software interrupt threads * are created later. */ cc_default_cpu = PCPU_GET(cpuid); CPU_FOREACH(cpu) { cc = CC_CPU(cpu); callout_cpu_init(cc, cpu); } } SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); /* * Initialize the per-cpu callout structures. */ static void callout_cpu_init(struct callout_cpu *cc, int cpu) { int i; mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN); cc->cc_inited = 1; cc->cc_callwheel = malloc_domainset(sizeof(struct callout_list) * callwheelsize, M_CALLOUT, DOMAINSET_PREF(pcpu_find(cpu)->pc_domain), M_WAITOK); for (i = 0; i < callwheelsize; i++) LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); cc->cc_firstevent = SBT_MAX; for (i = 0; i < 2; i++) cc_cce_cleanup(cc, i); #ifdef KTR snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), "callwheel cpu %d", cpu); #endif } #ifdef SMP /* * Switches the cpu tied to a specific callout. * The function expects a locked incoming callout cpu and returns with * locked outcoming callout cpu. */ static struct callout_cpu * callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) { struct callout_cpu *new_cc; MPASS(c != NULL && cc != NULL); CC_LOCK_ASSERT(cc); /* * Avoid interrupts and preemption firing after the callout cpu * is blocked in order to avoid deadlocks as the new thread * may be willing to acquire the callout cpu lock. */ c->c_cpu = CPUBLOCK; spinlock_enter(); CC_UNLOCK(cc); new_cc = CC_CPU(new_cpu); CC_LOCK(new_cc); spinlock_exit(); c->c_cpu = new_cpu; return (new_cc); } #endif /* * Start softclock threads. */ static void start_softclock(void *dummy) { struct proc *p; struct thread *td; struct callout_cpu *cc; int cpu, error; bool pin_swi; p = NULL; CPU_FOREACH(cpu) { cc = CC_CPU(cpu); error = kproc_kthread_add(softclock_thread, cc, &p, &td, RFSTOPPED, 0, "clock", "clock (%d)", cpu); if (error != 0) panic("failed to create softclock thread for cpu %d: %d", cpu, error); CC_LOCK(cc); cc->cc_thread = td; thread_lock(td); sched_class(td, PRI_ITHD); sched_prio(td, PI_SWI(SWI_CLOCK)); TD_SET_IWAIT(td); thread_lock_set(td, (struct mtx *)&cc->cc_lock); thread_unlock(td); if (cpu == cc_default_cpu) pin_swi = pin_default_swi; else pin_swi = pin_pcpu_swi; if (pin_swi) { error = cpuset_setithread(td->td_tid, cpu); if (error != 0) printf("%s: %s clock couldn't be pinned to cpu %d: %d\n", __func__, cpu == cc_default_cpu ? "default" : "per-cpu", cpu, error); } } } SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); #define CC_HASH_SHIFT 8 static inline u_int callout_hash(sbintime_t sbt) { return (sbt >> (32 - CC_HASH_SHIFT)); } static inline u_int callout_get_bucket(sbintime_t sbt) { return (callout_hash(sbt) & callwheelmask); } void callout_process(sbintime_t now) { + struct callout_entropy { + struct callout_cpu *cc; + struct thread *td; + sbintime_t now; + } entropy; struct callout *tmp, *tmpn; struct callout_cpu *cc; struct callout_list *sc; struct thread *td; sbintime_t first, last, lookahead, max, tmp_max; u_int firstb, lastb, nowb; #ifdef CALLOUT_PROFILING int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; #endif cc = CC_SELF(); mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); /* Compute the buckets of the last scan and present times. */ firstb = callout_hash(cc->cc_lastscan); cc->cc_lastscan = now; nowb = callout_hash(now); /* Compute the last bucket and minimum time of the bucket after it. */ if (nowb == firstb) lookahead = (SBT_1S / 16); else if (nowb - firstb == 1) lookahead = (SBT_1S / 8); else lookahead = SBT_1S; first = last = now; first += (lookahead / 2); last += lookahead; last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); lastb = callout_hash(last) - 1; max = last; /* * Check if we wrapped around the entire wheel from the last scan. * In case, we need to scan entirely the wheel for pending callouts. */ if (lastb - firstb >= callwheelsize) { lastb = firstb + callwheelsize - 1; if (nowb - firstb >= callwheelsize) nowb = lastb; } /* Iterate callwheel from firstb to nowb and then up to lastb. */ do { sc = &cc->cc_callwheel[firstb & callwheelmask]; tmp = LIST_FIRST(sc); while (tmp != NULL) { /* Run the callout if present time within allowed. */ if (tmp->c_time <= now) { /* * Consumer told us the callout may be run * directly from hardware interrupt context. */ if (tmp->c_iflags & CALLOUT_DIRECT) { #ifdef CALLOUT_PROFILING ++depth_dir; #endif cc_exec_next(cc) = LIST_NEXT(tmp, c_links.le); cc->cc_bucket = firstb & callwheelmask; LIST_REMOVE(tmp, c_links.le); softclock_call_cc(tmp, cc, #ifdef CALLOUT_PROFILING &mpcalls_dir, &lockcalls_dir, NULL, #endif 1); tmp = cc_exec_next(cc); cc_exec_next(cc) = NULL; } else { tmpn = LIST_NEXT(tmp, c_links.le); LIST_REMOVE(tmp, c_links.le); TAILQ_INSERT_TAIL(&cc->cc_expireq, tmp, c_links.tqe); tmp->c_iflags |= CALLOUT_PROCESSED; tmp = tmpn; } continue; } /* Skip events from distant future. */ if (tmp->c_time >= max) goto next; /* * Event minimal time is bigger than present maximal * time, so it cannot be aggregated. */ if (tmp->c_time > last) { lastb = nowb; goto next; } /* Update first and last time, respecting this event. */ if (tmp->c_time < first) first = tmp->c_time; tmp_max = tmp->c_time + tmp->c_precision; if (tmp_max < last) last = tmp_max; next: tmp = LIST_NEXT(tmp, c_links.le); } /* Proceed with the next bucket. */ firstb++; /* * Stop if we looked after present time and found * some event we can't execute at now. * Stop if we looked far enough into the future. */ } while (((int)(firstb - lastb)) <= 0); cc->cc_firstevent = last; cpu_new_callout(curcpu, last, first); #ifdef CALLOUT_PROFILING avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; #endif if (!TAILQ_EMPTY(&cc->cc_expireq)) { + entropy.cc = cc; + entropy.td = curthread; + entropy.now = now; + random_harvest_queue(&entropy, sizeof(entropy), RANDOM_CALLOUT); + td = cc->cc_thread; if (TD_AWAITING_INTR(td)) { thread_lock_block_wait(td); THREAD_LOCK_ASSERT(td, MA_OWNED); TD_CLR_IWAIT(td); sched_add(td, SRQ_INTR); } else mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); } else mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); } static struct callout_cpu * callout_lock(struct callout *c) { struct callout_cpu *cc; int cpu; for (;;) { cpu = c->c_cpu; #ifdef SMP if (cpu == CPUBLOCK) { while (c->c_cpu == CPUBLOCK) cpu_spinwait(); continue; } #endif cc = CC_CPU(cpu); CC_LOCK(cc); if (cpu == c->c_cpu) break; CC_UNLOCK(cc); } return (cc); } static void callout_cc_add(struct callout *c, struct callout_cpu *cc, sbintime_t sbt, sbintime_t precision, void (*func)(void *), void *arg, int cpu, int flags) { int bucket; CC_LOCK_ASSERT(cc); if (sbt < cc->cc_lastscan) sbt = cc->cc_lastscan; c->c_arg = arg; c->c_iflags |= CALLOUT_PENDING; c->c_iflags &= ~CALLOUT_PROCESSED; c->c_flags |= CALLOUT_ACTIVE; if (flags & C_DIRECT_EXEC) c->c_iflags |= CALLOUT_DIRECT; c->c_func = func; c->c_time = sbt; c->c_precision = precision; bucket = callout_get_bucket(c->c_time); CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); if (cc->cc_bucket == bucket) cc_exec_next(cc) = c; /* * Inform the eventtimers(4) subsystem there's a new callout * that has been inserted, but only if really required. */ if (SBT_MAX - c->c_time < c->c_precision) c->c_precision = SBT_MAX - c->c_time; sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; cpu_new_callout(cpu, sbt, c->c_time); } } static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, #ifdef CALLOUT_PROFILING int *mpcalls, int *lockcalls, int *gcalls, #endif int direct) { struct rm_priotracker tracker; callout_func_t *c_func, *drain; void *c_arg; struct lock_class *class; struct lock_object *c_lock; uintptr_t lock_status; int c_iflags; #ifdef SMP struct callout_cpu *new_cc; callout_func_t *new_func; void *new_arg; int flags, new_cpu; sbintime_t new_prec, new_time; #endif #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; struct timespec ts2; static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ static callout_func_t *lastfunc; #endif KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING, ("softclock_call_cc: pend %p %x", c, c->c_iflags)); KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE, ("softclock_call_cc: act %p %x", c, c->c_flags)); class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; lock_status = 0; if (c->c_flags & CALLOUT_SHAREDLOCK) { if (class == &lock_class_rm) lock_status = (uintptr_t)&tracker; else lock_status = 1; } c_lock = c->c_lock; c_func = c->c_func; c_arg = c->c_arg; c_iflags = c->c_iflags; c->c_iflags &= ~CALLOUT_PENDING; cc_exec_curr(cc, direct) = c; cc_exec_last_func(cc, direct) = c_func; cc_exec_last_arg(cc, direct) = c_arg; cc_exec_cancel(cc, direct) = false; cc_exec_drain(cc, direct) = NULL; CC_UNLOCK(cc); if (c_lock != NULL) { class->lc_lock(c_lock, lock_status); /* * The callout may have been cancelled * while we switched locks. */ if (cc_exec_cancel(cc, direct)) { class->lc_unlock(c_lock); goto skip; } /* The callout cannot be stopped now. */ cc_exec_cancel(cc, direct) = true; if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING (*gcalls)++; #endif CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", c, c_func, c_arg); } else { #ifdef CALLOUT_PROFILING (*lockcalls)++; #endif CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", c, c_func, c_arg); } } else { #ifdef CALLOUT_PROFILING (*mpcalls)++; #endif CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); } KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt1 = sbinuptime(); #endif THREAD_NO_SLEEPING(); SDT_PROBE1(callout_execute, , , callout__start, c); c_func(c_arg); SDT_PROBE1(callout_execute, , , callout__end, c); THREAD_SLEEPING_OK(); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt2 = sbinuptime(); sbt2 -= sbt1; if (sbt2 > maxdt) { if (lastfunc != c_func || sbt2 > maxdt * 2) { ts2 = sbttots(sbt2); printf( "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); } maxdt = sbt2; lastfunc = c_func; } #endif KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0) class->lc_unlock(c_lock); skip: CC_LOCK(cc); KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr")); cc_exec_curr(cc, direct) = NULL; if (cc_exec_drain(cc, direct)) { drain = cc_exec_drain(cc, direct); cc_exec_drain(cc, direct) = NULL; CC_UNLOCK(cc); drain(c_arg); CC_LOCK(cc); } if (cc_exec_waiting(cc, direct)) { /* * There is someone waiting for the * callout to complete. * If the callout was scheduled for * migration just cancel it. */ if (cc_cce_migrating(cc, direct)) { cc_cce_cleanup(cc, direct); /* * It should be assert here that the callout is not * destroyed but that is not easy. */ c->c_iflags &= ~CALLOUT_DFRMIGRATION; } cc_exec_waiting(cc, direct) = false; CC_UNLOCK(cc); wakeup(&cc_exec_waiting(cc, direct)); CC_LOCK(cc); } else if (cc_cce_migrating(cc, direct)) { #ifdef SMP /* * If the callout was scheduled for * migration just perform it now. */ new_cpu = cc_migration_cpu(cc, direct); new_time = cc_migration_time(cc, direct); new_prec = cc_migration_prec(cc, direct); new_func = cc_migration_func(cc, direct); new_arg = cc_migration_arg(cc, direct); cc_cce_cleanup(cc, direct); /* * It should be assert here that the callout is not destroyed * but that is not easy. * * As first thing, handle deferred callout stops. */ if (!callout_migrating(c)) { CTR3(KTR_CALLOUT, "deferred cancelled %p func %p arg %p", c, new_func, new_arg); return; } c->c_iflags &= ~CALLOUT_DFRMIGRATION; new_cc = callout_cpu_switch(c, cc, new_cpu); flags = (direct) ? C_DIRECT_EXEC : 0; callout_cc_add(c, new_cc, new_time, new_prec, new_func, new_arg, new_cpu, flags); CC_UNLOCK(new_cc); CC_LOCK(cc); #else panic("migration should not happen"); #endif } } /* * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning * the BSD Callout and Timer Facilities" and modified slightly for inclusion * in FreeBSD by Justin T. Gibbs. The original work on the data structures * used in this implementation was published by G. Varghese and T. Lauck in * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for * the Efficient Implementation of a Timer Facility" in the Proceedings of * the 11th ACM Annual Symposium on Operating Systems Principles, * Austin, Texas Nov 1987. */ /* * Software (low priority) clock interrupt thread handler. * Run periodic events from timeout queue. */ static void softclock_thread(void *arg) { struct thread *td = curthread; struct callout_cpu *cc; struct callout *c; #ifdef CALLOUT_PROFILING int depth, gcalls, lockcalls, mpcalls; #endif cc = (struct callout_cpu *)arg; CC_LOCK(cc); for (;;) { while (TAILQ_EMPTY(&cc->cc_expireq)) { /* * Use CC_LOCK(cc) as the thread_lock while * idle. */ thread_lock(td); thread_lock_set(td, (struct mtx *)&cc->cc_lock); TD_SET_IWAIT(td); mi_switch(SW_VOL | SWT_IWAIT); /* mi_switch() drops thread_lock(). */ CC_LOCK(cc); } #ifdef CALLOUT_PROFILING depth = gcalls = lockcalls = mpcalls = 0; #endif while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); softclock_call_cc(c, cc, #ifdef CALLOUT_PROFILING &mpcalls, &lockcalls, &gcalls, #endif 0); #ifdef CALLOUT_PROFILING ++depth; #endif } #ifdef CALLOUT_PROFILING avg_depth += (depth * 1000 - avg_depth) >> 8; avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; #endif } } void callout_when(sbintime_t sbt, sbintime_t precision, int flags, sbintime_t *res, sbintime_t *prec_res) { sbintime_t to_sbt, to_pr; if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) { *res = sbt; *prec_res = precision; return; } if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt) sbt = tick_sbt; if ((flags & C_HARDCLOCK) != 0 || sbt >= sbt_tickthreshold) { /* * Obtain the time of the last hardclock() call on * this CPU directly from the kern_clocksource.c. * This value is per-CPU, but it is equal for all * active ones. */ #ifdef __LP64__ to_sbt = DPCPU_GET(hardclocktime); #else spinlock_enter(); to_sbt = DPCPU_GET(hardclocktime); spinlock_exit(); #endif if (cold && to_sbt == 0) to_sbt = sbinuptime(); if ((flags & C_HARDCLOCK) == 0) to_sbt += tick_sbt; } else to_sbt = sbinuptime(); if (SBT_MAX - to_sbt < sbt) to_sbt = SBT_MAX; else to_sbt += sbt; *res = to_sbt; to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : sbt >> C_PRELGET(flags)); *prec_res = to_pr > precision ? to_pr : precision; } /* * New interface; clients allocate their own callout structures. * * callout_reset() - establish or change a timeout * callout_stop() - disestablish a timeout * callout_init() - initialize a callout structure so that it can * safely be passed to callout_reset() and callout_stop() * * defines three convenience macros: * * callout_active() - returns truth if callout has not been stopped, * drained, or deactivated since the last time the callout was * reset. * callout_pending() - returns truth if callout is still waiting for timeout * callout_deactivate() - marks the callout as having been serviced */ int callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec, callout_func_t *ftn, void *arg, int cpu, int flags) { sbintime_t to_sbt, precision; struct callout_cpu *cc; int cancelled, direct; int ignore_cpu=0; cancelled = 0; if (cpu == -1) { ignore_cpu = 1; } else if ((cpu >= MAXCPU) || ((CC_CPU(cpu))->cc_inited == 0)) { /* Invalid CPU spec */ panic("Invalid CPU in callout %d", cpu); } callout_when(sbt, prec, flags, &to_sbt, &precision); /* * This flag used to be added by callout_cc_add, but the * first time you call this we could end up with the * wrong direct flag if we don't do it before we add. */ if (flags & C_DIRECT_EXEC) { direct = 1; } else { direct = 0; } KASSERT(!direct || c->c_lock == NULL || (LOCK_CLASS(c->c_lock)->lc_flags & LC_SPINLOCK), ("%s: direct callout %p has non-spin lock", __func__, c)); cc = callout_lock(c); /* * Don't allow migration if the user does not care. */ if (ignore_cpu) { cpu = c->c_cpu; } if (cc_exec_curr(cc, direct) == c) { /* * We're being asked to reschedule a callout which is * currently in progress. If there is a lock then we * can cancel the callout if it has not really started. */ if (c->c_lock != NULL && !cc_exec_cancel(cc, direct)) cancelled = cc_exec_cancel(cc, direct) = true; if (cc_exec_waiting(cc, direct) || cc_exec_drain(cc, direct)) { /* * Someone has called callout_drain to kill this * callout. Don't reschedule. */ CTR4(KTR_CALLOUT, "%s %p func %p arg %p", cancelled ? "cancelled" : "failed to cancel", c, c->c_func, c->c_arg); CC_UNLOCK(cc); return (cancelled); } #ifdef SMP if (callout_migrating(c)) { /* * This only occurs when a second callout_reset_sbt_on * is made after a previous one moved it into * deferred migration (below). Note we do *not* change * the prev_cpu even though the previous target may * be different. */ cc_migration_cpu(cc, direct) = cpu; cc_migration_time(cc, direct) = to_sbt; cc_migration_prec(cc, direct) = precision; cc_migration_func(cc, direct) = ftn; cc_migration_arg(cc, direct) = arg; cancelled = 1; CC_UNLOCK(cc); return (cancelled); } #endif } if (c->c_iflags & CALLOUT_PENDING) { if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { if (cc_exec_next(cc) == c) cc_exec_next(cc) = LIST_NEXT(c, c_links.le); LIST_REMOVE(c, c_links.le); } else { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); } cancelled = 1; c->c_iflags &= ~ CALLOUT_PENDING; c->c_flags &= ~ CALLOUT_ACTIVE; } #ifdef SMP /* * If the callout must migrate try to perform it immediately. * If the callout is currently running, just defer the migration * to a more appropriate moment. */ if (c->c_cpu != cpu) { if (cc_exec_curr(cc, direct) == c) { /* * Pending will have been removed since we are * actually executing the callout on another * CPU. That callout should be waiting on the * lock the caller holds. If we set both * active/and/pending after we return and the * lock on the executing callout proceeds, it * will then see pending is true and return. * At the return from the actual callout execution * the migration will occur in softclock_call_cc * and this new callout will be placed on the * new CPU via a call to callout_cpu_switch() which * will get the lock on the right CPU followed * by a call callout_cc_add() which will add it there. * (see above in softclock_call_cc()). */ cc_migration_cpu(cc, direct) = cpu; cc_migration_time(cc, direct) = to_sbt; cc_migration_prec(cc, direct) = precision; cc_migration_func(cc, direct) = ftn; cc_migration_arg(cc, direct) = arg; c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING); c->c_flags |= CALLOUT_ACTIVE; CTR6(KTR_CALLOUT, "migration of %p func %p arg %p in %d.%08x to %u deferred", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), (u_int)(to_sbt & 0xffffffff), cpu); CC_UNLOCK(cc); return (cancelled); } cc = callout_cpu_switch(c, cc, cpu); } #endif callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), (u_int)(to_sbt & 0xffffffff)); CC_UNLOCK(cc); return (cancelled); } /* * Common idioms that can be optimized in the future. */ int callout_schedule_on(struct callout *c, int to_ticks, int cpu) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); } int callout_schedule(struct callout *c, int to_ticks) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); } int _callout_stop_safe(struct callout *c, int flags, callout_func_t *drain) { struct callout_cpu *cc, *old_cc; struct lock_class *class; int direct, sq_locked, use_lock; int cancelled, not_on_a_list; if ((flags & CS_DRAIN) != 0) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock, "calling %s", __func__); KASSERT((flags & CS_DRAIN) == 0 || drain == NULL, ("Cannot set drain callback and CS_DRAIN flag at the same time")); /* * Some old subsystems don't hold Giant while running a callout_stop(), * so just discard this check for the moment. */ if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) { if (c->c_lock == &Giant.lock_object) use_lock = mtx_owned(&Giant); else { use_lock = 1; class = LOCK_CLASS(c->c_lock); class->lc_assert(c->c_lock, LA_XLOCKED); } } else use_lock = 0; if (c->c_iflags & CALLOUT_DIRECT) { direct = 1; } else { direct = 0; } sq_locked = 0; old_cc = NULL; again: cc = callout_lock(c); if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) == (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) && ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) { /* * Special case where this slipped in while we * were migrating *as* the callout is about to * execute. The caller probably holds the lock * the callout wants. * * Get rid of the migration first. Then set * the flag that tells this code *not* to * try to remove it from any lists (its not * on one yet). When the callout wheel runs, * it will ignore this callout. */ c->c_iflags &= ~CALLOUT_PENDING; c->c_flags &= ~CALLOUT_ACTIVE; not_on_a_list = 1; } else { not_on_a_list = 0; } /* * If the callout was migrating while the callout cpu lock was * dropped, just drop the sleepqueue lock and check the states * again. */ if (sq_locked != 0 && cc != old_cc) { #ifdef SMP CC_UNLOCK(cc); sleepq_release(&cc_exec_waiting(old_cc, direct)); sq_locked = 0; old_cc = NULL; goto again; #else panic("migration should not happen"); #endif } /* * If the callout is running, try to stop it or drain it. */ if (cc_exec_curr(cc, direct) == c) { /* * Succeed we to stop it or not, we must clear the * active flag - this is what API users expect. If we're * draining and the callout is currently executing, first wait * until it finishes. */ if ((flags & CS_DRAIN) == 0) c->c_flags &= ~CALLOUT_ACTIVE; if ((flags & CS_DRAIN) != 0) { /* * The current callout is running (or just * about to run) and blocking is allowed, so * just wait for the current invocation to * finish. */ if (cc_exec_curr(cc, direct) == c) { /* * Use direct calls to sleepqueue interface * instead of cv/msleep in order to avoid * a LOR between cc_lock and sleepqueue * chain spinlocks. This piece of code * emulates a msleep_spin() call actually. * * If we already have the sleepqueue chain * locked, then we can safely block. If we * don't already have it locked, however, * we have to drop the cc_lock to lock * it. This opens several races, so we * restart at the beginning once we have * both locks. If nothing has changed, then * we will end up back here with sq_locked * set. */ if (!sq_locked) { CC_UNLOCK(cc); sleepq_lock( &cc_exec_waiting(cc, direct)); sq_locked = 1; old_cc = cc; goto again; } /* * Migration could be cancelled here, but * as long as it is still not sure when it * will be packed up, just let softclock() * take care of it. */ cc_exec_waiting(cc, direct) = true; DROP_GIANT(); CC_UNLOCK(cc); sleepq_add( &cc_exec_waiting(cc, direct), &cc->cc_lock.lock_object, "codrain", SLEEPQ_SLEEP, 0); sleepq_wait( &cc_exec_waiting(cc, direct), 0); sq_locked = 0; old_cc = NULL; /* Reacquire locks previously released. */ PICKUP_GIANT(); goto again; } c->c_flags &= ~CALLOUT_ACTIVE; } else if (use_lock && !cc_exec_cancel(cc, direct) && (drain == NULL)) { /* * The current callout is waiting for its * lock which we hold. Cancel the callout * and return. After our caller drops the * lock, the callout will be skipped in * softclock(). This *only* works with a * callout_stop() *not* callout_drain() or * callout_async_drain(). */ cc_exec_cancel(cc, direct) = true; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); KASSERT(!cc_cce_migrating(cc, direct), ("callout wrongly scheduled for migration")); if (callout_migrating(c)) { c->c_iflags &= ~CALLOUT_DFRMIGRATION; #ifdef SMP cc_migration_cpu(cc, direct) = CPUBLOCK; cc_migration_time(cc, direct) = 0; cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; #endif } CC_UNLOCK(cc); KASSERT(!sq_locked, ("sleepqueue chain locked")); return (1); } else if (callout_migrating(c)) { /* * The callout is currently being serviced * and the "next" callout is scheduled at * its completion with a migration. We remove * the migration flag so it *won't* get rescheduled, * but we can't stop the one thats running so * we return 0. */ c->c_iflags &= ~CALLOUT_DFRMIGRATION; #ifdef SMP /* * We can't call cc_cce_cleanup here since * if we do it will remove .ce_curr and * its still running. This will prevent a * reschedule of the callout when the * execution completes. */ cc_migration_cpu(cc, direct) = CPUBLOCK; cc_migration_time(cc, direct) = 0; cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; #endif CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", c, c->c_func, c->c_arg); if (drain) { KASSERT(cc_exec_drain(cc, direct) == NULL, ("callout drain function already set to %p", cc_exec_drain(cc, direct))); cc_exec_drain(cc, direct) = drain; } CC_UNLOCK(cc); return ((flags & CS_EXECUTING) != 0); } else { CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); if (drain) { KASSERT(cc_exec_drain(cc, direct) == NULL, ("callout drain function already set to %p", cc_exec_drain(cc, direct))); cc_exec_drain(cc, direct) = drain; } } KASSERT(!sq_locked, ("sleepqueue chain still locked")); cancelled = ((flags & CS_EXECUTING) != 0); } else cancelled = 1; if (sq_locked) sleepq_release(&cc_exec_waiting(cc, direct)); if ((c->c_iflags & CALLOUT_PENDING) == 0) { CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); /* * For not scheduled and not executing callout return * negative value. */ if (cc_exec_curr(cc, direct) != c) cancelled = -1; CC_UNLOCK(cc); return (cancelled); } c->c_iflags &= ~CALLOUT_PENDING; c->c_flags &= ~CALLOUT_ACTIVE; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); if (not_on_a_list == 0) { if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { if (cc_exec_next(cc) == c) cc_exec_next(cc) = LIST_NEXT(c, c_links.le); LIST_REMOVE(c, c_links.le); } else { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); } } CC_UNLOCK(cc); return (cancelled); } void callout_init(struct callout *c, int mpsafe) { bzero(c, sizeof *c); if (mpsafe) { c->c_lock = NULL; c->c_iflags = CALLOUT_RETURNUNLOCKED; } else { c->c_lock = &Giant.lock_object; c->c_iflags = 0; } c->c_cpu = cc_default_cpu; } void _callout_init_lock(struct callout *c, struct lock_object *lock, int flags) { bzero(c, sizeof *c); c->c_lock = lock; KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, ("callout_init_lock: bad flags %d", flags)); KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & LC_SLEEPABLE), ("%s: callout %p has sleepable lock", __func__, c)); c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); c->c_cpu = cc_default_cpu; } static int flssbt(sbintime_t sbt) { sbt += (uint64_t)sbt >> 1; if (sizeof(long) >= sizeof(sbintime_t)) return (flsl(sbt)); if (sbt >= SBT_1S) return (flsl(((uint64_t)sbt) >> 32) + 32); return (flsl(sbt)); } /* * Dump immediate statistic snapshot of the scheduled callouts. */ static int sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) { struct callout *tmp; struct callout_cpu *cc; struct callout_list *sc; sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; int ct[64], cpr[64], ccpbk[32]; int error, val, i, count, tcum, pcum, maxc, c, medc; int cpu; val = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); count = maxc = 0; st = spr = maxt = maxpr = 0; bzero(ccpbk, sizeof(ccpbk)); bzero(ct, sizeof(ct)); bzero(cpr, sizeof(cpr)); now = sbinuptime(); CPU_FOREACH(cpu) { cc = CC_CPU(cpu); CC_LOCK(cc); for (i = 0; i < callwheelsize; i++) { sc = &cc->cc_callwheel[i]; c = 0; LIST_FOREACH(tmp, sc, c_links.le) { c++; t = tmp->c_time - now; if (t < 0) t = 0; st += t / SBT_1US; spr += tmp->c_precision / SBT_1US; if (t > maxt) maxt = t; if (tmp->c_precision > maxpr) maxpr = tmp->c_precision; ct[flssbt(t)]++; cpr[flssbt(tmp->c_precision)]++; } if (c > maxc) maxc = c; ccpbk[fls(c + c / 2)]++; count += c; } CC_UNLOCK(cc); } for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) tcum += ct[i]; medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) pcum += cpr[i]; medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, c = 0; i < 32 && c < count / 2; i++) c += ccpbk[i]; medc = (i >= 2) ? (1 << (i - 2)) : 0; printf("Scheduled callouts statistic snapshot:\n"); printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", medc, count / callwheelsize / mp_ncpus, (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, maxc); printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, (st / count) / 1000000, (st / count) % 1000000, maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, (spr / count) / 1000000, (spr / count) % 1000000, maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); printf(" Distribution: \tbuckets\t time\t tcum\t" " prec\t pcum\n"); for (i = 0, tcum = pcum = 0; i < 64; i++) { if (ct[i] == 0 && cpr[i] == 0) continue; t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; tcum += ct[i]; pcum += cpr[i]; printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, i - 1 - (32 - CC_HASH_SHIFT), ct[i], tcum, cpr[i], pcum); } return (error); } SYSCTL_PROC(_kern, OID_AUTO, callout_stat, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_callout_stat, "I", "Dump immediate statistic snapshot of the scheduled callouts"); #ifdef DDB static void _show_callout(struct callout *c) { db_printf("callout %p\n", c); #define C_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, c->e); db_printf(" &c_links = %p\n", &(c->c_links)); C_DB_PRINTF("%" PRId64, c_time); C_DB_PRINTF("%" PRId64, c_precision); C_DB_PRINTF("%p", c_arg); C_DB_PRINTF("%p", c_func); C_DB_PRINTF("%p", c_lock); C_DB_PRINTF("%#x", c_flags); C_DB_PRINTF("%#x", c_iflags); C_DB_PRINTF("%d", c_cpu); #undef C_DB_PRINTF } DB_SHOW_COMMAND(callout, db_show_callout) { if (!have_addr) { db_printf("usage: show callout \n"); return; } _show_callout((struct callout *)addr); } static void _show_last_callout(int cpu, int direct, const char *dirstr) { struct callout_cpu *cc; void *func, *arg; cc = CC_CPU(cpu); func = cc_exec_last_func(cc, direct); arg = cc_exec_last_arg(cc, direct); db_printf("cpu %d last%s callout function: %p ", cpu, dirstr, func); db_printsym((db_expr_t)func, DB_STGY_ANY); db_printf("\ncpu %d last%s callout argument: %p\n", cpu, dirstr, arg); } DB_SHOW_COMMAND(callout_last, db_show_callout_last) { int cpu, last; if (have_addr) { if (addr < 0 || addr > mp_maxid || CPU_ABSENT(addr)) { db_printf("no such cpu: %d\n", (int)addr); return; } cpu = last = addr; } else { cpu = 0; last = mp_maxid; } while (cpu <= last) { if (!CPU_ABSENT(cpu)) { _show_last_callout(cpu, 0, ""); _show_last_callout(cpu, 1, " direct"); } cpu++; } } #endif /* DDB */ diff --git a/sys/sys/random.h b/sys/sys/random.h index cfcf4b30e698..6f4f90c9de98 100644 --- a/sys/sys/random.h +++ b/sys/sys/random.h @@ -1,167 +1,168 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2000-2015, 2017 Mark R. V. Murray * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_RANDOM_H_ #define _SYS_RANDOM_H_ #include #ifdef _KERNEL struct uio; /* * In the loadable random world, there are set of dangling pointers left in the * core kernel: * * read_random, read_random_uio, is_random_seeded are function pointers, * rather than functions. * * p_random_alg_context is a true pointer in loadable random kernels. * * These are initialized at SI_SUB_RANDOM:SI_ORDER_SECOND during boot. The * read-type pointers are initialized by random_alg_context_init() in * randomdev.c and p_random_alg_context in the algorithm, e.g., fortuna.c's * random_fortuna_init_alg(). The nice thing about function pointers is they * have a similar calling convention to ordinary functions. * * (In !loadable, the read_random, etc, routines are just plain functions; * p_random_alg_context is a macro for the public visibility * &random_alg_context.) */ #if defined(RANDOM_LOADABLE) extern void (*_read_random)(void *, u_int); extern int (*_read_random_uio)(struct uio *, bool); extern bool (*_is_random_seeded)(void); #define read_random(a, b) (*_read_random)(a, b) #define read_random_uio(a, b) (*_read_random_uio)(a, b) #define is_random_seeded() (*_is_random_seeded)() #else void read_random(void *, u_int); int read_random_uio(struct uio *, bool); bool is_random_seeded(void); #endif /* * Note: if you add or remove members of random_entropy_source, remember to * also update the strings in the static array random_source_descr[] in * random_harvestq.c. */ enum random_entropy_source { RANDOM_START = 0, RANDOM_CACHED = 0, /* Environmental sources */ RANDOM_ATTACH, RANDOM_KEYBOARD, RANDOM_MOUSE, RANDOM_NET_TUN, RANDOM_NET_ETHER, RANDOM_NET_NG, RANDOM_INTERRUPT, RANDOM_SWI, RANDOM_FS_ATIME, RANDOM_UMA, /* Special!! UMA/SLAB Allocator */ - RANDOM_ENVIRONMENTAL_END = RANDOM_UMA, + RANDOM_CALLOUT, + RANDOM_ENVIRONMENTAL_END = RANDOM_CALLOUT, /* Fast hardware random-number sources from here on. */ RANDOM_PURE_START, RANDOM_PURE_OCTEON = RANDOM_PURE_START, RANDOM_PURE_SAFE, RANDOM_PURE_GLXSB, RANDOM_PURE_HIFN, RANDOM_PURE_RDRAND, RANDOM_PURE_NEHEMIAH, RANDOM_PURE_RNDTEST, RANDOM_PURE_VIRTIO, RANDOM_PURE_BROADCOM, RANDOM_PURE_CCP, RANDOM_PURE_DARN, RANDOM_PURE_TPM, RANDOM_PURE_VMGENID, RANDOM_PURE_QUALCOMM, ENTROPYSOURCE }; _Static_assert(ENTROPYSOURCE <= 32, "hardcoded assumption that values fit in a typical word-sized bitset"); #define RANDOM_CACHED_BOOT_ENTROPY_MODULE "boot_entropy_cache" extern u_int hc_source_mask; void random_harvest_queue_(const void *, u_int, enum random_entropy_source); void random_harvest_fast_(const void *, u_int); void random_harvest_direct_(const void *, u_int, enum random_entropy_source); static __inline void random_harvest_queue(const void *entropy, u_int size, enum random_entropy_source origin) { if (hc_source_mask & (1 << origin)) random_harvest_queue_(entropy, size, origin); } static __inline void random_harvest_fast(const void *entropy, u_int size, enum random_entropy_source origin) { if (hc_source_mask & (1 << origin)) random_harvest_fast_(entropy, size); } static __inline void random_harvest_direct(const void *entropy, u_int size, enum random_entropy_source origin) { if (hc_source_mask & (1 << origin)) random_harvest_direct_(entropy, size, origin); } void random_harvest_register_source(enum random_entropy_source); void random_harvest_deregister_source(enum random_entropy_source); #if defined(RANDOM_ENABLE_UMA) #define random_harvest_fast_uma(a, b, c) random_harvest_fast(a, b, c) #else /* !defined(RANDOM_ENABLE_UMA) */ #define random_harvest_fast_uma(a, b, c) do {} while (0) #endif /* defined(RANDOM_ENABLE_UMA) */ #if defined(RANDOM_ENABLE_ETHER) #define random_harvest_queue_ether(a, b) random_harvest_queue(a, b, RANDOM_NET_ETHER) #else /* !defined(RANDOM_ENABLE_ETHER) */ #define random_harvest_queue_ether(a, b) do {} while (0) #endif /* defined(RANDOM_ENABLE_ETHER) */ #endif /* _KERNEL */ #define GRND_NONBLOCK 0x1 #define GRND_RANDOM 0x2 #define GRND_INSECURE 0x4 __BEGIN_DECLS ssize_t getrandom(void *buf, size_t buflen, unsigned int flags); __END_DECLS #endif /* _SYS_RANDOM_H_ */