Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F111843147
D20313.id58742.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
18 KB
Referenced Files
None
Subscribers
None
D20313.id58742.diff
View Options
Index: sys/dev/random/fortuna.h
===================================================================
--- sys/dev/random/fortuna.h
+++ sys/dev/random/fortuna.h
@@ -36,12 +36,14 @@
#define RANDOM_RESEED_LOCK(x) mtx_lock(&fortuna_state.fs_mtx)
#define RANDOM_RESEED_UNLOCK(x) mtx_unlock(&fortuna_state.fs_mtx)
#define RANDOM_RESEED_ASSERT_LOCK_OWNED(x) mtx_assert(&fortuna_state.fs_mtx, MA_OWNED)
+#define RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED() mtx_assert(&fortuna_state.fs_mtx, MA_NOTOWNED)
#else
#define RANDOM_RESEED_INIT_LOCK(x) mtx_init(&fortuna_state.fs_mtx, mtx_plain)
#define RANDOM_RESEED_DEINIT_LOCK(x) mtx_destroy(&fortuna_state.fs_mtx)
#define RANDOM_RESEED_LOCK(x) mtx_lock(&fortuna_state.fs_mtx)
#define RANDOM_RESEED_UNLOCK(x) mtx_unlock(&fortuna_state.fs_mtx)
#define RANDOM_RESEED_ASSERT_LOCK_OWNED(x)
+#define RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED()
#endif
#endif /* SYS_DEV_RANDOM_FORTUNA_H_INCLUDED */
Index: sys/dev/random/fortuna.c
===================================================================
--- sys/dev/random/fortuna.c
+++ sys/dev/random/fortuna.c
@@ -61,6 +61,7 @@
#include "unit_test.h"
#endif /* _KERNEL */
+#include <crypto/chacha20/chacha.h>
#include <crypto/rijndael/rijndael-api-fst.h>
#include <crypto/sha2/sha256.h>
@@ -75,7 +76,10 @@
/* Defined in FS&K */
#define RANDOM_FORTUNA_NPOOLS 32 /* The number of accumulation pools */
#define RANDOM_FORTUNA_DEFPOOLSIZE 64 /* The default pool size/length for a (re)seed */
-#define RANDOM_FORTUNA_MAX_READ (1 << 20) /* Max bytes in a single read */
+#define RANDOM_FORTUNA_MAX_READ (1 << 20) /* Max bytes from AES before rekeying */
+#define RANDOM_FORTUNA_BLOCKS_PER_KEY (1 << 16) /* Max blocks from AES before rekeying */
+CTASSERT(RANDOM_FORTUNA_BLOCKS_PER_KEY * RANDOM_BLOCKSIZE ==
+ RANDOM_FORTUNA_MAX_READ);
/*
* The allowable range of RANDOM_FORTUNA_DEFPOOLSIZE. The default value is above.
@@ -120,6 +124,26 @@
mtx_t fs_mtx;
} fortuna_state;
+/*
+ * Experimental concurrent reads feature. For now, disabled by default. But
+ * we may enable it in the future.
+ *
+ * The benefit is improved concurrency in Fortuna. That is reflected in two
+ * related aspects:
+ *
+ * 1. Concurrent devrandom readers can achieve similar throughput to a single
+ * reader thread.
+ *
+ * 2. The rand_harvestq process spends much less time spinning when one or more
+ * readers is processing a large request. Partially this is due to
+ * rand_harvestq / ra_event_processor design, which only passes one event at
+ * a time to the underlying algorithm. Each time, Fortuna must take its
+ * global state mutex, potentially blocking on a reader. Our adaptive
+ * mutexes assume that a lock holder currently on CPU will release the lock
+ * quickly, and spin if the owning thread is currently running.
+ */
+static bool fortuna_concurrent_read __read_frequently = false;
+
#ifdef _KERNEL
static struct sysctl_ctx_list random_clist;
RANDOM_CHECK_UINT(fs_minpoolsize, RANDOM_FORTUNA_MINPOOLSIZE, RANDOM_FORTUNA_MAXPOOLSIZE);
@@ -176,6 +200,11 @@
random_check_uint_fs_minpoolsize, "IU",
"Minimum pool size necessary to cause a reseed");
KASSERT(fortuna_state.fs_minpoolsize > 0, ("random: Fortuna threshold must be > 0 at startup"));
+
+ SYSCTL_ADD_BOOL(&random_clist, SYSCTL_CHILDREN(random_fortuna_o),
+ OID_AUTO, "concurrent_read", CTLFLAG_RDTUN,
+ &fortuna_concurrent_read, 0, "If non-zero, enable EXPERIMENTAL "
+ "feature to improve concurrent Fortuna performance.");
#endif
/*-
@@ -305,48 +334,6 @@
uint128_increment(&fortuna_state.fs_counter);
}
-/*-
- * FS&K - PseudoRandomData()
- *
- * If Chacha20 is used, output size is unrestricted. If AES-CTR is used,
- * output size MUST be <= 1MB and a multiple of RANDOM_BLOCKSIZE. The
- * reasoning for this is discussed in FS&K 9.4; the significant distinction
- * between the two ciphers is that AES has a *block* size of 128 bits while
- * Chacha has a *block* size of 512 bits.
- */
-static __inline void
-random_fortuna_genrandom(uint8_t *buf, size_t bytecount)
-{
- uint8_t newkey[RANDOM_KEYSIZE];
-
- RANDOM_RESEED_ASSERT_LOCK_OWNED();
-
- /*-
- * FS&K - assert(n < 2^20 (== 1 MB)) when 128-bit block cipher is used
- * - r = first-n-bytes(GenerateBlocks(ceil(n/16)))
- * - K = GenerateBlocks(2)
- */
- KASSERT(random_chachamode || bytecount <= RANDOM_FORTUNA_MAX_READ,
- ("%s: invalid large read request: %zu bytes", __func__,
- bytecount));
-
- /*
- * This is where FS&K would invoke GenerateBlocks(). GenerateBlocks()
- * doesn't make a lot of sense or have much value if we use bytecount
- * for the API (which is useful for ciphers that do not require
- * block-sized output, like Chacha20).
- *
- * Just invoke our PRF abstraction directly, which is responsible for
- * updating fs_counter ('C').
- */
- randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
- buf, bytecount);
- randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
- newkey, sizeof(newkey));
- randomdev_encrypt_init(&fortuna_state.fs_key, newkey);
- explicit_bzero(newkey, sizeof(newkey));
-}
-
/*-
* FS&K - RandomData() (Part 1)
* Used to return processed entropy from the PRNG. There is a pre_read
@@ -433,70 +420,144 @@
explicit_bzero(temp, sizeof(temp));
}
-/*-
- * FS&K - RandomData() (Part 2)
- * Main read from Fortuna, continued. May be called multiple times after
- * the random_fortuna_pre_read() above.
+/*
+ * This is basically GenerateBlocks() from FS&K.
*
- * The supplied buf MAY not be a multiple of RANDOM_BLOCKSIZE in size; it is
- * the responsibility of the algorithm to accommodate partial block reads, if a
- * block output mode is used.
+ * It differs in two ways:
+ *
+ * 1. Chacha20 is tolerant of non-block-multiple request sizes, so we do not
+ * need to handle any remainder bytes specially and can just pass the length
+ * directly to the PRF construction; and
+ *
+ * 2. Chacha20 is a 512-bit block size cipher (whereas AES has 128-bit block
+ * size, regardless of key size). This means Chacha does not require re-keying
+ * every 1MiB. This is implied by the math in FS&K 9.4 and mentioned
+ * explicitly in the conclusion, "If we had a block cipher with a 256-bit [or
+ * greater] block size, then the collisions would not have been an issue at
+ * all" (p. 144).
+ *
+ * 3. In conventional ("locked") mode, we produce a maximum of PAGE_SIZE output
+ * at a time before dropping the lock, to not bully the lock especially. This
+ * has been the status quo since 2015 (r284959).
+ *
+ * The upstream caller random_fortuna_read is responsible for zeroing out
+ * sensitive buffers provided as parameters to this routine.
*/
-void
-random_fortuna_read(uint8_t *buf, size_t bytecount)
+enum {
+ FORTUNA_UNLOCKED = false,
+ FORTUNA_LOCKED = true
+};
+static void
+random_fortuna_genbytes(uint8_t *buf, size_t bytecount,
+ uint8_t newkey[static RANDOM_KEYSIZE], uint128_t *p_counter,
+ union randomdev_key *p_key, bool locked)
{
uint8_t remainder_buf[RANDOM_BLOCKSIZE];
- size_t read_directly_len, read_chunk;
+ size_t chunk_size;
- /*
- * The underlying AES generator expects multiples of RANDOM_BLOCKSIZE.
- */
- if (random_chachamode)
- read_directly_len = bytecount;
+ if (locked)
+ RANDOM_RESEED_ASSERT_LOCK_OWNED();
else
- read_directly_len = rounddown(bytecount, RANDOM_BLOCKSIZE);
+ RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED();
- RANDOM_RESEED_LOCK();
- KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
+ /*
+ * Easy case: don't have to worry about bullying the global mutex,
+ * don't have to worry about rekeying Chacha; API is byte-oriented.
+ */
+ if (!locked && random_chachamode) {
+ randomdev_keystream(p_key, p_counter, buf, bytecount);
+ return;
+ }
- while (read_directly_len > 0) {
+ if (locked) {
/*
- * 128-bit block ciphers like AES must be re-keyed at 1MB
- * intervals to avoid unacceptable statistical differentiation
- * from true random data.
- *
- * 512-bit block ciphers like Chacha20 do not have this
- * problem. (FS&K 9.4)
+ * While holding the global lock, limit PRF generation to
+ * mitigate, but not eliminate, bullying symptoms.
*/
- if (random_chachamode)
- read_chunk = read_directly_len;
- else
- read_chunk = MIN(read_directly_len,
- RANDOM_FORTUNA_MAX_READ);
-
+ chunk_size = PAGE_SIZE;
+ } else {
/*
- * For now, we hold the global Fortuna mutex, so yield
- * periodically to provide vague availability to other lock
- * users. PAGE_SIZE is chosen to match existing behavior.
- */
- read_chunk = MIN(read_chunk, PAGE_SIZE);
+ * 128-bit block ciphers like AES must be re-keyed at 1MB
+ * intervals to avoid unacceptable statistical differentiation
+ * from true random data (FS&K 9.4, p. 143-144).
+ */
+ MPASS(!random_chachamode);
+ chunk_size = RANDOM_FORTUNA_MAX_READ;
+ }
- random_fortuna_genrandom(buf, read_chunk);
- buf += read_chunk;
- read_directly_len -= read_chunk;
- bytecount -= read_chunk;
+ chunk_size = MIN(bytecount, chunk_size);
+ if (!random_chachamode)
+ chunk_size = rounddown(chunk_size, RANDOM_BLOCKSIZE);
- /* Perform the actual yield. */
- if (read_directly_len != 0) {
- RANDOM_RESEED_UNLOCK();
- RANDOM_RESEED_LOCK();
+ while (bytecount >= chunk_size) {
+ randomdev_keystream(p_key, p_counter, buf, chunk_size);
+
+ buf += chunk_size;
+ bytecount -= chunk_size;
+
+ /* We have to rekey if there is any data remaining to be
+ * generated, in two scenarios:
+ *
+ * locked: we need to rekey before we unlock and release the
+ * global state to another consumer; or
+ *
+ * unlocked: we need to rekey because we're in AES mode and are
+ * required to rekey at chunk_size==1MB. But we do not need to
+ * rekey during the last trailing <1MB chunk.
+ */
+ if (bytecount > 0) {
+ if (locked || chunk_size == RANDOM_FORTUNA_MAX_READ) {
+ randomdev_keystream(p_key, p_counter, newkey,
+ RANDOM_KEYSIZE);
+ randomdev_encrypt_init(p_key, newkey);
+ }
+
+ /*
+ * If we're holding the global lock, yield it briefly
+ * now.
+ */
+ if (locked) {
+ RANDOM_RESEED_UNLOCK();
+ RANDOM_RESEED_LOCK();
+ }
+
+ /*
+ * At the trailing end, scale down chunk_size from 1MB or
+ * PAGE_SIZE to all remaining full blocks (AES) or all
+ * remaining bytes (Chacha).
+ */
+ if (bytecount < chunk_size) {
+ if (random_chachamode)
+ chunk_size = bytecount;
+ else if (bytecount >= RANDOM_BLOCKSIZE)
+ chunk_size = rounddown(bytecount,
+ RANDOM_BLOCKSIZE);
+ else
+ break;
+ }
}
}
- if (bytecount > 0)
- random_fortuna_genrandom(remainder_buf, sizeof(remainder_buf));
+ /*
+ * Generate any partial AES block remaining into a temporary buffer and
+ * copy the desired substring out.
+ */
+ if (bytecount > 0) {
+ MPASS(!random_chachamode);
- RANDOM_RESEED_UNLOCK();
+ randomdev_keystream(p_key, p_counter, remainder_buf,
+ sizeof(remainder_buf));
+ }
+
+ /*
+ * In locked mode, re-key global K before dropping the lock, which we
+ * don't need for memcpy/bzero below.
+ */
+ if (locked) {
+ randomdev_keystream(p_key, p_counter, newkey, RANDOM_KEYSIZE);
+ randomdev_encrypt_init(p_key, newkey);
+ RANDOM_RESEED_UNLOCK();
+ }
if (bytecount > 0) {
memcpy(buf, remainder_buf, bytecount);
@@ -504,6 +565,124 @@
}
}
+
+/*
+ * Handle only "concurrency-enabled" Fortuna reads to simplify logic.
+ *
+ * Caller (random_fortuna_read) is responsible for zeroing out sensitive
+ * buffers provided as parameters to this routine.
+ */
+static void
+random_fortuna_read_concurrent(uint8_t *buf, size_t bytecount,
+ uint8_t newkey[static RANDOM_KEYSIZE])
+{
+ union randomdev_key key_copy;
+ uint128_t counter_copy;
+ size_t blockcount;
+
+ MPASS(fortuna_concurrent_read);
+
+ /*
+ * Compute number of blocks required for the PRF request ('delta C').
+ * We will step the global counter 'C' by this number under lock, and
+ * then actually consume the counter values outside the lock.
+ *
+ * This ensures that contemporaneous but independent requests for
+ * randomness receive distinct 'C' values and thus independent PRF
+ * results.
+ */
+ if (random_chachamode) {
+ blockcount = howmany(bytecount, CHACHA_BLOCKLEN);
+ } else {
+ blockcount = howmany(bytecount, RANDOM_BLOCKSIZE);
+
+ /*
+ * Need to account for the additional blocks generated by
+ * rekeying when updating the global fs_counter.
+ */
+ blockcount += RANDOM_KEYS_PER_BLOCK *
+ (blockcount / RANDOM_FORTUNA_BLOCKS_PER_KEY);
+ }
+
+ RANDOM_RESEED_LOCK();
+ KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
+ /*
+ * Technically, we only need mutual exclusion to update shared state
+ * appropriately. Nothing about updating the shared internal state
+ * requires that we perform (most) expensive cryptographic keystream
+ * generation under lock. (We still need to generate 256 bits of
+ * keystream to re-key between consumers.)
+ *
+ * Save the original counter and key values that will be used as the
+ * PRF for this particular consumer.
+ */
+ memcpy(&counter_copy, &fortuna_state.fs_counter, sizeof(counter_copy));
+ memcpy(&key_copy, &fortuna_state.fs_key, sizeof(key_copy));
+
+ /*
+ * Step the counter as if we had generated 'bytecount' blocks for this
+ * consumer. I.e., ensure that the next consumer gets an independent
+ * range of counter values once we drop the global lock.
+ */
+ uint128_add64(&fortuna_state.fs_counter, blockcount);
+
+ /*
+ * We still need to Rekey the global 'K' between independent calls;
+ * this is no different from conventional Fortuna. Note that
+ * 'randomdev_keystream()' will step the fs_counter 'C' appropriately
+ * for the blocks needed for the 'newkey'.
+ *
+ * (This is part of PseudoRandomData() in FS&K, 9.4.4.)
+ */
+ randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
+ newkey, RANDOM_KEYSIZE);
+ randomdev_encrypt_init(&fortuna_state.fs_key, newkey);
+
+ /*
+ * We have everything we need to generate a unique PRF for this
+ * consumer without touching global state.
+ */
+ RANDOM_RESEED_UNLOCK();
+
+ random_fortuna_genbytes(buf, bytecount, newkey, &counter_copy,
+ &key_copy, FORTUNA_UNLOCKED);
+ RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED();
+
+ explicit_bzero(&counter_copy, sizeof(counter_copy));
+ explicit_bzero(&key_copy, sizeof(key_copy));
+}
+
+/*-
+ * FS&K - RandomData() (Part 2)
+ * Main read from Fortuna, continued. May be called multiple times after
+ * the random_fortuna_pre_read() above.
+ *
+ * The supplied buf MAY not be a multiple of RANDOM_BLOCKSIZE in size; it is
+ * the responsibility of the algorithm to accommodate partial block reads, if a
+ * block output mode is used.
+ */
+void
+random_fortuna_read(uint8_t *buf, size_t bytecount)
+{
+ uint8_t newkey[RANDOM_KEYSIZE];
+
+ if (fortuna_concurrent_read) {
+ random_fortuna_read_concurrent(buf, bytecount, newkey);
+ goto out;
+ }
+
+ RANDOM_RESEED_LOCK();
+ KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
+
+ random_fortuna_genbytes(buf, bytecount, newkey,
+ &fortuna_state.fs_counter, &fortuna_state.fs_key, FORTUNA_LOCKED);
+ /* Returns unlocked */
+ RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED();
+
+out:
+ explicit_bzero(newkey, sizeof(newkey));
+}
+
#ifdef _KERNEL
static bool block_seeded_status = false;
SYSCTL_BOOL(_kern_random, OID_AUTO, block_seeded_status, CTLFLAG_RWTUN,
Index: sys/dev/random/hash.h
===================================================================
--- sys/dev/random/hash.h
+++ sys/dev/random/hash.h
@@ -32,6 +32,10 @@
#include <crypto/chacha20/_chacha.h>
#include <dev/random/uint128.h>
+#ifndef _KERNEL
+#define __read_frequently
+#endif
+
/* Keys are formed from cipher blocks */
#define RANDOM_KEYSIZE 32 /* (in bytes) == 256 bits */
#define RANDOM_KEYSIZE_WORDS (RANDOM_KEYSIZE/sizeof(uint32_t))
Index: sys/dev/random/hash.c
===================================================================
--- sys/dev/random/hash.c
+++ sys/dev/random/hash.c
@@ -74,7 +74,7 @@
* Benefits include somewhat faster keystream generation compared with
* unaccelerated AES-ICM.
*/
-bool random_chachamode = false;
+bool random_chachamode __read_frequently = false;
#ifdef _KERNEL
SYSCTL_BOOL(_kern_random, OID_AUTO, use_chacha20_cipher, CTLFLAG_RDTUN,
&random_chachamode, 0,
Index: sys/dev/random/uint128.h
===================================================================
--- sys/dev/random/uint128.h
+++ sys/dev/random/uint128.h
@@ -65,6 +65,21 @@
#endif
}
+static __inline void
+uint128_add64(uint128_t *big_uintp, uint64_t add)
+{
+#ifdef USE_REAL_UINT128_T
+ (*big_uintp) += add;
+#else
+ uint64_t word0p;
+
+ word0p = big_uintp->u128t_word0 + add;
+ if (word0p < big_uintp->u128t_word0)
+ big_uintp->u128t_word1++;
+ big_uintp->u128t_word0 = word0p;
+#endif
+}
+
static __inline bool
uint128_equals(uint128_t a, uint128_t b)
{
Index: tests/sys/devrandom/uint128_test.c
===================================================================
--- tests/sys/devrandom/uint128_test.c
+++ tests/sys/devrandom/uint128_test.c
@@ -152,6 +152,62 @@
}
}
+ATF_TC_WITHOUT_HEAD(uint128_add64);
+ATF_TC_BODY(uint128_add64, tc)
+{
+ static const struct u128_add64_tc {
+ uint32_t input[4];
+ uint64_t addend;
+ uint32_t expected[4];
+ const char *descr;
+ } tests[] = {
+ {
+ .input = { 0, 0, 0, 0 },
+ .addend = 1,
+ .expected = { 1, 0, 0, 0 },
+ .descr = "0 + 1 -> 1",
+ },
+ {
+ .input = { 1, 0, 0, 0 },
+ .addend = UINT32_MAX,
+ .expected = { 0, 1, 0, 0 },
+ .descr = "1 + (2^32 - 1) -> 2^32 (word carry)",
+ },
+ {
+ .input = { 1, 0, 0, 0 },
+ .addend = UINT64_MAX,
+ .expected = { 0, 0, 1, 0 },
+ .descr = "1 + (2^64 - 1) -> 2^64 (u128t_word0 carry)",
+ },
+ {
+ .input = { 0x11111111, 0x11111111, 0, 0 },
+ .addend = 0xf0123456789abcdeULL,
+ .expected = { 0x89abcdef, 0x01234567, 1, 0 },
+ .descr = "0x1111_1111_1111_1111 +"
+ "0xf012_3456_789a_bcde ->"
+ "0x1_0123_4567_89ab_cdef",
+ },
+ {
+ .input = { 1, 0, UINT32_MAX, 0 },
+ .addend = UINT64_MAX,
+ .expected = { 0, 0, 0, 1 },
+ .descr = "Carry ~2^96",
+ },
+ };
+ uint8_t inputle[16], expectedle[16];
+ uint128_t a;
+ size_t i;
+
+ for (i = 0; i < nitems(tests); i++) {
+ vec_u32_tole128(inputle, tests[i].input);
+ vec_u32_tole128(expectedle, tests[i].expected);
+
+ a = le128dec(inputle);
+ uint128_add64(&a, tests[i].addend);
+ u128_check_equality(le128dec(expectedle), a, tests[i].descr);
+ }
+}
+
/*
* Test assumptions about Chacha incrementing counter in the same way as
* uint128.h
@@ -219,6 +275,7 @@
{
ATF_TP_ADD_TC(tp, uint128_inc);
+ ATF_TP_ADD_TC(tp, uint128_add64);
ATF_TP_ADD_TC(tp, uint128_chacha_ctr);
return (atf_no_error());
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Mar 10, 4:25 AM (4 h, 37 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17079779
Default Alt Text
D20313.id58742.diff (18 KB)
Attached To
Mode
D20313: Fortuna: allow increased concurrency
Attached
Detach File
Event Timeline
Log In to Comment