Page MenuHomeFreeBSD

D20313.id57570.diff
No OneTemporary

D20313.id57570.diff

Index: sys/dev/random/fortuna.c
===================================================================
--- sys/dev/random/fortuna.c
+++ sys/dev/random/fortuna.c
@@ -61,6 +61,7 @@
#include "unit_test.h"
#endif /* _KERNEL */
+#include <crypto/chacha20/chacha.h>
#include <crypto/rijndael/rijndael-api-fst.h>
#include <crypto/sha2/sha256.h>
@@ -75,7 +76,10 @@
/* Defined in FS&K */
#define RANDOM_FORTUNA_NPOOLS 32 /* The number of accumulation pools */
#define RANDOM_FORTUNA_DEFPOOLSIZE 64 /* The default pool size/length for a (re)seed */
-#define RANDOM_FORTUNA_MAX_READ (1 << 20) /* Max bytes in a single read */
+#define RANDOM_FORTUNA_MAX_READ (1 << 20) /* Max bytes from AES before rekeying */
+#define RANDOM_FORTUNA_BLOCKS_PER_KEY (1 << 16) /* Max blocks from AES before rekeying */
+CTASSERT(RANDOM_FORTUNA_BLOCKS_PER_KEY * RANDOM_BLOCKSIZE ==
+ RANDOM_FORTUNA_MAX_READ);
/*
* The allowable range of RANDOM_FORTUNA_DEFPOOLSIZE. The default value is above.
@@ -120,6 +124,26 @@
mtx_t fs_mtx;
} fortuna_state;
+/*
+ * Experimental concurrent reads feature. For now, disabled by default. But
+ * we may enable it in the future.
+ *
+ * The benefit is improved concurrency in Fortuna. That is reflected in two
+ * related aspects:
+ *
+ * 1. Concurrent devrandom readers can achieve similar throughput to a single
+ * reader thread.
+ *
+ * 2. The rand_harvestq process spends much less time spinning when one or more
+ * readers is processing a large request. Partially this is due to
+ * rand_harvestq / ra_event_processor design, which only passes one event at
+ * a time to the underlying algorithm. Each time, Fortuna must take its
+ * global state mutex, potentially blocking on a reader. Our adaptive
+ * mutexes assume that a lock holder currently on CPU will release the lock
+ * quickly, and spin if the owning thread is currently running.
+ */
+static bool fortuna_concurrent_read = false;
+
#ifdef _KERNEL
static struct sysctl_ctx_list random_clist;
RANDOM_CHECK_UINT(fs_minpoolsize, RANDOM_FORTUNA_MINPOOLSIZE, RANDOM_FORTUNA_MAXPOOLSIZE);
@@ -176,6 +200,11 @@
random_check_uint_fs_minpoolsize, "IU",
"Minimum pool size necessary to cause a reseed");
KASSERT(fortuna_state.fs_minpoolsize > 0, ("random: Fortuna threshold must be > 0 at startup"));
+
+ SYSCTL_ADD_BOOL(&random_clist, SYSCTL_CHILDREN(random_fortuna_o),
+ OID_AUTO, "concurrent_read", CTLFLAG_RDTUN,
+ &fortuna_concurrent_read, 0, "If non-zero, enable EXPERIMENTAL "
+ "feature to improve concurrent Fortuna performance.");
#endif
/*-
@@ -305,48 +334,6 @@
uint128_increment(&fortuna_state.fs_counter);
}
-/*-
- * FS&K - PseudoRandomData()
- *
- * If Chacha20 is used, output size is unrestricted. If AES-CTR is used,
- * output size MUST be <= 1MB and a multiple of RANDOM_BLOCKSIZE. The
- * reasoning for this is discussed in FS&K 9.4; the significant distinction
- * between the two ciphers is that AES has a *block* size of 128 bits while
- * Chacha has a *block* size of 256 bits.
- */
-static __inline void
-random_fortuna_genrandom(uint8_t *buf, size_t bytecount)
-{
- uint8_t newkey[RANDOM_KEYSIZE];
-
- RANDOM_RESEED_ASSERT_LOCK_OWNED();
-
- /*-
- * FS&K - assert(n < 2^20 (== 1 MB)) when 128-bit block cipher is used
- * - r = first-n-bytes(GenerateBlocks(ceil(n/16)))
- * - K = GenerateBlocks(2)
- */
- KASSERT(random_chachamode || bytecount <= RANDOM_FORTUNA_MAX_READ,
- ("%s: invalid large read request: %zu bytes", __func__,
- bytecount));
-
- /*
- * This is where FS&K would invoke GenerateBlocks(). GenerateBlocks()
- * doesn't make a lot of sense or have much value if we use bytecount
- * for the API (which is useful for ciphers that do not require
- * block-sized output, like Chacha20).
- *
- * Just invoke our PRF abstraction directly, which is responsible for
- * updating fs_counter ('C').
- */
- randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
- buf, bytecount);
- randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
- newkey, sizeof(newkey));
- randomdev_encrypt_init(&fortuna_state.fs_key, newkey);
- explicit_bzero(newkey, sizeof(newkey));
-}
-
/*-
* FS&K - RandomData() (Part 1)
* Used to return processed entropy from the PRNG. There is a pre_read
@@ -445,47 +432,127 @@
void
random_fortuna_read(uint8_t *buf, size_t bytecount)
{
- uint8_t remainder_buf[RANDOM_BLOCKSIZE];
- size_t read_directly_len, read_chunk;
-
- /*
- * The underlying AES generator expects multiples of RANDOM_BLOCKSIZE.
- */
- if (random_chachamode)
- read_directly_len = bytecount;
- else
+ uint8_t remainder_buf[RANDOM_BLOCKSIZE], newkey[RANDOM_KEYSIZE];
+ size_t read_directly_len, read_chunk, blockcount;
+ uint128_t counter_copy, *p_counter;
+ union randomdev_key key_copy, *p_key;
+
+ if (random_chachamode) {
+ blockcount = howmany(bytecount, CHACHA_BLOCKLEN);
+ /* Unused; squash GCC maybe-uninitialized warning: */
+ read_directly_len = 0;
+ } else {
+ /*
+ * The underlying AES generator expects multiples of
+ * RANDOM_BLOCKSIZE.
+ */
read_directly_len = rounddown(bytecount, RANDOM_BLOCKSIZE);
+ blockcount = howmany(bytecount, RANDOM_BLOCKSIZE);
+
+ /*
+ * Need to account for the additional blocks generated by
+ * rekeying when updating the global fs_counter.
+ */
+ blockcount += RANDOM_KEYS_PER_BLOCK *
+ (blockcount / RANDOM_FORTUNA_BLOCKS_PER_KEY);
+ }
RANDOM_RESEED_LOCK();
KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
- while (read_directly_len > 0) {
+ if (fortuna_concurrent_read) {
/*
- * 128-bit block ciphers like AES must be re-keyed at 1MB
- * intervals to avoid unacceptable statistical differentiation
- * from true random data.
+ * Technically, we only need mutual exclusion to:
+ * 1. Step the counter by the number of output blocks, so that
+ * consumers get unique counter values, and
+ * 2. Rekey the shared key for forward secrecy.
+ *
+ * We can do either of those and drop the global mutex before
+ * we actually generate N bytes of output.
*
- * 256-bit block ciphers like Chacha20 do not have this
- * problem. (FS&K 9.4)
+ * Save the original counter and key values for this consumer.
+ */
+ memcpy(&counter_copy, &fortuna_state.fs_counter, sizeof(counter_copy));
+ memcpy(&key_copy, &fortuna_state.fs_key, sizeof(key_copy));
+
+ /* Step the counter as if we had generated 'bytecount' blocks: */
+ uint128_add(&fortuna_state.fs_counter, blockcount);
+
+ /* Rekey; this is part of PseudoRandomData() in FS&K, 9.4.4 */
+ randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
+ newkey, sizeof(newkey));
+ randomdev_encrypt_init(&fortuna_state.fs_key, newkey);
+
+ p_counter = &counter_copy;
+ p_key = &key_copy;
+
+ /*
+ * We have everything we need to generate a unique PRF for this
+ * consumer.
*/
- if (random_chachamode)
- read_chunk = read_directly_len;
- else
+ RANDOM_RESEED_UNLOCK();
+ } else {
+ p_counter = &fortuna_state.fs_counter;
+ p_key = &fortuna_state.fs_key;
+ }
+
+ /*
+ * The rest of this is basically GenerateBlocks() from FS&K.
+ *
+ * It differs in two ways:
+ *
+ * 1. Chacha20 is tolerant of non-block-multiple request sizes, so we
+ * do not need to handle any remainder bytes specially and can just
+ * pass the length directly to the PRF construction; and
+ *
+ * 2. Chacha20 is a 256-bit block size cipher (whereas AES has 128-bit
+ * block size, regardless of key size). This means Chacha does not
+ * require re-keying every 1MiB. This is implied by the math in FS&K
+ * 9.4 and mentioned explicitly in the conclusion, "If we had a block
+ * cipher with a 256-bit block size, then the collisions would not have
+ * been an issue at all" (p. 144).
+ */
+ if (random_chachamode) {
+ randomdev_keystream(p_key, p_counter, buf, bytecount);
+ } else {
+ while (read_directly_len > 0) {
+ /*
+ * 128-bit block ciphers like AES must be re-keyed at 1MB
+ * intervals to avoid unacceptable statistical differentiation
+ * from true random data (FS&K 9.4, p. 143-144).
+ */
read_chunk = MIN(read_directly_len,
RANDOM_FORTUNA_MAX_READ);
- random_fortuna_genrandom(buf, read_chunk);
- buf += read_chunk;
- read_directly_len -= read_chunk;
- bytecount -= read_chunk;
+ randomdev_keystream(p_key, p_counter, buf, read_chunk);
+
+ /* Rekey AES at 1MB intervals. */
+ if (read_chunk == RANDOM_FORTUNA_MAX_READ &&
+ bytecount > read_chunk) {
+ randomdev_keystream(p_key, p_counter,
+ newkey, sizeof(newkey));
+ randomdev_encrypt_init(p_key, newkey);
+ }
+
+ buf += read_chunk;
+ read_directly_len -= read_chunk;
+ bytecount -= read_chunk;
+ }
+ if (bytecount > 0) {
+ randomdev_keystream(p_key, p_counter,
+ remainder_buf, sizeof(remainder_buf));
+ memcpy(buf, remainder_buf, bytecount);
+ explicit_bzero(remainder_buf, sizeof(remainder_buf));
+ }
}
- if (bytecount > 0) {
- random_fortuna_genrandom(remainder_buf, sizeof(remainder_buf));
- memcpy(buf, remainder_buf, bytecount);
- explicit_bzero(remainder_buf, sizeof(remainder_buf));
+ if (fortuna_concurrent_read) {
+ explicit_bzero(&counter_copy, sizeof(counter_copy));
+ explicit_bzero(&key_copy, sizeof(key_copy));
+ } else {
+ RANDOM_RESEED_UNLOCK();
}
- RANDOM_RESEED_UNLOCK();
+ explicit_bzero(newkey, sizeof(newkey));
}
#ifdef _KERNEL
Index: sys/dev/random/uint128.h
===================================================================
--- sys/dev/random/uint128.h
+++ sys/dev/random/uint128.h
@@ -65,6 +65,21 @@
#endif
}
+static __inline void
+uint128_add(uint128_t *big_uintp, uint64_t add)
+{
+#ifdef USE_REAL_UINT128_T
+ (*big_uintp) += add;
+#else
+ uint64_t word0p;
+
+ word0p = big_uintp->u128t_word0 + add;
+ if (word0p < big_uintp->u128t_word0)
+ big_uintp->u128t_word1++;
+ big_uintp->u128t_word0 = word0p;
+#endif
+}
+
static __inline bool
uint128_equals(uint128_t a, uint128_t b)
{

File Metadata

Mime Type
text/plain
Expires
Sat, Feb 22, 7:55 AM (3 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16767805
Default Alt Text
D20313.id57570.diff (9 KB)

Event Timeline