Index: sys/dev/random/fortuna.c =================================================================== --- sys/dev/random/fortuna.c +++ sys/dev/random/fortuna.c @@ -128,7 +128,7 @@ #endif static void random_fortuna_pre_read(void); -static void random_fortuna_read(uint8_t *, u_int); +static void random_fortuna_read(uint8_t *, size_t); static bool random_fortuna_seeded(void); static bool random_fortuna_seeded_internal(void); static void random_fortuna_process_event(struct harvest_event *); @@ -306,50 +306,46 @@ uint128_increment(&fortuna_state.fs_counter); } -/*- - * FS&K - GenerateBlocks() - * Generate a number of complete blocks of random output. - */ -static __inline void -random_fortuna_genblocks(uint8_t *buf, u_int blockcount) -{ - - RANDOM_RESEED_ASSERT_LOCK_OWNED(); - KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0")); - - /* - * Fills buf with RANDOM_BLOCKSIZE * blockcount bytes of keystream. - * Increments fs_counter as it goes. - */ - randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter, - buf, blockcount); -} - /*- * FS&K - PseudoRandomData() - * This generates no more than 2^20 bytes of data, and cleans up its - * internal state when finished. It is assumed that a whole number of - * blocks are available for writing; any excess generated will be - * ignored. + * + * If Chacha20 is used, output size is unrestricted. If AES-CTR is used, + * output size MUST be <= 1MB and a multiple of RANDOM_BLOCKSIZE. The + * reasoning for this is discussed in FS&K 9.4; the significant distinction + * between the two ciphers is that AES has a *block* size of 128 bits while + * Chacha has a *block* size of 256 bits. */ static __inline void -random_fortuna_genrandom(uint8_t *buf, u_int bytecount) +random_fortuna_genrandom(uint8_t *buf, size_t bytecount) { - uint8_t temp[RANDOM_BLOCKSIZE * RANDOM_KEYS_PER_BLOCK]; - u_int blockcount; + uint8_t newkey[RANDOM_KEYSIZE]; RANDOM_RESEED_ASSERT_LOCK_OWNED(); + /*- - * FS&K - assert(n < 2^20 (== 1 MB) + * FS&K - assert(n < 2^20 (== 1 MB)) when 128-bit block cipher is used * - r = first-n-bytes(GenerateBlocks(ceil(n/16))) * - K = GenerateBlocks(2) */ - KASSERT((bytecount <= RANDOM_FORTUNA_MAX_READ), ("invalid single read request to Fortuna of %d bytes", bytecount)); - blockcount = howmany(bytecount, RANDOM_BLOCKSIZE); - random_fortuna_genblocks(buf, blockcount); - random_fortuna_genblocks(temp, RANDOM_KEYS_PER_BLOCK); - randomdev_encrypt_init(&fortuna_state.fs_key, temp); - explicit_bzero(temp, sizeof(temp)); + KASSERT(random_chachamode || bytecount <= RANDOM_FORTUNA_MAX_READ, + ("%s: invalid large read request: %zu bytes", __func__, + bytecount)); + + /* + * This is where FS&K would invoke GenerateBlocks(). GenerateBlocks() + * doesn't make a lot of sense or have much value if we use bytecount + * for the API (which is useful for ciphers that do not require + * block-sized output, like Chacha20). + * + * Just invoke our PRF abstraction directly, which is responsible for + * updating fs_counter ('C'). + */ + randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter, + buf, bytecount); + randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter, + newkey, sizeof(newkey)); + randomdev_encrypt_init(&fortuna_state.fs_key, newkey); + explicit_bzero(newkey, sizeof(newkey)); } /*- @@ -442,17 +438,54 @@ * FS&K - RandomData() (Part 2) * Main read from Fortuna, continued. May be called multiple times after * the random_fortuna_pre_read() above. - * The supplied buf MUST be a multiple of RANDOM_BLOCKSIZE in size. - * Lots of code presumes this for efficiency, both here and in other - * routines. You are NOT allowed to break this! + * + * The supplied buf MAY not be a multiple of RANDOM_BLOCKSIZE in size; it is + * the responsibility of the algorithm to accommodate partial block reads, if a + * block output mode is used. */ void -random_fortuna_read(uint8_t *buf, u_int bytecount) +random_fortuna_read(uint8_t *buf, size_t bytecount) { + uint8_t remainder_buf[RANDOM_BLOCKSIZE]; + size_t read_directly_len, read_chunk; + + /* + * The underlying AES generator expects multiples of RANDOM_BLOCKSIZE. + */ + if (random_chachamode) + read_directly_len = bytecount; + else + read_directly_len = rounddown(bytecount, RANDOM_BLOCKSIZE); - KASSERT((bytecount % RANDOM_BLOCKSIZE) == 0, ("%s(): bytecount (= %d) must be a multiple of %d", __func__, bytecount, RANDOM_BLOCKSIZE )); RANDOM_RESEED_LOCK(); - random_fortuna_genrandom(buf, bytecount); + KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0")); + + while (read_directly_len > 0) { + /* + * 128-bit block ciphers like AES must be re-keyed at 1MB + * intervals to avoid unacceptable statistical differentiation + * from true random data. + * + * 256-bit block ciphers like Chacha20 do not have this + * problem. (FS&K 9.4) + */ + if (random_chachamode) + read_chunk = read_directly_len; + else + read_chunk = MIN(read_directly_len, + RANDOM_FORTUNA_MAX_READ); + + random_fortuna_genrandom(buf, read_chunk); + buf += read_chunk; + read_directly_len -= read_chunk; + bytecount -= read_chunk; + } + + if (bytecount > 0) { + random_fortuna_genrandom(remainder_buf, sizeof(remainder_buf)); + memcpy(buf, remainder_buf, bytecount); + explicit_bzero(remainder_buf, sizeof(remainder_buf)); + } RANDOM_RESEED_UNLOCK(); } Index: sys/dev/random/hash.h =================================================================== --- sys/dev/random/hash.h +++ sys/dev/random/hash.h @@ -54,14 +54,14 @@ struct chacha_ctx chacha; }; -extern bool fortuna_chachamode; +extern bool random_chachamode; void randomdev_hash_init(struct randomdev_hash *); void randomdev_hash_iterate(struct randomdev_hash *, const void *, size_t); void randomdev_hash_finish(struct randomdev_hash *, void *); void randomdev_encrypt_init(union randomdev_key *, const void *); -void randomdev_keystream(union randomdev_key *context, uint128_t *, void *, u_int); +void randomdev_keystream(union randomdev_key *context, uint128_t *, void *, size_t); void randomdev_getkey(union randomdev_key *, const void **, size_t *); #endif /* SYS_DEV_RANDOM_HASH_H_INCLUDED */ Index: sys/dev/random/hash.c =================================================================== --- sys/dev/random/hash.c +++ sys/dev/random/hash.c @@ -121,16 +121,18 @@ } /* - * Create a psuedorandom output stream of 'blockcount' blocks using a CTR-mode + * Create a psuedorandom output stream of 'bytecount' bytes using a CTR-mode * cipher or similar. The 128-bit counter is supplied in the in-out parmeter - * 'ctr.' The output stream goes to 'd_out.' 'blockcount' RANDOM_BLOCKSIZE - * bytes are generated. + * 'ctr.' The output stream goes to 'd_out.' + * + * If AES is used, 'bytecount' is guaranteed to be a multiple of + * RANDOM_BLOCKSIZE. */ void randomdev_keystream(union randomdev_key *context, uint128_t *ctr, - void *d_out, u_int blockcount) + void *d_out, size_t bytecount) { - u_int i; + size_t i, blockcount, read_chunk; if (random_chachamode) { uint128_t lectr; @@ -143,8 +145,20 @@ le128enc(&lectr, *ctr); chacha_ivsetup(&context->chacha, NULL, (const void *)&lectr); - chacha_encrypt_bytes(&context->chacha, NULL, d_out, - RANDOM_BLOCKSIZE * blockcount); + while (bytecount > 0) { + /* + * We are limited by the chacha_encrypt_bytes API to + * u32 bytes per chunk. + */ + read_chunk = MIN(bytecount, + rounddown((size_t)UINT32_MAX, CHACHA_BLOCKLEN)); + + chacha_encrypt_bytes(&context->chacha, NULL, d_out, + read_chunk); + + d_out = (char *)d_out + read_chunk; + bytecount -= read_chunk; + } /* * Decode Chacha-updated LE counter to native endian and store @@ -152,7 +166,14 @@ */ chacha_ctrsave(&context->chacha, (void *)&lectr); *ctr = le128dec(&lectr); + + explicit_bzero(&lectr, sizeof(lectr)); } else { + KASSERT(bytecount % RANDOM_BLOCKSIZE == 0, + ("%s: AES mode invalid bytecount, not a multiple of native " + "block size", __func__)); + + blockcount = bytecount / RANDOM_BLOCKSIZE; for (i = 0; i < blockcount; i++) { /*- * FS&K - r = r|E(K,C) Index: sys/dev/random/other_algorithm.c =================================================================== --- sys/dev/random/other_algorithm.c +++ sys/dev/random/other_algorithm.c @@ -84,7 +84,7 @@ #endif /* _KERNEL */ static void random_other_pre_read(void); -static void random_other_read(uint8_t *, u_int); +static void random_other_read(uint8_t *, size_t); static bool random_other_seeded(void); static void random_other_process_event(struct harvest_event *); static void random_other_init_alg(void *); @@ -165,10 +165,10 @@ } /* - * void random_other_read(uint8_t *buf, u_int count) + * void random_other_read(uint8_t *buf, size_t count) * * Generate bytes of output into <*buf>. - * You may use the fact that will be a multiple of + * You may NOT use the fact that will be a multiple of * RANDOM_BLOCKSIZE for optimization purposes. * * This function will always be called with your generator @@ -176,7 +176,7 @@ * output here, then feel free to KASSERT() or panic(). */ static void -random_other_read(uint8_t *buf, u_int count) +random_other_read(uint8_t *buf, size_t count) { RANDOM_RESEED_LOCK(); Index: sys/dev/random/randomdev.h =================================================================== --- sys/dev/random/randomdev.h +++ sys/dev/random/randomdev.h @@ -68,7 +68,7 @@ typedef void random_alg_init_t(void *); typedef void random_alg_deinit_t(void *); typedef void random_alg_pre_read_t(void); -typedef void random_alg_read_t(uint8_t *, u_int); +typedef void random_alg_read_t(uint8_t *, size_t); typedef bool random_alg_seeded_t(void); typedef void random_alg_reseed_t(void); typedef void random_alg_eventprocessor_t(struct harvest_event *); Index: sys/dev/random/randomdev.c =================================================================== --- sys/dev/random/randomdev.c +++ sys/dev/random/randomdev.c @@ -172,17 +172,21 @@ int READ_RANDOM_UIO(struct uio *uio, bool nonblock) { - uint8_t *random_buf; - int error; - ssize_t read_len, total_read, c; /* 16 MiB takes about 0.08 s CPU time on my 2017 AMD Zen CPU */ #define SIGCHK_PERIOD (16 * 1024 * 1024) const size_t sigchk_period = SIGCHK_PERIOD; - CTASSERT(SIGCHK_PERIOD % PAGE_SIZE == 0); #undef SIGCHK_PERIOD - random_buf = malloc(PAGE_SIZE, M_ENTROPY, M_WAITOK); + uint8_t *random_buf; + size_t total_read, read_len; + ssize_t bufsize; + int error; + + + KASSERT(uio->uio_rw == UIO_READ, ("%s: bogus write", __func__)); + KASSERT(uio->uio_resid >= 0, ("%s: bogus negative resid", __func__)); + p_random_alg_context->ra_pre_read(); error = 0; /* (Un)Blocking logic */ @@ -193,44 +197,64 @@ error = randomdev_wait_until_seeded( SEEDWAIT_INTERRUPTIBLE); } - if (error == 0) { - read_rate_increment((uio->uio_resid + sizeof(uint32_t))/sizeof(uint32_t)); - total_read = 0; - while (uio->uio_resid && !error) { - read_len = uio->uio_resid; - /* - * Belt-and-braces. - * Round up the read length to a crypto block size multiple, - * which is what the underlying generator is expecting. - * See the random_buf size requirements in the Fortuna code. - */ - read_len = roundup(read_len, RANDOM_BLOCKSIZE); - /* Work in chunks page-sized or less */ - read_len = MIN(read_len, PAGE_SIZE); - p_random_alg_context->ra_read(random_buf, read_len); - c = MIN(uio->uio_resid, read_len); - /* - * uiomove() may yield the CPU before each 'c' bytes - * (up to PAGE_SIZE) are copied out. - */ - error = uiomove(random_buf, c, uio); - total_read += c; - /* - * Poll for signals every few MBs to avoid very long - * uninterruptible syscalls. - */ - if (error == 0 && uio->uio_resid != 0 && - total_read % sigchk_period == 0) { - error = tsleep_sbt(&random_alg_context, PCATCH, - "randrd", SBT_1NS, 0, C_HARDCLOCK); - /* Squash tsleep timeout condition */ - if (error == EWOULDBLOCK) - error = 0; - } + if (error != 0) + return (error); + + read_rate_increment(howmany(uio->uio_resid + 1, sizeof(uint32_t))); + total_read = 0; + + /* Easy to deal with the trivial 0 byte case. */ + if (__predict_false(uio->uio_resid == 0)) + return (0); + + /* + * If memory is plentiful, use maximally sized requests to avoid + * per-call algorithm overhead. But fall back to a single page + * allocation if the full request isn't immediately available. + */ + bufsize = MIN(sigchk_period, (size_t)uio->uio_resid); + random_buf = malloc(bufsize, M_ENTROPY, M_NOWAIT); + if (random_buf == NULL) { + bufsize = PAGE_SIZE; + random_buf = malloc(bufsize, M_ENTROPY, M_WAITOK); + } + + error = 0; + while (uio->uio_resid > 0 && error == 0) { + read_len = MIN((size_t)uio->uio_resid, bufsize); + + p_random_alg_context->ra_read(random_buf, read_len); + + /* + * uiomove() may yield the CPU before each 'read_len' bytes (up + * to bufsize) are copied out. + */ + error = uiomove(random_buf, read_len, uio); + total_read += read_len; + + /* + * Poll for signals every few MBs to avoid very long + * uninterruptible syscalls. + */ + if (error == 0 && uio->uio_resid != 0 && + total_read % sigchk_period == 0) { + error = tsleep_sbt(&random_alg_context, PCATCH, + "randrd", SBT_1NS, 0, C_HARDCLOCK); + /* Squash tsleep timeout condition */ + if (error == EWOULDBLOCK) + error = 0; } - if (error == ERESTART || error == EINTR) - error = 0; } + + /* + * Short reads due to signal interrupt should not indicate error. + * Instead, the uio will reflect that the read was shorter than + * requested. + */ + if (error == ERESTART || error == EINTR) + error = 0; + + explicit_bzero(random_buf, bufsize); free(random_buf, M_ENTROPY); return (error); } @@ -249,7 +273,6 @@ void READ_RANDOM(void *random_buf, u_int len) { - u_int read_directly_len; KASSERT(random_buf != NULL, ("No suitable random buffer in %s", __func__)); p_random_alg_context->ra_pre_read(); @@ -284,23 +307,7 @@ (void)randomdev_wait_until_seeded(SEEDWAIT_UNINTERRUPTIBLE); } read_rate_increment(roundup2(len, sizeof(uint32_t))); - /* - * The underlying generator expects multiples of - * RANDOM_BLOCKSIZE. - */ - read_directly_len = rounddown(len, RANDOM_BLOCKSIZE); - if (read_directly_len > 0) - p_random_alg_context->ra_read(random_buf, read_directly_len); - if (read_directly_len < len) { - uint8_t remainder_buf[RANDOM_BLOCKSIZE]; - - p_random_alg_context->ra_read(remainder_buf, - sizeof(remainder_buf)); - memcpy((char *)random_buf + read_directly_len, remainder_buf, - len - read_directly_len); - - explicit_bzero(remainder_buf, sizeof(remainder_buf)); - } + p_random_alg_context->ra_read(random_buf, len); } bool