Changeset View
Standalone View
lib/libc/gen/arc4random.h
Show All 18 Lines | |||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||||
* | * | ||||
* $FreeBSD$ | * $FreeBSD$ | ||||
*/ | */ | ||||
/* | /* | ||||
* Stub functions for portability. | * Stub functions for portability. | ||||
*/ | */ | ||||
#include <sys/elf.h> | |||||
#include <sys/endian.h> | |||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||
#include <sys/time.h> /* for sys/vdso.h only. */ | |||||
#include <sys/vdso.h> | |||||
#include <machine/atomic.h> | |||||
#include <err.h> | |||||
#include <errno.h> | |||||
#include <signal.h> | #include <signal.h> | ||||
#include <stdbool.h> | |||||
#include <stdint.h> | |||||
/* | |||||
* The kernel root seed version is a 64-bit counter, but we truncate it to a | |||||
* 32-bit value in userspace for the convenience of 32-bit platforms. 32-bit | |||||
* rollover is not possible with the current reseed interval (1 hour at limit) | |||||
* without dynamic addition of new random devices (which also force a reseed in | |||||
* the FXRNG design). We don't have any dynamic device mechanism at this | |||||
* time, and anyway something else is very wrong if billions of new devices are | |||||
* being added. | |||||
* | |||||
* As is, it takes roughly 456,000 years of runtime to overflow the 32-bit | |||||
* version. | |||||
*/ | |||||
#define fxrng_load_acq_generation(x) atomic_load_acq_32(x) | |||||
static struct vdso_fxrng_generation_1 *vdso_fxrngp; | |||||
static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; | static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; | ||||
#define _ARC4_LOCK() \ | #define _ARC4_LOCK() \ | ||||
do { \ | do { \ | ||||
if (__isthreaded) \ | if (__isthreaded) \ | ||||
_pthread_mutex_lock(&arc4random_mtx); \ | _pthread_mutex_lock(&arc4random_mtx); \ | ||||
} while (0) | } while (0) | ||||
#define _ARC4_UNLOCK() \ | #define _ARC4_UNLOCK() \ | ||||
do { \ | do { \ | ||||
if (__isthreaded) \ | if (__isthreaded) \ | ||||
_pthread_mutex_unlock(&arc4random_mtx); \ | _pthread_mutex_unlock(&arc4random_mtx); \ | ||||
} while (0) | } while (0) | ||||
static inline void | static inline void | ||||
_getentropy_fail(void) | _getentropy_fail(void) | ||||
{ | { | ||||
raise(SIGKILL); | raise(SIGKILL); | ||||
} | } | ||||
static inline void | |||||
_rs_initialize_fxrng(void) | |||||
{ | |||||
struct vdso_fxrng_generation_1 *fxrngp; | |||||
int error; | |||||
error = _elf_aux_info(AT_FXRNG, &fxrngp, sizeof(fxrngp)); | |||||
if (error != 0) { | |||||
/* | |||||
* New userspace on an old or !RANDOM_FENESTRASX kernel; or an | |||||
* arch that does not have a VDSO page. | |||||
*/ | |||||
return; | |||||
} | |||||
/* Old userspace on newer kernel. */ | |||||
if (fxrngp->fx_vdso_version != VDSO_FXRNG_VER_1) | |||||
return; | |||||
vdso_fxrngp = fxrngp; | |||||
} | |||||
static inline int | static inline int | ||||
_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) | _rs_allocate(struct _rs **rsp, struct _rsx **rsxp) | ||||
{ | { | ||||
struct { | struct { | ||||
struct _rs rs; | struct _rs rs; | ||||
struct _rsx rsx; | struct _rsx rsx; | ||||
} *p; | } *p; | ||||
if ((p = mmap(NULL, sizeof(*p), PROT_READ|PROT_WRITE, | if ((p = mmap(NULL, sizeof(*p), PROT_READ|PROT_WRITE, | ||||
MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) | MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) | ||||
return (-1); | return (-1); | ||||
/* Allow bootstrapping arc4random.c on Linux/macOS */ | /* Allow bootstrapping arc4random.c on Linux/macOS */ | ||||
kib: Don't you need to recheck the MSW, instead of LSW ? Inconsistent read occurs when the overflow… | |||||
Done Inline ActionsEither recheck is fine. LSW updates so infrequently that we're checking for rollover from 0xffffffff -> 0. 0xffffffff -> 0x1 would be surprising, but still correct. Checking MSW would also be fine. Yes, perhaps we do not need the acq on the 2nd read of the checked word. cem: Either recheck is fine. LSW updates so infrequently that we're checking for rollover from… | |||||
#ifdef INHERIT_ZERO | #ifdef INHERIT_ZERO | ||||
if (minherit(p, sizeof(*p), INHERIT_ZERO) == -1) { | if (minherit(p, sizeof(*p), INHERIT_ZERO) == -1) { | ||||
munmap(p, sizeof(*p)); | munmap(p, sizeof(*p)); | ||||
return (-1); | return (-1); | ||||
} | } | ||||
#endif | #endif | ||||
Done Inline ActionsILP32 check is of course bogus. Even if you decided to broke half of the arches and not implement this feature for them, still it should done as regular fall-out from elf_aux_info returning error. kib: ILP32 check is of course bogus. Even if you decided to broke half of the arches and not… | |||||
Done Inline ActionsYes. I just wanted to get a PoC working on amd64 first and worry about 32-bit later. Initially I did not have the ILP32 check, but lib32 failed to compile due to missing atomic_load_acq_64(). cem: Yes. I just wanted to get a PoC working on amd64 first and worry about 32-bit later. | |||||
Done Inline ActionsOk, I see. How often the gen number is updated ? If it cannot wrap upper word (upper in the sense of containing high-value bits) in a reasonable time, then it is enough to read low, high, re-read low and compare low1 and low2. What for the acquire barrier in the load operation ? Do you order between read of generation and read of the seed from syscall ? kib: Ok, I see.
How often the gen number is updated ? If it cannot wrap upper word (upper in the… | |||||
Not Done Inline ActionsFrom the referenced design documentation, it appears to be a slowly (once an hour), so wrapping is probably not a big deal here. I do have some doubts with the benefit of performing reseeding upon root seed refresh. Effectively, it triggers an immediate reseed so applications that does arc4random(3) a lot would hammer the system entropy source in an hourly basis (with the existing code, they are relatively independent and only reseeds when arc4random(3) thinks necessary, either when we generated enough output, or have forked, whichever comes first). This seems to be a bad design choice for both security (the randomness caused by accumulated scheduling skews between arc4random(3) calls in the system are now gone) and performance (tends to do something computational intensive in hourly basis). delphij: From the referenced design documentation, it appears to be a slowly (once an hour), so wrapping… | |||||
Done Inline Actions@kib, Generation counter bumps fairly infrequently, starting at 1 second, backing off to 1 hour. It can also be bumped a handful of times by new entropy sources showing up; none of that risks overflowing the bottom 32 bits especially quickly, so the L-H-L read scheme proposed is probably fine. Re: acquire, yes. The idea is the seed version is the one this PRNG is seeded with, so we should read the seed from syscall strictly after grabbing the seed version. (Even better would be an interface to provide both the seed version and seed entropy at the same time, like the kernel provides to arc4random(9) in the parent review, but that would require a new syscall, or abusing sysctl, or abusing getrandom(2).) cem: @kib, Generation counter bumps fairly infrequently, starting at 1 second, backing off to 1 hour. | |||||
Done Inline Actions
I should have added: storing a stale seed version due to a race is ok. We just reseed one more time than we would have otherwise. Storing a newer seed version means we missed a reseed, which is bad. cem: > The idea is the seed version is the one this PRNG is seeded with
I should have added… | |||||
_rs_initialize_fxrng(); | |||||
Not Done Inline ActionsWhy use _1 and not _CURR ? kib: Why use _1 and not _CURR ? | |||||
Done Inline ActionsWhy _CURR? This version of the code understands version 1 of the ABI; if CURR is bumped, this code should still be associated with version 1, not whatever moving target CURR is. cem: Why `_CURR`? This version of the code understands version 1 of the ABI; if CURR is bumped… | |||||
Not Done Inline ActionsThis is the common practice, look e.g. at the sys/conf.h. The motivation is that ver 2 typically differs from ver 1 by some adjustments, instead of being rewritten from scratch. Correspondingly, the code gets some fiddling with, which makes one less place to change (and forget). kib: This is the common practice, look e.g. at the sys/conf.h. The motivation is that ver 2… | |||||
*rsp = &p->rs; | *rsp = &p->rs; | ||||
*rsxp = &p->rsx; | *rsxp = &p->rsx; | ||||
return (0); | return (0); | ||||
} | } | ||||
/* | |||||
* This isn't only detecting fork. We're also using the existing callback from | |||||
* _rs_stir_if_needed() to force arc4random(3) to reseed if the fenestrasX root | |||||
* seed version has changed. (That is, the root random(4) has reseeded from | |||||
* pooled entropy.) | |||||
*/ | |||||
static inline void | static inline void | ||||
_rs_forkdetect(void) | _rs_forkdetect(void) | ||||
{ | { | ||||
/* Detect fork (minherit(2) INHERIT_ZERO). */ | |||||
if (__predict_false(rs == NULL || rsx == NULL)) | |||||
return; | |||||
/* If present, detect kernel FenestrasX seed version change. */ | |||||
Done Inline ActionsPlease do not emit messages from libc. kib: Please do not emit messages from libc. | |||||
Done Inline ActionsOk cem: Ok | |||||
if (vdso_fxrngp == NULL) | |||||
return; | |||||
Not Done Inline ActionsThis puns on int/long for ILP32. Since you use #ifdef __LP64 already, I think this should be type-correct as well. kib: This puns on int/long for ILP32. Since you use #ifdef __LP64 already, I think this should be… | |||||
Done Inline ActionsHow does it pun on int/long? fxrng_root_generationp is unsigned long* cem: How does it pun on int/long? `fxrng_root_generationp` is `unsigned long*` | |||||
Not Done Inline ActionsHm, yes, you do type-punning with fxrng_root_generation instead. One more reason to use 32bit IMO. kib: Hm, yes, you do type-punning with fxrng_root_generation instead. One more reason to use 32bit… | |||||
if (__predict_true(rsx->rs_seed_generation == | |||||
Not Done Inline ActionsThere is no check for version there. kib: There is no check for version there. | |||||
fxrng_load_acq_generation(&vdso_fxrngp->fx_generation32))) | |||||
return; | |||||
/* Invalidate rs_buf to force "stir" (reseed). */ | |||||
memset(rs, 0, sizeof(*rs)); | |||||
} | } |
Don't you need to recheck the MSW, instead of LSW ? Inconsistent read occurs when the overflow increases the value of the high word.
Also, I believe that two fences are enough: