Changeset View
Standalone View
lib/libc/gen/arc4random.h
Show All 18 Lines | |||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||||
* | * | ||||
* $FreeBSD$ | * $FreeBSD$ | ||||
*/ | */ | ||||
/* | /* | ||||
* Stub functions for portability. | * Stub functions for portability. | ||||
*/ | */ | ||||
#include <sys/elf.h> | |||||
#include <sys/endian.h> | |||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||
#include <sys/time.h> /* for sys/vdso.h only. */ | |||||
#include <sys/vdso.h> | |||||
#include <machine/atomic.h> | |||||
#include <err.h> | |||||
#include <errno.h> | |||||
#include <signal.h> | #include <signal.h> | ||||
#include <stdbool.h> | |||||
#include <stdint.h> | |||||
/* | |||||
* The root seed version is a 64-bit counter, but we truncate it to a 32-bit | |||||
* value on ILP32 userspace (including compat32). | |||||
*/ | |||||
static unsigned long *fxrng_root_generationp; | |||||
static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; | static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; | ||||
#define _ARC4_LOCK() \ | #define _ARC4_LOCK() \ | ||||
do { \ | do { \ | ||||
if (__isthreaded) \ | if (__isthreaded) \ | ||||
_pthread_mutex_lock(&arc4random_mtx); \ | _pthread_mutex_lock(&arc4random_mtx); \ | ||||
} while (0) | } while (0) | ||||
#define _ARC4_UNLOCK() \ | #define _ARC4_UNLOCK() \ | ||||
do { \ | do { \ | ||||
if (__isthreaded) \ | if (__isthreaded) \ | ||||
_pthread_mutex_unlock(&arc4random_mtx); \ | _pthread_mutex_unlock(&arc4random_mtx); \ | ||||
} while (0) | } while (0) | ||||
static inline void | static inline void | ||||
_getentropy_fail(void) | _getentropy_fail(void) | ||||
{ | { | ||||
raise(SIGKILL); | raise(SIGKILL); | ||||
} | } | ||||
static inline int | static inline int | ||||
_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) | _rs_allocate(struct _rs **rsp, struct _rsx **rsxp) | ||||
{ | { | ||||
struct vdso_fxrng_generation *vdso_fxrngp; | |||||
struct { | struct { | ||||
struct _rs rs; | struct _rs rs; | ||||
struct _rsx rsx; | struct _rsx rsx; | ||||
} *p; | } *p; | ||||
int error; | |||||
if ((p = mmap(NULL, sizeof(*p), PROT_READ|PROT_WRITE, | if ((p = mmap(NULL, sizeof(*p), PROT_READ|PROT_WRITE, | ||||
MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) | MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) | ||||
return (-1); | return (-1); | ||||
if (minherit(p, sizeof(*p), INHERIT_ZERO) == -1) { | if (minherit(p, sizeof(*p), INHERIT_ZERO) == -1) { | ||||
munmap(p, sizeof(*p)); | munmap(p, sizeof(*p)); | ||||
return (-1); | return (-1); | ||||
} | } | ||||
error = _elf_aux_info(AT_FXRNG, &vdso_fxrngp, sizeof(vdso_fxrngp)); | |||||
kib: ILP32 check is of course bogus. Even if you decided to broke half of the arches and not… | |||||
Done Inline ActionsYes. I just wanted to get a PoC working on amd64 first and worry about 32-bit later. Initially I did not have the ILP32 check, but lib32 failed to compile due to missing atomic_load_acq_64(). cem: Yes. I just wanted to get a PoC working on amd64 first and worry about 32-bit later. | |||||
Done Inline ActionsOk, I see. How often the gen number is updated ? If it cannot wrap upper word (upper in the sense of containing high-value bits) in a reasonable time, then it is enough to read low, high, re-read low and compare low1 and low2. What for the acquire barrier in the load operation ? Do you order between read of generation and read of the seed from syscall ? kib: Ok, I see.
How often the gen number is updated ? If it cannot wrap upper word (upper in the… | |||||
Not Done Inline ActionsFrom the referenced design documentation, it appears to be a slowly (once an hour), so wrapping is probably not a big deal here. I do have some doubts with the benefit of performing reseeding upon root seed refresh. Effectively, it triggers an immediate reseed so applications that does arc4random(3) a lot would hammer the system entropy source in an hourly basis (with the existing code, they are relatively independent and only reseeds when arc4random(3) thinks necessary, either when we generated enough output, or have forked, whichever comes first). This seems to be a bad design choice for both security (the randomness caused by accumulated scheduling skews between arc4random(3) calls in the system are now gone) and performance (tends to do something computational intensive in hourly basis). delphij: From the referenced design documentation, it appears to be a slowly (once an hour), so wrapping… | |||||
Done Inline Actions@kib, Generation counter bumps fairly infrequently, starting at 1 second, backing off to 1 hour. It can also be bumped a handful of times by new entropy sources showing up; none of that risks overflowing the bottom 32 bits especially quickly, so the L-H-L read scheme proposed is probably fine. Re: acquire, yes. The idea is the seed version is the one this PRNG is seeded with, so we should read the seed from syscall strictly after grabbing the seed version. (Even better would be an interface to provide both the seed version and seed entropy at the same time, like the kernel provides to arc4random(9) in the parent review, but that would require a new syscall, or abusing sysctl, or abusing getrandom(2).) cem: @kib, Generation counter bumps fairly infrequently, starting at 1 second, backing off to 1 hour. | |||||
Done Inline Actions
I should have added: storing a stale seed version due to a race is ok. We just reseed one more time than we would have otherwise. Storing a newer seed version means we missed a reseed, which is bad. cem: > The idea is the seed version is the one this PRNG is seeded with
I should have added… | |||||
if (error == 0) { | |||||
#ifdef __LP64__ | |||||
Not Done Inline ActionsWhy use _1 and not _CURR ? kib: Why use _1 and not _CURR ? | |||||
Done Inline ActionsWhy _CURR? This version of the code understands version 1 of the ABI; if CURR is bumped, this code should still be associated with version 1, not whatever moving target CURR is. cem: Why `_CURR`? This version of the code understands version 1 of the ABI; if CURR is bumped… | |||||
Not Done Inline ActionsThis is the common practice, look e.g. at the sys/conf.h. The motivation is that ver 2 typically differs from ver 1 by some adjustments, instead of being rewritten from scratch. Correspondingly, the code gets some fiddling with, which makes one less place to change (and forget). kib: This is the common practice, look e.g. at the sys/conf.h. The motivation is that ver 2… | |||||
fxrng_root_generationp = &vdso_fxrngp->fx_generation; | |||||
#else | |||||
/* | |||||
* Dummy cast to appease Clang's integer type literalism -- | |||||
* long and int are both 32-bit on *IL*P*32*, but they are | |||||
* distinct types to Clang. | |||||
*/ | |||||
fxrng_root_generationp = (void *)&vdso_fxrngp->fx_generation32; | |||||
#endif | |||||
} else { | |||||
/* | |||||
* Transition period: new userspace on old kernel. Should | |||||
* become a hard error at some point, if the scheme is adopted. | |||||
*/ | |||||
Not Done Inline ActionsDon't you need to recheck the MSW, instead of LSW ? Inconsistent read occurs when the overflow increases the value of the high word. Also, I believe that two fences are enough: hi1 = atomic_load_32(HI); atomic_thread_fence_acq(); low = atomic_load_32(LO); atomic_thread_fence_acq(); hi2 = atomic_load_32(HI); kib: Don't you need to recheck the MSW, instead of LSW ? Inconsistent read occurs when the overflow… | |||||
Done Inline ActionsEither recheck is fine. LSW updates so infrequently that we're checking for rollover from 0xffffffff -> 0. 0xffffffff -> 0x1 would be surprising, but still correct. Checking MSW would also be fine. Yes, perhaps we do not need the acq on the 2nd read of the checked word. cem: Either recheck is fine. LSW updates so infrequently that we're checking for rollover from… | |||||
errno = error; | |||||
#ifdef NOTYET | |||||
return (-1); | |||||
#else | |||||
warn("Could not retrieve AT_FXRNG auxinfo"); | |||||
kibUnsubmitted Done Inline ActionsPlease do not emit messages from libc. kib: Please do not emit messages from libc. | |||||
cemAuthorUnsubmitted Done Inline ActionsOk cem: Ok | |||||
#endif | |||||
} | |||||
*rsp = &p->rs; | *rsp = &p->rs; | ||||
*rsxp = &p->rsx; | *rsxp = &p->rsx; | ||||
return (0); | return (0); | ||||
} | } | ||||
/* | |||||
* This isn't detecting fork; we're just using the existing callback from | |||||
* _rs_stir_if_needed() to force arc4random(3) to reseed if the fenestrasX root | |||||
* seed version has changed. (That is, the root random(4) has reseeded from | |||||
* pooled entropy.) | |||||
*/ | |||||
static inline void | static inline void | ||||
_rs_forkdetect(void) | _rs_forkdetect(void) | ||||
{ | { | ||||
if (__predict_false(rs == NULL || rsx == NULL)) | |||||
return; | |||||
if (fxrng_root_generationp == NULL) | |||||
return; | |||||
if (__predict_true(rsx->rs_seed_generation == | |||||
atomic_load_acq_long(fxrng_root_generationp))) | |||||
kibUnsubmitted Not Done Inline ActionsThis puns on int/long for ILP32. Since you use #ifdef __LP64 already, I think this should be type-correct as well. kib: This puns on int/long for ILP32. Since you use #ifdef __LP64 already, I think this should be… | |||||
cemAuthorUnsubmitted Done Inline ActionsHow does it pun on int/long? fxrng_root_generationp is unsigned long* cem: How does it pun on int/long? `fxrng_root_generationp` is `unsigned long*` | |||||
kibUnsubmitted Not Done Inline ActionsHm, yes, you do type-punning with fxrng_root_generation instead. One more reason to use 32bit IMO. kib: Hm, yes, you do type-punning with fxrng_root_generation instead. One more reason to use 32bit… | |||||
return; | |||||
Not Done Inline ActionsThere is no check for version there. kib: There is no check for version there. | |||||
/* Invalidate rs_buf to force "stir" (reseed). */ | |||||
memset(rs, 0, sizeof(*rs)); | |||||
} | } |
ILP32 check is of course bogus. Even if you decided to broke half of the arches and not implement this feature for them, still it should done as regular fall-out from elf_aux_info returning error.