Changeset View
Changeset View
Standalone View
Standalone View
libkern/arc4random.c
Context not available. | |||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/time.h> | #include <sys/time.h> | ||||
#include <sys/smp.h> | |||||
#include <sys/malloc.h> | |||||
#define ARC4_RESEED_BYTES 65536 | #define ARC4_RESEED_BYTES 65536 | ||||
#define ARC4_RESEED_SECONDS 300 | #define ARC4_RESEED_SECONDS 300 | ||||
Context not available. | |||||
int arc4rand_iniseed_state = ARC4_ENTR_NONE; | int arc4rand_iniseed_state = ARC4_ENTR_NONE; | ||||
static u_int8_t arc4_i, arc4_j; | MALLOC_DEFINE(M_ARC4RANDOM, "arc4random", "arc4random structures"); | ||||
static int arc4_numruns = 0; | |||||
static u_int8_t arc4_sbox[256]; | |||||
static time_t arc4_t_reseed; | |||||
static struct mtx arc4_mtx; | |||||
static u_int8_t arc4_randbyte(void); | struct arc4_s { | ||||
u_int8_t i, j; | |||||
int numruns; | |||||
u_int8_t sbox[256]; | |||||
time_t t_reseed; | |||||
struct mtx mtx; | |||||
}; | |||||
static struct arc4_s *arc4inst = NULL; | |||||
#define ARC4_FOREACH(_arc4) \ | |||||
for (_arc4 = &arc4inst[0]; _arc4 <= &arc4inst[mp_maxid]; _arc4++) | |||||
static u_int8_t arc4_randbyte(struct arc4_s *arc4); | |||||
static __inline void | static __inline void | ||||
arc4_swap(u_int8_t *a, u_int8_t *b) | arc4_swap(u_int8_t *a, u_int8_t *b) | ||||
{ | { | ||||
Context not available. | |||||
* Stir our S-box. | * Stir our S-box. | ||||
*/ | */ | ||||
static void | static void | ||||
arc4_randomstir(void) | arc4_randomstir(struct arc4_s* arc4) | ||||
{ | { | ||||
u_int8_t key[ARC4_KEYBYTES]; | u_int8_t key[ARC4_KEYBYTES]; | ||||
int n; | int n; | ||||
Context not available. | |||||
*/ | */ | ||||
(void)read_random(key, ARC4_KEYBYTES); | (void)read_random(key, ARC4_KEYBYTES); | ||||
getmicrouptime(&tv_now); | getmicrouptime(&tv_now); | ||||
mtx_lock(&arc4_mtx); | mtx_lock(&arc4->mtx); | ||||
for (n = 0; n < 256; n++) { | for (n = 0; n < 256; n++) { | ||||
arc4_j = (arc4_j + arc4_sbox[n] + key[n]) % 256; | arc4->j = (arc4->j + arc4->sbox[n] + key[n]) % 256; | ||||
arc4_swap(&arc4_sbox[n], &arc4_sbox[arc4_j]); | arc4_swap(&arc4->sbox[n], &arc4->sbox[arc4->j]); | ||||
} | } | ||||
arc4_i = arc4_j = 0; | arc4->i = arc4->j = 0; | ||||
/* Reset for next reseed cycle. */ | /* Reset for next reseed cycle. */ | ||||
arc4_t_reseed = tv_now.tv_sec + ARC4_RESEED_SECONDS; | arc4->t_reseed = tv_now.tv_sec + ARC4_RESEED_SECONDS; | ||||
arc4_numruns = 0; | arc4->numruns = 0; | ||||
/* | /* | ||||
* Throw away the first N words of output, as suggested in the | * Throw away the first N words of output, as suggested in the | ||||
* paper "Weaknesses in the Key Scheduling Algorithm of RC4" | * paper "Weaknesses in the Key Scheduling Algorithm of RC4" | ||||
Context not available. | |||||
* http://dl.acm.org/citation.cfm?id=646557.694759 | * http://dl.acm.org/citation.cfm?id=646557.694759 | ||||
*/ | */ | ||||
for (n = 0; n < 256*4; n++) | for (n = 0; n < 256*4; n++) | ||||
arc4_randbyte(); | arc4_randbyte(arc4); | ||||
mtx_unlock(&arc4_mtx); | |||||
mtx_unlock(&arc4->mtx); | |||||
} | } | ||||
/* | /* | ||||
Context not available. | |||||
static void | static void | ||||
arc4_init(void) | arc4_init(void) | ||||
{ | { | ||||
struct arc4_s *arc4; | |||||
int n; | int n; | ||||
mtx_init(&arc4_mtx, "arc4_mtx", NULL, MTX_DEF); | arc4inst = malloc((mp_maxid + 1) * sizeof(struct arc4_s), | ||||
arc4_i = arc4_j = 0; | M_ARC4RANDOM, M_NOWAIT | M_ZERO); | ||||
for (n = 0; n < 256; n++) | KASSERT(arc4inst != NULL, ("arc4_init: memory allocation error")); | ||||
arc4_sbox[n] = (u_int8_t) n; | |||||
arc4_t_reseed = 0; | ARC4_FOREACH(arc4) { | ||||
mtx_init(&arc4->mtx, "arc4_mtx", NULL, MTX_DEF); | |||||
arc4->i = arc4->j = 0; | |||||
for (n = 0; n < 256; n++) | |||||
arc4->sbox[n] = (u_int8_t) n; | |||||
arc4->t_reseed = -1; | |||||
arc4->numruns = 0; | |||||
} | |||||
} | } | ||||
SYSINIT(arc4, SI_SUB_LOCK, SI_ORDER_ANY, arc4_init, NULL); | |||||
SYSINIT(arc4_init, SI_SUB_LOCK, SI_ORDER_ANY, arc4_init, NULL); | |||||
static void | |||||
arc4_uninit(void) | |||||
{ | |||||
struct arc4_s *arc4; | |||||
ARC4_FOREACH(arc4) { | |||||
mtx_destroy(&arc4->mtx); | |||||
} | |||||
free(arc4inst, M_ARC4RANDOM); | |||||
} | |||||
SYSUNINIT(arc4, SI_SUB_LOCK, SI_ORDER_ANY, arc4_uninit, NULL); | |||||
/* | /* | ||||
* Generate a random byte. | * Generate a random byte. | ||||
*/ | */ | ||||
static u_int8_t | static u_int8_t | ||||
arc4_randbyte(void) | arc4_randbyte(struct arc4_s *arc4) | ||||
{ | { | ||||
u_int8_t arc4_t; | u_int8_t arc4_t; | ||||
arc4_i = (arc4_i + 1) % 256; | arc4->i = (arc4->i + 1) % 256; | ||||
arc4_j = (arc4_j + arc4_sbox[arc4_i]) % 256; | arc4->j = (arc4->j + arc4->sbox[arc4->i]) % 256; | ||||
arc4_swap(&arc4_sbox[arc4_i], &arc4_sbox[arc4_j]); | arc4_swap(&arc4->sbox[arc4->i], &arc4->sbox[arc4->j]); | ||||
arc4_t = (arc4_sbox[arc4_i] + arc4_sbox[arc4_j]) % 256; | arc4_t = (arc4->sbox[arc4->i] + arc4->sbox[arc4->j]) % 256; | ||||
return arc4_sbox[arc4_t]; | return arc4->sbox[arc4_t]; | ||||
} | } | ||||
/* | /* | ||||
Context not available. | |||||
{ | { | ||||
u_char *p; | u_char *p; | ||||
struct timeval tv; | struct timeval tv; | ||||
struct arc4_s *arc4; | |||||
if (reseed || atomic_cmpset_int(&arc4rand_iniseed_state, | |||||
ARC4_ENTR_HAVE, ARC4_ENTR_SEED)) { | |||||
ARC4_FOREACH(arc4) | |||||
arc4_randomstir(arc4); | |||||
} | |||||
arc4 = &arc4inst[curcpu]; | |||||
getmicrouptime(&tv); | getmicrouptime(&tv); | ||||
if (atomic_cmpset_int(&arc4rand_iniseed_state, ARC4_ENTR_HAVE, | if ((arc4->numruns > ARC4_RESEED_BYTES) | ||||
delphij: Note that this is an unlocked read of arc4->numruns, wouldn't this lead to a race window? (Not… | |||||
ARC4_ENTR_SEED) || reseed || | || (tv.tv_sec > arc4->t_reseed)) | ||||
delphijUnsubmitted Not Done Inline Actionsstyle: || operator should be at the end of line. delphij: style: || operator should be at the end of line. | |||||
(arc4_numruns > ARC4_RESEED_BYTES) || | arc4_randomstir(arc4); | ||||
(tv.tv_sec > arc4_t_reseed)) | |||||
arc4_randomstir(); | |||||
mtx_lock(&arc4_mtx); | mtx_lock(&arc4->mtx); | ||||
arc4_numruns += len; | arc4->numruns += len; | ||||
delphijUnsubmitted Not Done Inline ActionsBecause the mtx is released and re-acquired, we may no longer be guaranteed to have a satisfactory numruns at this point (and may be running on a different CPU)? delphij: Because the mtx is released and re-acquired, we may no longer be guaranteed to have a… | |||||
emeric.poupon_stormshield.euAuthorUnsubmitted Not Done Inline ActionsUnfortunately this is a problem that is already present in the current version. The goal is not to bind the running thread to a given CPU, but just to split the arc4rand instance in several instances in order to reduce contention. I used curcpu to choose an instance but I could as well have done a round robin on a pool of instances which size is set using a tunable. emeric.poupon_stormshield.eu: Unfortunately this is a problem that is already present in the current version.
The goal is… | |||||
p = ptr; | p = ptr; | ||||
while (len--) | while (len--) | ||||
*p++ = arc4_randbyte(); | *p++ = arc4_randbyte(arc4); | ||||
mtx_unlock(&arc4_mtx); | mtx_unlock(&arc4->mtx); | ||||
} | } | ||||
uint32_t | uint32_t | ||||
Context not available. |
Note that this is an unlocked read of arc4->numruns, wouldn't this lead to a race window? (Not necessarily a problem in practice, but it's worrying me). @markm and/or @ache -- what's your opinion?