Changeset View
Standalone View
sys/libkern/arc4random.c
/*- | /*- | ||||
* THE BEER-WARE LICENSE | * Copyright (c) 2017 Mark R V Murray | ||||
* All rights reserved. | |||||
* | * | ||||
* <dan@FreeBSD.ORG> wrote this file. As long as you retain this notice you | * Redistribution and use in source and binary forms, with or without | ||||
* can do whatever you want with this stuff. If we meet some day, and you | * modification, are permitted provided that the following conditions | ||||
* think this stuff is worth it, you can buy me a beer in return. | * are met: | ||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer | |||||
* in this position and unchanged. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | * | ||||
* Dan Moschuk | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | ||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
* | |||||
markm: I've emailed Dan to ask permission to change the license. | |||||
*/ | */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <sys/types.h> | #include <sys/types.h> | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/random.h> | #include <sys/random.h> | ||||
#include <sys/libkern.h> | #include <sys/libkern.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/time.h> | #include <sys/time.h> | ||||
#include <sys/smp.h> | #include <sys/smp.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#define ARC4_RESEED_BYTES 65536 | #include <crypto/chacha20/chacha.h> | ||||
#define ARC4_RESEED_SECONDS 300 | |||||
#define ARC4_KEYBYTES 256 | |||||
#define CHACHA20_RESEED_BYTES 65536 | |||||
#define CHACHA20_RESEED_SECONDS 300 | |||||
#define CHACHA20_KEYBYTES 32 | |||||
#define CHACHA20_BUFFER_SIZE 64 | |||||
int arc4rand_iniseed_state = ARC4_ENTR_NONE; | int arc4rand_iniseed_state = ARC4_ENTR_NONE; | ||||
MALLOC_DEFINE(M_ARC4RANDOM, "arc4random", "arc4random structures"); | MALLOC_DEFINE(M_CHACHA20RANDOM, "chacha20random", "chacha20random structures"); | ||||
struct arc4_s { | struct chacha20_s { | ||||
struct mtx mtx; | struct mtx mtx; | ||||
u_int8_t i, j; | |||||
int numruns; | int numruns; | ||||
u_int8_t sbox[256]; | |||||
time_t t_reseed; | time_t t_reseed; | ||||
u_int8_t m_buffer[CHACHA20_BUFFER_SIZE]; | |||||
struct chacha_ctx ctx; | |||||
} __aligned(CACHE_LINE_SIZE); | } __aligned(CACHE_LINE_SIZE); | ||||
static struct arc4_s *arc4inst = NULL; | static struct chacha20_s *chacha20inst = NULL; | ||||
#define ARC4_FOREACH(_arc4) \ | #define CHACHA20_FOREACH(_chacha20) \ | ||||
for (_arc4 = &arc4inst[0]; _arc4 <= &arc4inst[mp_maxid]; _arc4++) | for (_chacha20 = &chacha20inst[0]; \ | ||||
_chacha20 <= &chacha20inst[mp_maxid]; \ | |||||
_chacha20++) | |||||
static u_int8_t arc4_randbyte(struct arc4_s *arc4); | |||||
static __inline void | |||||
arc4_swap(u_int8_t *a, u_int8_t *b) | |||||
{ | |||||
u_int8_t c; | |||||
c = *a; | |||||
*a = *b; | |||||
*b = c; | |||||
} | |||||
/* | /* | ||||
* Stir our S-box. | * Mix up the current context. | ||||
*/ | */ | ||||
static void | static void | ||||
arc4_randomstir(struct arc4_s* arc4) | chacha20_randomstir(struct chacha20_s* chacha20) | ||||
{ | { | ||||
u_int8_t key[ARC4_KEYBYTES]; | u_int8_t key[CHACHA20_KEYBYTES]; | ||||
u_int8_t c_buffer[CHACHA20_BUFFER_SIZE]; | |||||
int n; | int n; | ||||
struct timeval tv_now; | struct timeval tv_now; | ||||
/* | /* | ||||
Done Inline ActionsNote that c_buffer is only used when BURN_OUTPUT_LIKE_RC4. delphij: Note that c_buffer is only used when BURN_OUTPUT_LIKE_RC4. | |||||
* XXX: FIX!! This isn't brilliant. Need more confidence. | * XXX: FIX!! This isn't brilliant. Need more confidence. | ||||
* This returns zero entropy before random(4) is seeded. | * This returns zero entropy before random(4) is seeded. | ||||
* But how? At early boot, there is no entropy, but demand | |||||
* for it? Where else can we grab some cheap garbage? | |||||
*/ | */ | ||||
allanjudeUnsubmitted Done Inline Actionsis the saved /entropy file useful for this? allanjude: is the saved /entropy file useful for this? | |||||
markmAuthorUnsubmitted Done Inline ActionsNo, sorry; files are WAY too late in the game. But it occurs to me I may be able to do something with boot-time stored entropy. markm: No, sorry; files are WAY too late in the game. But it occurs to me I may be able to do… | |||||
delphijUnsubmitted Done Inline ActionsJust an idea -- I wonder if it's possible that we could do something like, if the read_random() failed, we fill CHACHA20_KEYBYTES with some data derived from the loader supplied /boot/entropy plus attach entropy already available to harvest queue? It's not perfect but would be better than just crossing fingers with random data on stack... delphij: Just an idea -- I wonder if it's possible that we could do something like, if the read_random()… | |||||
(void)read_random(key, ARC4_KEYBYTES); | (void)read_random(key, CHACHA20_KEYBYTES); | ||||
Done Inline Actionsthis is where arc4 used to initalize it randomness, this should be carried over to chacha, or at least a description why WHY it is safe to change this behavior should be included. jmg: this is where arc4 used to initalize it randomness, this should be carried over to chacha, or… | |||||
Done Inline ActionsThe initialisation is present. Please look again below the preload block. markm: The initialisation is present. Please look again below the preload block. | |||||
getmicrouptime(&tv_now); | getmicrouptime(&tv_now); | ||||
mtx_lock(&arc4->mtx); | mtx_lock(&chacha20->mtx); | ||||
for (n = 0; n < 256; n++) { | chacha_keysetup(&chacha20->ctx, key, CHACHA20_KEYBYTES*8); | ||||
Done Inline Actions[no action requested] architecturally, this data really "belong" to random_harvestq, should we move the code there? delphij: [no action requested] architecturally, this data really "belong" to random_harvestq, should we… | |||||
Done Inline ActionsI want to keep it independent of the harvestq stuff, as arc4random is now capable of starting quite safely without involving random(9). markm: I want to keep it independent of the harvestq stuff, as arc4random is now capable of starting… | |||||
arc4->j = (arc4->j + arc4->sbox[n] + key[n]) % 256; | chacha_ivsetup(&chacha20->ctx, (u_char *)&tv_now.tv_sec, | ||||
arc4_swap(&arc4->sbox[n], &arc4->sbox[arc4->j]); | (u_char *)&tv_now.tv_usec); | ||||
} | |||||
arc4->i = arc4->j = 0; | |||||
/* Reset for next reseed cycle. */ | /* Reset for next reseed cycle. */ | ||||
arc4->t_reseed = tv_now.tv_sec + ARC4_RESEED_SECONDS; | chacha20->t_reseed = tv_now.tv_sec + CHACHA20_RESEED_SECONDS; | ||||
arc4->numruns = 0; | chacha20->numruns = 0; | ||||
/* | /* Burn the first 4k of output just because. */ | ||||
Done Inline ActionsThis seems to be unused code, and the reasoning for old code was to address RC4 key scheduling weakness, should we just remove the chunk so reader won't get confused by it? delphij: This seems to be unused code, and the reasoning for old code was to address RC4 key scheduling… | |||||
* Throw away the first N words of output, as suggested in the | for (n = 0; n < (1024*4)/CHACHA20_BUFFER_SIZE; n++) | ||||
* paper "Weaknesses in the Key Scheduling Algorithm of RC4" | chacha_encrypt_bytes(&chacha20->ctx, chacha20->m_buffer, | ||||
* by Fluher, Mantin, and Shamir. (N = 768 in our case.) | c_buffer, CHACHA20_BUFFER_SIZE); | ||||
* | memset(c_buffer, 0, CHACHA20_BUFFER_SIZE); | ||||
Done Inline ActionsPlease use explicit_bzero() here. delphij: Please use explicit_bzero() here. | |||||
Done Inline Actionsno initialization is done in the case that there is no entropy module loaded. jmg: no initialization is done in the case that there is no entropy module loaded. | |||||
Done Inline ActionsSee below. look for the ... chacha_keysetup(&chacha20->ctx, key, CHACHA20_KEYBYTES*8); chacha_ivsetup(&chacha20->ctx, (u_char *)&tv_now.tv_sec, ... lines. markm: See below. look for the ...
chacha_keysetup(&chacha20->ctx, key, CHACHA20_KEYBYTES*8)… | |||||
* http://dl.acm.org/citation.cfm?id=646557.694759 | mtx_unlock(&chacha20->mtx); | ||||
*/ | |||||
for (n = 0; n < 768*4; n++) | |||||
arc4_randbyte(arc4); | |||||
mtx_unlock(&arc4->mtx); | |||||
} | } | ||||
Done Inline ActionsIf we preserve BURN_OUTPUT_LIKE_RC4, please move this zeroing to the ifdef'ed block. delphij: If we preserve BURN_OUTPUT_LIKE_RC4, please move this zeroing to the ifdef'ed block. | |||||
/* | /* | ||||
* Initialize our S-box to its beginning defaults. | * Initialize the contexts. | ||||
*/ | */ | ||||
static void | static void | ||||
arc4_init(void) | chacha20_init(void) | ||||
{ | { | ||||
struct arc4_s *arc4; | struct chacha20_s *chacha20; | ||||
int n; | |||||
arc4inst = malloc((mp_maxid + 1) * sizeof(struct arc4_s), | chacha20inst = malloc((mp_maxid + 1) * sizeof(struct chacha20_s), | ||||
Done Inline ActionsRWatson: Look here. I didn't *introduce* per-cpu allocation - it was already there. I'm just modifying the /status quo/. markm: RWatson: Look here. I didn't *introduce* per-cpu allocation - it was already there. I'm just… | |||||
M_ARC4RANDOM, M_NOWAIT | M_ZERO); | M_CHACHA20RANDOM, M_NOWAIT | M_ZERO); | ||||
KASSERT(arc4inst != NULL, ("arc4_init: memory allocation error")); | KASSERT(chacha20inst != NULL, ("chacha20_init: memory allocation error")); | ||||
ARC4_FOREACH(arc4) { | CHACHA20_FOREACH(chacha20) { | ||||
mtx_init(&arc4->mtx, "arc4_mtx", NULL, MTX_DEF); | mtx_init(&chacha20->mtx, "chacha20_mtx", NULL, MTX_DEF); | ||||
chacha20->t_reseed = -1; | |||||
arc4->i = arc4->j = 0; | chacha20->numruns = 0; | ||||
for (n = 0; n < 256; n++) | memset(chacha20->m_buffer, 0, CHACHA20_BUFFER_SIZE); | ||||
Done Inline ActionsPlease use explicit_bzero() delphij: Please use explicit_bzero() | |||||
arc4->sbox[n] = (u_int8_t) n; | memset(&chacha20->ctx, 0, sizeof(chacha20->ctx)); | ||||
Done Inline ActionsDitto. delphij: Ditto. | |||||
arc4->t_reseed = -1; | |||||
arc4->numruns = 0; | |||||
} | } | ||||
} | } | ||||
SYSINIT(arc4, SI_SUB_LOCK, SI_ORDER_ANY, arc4_init, NULL); | SYSINIT(chacha20, SI_SUB_LOCK, SI_ORDER_ANY, chacha20_init, NULL); | ||||
static void | static void | ||||
arc4_uninit(void) | chacha20_uninit(void) | ||||
{ | { | ||||
struct arc4_s *arc4; | struct chacha20_s *chacha20; | ||||
ARC4_FOREACH(arc4) { | CHACHA20_FOREACH(chacha20) | ||||
mtx_destroy(&arc4->mtx); | mtx_destroy(&chacha20->mtx); | ||||
free(chacha20inst, M_CHACHA20RANDOM); | |||||
} | } | ||||
free(arc4inst, M_ARC4RANDOM); | SYSUNINIT(chacha20, SI_SUB_LOCK, SI_ORDER_ANY, chacha20_uninit, NULL); | ||||
} | |||||
SYSUNINIT(arc4, SI_SUB_LOCK, SI_ORDER_ANY, arc4_uninit, NULL); | |||||
/* | /* | ||||
* Generate a random byte. | |||||
*/ | |||||
static u_int8_t | |||||
arc4_randbyte(struct arc4_s *arc4) | |||||
{ | |||||
u_int8_t arc4_t; | |||||
arc4->i = (arc4->i + 1) % 256; | |||||
arc4->j = (arc4->j + arc4->sbox[arc4->i]) % 256; | |||||
arc4_swap(&arc4->sbox[arc4->i], &arc4->sbox[arc4->j]); | |||||
arc4_t = (arc4->sbox[arc4->i] + arc4->sbox[arc4->j]) % 256; | |||||
return arc4->sbox[arc4_t]; | |||||
} | |||||
/* | |||||
* MPSAFE | * MPSAFE | ||||
*/ | */ | ||||
void | void | ||||
arc4rand(void *ptr, u_int len, int reseed) | arc4rand(void *ptr, u_int len, int reseed) | ||||
{ | { | ||||
u_char *p; | struct chacha20_s *chacha20; | ||||
struct timeval tv; | struct timeval tv; | ||||
struct arc4_s *arc4; | u_int length; | ||||
u_int8_t *p; | |||||
if (reseed || atomic_cmpset_int(&arc4rand_iniseed_state, | if (reseed || atomic_cmpset_int(&arc4rand_iniseed_state, | ||||
ARC4_ENTR_HAVE, ARC4_ENTR_SEED)) { | ARC4_ENTR_HAVE, ARC4_ENTR_SEED)) { | ||||
ARC4_FOREACH(arc4) | CHACHA20_FOREACH(chacha20) | ||||
arc4_randomstir(arc4); | chacha20_randomstir(chacha20); | ||||
} | } | ||||
arc4 = &arc4inst[curcpu]; | chacha20 = &chacha20inst[curcpu]; | ||||
getmicrouptime(&tv); | getmicrouptime(&tv); | ||||
if ((arc4->numruns > ARC4_RESEED_BYTES) || | if ((chacha20->numruns > CHACHA20_RESEED_BYTES) || | ||||
(tv.tv_sec > arc4->t_reseed)) | (tv.tv_sec > chacha20->t_reseed)) | ||||
arc4_randomstir(arc4); | chacha20_randomstir(chacha20); | ||||
mtx_lock(&arc4->mtx); | mtx_lock(&chacha20->mtx); | ||||
arc4->numruns += len; | chacha20->numruns += len; | ||||
p = ptr; | p = ptr; | ||||
while (len--) | while (len) { | ||||
*p++ = arc4_randbyte(arc4); | length = MIN(CHACHA20_BUFFER_SIZE, len); | ||||
mtx_unlock(&arc4->mtx); | chacha_encrypt_bytes(&chacha20->ctx, chacha20->m_buffer, p, length); | ||||
p += length; | |||||
len -= length; | |||||
} | } | ||||
mtx_unlock(&chacha20->mtx); | |||||
Done Inline ActionsI think there is a slight chance that, if chacha20->numruns < CHACHA20_RESEED_BYTES, but len + chacha20->numruns > CHACHA20_RESEED_BYTES, we would be owing a reseed in the middle. This reminds me https://reviews.freebsd.org/D8130#168807 and I think the principle still applies here, perhaps we should restructure the code block (line 176-191) like this: getmicrouptime(&tv); /* XXX */ p = ptr; while (len > 0) { critical_enter(); chacha20 = &chacha20inst[curcpu]; if ((chacha20->numruns > CHACHA20_RESEED_BYTES) || (tv.tv_sec > chacha20->t_reseed)) { /* randomstir() may block. Reenter the loop after it as we may be on a different CPU and needs to operate a different chacha20 context at the time. */ critical_exit(); chacha20_randomstir(chacha20); getmicrouptime(&tv); continue; } length = MIN(CHACHA20_BUFFER_SIZE, len); chacha20->numruns += len; chacha_encrypt_bytes(&chacha20->ctx, chacha20->m_buffer, p, length); p += length; len -= length; critical_exit(); } And get rid of the per-chacha20 context mutex. delphij: I think there is a slight chance that, if chacha20->numruns < CHACHA20_RESEED_BYTES, but len +… | |||||
Done Inline Actions(My previous reply got lost somehow) Are you sure about this? Last time I went near critical sections, I got very strong pushback. Removing the mutexes here also means removing them in other places where there may be data structure conflict. Surely that means putting those in critical sections too? Folks won't like that. markm: (My previous reply got lost somehow)
Are you sure about this? Last time I went near critical… | |||||
Done Inline ActionsYeah, that's why I asked for a feedback even if it's a reject :) But TL;DR really -- I'm fine if we stick using mutex, and we can continue revising the existing status quo at a later time after this change is landed. My main concern is that we could be consuming more bytes than we are supposed to, because there is no restir in the middle of while loop. My reasoning for critical section: Basically the idea is to prevent preemption or CPU migrations when we are working on the Chacha20 context (because we are already using a per-CPU context now) and there isn't much point to use a full mutex. The only reason we didn't held a mutex or enter a critical section is because read_random() could potentially block. Perhaps we should not do a direct restir in line 167 (as it might not belong to us), but set numruns to CHACHA20_RESEED_BYTES + 1 with an atomic store, and all reads to an atomic read [atomic_load_acq_int(&chacha20->numruns)]? This way, the stir part of chacha20_randomstir() would become something like: getmicrouptime(&tv_now); atomic_store_rel_int(&chacha20->numruns, CHACHA20_RESEED_BYTES + 1); critical_enter(); /* * We can ONLY do the restir if we are still on the same CPU. * Caller is expected to re-check because they have left critical * section. */ if (chacha20inst[curcpu] == chacha20) { chacha_keysetup(&chacha20->ctx, key, CHACHA20_KEYBYTES*8); chacha_ivsetup(&chacha20->ctx, (u_char *)&tv_now.tv_sec, (u_char *)&tv_now.tv_usec); /* Reset for next reseed cycle. */ chacha20->t_reseed = tv_now.tv_sec + CHACHA20_RESEED_SECONDS; chacha20->numruns = 0; } critical_exit(); delphij: Yeah, that's why I asked for a feedback even if it's a reject :)
But TL;DR really -- I'm fine… | |||||
} | |||||
uint32_t | uint32_t | ||||
arc4random(void) | arc4random(void) | ||||
{ | { | ||||
uint32_t ret; | uint32_t ret; | ||||
arc4rand(&ret, sizeof ret, 0); | arc4rand(&ret, sizeof(ret), 0); | ||||
return ret; | return ret; | ||||
} | } |
I've emailed Dan to ask permission to change the license.