Index: conf/files =================================================================== --- conf/files +++ conf/files @@ -527,6 +527,10 @@ ipsec | random | wlan_ccmp crypto/rijndael/rijndael-api-fst.c optional geom_bde | random crypto/rijndael/rijndael-api.c optional crypto | ipsec | wlan_ccmp +crypto/rnd_core.c standard +crypto/rnd_chacha.c standard +crypto/chacha.c standard +crypto/intrinsics.c standard crypto/sha1.c optional carp | crypto | ipsec | \ netgraph_mppc_encryption | sctp crypto/sha2/sha2.c optional crypto | geom_bde | ipsec | random | \ Index: crypto/chacha.c =================================================================== --- crypto/chacha.c +++ crypto/chacha.c @@ -0,0 +1,206 @@ +/* + * Heavily based upon: + * https://github.com/bitwiseshiftleft/crandom/blob/master/chacha.c + */ +#include "intrinsics.h" + +void crandom_chacha_expand(u_int64_t iv, u_int64_t ctr, int nr, int output_size, const unsigned char *key_, unsigned char *output_); + +// ------------------------------- Vectorized code ------------------------------- +#define shuffle(x,i) _mm_shuffle_epi32(x, \ + i + ((i+1)&3)*4 + ((i+2)&3)*16 + ((i+3)&3)*64) + +#define add _mm_add_epi32 +#define add64 _mm_add_epi64 + +#define NEED_XOP (MIGHT_HAVE_XOP) +#define NEED_SSSE3 (MIGHT_HAVE_SSSE3 && !MUST_HAVE_XOP) +#define NEED_SSE2 (MIGHT_HAVE_SSE2 && !MUST_HAVE_SSSE3) +#define NEED_CONV (!MUST_HAVE_SSE2) + +#if NEED_XOP + static inline void + quarter_round_xop(ssereg *a, ssereg *b, ssereg *c, ssereg *d) { + *a = add(*a,*b); *d = xop_rotate(16, *d ^ *a); + *c = add(*c,*d); *b = xop_rotate(12, *b ^ *c); + *a = add(*a,*b); *d = xop_rotate(8, *d ^ *a); + *c = add(*c,*d); *b = xop_rotate(7, *b ^ *c); + } +#endif + +#if NEED_SSSE3 + static const ssereg shuffle8 = { 0x0605040702010003ull, 0x0E0D0C0F0A09080Bull }; + static const ssereg shuffle16 = { 0x0504070601000302ull, 0x0D0C0F0E09080B0Aull }; + + INTRINSIC ssereg ssse3_rotate_8(ssereg a) { + return _mm_shuffle_epi8(a, shuffle8); + } + + INTRINSIC ssereg ssse3_rotate_16(ssereg a) { + return _mm_shuffle_epi8(a, shuffle16); + } + + static inline void + quarter_round_ssse3(ssereg *a, ssereg *b, ssereg *c, ssereg *d) { + *a = add(*a,*b); *d = ssse3_rotate_16(*d ^ *a); + *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c); + *a = add(*a,*b); *d = ssse3_rotate_8( *d ^ *a); + *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c); + } +#endif // MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP) + +#if NEED_SSE2 + static inline void + quarter_round_sse2(ssereg *a, ssereg *b, ssereg *c, ssereg *d) { + *a = add(*a,*b); *d = sse2_rotate(16, *d ^ *a); + *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c); + *a = add(*a,*b); *d = sse2_rotate(8, *d ^ *a); + *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c); + } +#endif + +#define DOUBLE_ROUND(qrf) { \ + qrf(&a1,&b1,&c1,&d1); \ + qrf(&a2,&b2,&c2,&d2); \ + b1 = shuffle(b1,1); \ + c1 = shuffle(c1,2); \ + d1 = shuffle(d1,3); \ + b2 = shuffle(b2,1); \ + c2 = shuffle(c2,2); \ + d2 = shuffle(d2,3); \ + \ + qrf(&a1,&b1,&c1,&d1); \ + qrf(&a2,&b2,&c2,&d2); \ + b1 = shuffle(b1,3); \ + c1 = shuffle(c1,2); \ + d1 = shuffle(d1,1); \ + b2 = shuffle(b2,3); \ + c2 = shuffle(c2,2); \ + d2 = shuffle(d2,1); \ + } + +#define OUTPUT_FUNCTION { \ + output[0] = add(a1,aa); \ + output[1] = add(b1,bb); \ + output[2] = add(c1,cc); \ + output[3] = add(d1,dd); \ + output[4] = add(a2,aa); \ + output[5] = add(b2,bb); \ + output[6] = add(c2,add(cc,p)); \ + output[7] = add(d2,dd); \ + \ + output += 8; \ + \ + cc = add64(add64(cc,p), p); \ + a1 = a2 = aa; \ + b1 = b2 = bb; \ + c1 = cc; c2 = add64(cc,p);\ + d1 = d2 = dd; \ + } +// ------------------------------------------------------------------------------- + +INTRINSIC u_int32_t rotate(int r, u_int32_t a) { + return a<>(32-r); +} + +static inline void +quarter_round(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d) { + *a = *a + *b; *d = rotate(16, *d^*a); + *c = *c + *d; *b = rotate(12, *b^*c); + *a = *a + *b; *d = rotate(8, *d^*a); + *c = *c + *d; *b = rotate(7, *b^*c); +} + +void +crandom_chacha_expand(u_int64_t iv, + u_int64_t ctr, + int nr, + int output_size, + const unsigned char *key_, + unsigned char *output_) { +# if MIGHT_HAVE_SSE2 + if (CPU_HAVE(SSE2)) { + ssereg *key = (ssereg *)key_; + ssereg *output = (ssereg *)output_; + + ssereg a1 = key[0], a2 = a1, aa = a1, + b1 = key[1], b2 = b1, bb = b1, + c1 = {iv, ctr}, c2 = {iv, ctr+1}, cc = c1, + d1 = {0x3320646e61707865ull, 0x6b20657479622d32ull}, d2 = d1, dd = d1, + p = {0, 1}; + + int i,r; +# if (NEED_XOP) + if (CPU_HAVE(XOP)) { + for (i=0; i0; r-=2) + DOUBLE_ROUND(quarter_round_xop); + OUTPUT_FUNCTION; + } + return; + } +# endif +# if (NEED_SSSE3) + if (CPU_HAVE(SSSE3)) { + for (i=0; i0; r-=2) + DOUBLE_ROUND(quarter_round_ssse3); + OUTPUT_FUNCTION; + } + return; + } +# endif +# if (NEED_SSE2) + if (CPU_HAVE(SSE2)) { + for (i=0; i0; r-=2) + DOUBLE_ROUND(quarter_round_sse2); + OUTPUT_FUNCTION; + } + return; + } +# endif + } +# endif + +# if NEED_CONV + { + const u_int32_t *key = (const u_int32_t *)key_; + u_int32_t + x[16], + input[16] = { + key[0], key[1], key[2], key[3], + key[4], key[5], key[6], key[7], + iv, iv>>32, ctr, ctr>>32, + 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 + }, + *output = (u_int32_t *)output_; + int i, r; + + for (i=0; i0; r-=2) { + quarter_round(&x[0], &x[4], &x[8], &x[12]); + quarter_round(&x[1], &x[5], &x[9], &x[13]); + quarter_round(&x[2], &x[6], &x[10], &x[14]); + quarter_round(&x[3], &x[7], &x[11], &x[15]); + + quarter_round(&x[0], &x[5], &x[10], &x[15]); + quarter_round(&x[1], &x[6], &x[11], &x[12]); + quarter_round(&x[2], &x[7], &x[8], &x[13]); + quarter_round(&x[3], &x[4], &x[9], &x[14]); + } + for (r=0; r<16; r++) { + output[r] = x[r] + input[r]; + } + + output += 16; + input[11] ++; + if (!input[11]) input[12]++; + } + } + +#endif // NEED_CONV +} Index: crypto/chacha_private.h =================================================================== --- crypto/chacha_private.h +++ crypto/chacha_private.h @@ -0,0 +1,233 @@ +/* +chacha-merged.c version 20080118 +D.J. Bernstein +Public domain. +*/ + +/* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */ + +typedef unsigned char u8; +typedef unsigned int u32; + +typedef struct +{ + u32 input[16]; /* could be compressed */ +} chacha_ctx; + +#define U8C(v) (v##U) +#define U32C(v) (v##U) + +#define U8V(v) ((u8)(v) & U8C(0xFF)) +#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) + +#define ROTL32(v, n) \ + (U32V((v) << (n)) | ((v) >> (32 - (n)))) + +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0])) | \ + ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | \ + ((u32)((p)[3]) << 24)) + +#define U32TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v)); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + } while (0) + +#define ROTATE(v, c) (ROTL32(v, c)) +#define XOR(v, w) ((v) ^ (w)) +#define PLUS(v, w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v), 1)) + +#define QUARTERROUND(a, b, c, d) \ + a = PLUS(a, b); d = ROTATE(XOR(d, a), 16); \ + c = PLUS(c, d); b = ROTATE(XOR(b, c), 12); \ + a = PLUS(a, b); d = ROTATE(XOR(d, a), 8); \ + c = PLUS(c, d); b = ROTATE(XOR(b, c), 7); + +static const char sigma[16] = "expand 32-byte k"; +static const char tau[16] = "expand 16-byte k"; + +static void +chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits, u32 ivbits) +{ + const char *constants; + + x->input[4] = U8TO32_LITTLE(k + 0); + x->input[5] = U8TO32_LITTLE(k + 4); + x->input[6] = U8TO32_LITTLE(k + 8); + x->input[7] = U8TO32_LITTLE(k + 12); + + if (kbits == 256) { /* recommended */ + k += 16; + constants = sigma; + } else { /* kbits == 128 */ + constants = tau; + } + + x->input[8] = U8TO32_LITTLE(k + 0); + x->input[9] = U8TO32_LITTLE(k + 4); + x->input[10] = U8TO32_LITTLE(k + 8); + x->input[11] = U8TO32_LITTLE(k + 12); + x->input[0] = U8TO32_LITTLE(constants + 0); + x->input[1] = U8TO32_LITTLE(constants + 4); + x->input[2] = U8TO32_LITTLE(constants + 8); + x->input[3] = U8TO32_LITTLE(constants + 12); +} + +static void +chacha_ivsetup(chacha_ctx *x, const u8 *iv) +{ + x->input[12] = 0; + x->input[13] = 0; + x->input[14] = U8TO32_LITTLE(iv + 0); + x->input[15] = U8TO32_LITTLE(iv + 4); +} + +static void +chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes) +{ + u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + u8 *ctarget = NULL; + u8 tmp[64]; + u_int i; + + if (!bytes) + return; + + j0 = x->input[0]; + j1 = x->input[1]; + j2 = x->input[2]; + j3 = x->input[3]; + j4 = x->input[4]; + j5 = x->input[5]; + j6 = x->input[6]; + j7 = x->input[7]; + j8 = x->input[8]; + j9 = x->input[9]; + j10 = x->input[10]; + j11 = x->input[11]; + j12 = x->input[12]; + j13 = x->input[13]; + j14 = x->input[14]; + j15 = x->input[15]; + + for (;;) { + if (bytes < 64) { + for (i = 0; i < bytes; ++i) + tmp[i] = m[i]; + m = tmp; + ctarget = c; + c = tmp; + } + + x0 = j0; + x1 = j1; + x2 = j2; + x3 = j3; + x4 = j4; + x5 = j5; + x6 = j6; + x7 = j7; + x8 = j8; + x9 = j9; + x10 = j10; + x11 = j11; + x12 = j12; + x13 = j13; + x14 = j14; + x15 = j15; + + for (i = 20; i > 0; i -= 2) { + QUARTERROUND(x0, x4, x8, x12) + QUARTERROUND(x1, x5, x9, x13) + QUARTERROUND(x2, x6, x10, x14) + QUARTERROUND(x3, x7, x11, x15) + QUARTERROUND(x0, x5, x10, x15) + QUARTERROUND(x1, x6, x11, x12) + QUARTERROUND(x2, x7, x8, x13) + QUARTERROUND(x3, x4, x9, x14) + } + + x0 = PLUS(x0, j0); + x1 = PLUS(x1, j1); + x2 = PLUS(x2, j2); + x3 = PLUS(x3, j3); + x4 = PLUS(x4, j4); + x5 = PLUS(x5, j5); + x6 = PLUS(x6, j6); + x7 = PLUS(x7, j7); + x8 = PLUS(x8, j8); + x9 = PLUS(x9, j9); + x10 = PLUS(x10, j10); + x11 = PLUS(x11, j11); + x12 = PLUS(x12, j12); + x13 = PLUS(x13, j13); + x14 = PLUS(x14, j14); + x15 = PLUS(x15, j15); + +#ifndef KEYSTREAM_ONLY + x0 = XOR(x0, U8TO32_LITTLE(m + 0)); + x1 = XOR(x1, U8TO32_LITTLE(m + 4)); + x2 = XOR(x2, U8TO32_LITTLE(m + 8)); + x3 = XOR(x3, U8TO32_LITTLE(m + 12)); + x4 = XOR(x4, U8TO32_LITTLE(m + 16)); + x5 = XOR(x5, U8TO32_LITTLE(m + 20)); + x6 = XOR(x6, U8TO32_LITTLE(m + 24)); + x7 = XOR(x7, U8TO32_LITTLE(m + 28)); + x8 = XOR(x8, U8TO32_LITTLE(m + 32)); + x9 = XOR(x9, U8TO32_LITTLE(m + 36)); + x10 = XOR(x10, U8TO32_LITTLE(m + 40)); + x11 = XOR(x11, U8TO32_LITTLE(m + 44)); + x12 = XOR(x12, U8TO32_LITTLE(m + 48)); + x13 = XOR(x13, U8TO32_LITTLE(m + 52)); + x14 = XOR(x14, U8TO32_LITTLE(m + 56)); + x15 = XOR(x15, U8TO32_LITTLE(m + 60)); +#endif + + j12 = PLUSONE(j12); + + if (!j12) { + j13 = PLUSONE(j13); + /* stopping at 2^70 bytes per nonce is user responsability */ + } + + U32TO8_LITTLE(c + 0, x0); + U32TO8_LITTLE(c + 4, x1); + U32TO8_LITTLE(c + 8, x2); + U32TO8_LITTLE(c + 12, x3); + U32TO8_LITTLE(c + 16, x4); + U32TO8_LITTLE(c + 20, x5); + U32TO8_LITTLE(c + 24, x6); + U32TO8_LITTLE(c + 28, x7); + U32TO8_LITTLE(c + 32, x8); + U32TO8_LITTLE(c + 36, x9); + U32TO8_LITTLE(c + 40, x10); + U32TO8_LITTLE(c + 44, x11); + U32TO8_LITTLE(c + 48, x12); + U32TO8_LITTLE(c + 52, x13); + U32TO8_LITTLE(c + 56, x14); + U32TO8_LITTLE(c + 60, x15); + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0; i < bytes; ++i) + ctarget[i] = c[i]; + } + + x->input[12] = j12; + x->input[13] = j13; + return; + } + + bytes -= 64; + c += 64; +#ifndef KEYSTREAM_ONLY + m += 64; +#endif + } +} Index: crypto/intrinsics.h =================================================================== --- crypto/intrinsics.h +++ crypto/intrinsics.h @@ -0,0 +1,161 @@ +#ifndef __CRANDOM_INTRINSICS_H__ +#define __CRANDOM_INTRINSICS_H__ + +/* + * Heavily based upon: + * https://github.com/bitwiseshiftleft/crandom/blob/master/intrinsics.h + */ + +#include + +#define INTRINSIC \ + static inline __attribute__((__gnu_inline__, __always_inline__)) + +#define CPU_SSE2 0x001 +#define CPU_SSSE3 0x002 +#define CPU_AESNI 0x004 +#define CPU_AVX 0x008 +#define CPU_AVX2 0x010 +#define CPU_XOP 0x020 +#define CPU_RDRAND 0x040 +#define CPU_TSC 0x080 + +#if defined(__x86_64__) || defined(__i386__) +#define MIGHT_HAVE_RDRAND 1 +#else +#define MIGHT_HAVE_RDRAND 0 +#endif + +#ifndef MUST_HAVE_RDRAND +#define MUST_HAVE_RDRAND 0 +#endif + +#if defined(__clang__) && __has_builtin(__builtin_readcyclecounter) +#define rdtsc __builtin_readcyclecounter +#define MUST_HAVE_TSC 1 +#define MIGHT_HAVE_TSC 1 +#else +#define MUST_HAVE_TSC 0 +INTRINSIC uint64_t rdtsc() { + uint64_t out = 0; +# if (defined(__i386__) || defined(__x86_64__)) +# define MIGHT_HAVE_TSC 1 + asm __volatile__ ("rdtsc" : "=A"(out)); +# else +# define MIGHT_HAVE_TSC 0 +# endif + return out; +} +#endif + +#ifdef __SSE2__ +# define MIGHT_HAVE_SSE2 1 +# ifndef MUST_HAVE_SSE2 +# ifdef __x86_64__ +# define MUST_HAVE_SSE2 1 +# else +# define MUST_HAVE_SSE2 0 +# endif +# endif + +# include + typedef __m128i ssereg; +# define pslldq _mm_slli_epi32 +# define pshufd _mm_shuffle_epi32 + +INTRINSIC ssereg sse2_rotate(int r, ssereg a) { + return _mm_slli_epi32(a, r) ^ _mm_srli_epi32(a, 32-r); +} + +#else +# define MIGHT_HAVE_SSE2 0 +# define MUST_HAVE_SSE2 0 +#endif + +#ifdef __SSSE3__ +# include +# define MIGHT_HAVE_SSSE3 1 +# ifndef MUST_HAVE_SSSE3 +# define MUST_HAVE_SSSE3 0 +# endif +#else +# define MIGHT_HAVE_SSSE3 0 +# define MUST_HAVE_SSSE3 0 +#endif + +#ifdef __AES__ +#include +# define MIGHT_HAVE_AESNI 1 +# ifndef MUST_HAVE_AESNI +# define MUST_HAVE_AESNI 0 +# endif +INTRINSIC ssereg aeskeygenassist(int rc, ssereg x) { + ssereg out; + asm("aeskeygenassist %2, %1, %0" : "=x"(out) : "x"(x), "g"(rc)); + return out; +} + +INTRINSIC ssereg aesenc(ssereg subkey, ssereg block) { + ssereg out = block; + asm("aesenc %1, %0" : "+x"(out) : "x"(subkey)); + return out; +} + +INTRINSIC ssereg aesenclast(ssereg subkey, ssereg block) { + ssereg out = block; + asm("aesenclast %1, %0" : "+x"(out) : "x"(subkey)); + return out; +} + +#else +# define MIGHT_HAVE_AESNI 0 +# define MUST_HAVE_AESNI 0 +#endif + + + +#ifdef __XOP__ +#include +# define MIGHT_HAVE_XOP 1 +# ifndef MUST_HAVE_XOP +# define MUST_HAVE_XOP 0 +# endif +#define xop_rotate(amount, x) _mm_roti_epi32((x), (amount)) +#else +# define MIGHT_HAVE_XOP 0 +# define MUST_HAVE_XOP 0 +#endif + +#ifndef likely +# define likely(x) __builtin_expect((x),1) +# define unlikely(x) __builtin_expect((x),0) +#endif + +#ifdef _KERNEL +# if defined(__i386__) || defined(__x86_64__) +#include +#include +#define rnd_cpufeats ( \ + (CPU_SSE2 * !!(cpu_feature & CPUID_SSE2)) | \ + (CPU_SSSE3 * !!(cpu_feature2 & CPUID2_SSSE3)) | \ + (CPU_AESNI * !!(cpu_feature2 & CPUID2_AESNI)) | \ + (CPU_AVX * !!(cpu_feature2 & CPUID2_AVX)) | \ + (CPU_AVX2 * !!(cpu_stdext_feature & CPUID_STDEXT_AVX2)) | \ + (CPU_XOP * !!(amd_feature2 & AMDID2_XOP)) | \ + (CPU_RDRAND * !!(cpu_feature2 & CPUID2_RDRAND)) | \ + (CPU_TSC * !!(cpu_feature & CPUID_TSC)) \ + ) +# else +# define rnd_cpufeats 0 +# endif +#else +unsigned int rnd_cpufeats; +#endif +#define CPU_HAVE(x) (MIGHT_HAVE_ ## x && rnd_cpufeats & CPU_ ## x) + +uint64_t cpu_random(void); +#if MIGHT_HAVE_RDRAND +uint64_t rdrand(void); +#endif + +#endif // __CRANDOM_INTRINSICS_H__ Index: crypto/intrinsics.c =================================================================== --- crypto/intrinsics.c +++ crypto/intrinsics.c @@ -0,0 +1,93 @@ +/* + * Heavily based upon: + * https://github.com/bitwiseshiftleft/crandom/blob/master/intrinsics.c + */ +#include "intrinsics.h" + +#ifndef _KERNEL + __attribute__((constructor)) +void +detect_features(void) +{ +#if (defined(__i386__) || defined(__x86_64__)) + uint32_t a, b, c, d; + unsigned int out; + + /* XXX - add support for detecting CPUID instruction */ + + out = 0; + + a = 1; + __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); + + if (b & (1 << 5)) out |= CPU_AVX2; + if (c & (1 << 25)) out |= CPU_AESNI; + if (c & (1 << 28)) out |= CPU_AVX; + if (d & (1 << 9)) out |= CPU_SSSE3; + if (d & (1 << 26)) out |= CPU_SSE2; + + a = 0x80000001; + __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); + + if (c & (1 << 11)) out |= CPU_XOP; + if (c & (1 << 30)) out |= CPU_RDRAND; + + rnd_cpufeats = out; +#endif +} +#endif + +#if MIGHT_HAVE_RDRAND +uint64_t +rdrand() +{ + int a; + uint64_t out; + +#if defined(__x86_64__) + a = 0; + while (!a) { + __asm__ __volatile__ ("rdrand %0\n setc %%al" : + "=r"(out), "+a"(a) :: "cc"); + } +#elif defined(__i386__) + uint32_t out1, out2; + a = 0; + while (!a) { + __asm__ __volatile__ ("rdrand %0\n setc %%al" : + "=r"(out1), "+a"(a) :: "cc"); + } + /* XXX - or should we only read once? */ + a = 0; + while (!a) { + __asm__ __volatile__ ("rdrand %0\n setc %%al" : + "=r"(out2), "+a"(a) :: "cc"); + } + out = ((uint64_t)out1 << 32) | out2; +#else + abort(); +#endif + return out; +} +#endif + +/* + * Best effort randomness + */ +uint64_t +cpu_random(void) +{ + uint64_t out; + +#if MIGHT_HAVE_RDRAND + if (CPU_HAVE(RDRAND)) { + out = rdrand(); + if (!out) + out = rdtsc(); + } else +#endif + if (MUST_HAVE_TSC || CPU_HAVE(TSC)) + out = rdtsc(); + + return out; +} Index: crypto/rnd.h =================================================================== --- crypto/rnd.h +++ crypto/rnd.h @@ -0,0 +1,61 @@ +#include +#ifndef _KERNEL +#include +#endif + +#include "intrinsics.h" + +#if 1 +#define RND_STATESIZE 32 +#define RND_BUFSIZE (384 - RND_STATESIZE) +#define RND_EXTRASTATE (0) +#elif 0 +#define RND_STATESIZE (15*16) +#define RND_BUFSIZE (3200 - RND_STATESIZE) +#define RND_EXTRASTATE (0*15*16) +#elif 1 +#define RND_STATESIZE 32 +#define RND_BUFSIZE (384 - RND_STATESIZE) +#define RND_EXTRASTATE (15*16) +#endif + + +#define RND_RESEEDINT ((1024*1024 + RND_BUFSIZE - 1) / RND_BUFSIZE) + +/* internal support function */ +#ifdef _KERNEL +#include +#define rnd_truerandom(x, y) read_random((x), (y)) +#else +void rnd_truerandom(unsigned char *buf, size_t cnt); +#endif + +/* module provided defines */ +void rndi_init(unsigned char *state); +void rndi_reseed(unsigned char *state, size_t total); +void rndi_expand(uint64_t iv, uint64_t *cntr, size_t cnt, + unsigned char *instate, unsigned char *outstate); +void rndi_postrekey(unsigned char *state, size_t total); + +#define fitcnt(x, y) (((x) + (y) - 1) / (y)) + +struct rnd_state { + /* XXX - figure out correct order for these */ + uint64_t magic; +#define RND_MAGIC (0x20150130l) + unsigned int seedgen; + int toreseed; + int bufbytes; + uint64_t cntr; + /* + * NB: following three buffers must be together as _expand writes to + * both in a single call. + */ + unsigned char buf[RND_BUFSIZE] __aligned(16); + unsigned char prngstate[RND_STATESIZE] __aligned(16); + unsigned char extrastate[RND_EXTRASTATE] __aligned(16); +}; + +/* public interface */ +void rnd_generate(unsigned char *out, size_t len); +void rnd_reseed(void); Index: crypto/rnd_chacha.c =================================================================== --- crypto/rnd_chacha.c +++ crypto/rnd_chacha.c @@ -0,0 +1,44 @@ +#include + +#include "rnd.h" + +void crandom_chacha_expand(u_int64_t iv, u_int64_t ctr, int nr, int output_size, const unsigned char *key_, unsigned char *output_); + +#if RND_STATESIZE < 32 +#error RND_STATESIZE not large enough +#endif + +void +rndi_init(unsigned char *state) +{ + + rnd_truerandom(state, 32); +} + +void +rndi_expand(uint64_t iv, uint64_t *cntr, size_t cnt, + unsigned char *instate, unsigned char *outstate) +{ + + crandom_chacha_expand(iv, *cntr, 12, cnt, instate, outstate); + *cntr += cnt / 64; +} + +void +rndi_postrekey(unsigned char *state, size_t total) +{ +} + +void +rndi_reseed(unsigned char *state, size_t total) +{ + uint64_t buf[total / sizeof(uint64_t)]; + uint64_t *st; + int i; + + rnd_truerandom((unsigned char *)buf, sizeof buf); + + st = (uint64_t *)state; + for (i = 0; i < nitems(buf); i++) + st[i] ^= buf[i]; +} Index: crypto/rnd_core.c =================================================================== --- crypto/rnd_core.c +++ crypto/rnd_core.c @@ -0,0 +1,85 @@ +#include +#include +#ifdef _KERNEL +#include +#include +#include +#include +#else +#include +#endif + +#include "rnd.h" + +#include "intrinsics.h" + +static unsigned int rnd_seedgeneration; +#ifdef _KERNEL +#include +DPCPU_DEFINE(struct rnd_state, pcpurndstate); +#else +static /* XXX - __thread */ struct rnd_state rndstate; +#endif + +void +rnd_reseed(void) +{ + + atomic_add_rel_int(&rnd_seedgeneration, 1); +} + +void +rnd_generate(unsigned char *out, size_t len) +{ + struct rnd_state *_rs; + size_t num; + +#ifdef _KERNEL + sched_pin(); + _rs = DPCPU_PTR(pcpurndstate); +#else + _rs = &rndstate; +#endif + + if (_rs->magic != RND_MAGIC) { + *_rs = (struct rnd_state){ + .magic = RND_MAGIC, + }; + rndi_init(_rs->prngstate); + } + + while (len) { + if (_rs->toreseed == 0 || + _rs->seedgen != rnd_seedgeneration) { + printf("reseeding: %p, %d, %u\n", _rs, _rs->bufbytes, _rs->seedgen); + rndi_reseed(_rs->prngstate, RND_STATESIZE); + _rs->toreseed = RND_RESEEDINT; + _rs->seedgen = rnd_seedgeneration; + _rs->bufbytes = 0; + } + + if (_rs->bufbytes == 0) { + uint64_t iv; + iv = cpu_random(); + + /* fill buffer & rekey */ + rndi_expand(iv, &_rs->cntr, RND_BUFSIZE + RND_STATESIZE, + _rs->prngstate, _rs->buf); + rndi_postrekey(_rs->prngstate, RND_STATESIZE); + _rs->bufbytes = sizeof _rs->buf; + _rs->toreseed--; + } + + num = MIN(len, _rs->bufbytes); + _rs->bufbytes -= num; + memcpy(out, &_rs->buf[_rs->bufbytes], num); + memset(&_rs->buf[_rs->bufbytes], 0, num); + + out += num; + len -= num; + } + +#ifdef _KERNEL + sched_unpin(); +#endif +} Index: dev/random/random_adaptors.c =================================================================== --- dev/random/random_adaptors.c +++ dev/random/random_adaptors.c @@ -366,9 +366,6 @@ selwakeuppri(&rsel, PUSER); wakeup(&random_adaptor); printf("random: unblocking device.\n"); - - /* Do arc4random(9) a favour while we are about it. */ - (void)atomic_cmpset_int(&arc4rand_iniseed_state, ARC4_ENTR_NONE, ARC4_ENTR_HAVE); } static int Index: libkern/arc4random.c =================================================================== --- libkern/arc4random.c +++ libkern/arc4random.c @@ -1,149 +1,46 @@ /*- - * THE BEER-WARE LICENSE + * Copyright 2015 John-Mark Gurney. + * All rights reserved. * - * wrote this file. As long as you retain this notice you - * can do whatever you want with this stuff. If we meet some day, and you - * think this stuff is worth it, you can buy me a beer in return. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * Dan Moschuk + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + * */ #include __FBSDID("$FreeBSD$"); -#include -#include -#include -#include #include -#include -#include -#include +#include -#define ARC4_RESEED_BYTES 65536 -#define ARC4_RESEED_SECONDS 300 -#define ARC4_KEYBYTES (256 / 8) - -int arc4rand_iniseed_state = ARC4_ENTR_NONE; - -static u_int8_t arc4_i, arc4_j; -static int arc4_numruns = 0; -static u_int8_t arc4_sbox[256]; -static time_t arc4_t_reseed; -static struct mtx arc4_mtx; - -static u_int8_t arc4_randbyte(void); - -static __inline void -arc4_swap(u_int8_t *a, u_int8_t *b) -{ - u_int8_t c; - - c = *a; - *a = *b; - *b = c; -} - -/* - * Stir our S-box. - */ -static void -arc4_randomstir (void) -{ - u_int8_t key[256]; - int r, n; - struct timeval tv_now; - - /* - * XXX read_random() returns unsafe numbers if the entropy - * device is not loaded -- MarkM. - */ - r = read_random(key, ARC4_KEYBYTES); - getmicrouptime(&tv_now); - mtx_lock(&arc4_mtx); - /* If r == 0 || -1, just use what was on the stack. */ - if (r > 0) { - for (n = r; n < sizeof(key); n++) - key[n] = key[n % r]; - } - - for (n = 0; n < 256; n++) { - arc4_j = (arc4_j + arc4_sbox[n] + key[n]) % 256; - arc4_swap(&arc4_sbox[n], &arc4_sbox[arc4_j]); - } - arc4_i = arc4_j = 0; - - /* Reset for next reseed cycle. */ - arc4_t_reseed = tv_now.tv_sec + ARC4_RESEED_SECONDS; - arc4_numruns = 0; - - /* - * Throw away the first N words of output, as suggested in the - * paper "Weaknesses in the Key Scheduling Algorithm of RC4" - * by Fluher, Mantin, and Shamir. (N = 256 in our case.) - */ - for (n = 0; n < 256*4; n++) - arc4_randbyte(); - mtx_unlock(&arc4_mtx); -} - -/* - * Initialize our S-box to its beginning defaults. - */ -static void -arc4_init(void) -{ - int n; - - mtx_init(&arc4_mtx, "arc4_mtx", NULL, MTX_DEF); - arc4_i = arc4_j = 0; - for (n = 0; n < 256; n++) - arc4_sbox[n] = (u_int8_t) n; - - arc4_t_reseed = 0; -} - -SYSINIT(arc4_init, SI_SUB_LOCK, SI_ORDER_ANY, arc4_init, NULL); - -/* - * Generate a random byte. - */ -static u_int8_t -arc4_randbyte(void) -{ - u_int8_t arc4_t; - - arc4_i = (arc4_i + 1) % 256; - arc4_j = (arc4_j + arc4_sbox[arc4_i]) % 256; - - arc4_swap(&arc4_sbox[arc4_i], &arc4_sbox[arc4_j]); - - arc4_t = (arc4_sbox[arc4_i] + arc4_sbox[arc4_j]) % 256; - return arc4_sbox[arc4_t]; -} - -/* - * MPSAFE - */ void arc4rand(void *ptr, u_int len, int reseed) { - u_char *p; - struct timeval tv; - getmicrouptime(&tv); - if (atomic_cmpset_int(&arc4rand_iniseed_state, ARC4_ENTR_HAVE, - ARC4_ENTR_SEED) || reseed || - (arc4_numruns > ARC4_RESEED_BYTES) || - (tv.tv_sec > arc4_t_reseed)) - arc4_randomstir(); + if (reseed) + rnd_reseed(); - mtx_lock(&arc4_mtx); - arc4_numruns += len; - p = ptr; - while (len--) - *p++ = arc4_randbyte(); - mtx_unlock(&arc4_mtx); + rnd_generate(ptr, len); } uint32_t @@ -151,6 +48,7 @@ { uint32_t ret; - arc4rand(&ret, sizeof ret, 0); - return ret; + rnd_generate((unsigned char *)&ret, sizeof ret); + + return (ret); }