diff --git a/lib/libc/gen/arc4random.3 b/lib/libc/gen/arc4random.3 --- a/lib/libc/gen/arc4random.3 +++ b/lib/libc/gen/arc4random.3 @@ -30,7 +30,7 @@ .\" .\" Manual page, using -mandoc macros .\" -.Dd April 13, 2020 +.Dd November 18, 2024 .Dt ARC4RANDOM 3 .Os .Sh NAME @@ -129,6 +129,17 @@ .%O Document ID: 4027b5256e17b9796842e6d0f68b0b5e .%U http://cr.yp.to/papers.html#chacha .Re +.Rs +.%A Daniel Lemire +.%T Fast Random Integer Generation in an Interval +.%D January 2019 +.%J ACM Trans. Model. Comput. Simul. +.%I Association for Computing Machinery +.%C New York, NY, USA +.%V vol. 29 +.%N no. 1 +.%P pp. 1\(en12 +.Re .Sh HISTORY These functions first appeared in .Ox 2.1 . diff --git a/lib/libc/gen/arc4random_uniform.c b/lib/libc/gen/arc4random_uniform.c --- a/lib/libc/gen/arc4random_uniform.c +++ b/lib/libc/gen/arc4random_uniform.c @@ -1,56 +1,47 @@ -/* $OpenBSD: arc4random_uniform.c,v 1.3 2019/01/20 02:59:07 bcook Exp $ */ - -/* - * Copyright (c) 2008, Damien Miller +/*- + * SPDX-License-Identifier: 0BSD * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. + * Copyright (c) Robert Clausecker + * Based on a publication by Daniel Lemire. + * Public domain where applicable. * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * Daniel Lemire, "Fast Random Integer Generation in an Interval", + * Association for Computing Machinery, ACM Trans. Model. Comput. Simul., + * no. 1, vol. 29, pp. 1--12, New York, NY, USA, January 2019. */ #include #include -/* - * Calculate a uniformly distributed random number less than upper_bound - * avoiding "modulo bias". - * - * Uniformity is achieved by generating new random numbers until the one - * returned is outside the range [0, 2**32 % upper_bound). This - * guarantees the selected random number will be inside - * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound) - * after reduction modulo upper_bound. - */ uint32_t arc4random_uniform(uint32_t upper_bound) { - uint32_t r, min; - - if (upper_bound < 2) - return 0; - - /* 2**32 % x == (2**32 - x) % x */ - min = -upper_bound % upper_bound; + uint64_t product; /* - * This could theoretically loop forever but each retry has - * p > 0.5 (worst case, usually far better) of selecting a - * number inside the range we need, so it should rarely need - * to re-roll. + * The paper uses these variable names: + * + * L -- log2(UINT32_MAX+1) + * s -- upper_bound + * x -- arc4random() return value + * m -- product + * l -- (uint32_t)product + * t -- threshold */ - for (;;) { - r = arc4random(); - if (r >= min) - break; + + if (upper_bound <= 1) + return (0); + + product = upper_bound * (uint64_t)arc4random(); + + if ((uint32_t)product < upper_bound) { + uint32_t threshold; + + /* threshold = (2**32 - upper_bound) % upper_bound */ + threshold = -upper_bound % upper_bound; + while ((uint32_t)product < threshold) + product = upper_bound * (uint64_t)arc4random(); } - return r % upper_bound; + return (product >> 32); } diff --git a/lib/libc/tests/gen/arc4random_test.c b/lib/libc/tests/gen/arc4random_test.c --- a/lib/libc/tests/gen/arc4random_test.c +++ b/lib/libc/tests/gen/arc4random_test.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2011 David Schultz + * Copyright (c) 2024 Robert Clausecker * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -77,10 +79,34 @@ "sequences are the same"); } +/* + * Test whether arc4random_uniform() returns a number below the given threshold. + * Test with various thresholds. + */ +ATF_TC_WITHOUT_HEAD(test_arc4random_uniform); +ATF_TC_BODY(test_arc4random_uniform, tc) +{ + size_t i, j; + static const uint32_t thresholds[] = { + 1, 2, 3, 4, 5, 10, 100, 1000, + INT32_MAX, (uint32_t)INT32_MAX + 1, + UINT32_MAX - 1000000000, UINT32_MAX - 1000000, UINT32_MAX - 1, 0 + }; + + for (i = 0; thresholds[i] != 0; i++) + for (j = 0; j < 10000; j++) + ATF_CHECK(arc4random_uniform(thresholds[i]) < thresholds[i]); + + /* for a threshold of zero, just check that we get zero every time */ + for (j = 0; j < 1000; j++) + ATF_CHECK_EQ(0, arc4random_uniform(0)); +} + ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, test_arc4random); + ATF_TP_ADD_TC(tp, test_arc4random_uniform); return (atf_no_error()); } diff --git a/tools/test/README b/tools/test/README --- a/tools/test/README +++ b/tools/test/README @@ -1,4 +1,3 @@ - This directory is for standalone test programs. For the FreeBSD Test Suite, which uses Kyua, please see /usr/src/tests/ @@ -7,6 +6,7 @@ Please make a subdir per program, and add a brief description to this file. +arc4random Bias test for arc4random_uniform() auxinfo Return information on page sizes, CPUs, and OS release date. devrandom Programs to test /dev/*random. gpioevents Test delivery of gpio pin-change events to userland. diff --git a/tools/test/arc4random/biastest.c b/tools/test/arc4random/biastest.c new file mode 100644 --- /dev/null +++ b/tools/test/arc4random/biastest.c @@ -0,0 +1,216 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Robert Clausecker + * + * biastest.c -- bias test for arc4random_uniform(). + * + * The default configuration of this test has an upper bound of + * (3/4) * UINT32_MAX, which should give a high amount of bias in + * an incorrect implementation. If the range reduction is + * implemented correctly, the parameters of the statistic should + * closely match the expected values. If not, they'll differ. + * + * For memory usage reasons, we use an uchar to track the number of + * observations per bucket. If the number of tries is much larger + * than upper_bound, the buckets likely overflow. This is detected + * by the test, but will lead to incorrect results. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void collect_sample(unsigned char *, long long, uint32_t); +static void analyze_sample(const unsigned char *, long long, uint32_t); + +static atomic_bool complete = false; +static long long tries = 5ULL << 32; +static atomic_llong tries_done = 0; + +static void +usage(const char *argv0) +{ + fprintf(stderr, "usage: %s [-n tries] [-t upper_bound]\n", argv0); + exit(EXIT_FAILURE); +} + +int +main(int argc, char *argv[]) +{ + uint32_t threshold = 3UL << 30; + int ch; + unsigned char *sample; + + while (ch = getopt(argc, argv, "n:t:"), ch != EOF) + switch (ch) { + case 'n': + tries = atoll(optarg); + break; + + case 't': + threshold = (uint32_t)atoll(optarg); + break; + + default: + usage(argv[0]); + } + + if (optind != argc) + usage(argv[0]); + + if (threshold == 0) { + fprintf(stderr, "threshold must be between 1 and %lu\n", (unsigned long)UINT32_MAX); + exit(EXIT_FAILURE); + } + + sample = calloc(threshold, 1); + if (sample == NULL) { + perror("calloc(threshold, 1)"); + return (EXIT_FAILURE); + } + + collect_sample(sample, tries, threshold); + analyze_sample(sample, tries, threshold); +} + +static void +progress(int signo) +{ + (void)signo; + + if (!complete) { + fprintf(stderr, "\r%10lld of %10lld samples taken (%5.2f%% done)", + tries_done, tries, (tries_done * 100.0) / tries); + + signal(SIGALRM, progress); + alarm(1); + } +} + +static void +collect_sample(unsigned char *sample, long long tries, uint32_t threshold) +{ + long long i; + uint32_t x; + bool overflowed = false; + + progress(SIGALRM); + + for (i = 0; i < tries; i++) { + x = arc4random_uniform(threshold); + tries_done++; + assert(x < threshold); + + if (sample[x] == UCHAR_MAX) { + if (!overflowed) { + printf("sample table overflow, results will be incorrect\n"); + overflowed = true; + } + } else + sample[x]++; + } + + progress(SIGALRM); + complete = true; + fputc('\n', stderr); +} + +static void +analyze_sample(const unsigned char *sample, long long tries, uint32_t threshold) +{ + double discrepancy, average, variance, total; + long long histogram[UCHAR_MAX + 1] = { 0 }, sum, n, median; + uint32_t i, i_min, i_max; + int min, max; + + printf("distribution properties:\n"); + + /* find median, average, deviation, smallest, and largest bucket */ + total = 0.0; + for (i = 0; i < threshold; i++) { + histogram[sample[i]]++; + total += (double)i * sample[i]; + } + + average = total / tries; + + variance = 0.0; + median = threshold; + n = 0; + i_min = 0; + i_max = 0; + min = sample[i_min]; + max = sample[i_max]; + + for (i = 0; i < threshold; i++) { + discrepancy = i - average; + variance += sample[i] * discrepancy * discrepancy; + + n += sample[i]; + if (median == threshold && n > tries / 2) + median = i; + + if (sample[i] < min) { + i_min = i; + min = sample[i_min]; + } else if (sample[i] > max) { + i_max = i; + max = sample[i_max]; + } + } + + variance /= tries; + assert(median < threshold); + + printf("\tthreshold: %lu\n", (unsigned long)threshold); + printf("\tobservations: %lld\n", tries); + printf("\tleast common: %lu (%d observations)\n", (unsigned long)i_min, min); + printf("\tmost common: %lu (%d observations)\n", (unsigned long)i_max, max); + printf("\tmedian: %lld (expected %lu)\n", median, (unsigned long)threshold / 2); + printf("\taverage: %f (expected %f)\n", average, 0.5 * (threshold - 1)); + printf("\tdeviation: %f (expected %f)\n\n", sqrt(variance), + sqrt(((double)threshold * threshold - 1.0) / 12)); + + /* build histogram and analyze it */ + printf("sample properties:\n"); + + /* find median, average, and deviation */ + average = (double)tries / threshold; + + variance = 0.0; + for (i = 0; i < UCHAR_MAX; i++) { + discrepancy = i - average; + variance += histogram[i] * discrepancy * discrepancy; + } + + variance /= threshold; + + n = 0; + median = UCHAR_MAX + 1; + for (i = 0; i <= UCHAR_MAX; i++) { + n += histogram[i]; + if (n >= threshold / 2) { + median = i; + break; + } + } + + assert(median <= UCHAR_MAX); /* unreachable */ + + printf("\tmedian: %lld\n", median); + printf("\taverage: %f\n", average); + printf("\tdeviation: %f (expected %f)\n\n", sqrt(variance), sqrt(average * (1.0 - 1.0 / threshold))); + + printf("histogram:\n"); + for (i = 0; i < 256; i++) + if (histogram[i] != 0) + printf("\t%3d:\t%lld\n", (int)i, histogram[i]); +}