diff --git a/lib/libc/stdlib/qsort.c b/lib/libc/stdlib/qsort.c index dc0a5452161e..8a2d5ae97cd5 100644 --- a/lib/libc/stdlib/qsort.c +++ b/lib/libc/stdlib/qsort.c @@ -1,248 +1,235 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include "libc_private.h" #if defined(I_AM_QSORT_R) typedef int cmp_t(void *, const void *, const void *); #elif defined(I_AM_QSORT_S) typedef int cmp_t(const void *, const void *, void *); #else typedef int cmp_t(const void *, const void *); #endif static inline char *med3(char *, char *, char *, cmp_t *, void *); #define MIN(a, b) ((a) < (b) ? a : b) /* * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". */ static inline void swapfunc(char *a, char *b, size_t es) { char t; do { t = *a; *a++ = *b; *b++ = t; } while (--es > 0); } #define vecswap(a, b, n) \ if ((n) > 0) swapfunc(a, b, n) #if defined(I_AM_QSORT_R) #define CMP(t, x, y) (cmp((t), (x), (y))) #elif defined(I_AM_QSORT_S) #define CMP(t, x, y) (cmp((x), (y), (t))) #else #define CMP(t, x, y) (cmp((x), (y))) #endif static inline char * med3(char *a, char *b, char *c, cmp_t *cmp, void *thunk #if !defined(I_AM_QSORT_R) && !defined(I_AM_QSORT_S) __unused #endif ) { return CMP(thunk, a, b) < 0 ? (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a )) :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c )); } /* * The actual qsort() implementation is static to avoid preemptible calls when * recursing. Also give them different names for improved debugging. */ #if defined(I_AM_QSORT_R) #define local_qsort local_qsort_r #elif defined(I_AM_QSORT_S) #define local_qsort local_qsort_s #endif static void local_qsort(void *a, size_t n, size_t es, cmp_t *cmp, void *thunk) { char *pa, *pb, *pc, *pd, *pl, *pm, *pn; size_t d1, d2; int cmp_result; - int swap_cnt; /* if there are less than 2 elements, then sorting is not needed */ if (__predict_false(n < 2)) return; loop: - swap_cnt = 0; if (n < 7) { for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; pl > (char *)a && CMP(thunk, pl - es, pl) > 0; pl -= es) swapfunc(pl, pl - es, es); return; } pm = (char *)a + (n / 2) * es; if (n > 7) { pl = a; pn = (char *)a + (n - 1) * es; if (n > 40) { size_t d = (n / 8) * es; pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk); pm = med3(pm - d, pm, pm + d, cmp, thunk); pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk); } pm = med3(pl, pm, pn, cmp, thunk); } swapfunc(a, pm, es); pa = pb = (char *)a + es; pc = pd = (char *)a + (n - 1) * es; for (;;) { while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) { if (cmp_result == 0) { - swap_cnt = 1; swapfunc(pa, pb, es); pa += es; } pb += es; } while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) { if (cmp_result == 0) { - swap_cnt = 1; swapfunc(pc, pd, es); pd -= es; } pc -= es; } if (pb > pc) break; swapfunc(pb, pc, es); - swap_cnt = 1; pb += es; pc -= es; } - if (swap_cnt == 0) { /* Switch to insertion sort */ - for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) - for (pl = pm; - pl > (char *)a && CMP(thunk, pl - es, pl) > 0; - pl -= es) - swapfunc(pl, pl - es, es); - return; - } pn = (char *)a + n * es; d1 = MIN(pa - (char *)a, pb - pa); vecswap(a, pb - d1, d1); d1 = MIN(pd - pc, pn - pd - es); vecswap(pb, pn - d1, d1); d1 = pb - pa; d2 = pd - pc; if (d1 <= d2) { /* Recurse on left partition, then iterate on right partition */ if (d1 > es) { local_qsort(a, d1 / es, es, cmp, thunk); } if (d2 > es) { /* Iterate rather than recurse to save stack space */ /* qsort(pn - d2, d2 / es, es, cmp); */ a = pn - d2; n = d2 / es; goto loop; } } else { /* Recurse on right partition, then iterate on left partition */ if (d2 > es) { local_qsort(pn - d2, d2 / es, es, cmp, thunk); } if (d1 > es) { /* Iterate rather than recurse to save stack space */ /* qsort(a, d1 / es, es, cmp); */ n = d1 / es; goto loop; } } } #if defined(I_AM_QSORT_R) void qsort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp) { local_qsort_r(a, n, es, cmp, thunk); } #elif defined(I_AM_QSORT_S) errno_t qsort_s(void *a, rsize_t n, rsize_t es, cmp_t *cmp, void *thunk) { if (n > RSIZE_MAX) { __throw_constraint_handler_s("qsort_s : n > RSIZE_MAX", EINVAL); return (EINVAL); } else if (es > RSIZE_MAX) { __throw_constraint_handler_s("qsort_s : es > RSIZE_MAX", EINVAL); return (EINVAL); } else if (n != 0) { if (a == NULL) { __throw_constraint_handler_s("qsort_s : a == NULL", EINVAL); return (EINVAL); } else if (cmp == NULL) { __throw_constraint_handler_s("qsort_s : cmp == NULL", EINVAL); return (EINVAL); } else if (es <= 0) { __throw_constraint_handler_s("qsort_s : es <= 0", EINVAL); return (EINVAL); } } local_qsort_s(a, n, es, cmp, thunk); return (0); } #else void qsort(void *a, size_t n, size_t es, cmp_t *cmp) { local_qsort(a, n, es, cmp, NULL); } #endif diff --git a/lib/libc/tests/stdlib/Makefile b/lib/libc/tests/stdlib/Makefile index 9df98bd4f435..b86dcb28cb1e 100644 --- a/lib/libc/tests/stdlib/Makefile +++ b/lib/libc/tests/stdlib/Makefile @@ -1,83 +1,84 @@ .include ATF_TESTS_C+= cxa_atexit_test ATF_TESTS_C+= dynthr_test ATF_TESTS_C+= heapsort_test ATF_TESTS_C+= mergesort_test ATF_TESTS_C+= qsort_test .if ${COMPILER_TYPE} == "clang" ATF_TESTS_C+= qsort_b_test .endif ATF_TESTS_C+= qsort_r_test ATF_TESTS_C+= qsort_s_test +ATF_TESTS_C+= qsort_bench ATF_TESTS_C+= quick_exit_test ATF_TESTS_C+= set_constraint_handler_s_test ATF_TESTS_C+= strfmon_test ATF_TESTS_C+= tsearch_test ATF_TESTS_CXX+= cxa_thread_atexit_test ATF_TESTS_CXX+= cxa_thread_atexit_nothr_test # All architectures on FreeBSD have fenv.h CFLAGS+= -D__HAVE_FENV # Define __HAVE_LONG_DOUBLE for architectures whose long double has greater # precision than their double. .if ${MACHINE_CPUARCH} == "aarch64" || \ ${MACHINE_CPUARCH} == "amd64" || \ ${MACHINE_CPUARCH} == "i386" || \ ${MACHINE_CPUARCH} == "riscv" CFLAGS+= -D__HAVE_LONG_DOUBLE .endif # TODO: t_getenv_thread, t_mi_vector_hash, t_strtoi NETBSD_ATF_TESTS_C+= abs_test NETBSD_ATF_TESTS_C+= atoi_test NETBSD_ATF_TESTS_C+= div_test NETBSD_ATF_TESTS_C+= getenv_test NETBSD_ATF_TESTS_C+= exit_test NETBSD_ATF_TESTS_C+= hsearch_test NETBSD_ATF_TESTS_C+= posix_memalign_test NETBSD_ATF_TESTS_C+= random_test NETBSD_ATF_TESTS_C+= strtod_test NETBSD_ATF_TESTS_C+= strtol_test NETBSD_ATF_TESTS_C+= system_test # TODO: need to come up with a correct explanation of what the patch pho does # with h_atexit #ATF_TESTS_SH= atexit_test NETBSD_ATF_TESTS_SH= getopt_test .include "../Makefile.netbsd-tests" BINDIR= ${TESTSDIR} # TODO: see comment above #PROGS+= h_atexit PROGS+= h_getopt h_getopt_long CFLAGS+= -I${.CURDIR} CXXSTD.cxa_thread_atexit_test= c++11 CXXSTD.cxa_thread_atexit_nothr_test= c++11 LIBADD.cxa_thread_atexit_test+= pthread # Tests that requires Blocks feature .for t in qsort_b_test CFLAGS.${t}.c+= -fblocks LIBADD.${t}+= BlocksRuntime .endfor .for t in h_getopt h_getopt_long CFLAGS.$t+= -I${LIBNETBSD_SRCDIR} -I${SRCTOP}/contrib/netbsd-tests LDFLAGS.$t+= -L${LIBNETBSD_OBJDIR} LIBADD.${t}+= netbsd util .endfor LIBADD.strtod_test+= m SUBDIR+= dynthr_mod SUBDIR+= libatexit .include diff --git a/lib/libc/tests/stdlib/qsort_bench.c b/lib/libc/tests/stdlib/qsort_bench.c new file mode 100644 index 000000000000..5f2cfae40140 --- /dev/null +++ b/lib/libc/tests/stdlib/qsort_bench.c @@ -0,0 +1,113 @@ +/*- + * Copyright (c) 2025 Dag-Erling Smørgrav + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include + +/*- + * Measures qsort(3) runtime with pathological input and verify that it + * stays close to N * log2(N). + * + * Thanks to Vivian Hussey for the proof of concept. + * + * The input we construct is similar to a sweep from 0 to N where each + * half, except for the first element, has been reversed; for instance, + * with N = 8, we get { 0, 3, 2, 1, 4, 8, 7, 6 }. This triggers a bug in + * the BSD qsort(3) where it will switch to insertion sort if the pivots + * are sorted. + * + * This article goes into more detail about the bug and its origin: + * + * https://www.raygard.net/2022/02/26/Re-engineering-a-qsort-part-3 + * + * With this optimization (the `if (swap_cnt == 0)` block), qsort(3) needs + * roughly N * N / 4 comparisons to sort our pathological input. Without + * it, it needs only a little more than N * log2(N) comparisons. + */ + +/* we stop testing once a single takes longer than this */ +#define MAXRUNSECS 10 + +static bool debugging; + +static uintmax_t ncmp; + +static int +intcmp(const void *a, const void *b) +{ + ncmp++; + return ((*(int *)a > *(int *)b) - (*(int *)a < *(int *)b)); +} + +static void +qsort_bench(int log2n) +{ + uintmax_t n = 1LLU << log2n; + int *buf; + + /* fill an array with a pathological pattern */ + ATF_REQUIRE(buf = malloc(n * sizeof(*buf))); + buf[0] = 0; + buf[n / 2] = n / 2; + for (unsigned int i = 1; i < n / 2; i++) { + buf[i] = n / 2 - i; + buf[n / 2 + i] = n - i; + } + + ncmp = 0; + qsort(buf, n, sizeof(*buf), intcmp); + + /* check result and free array */ + if (debugging) { + for (unsigned int i = 1; i < n; i++) { + ATF_REQUIRE_MSG(buf[i] > buf[i - 1], + "array is not sorted"); + } + } + free(buf); + + /* check that runtime does not exceed N² */ + ATF_CHECK_MSG(ncmp / n < n, + "runtime %ju exceeds N² for N = %ju", ncmp, n); + + /* check that runtime does not exceed N log N by much */ + ATF_CHECK_MSG(ncmp / n <= log2n + 1, + "runtime %ju exceeds N log N for N = %ju", ncmp, n); +} + +ATF_TC_WITHOUT_HEAD(qsort_bench); +ATF_TC_BODY(qsort_bench, tc) +{ + struct timespec t0, t1; + uintmax_t tus; + + for (int i = 10; i <= 30; i++) { + clock_gettime(CLOCK_UPTIME, &t0); + qsort_bench(i); + clock_gettime(CLOCK_UPTIME, &t1); + tus = t1.tv_sec * 1000000 + t1.tv_nsec / 1000; + tus -= t0.tv_sec * 1000000 + t0.tv_nsec / 1000; + if (debugging) { + fprintf(stderr, "N = 2^%d in %ju.%06jus\n", + i, tus / 1000000, tus % 1000000); + } + /* stop once an individual run exceeds our limit */ + if (tus / 1000000 >= MAXRUNSECS) + break; + } +} + +ATF_TP_ADD_TCS(tp) +{ + debugging = !getenv("__RUNNING_INSIDE_ATF_RUN") && + isatty(STDERR_FILENO); + ATF_TP_ADD_TC(tp, qsort_bench); + return (atf_no_error()); +}