diff --git a/config/always-compiler-options.m4 b/config/always-compiler-options.m4 index 1e7ec3db9f63..6383b12506ee 100644 --- a/config/always-compiler-options.m4 +++ b/config/always-compiler-options.m4 @@ -1,326 +1,357 @@ dnl # dnl # Enabled -fsanitize=address if supported by $CC. dnl # dnl # LDFLAGS needs -fsanitize=address at all times so libraries compiled with dnl # it will be linked successfully. CFLAGS will vary by binary being built. dnl # dnl # The ASAN_OPTIONS environment variable can be used to further control dnl # the behavior of binaries and libraries build with -fsanitize=address. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_ASAN], [ AC_MSG_CHECKING([whether to build with -fsanitize=address support]) AC_ARG_ENABLE([asan], [AS_HELP_STRING([--enable-asan], [Enable -fsanitize=address support @<:@default=no@:>@])], [], [enable_asan=no]) AM_CONDITIONAL([ASAN_ENABLED], [test x$enable_asan = xyes]) AC_SUBST([ASAN_ENABLED], [$enable_asan]) AC_MSG_RESULT($enable_asan) AS_IF([ test "$enable_asan" = "yes" ], [ AC_MSG_CHECKING([whether $CC supports -fsanitize=address]) saved_cflags="$CFLAGS" CFLAGS="$CFLAGS -Werror -fsanitize=address" AC_LINK_IFELSE([ AC_LANG_SOURCE([[ int main() { return 0; } ]]) ], [ ASAN_CFLAGS="-fsanitize=address" ASAN_LDFLAGS="-fsanitize=address" ASAN_ZFS="_with_asan" AC_MSG_RESULT([yes]) ], [ AC_MSG_ERROR([$CC does not support -fsanitize=address]) ]) CFLAGS="$saved_cflags" ], [ ASAN_CFLAGS="" ASAN_LDFLAGS="" ASAN_ZFS="_without_asan" ]) AC_SUBST([ASAN_CFLAGS]) AC_SUBST([ASAN_LDFLAGS]) AC_SUBST([ASAN_ZFS]) ]) dnl # dnl # Enabled -fsanitize=undefined if supported by cc. dnl # dnl # LDFLAGS needs -fsanitize=undefined at all times so libraries compiled with dnl # it will be linked successfully. CFLAGS will vary by binary being built. dnl # dnl # The UBSAN_OPTIONS environment variable can be used to further control dnl # the behavior of binaries and libraries build with -fsanitize=undefined. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_UBSAN], [ AC_MSG_CHECKING([whether to build with -fsanitize=undefined support]) AC_ARG_ENABLE([ubsan], [AS_HELP_STRING([--enable-ubsan], [Enable -fsanitize=undefined support @<:@default=no@:>@])], [], [enable_ubsan=no]) AM_CONDITIONAL([UBSAN_ENABLED], [test x$enable_ubsan = xyes]) AC_SUBST([UBSAN_ENABLED], [$enable_ubsan]) AC_MSG_RESULT($enable_ubsan) AS_IF([ test "$enable_ubsan" = "yes" ], [ AC_MSG_CHECKING([whether $CC supports -fsanitize=undefined]) saved_cflags="$CFLAGS" CFLAGS="$CFLAGS -Werror -fsanitize=undefined" AC_LINK_IFELSE([ AC_LANG_SOURCE([[ int main() { return 0; } ]]) ], [ UBSAN_CFLAGS="-fsanitize=undefined" UBSAN_LDFLAGS="-fsanitize=undefined" UBSAN_ZFS="_with_ubsan" AC_MSG_RESULT([yes]) ], [ AC_MSG_ERROR([$CC does not support -fsanitize=undefined]) ]) CFLAGS="$saved_cflags" ], [ UBSAN_CFLAGS="" UBSAN_LDFLAGS="" UBSAN_ZFS="_without_ubsan" ]) AC_SUBST([UBSAN_CFLAGS]) AC_SUBST([UBSAN_LDFLAGS]) AC_SUBST([UBSAN_ZFS]) ]) dnl # dnl # Check if cc supports -Wframe-larger-than= option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_FRAME_LARGER_THAN], [ AC_MSG_CHECKING([whether $CC supports -Wframe-larger-than=]) saved_flags="$CFLAGS" CFLAGS="$CFLAGS -Werror -Wframe-larger-than=4096" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ FRAME_LARGER_THAN="-Wframe-larger-than=4096" AC_MSG_RESULT([yes]) ], [ FRAME_LARGER_THAN="" AC_MSG_RESULT([no]) ]) CFLAGS="$saved_flags" AC_SUBST([FRAME_LARGER_THAN]) ]) dnl # dnl # Check if cc supports -Wno-format-truncation option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION], [ AC_MSG_CHECKING([whether $CC supports -Wno-format-truncation]) saved_flags="$CFLAGS" CFLAGS="$CFLAGS -Werror -Wno-format-truncation" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ NO_FORMAT_TRUNCATION=-Wno-format-truncation AC_MSG_RESULT([yes]) ], [ NO_FORMAT_TRUNCATION= AC_MSG_RESULT([no]) ]) CFLAGS="$saved_flags" AC_SUBST([NO_FORMAT_TRUNCATION]) ]) dnl # dnl # Check if cc supports -Wno-format-zero-length option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH], [ AC_MSG_CHECKING([whether $CC supports -Wno-format-zero-length]) saved_flags="$CFLAGS" CFLAGS="$CFLAGS -Werror -Wno-format-zero-length" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ NO_FORMAT_ZERO_LENGTH=-Wno-format-zero-length AC_MSG_RESULT([yes]) ], [ NO_FORMAT_ZERO_LENGTH= AC_MSG_RESULT([no]) ]) CFLAGS="$saved_flags" AC_SUBST([NO_FORMAT_ZERO_LENGTH]) ]) dnl # dnl # Check if cc supports -Wno-clobbered option. dnl # dnl # We actually invoke it with the -Wclobbered option dnl # and infer the 'no-' version does or doesn't exist based upon dnl # the results. This is required because when checking any of dnl # no- prefixed options gcc always returns success. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_CLOBBERED], [ AC_MSG_CHECKING([whether $CC supports -Wno-clobbered]) saved_flags="$CFLAGS" CFLAGS="$CFLAGS -Werror -Wclobbered" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ NO_CLOBBERED=-Wno-clobbered AC_MSG_RESULT([yes]) ], [ NO_CLOBBERED= AC_MSG_RESULT([no]) ]) CFLAGS="$saved_flags" AC_SUBST([NO_CLOBBERED]) ]) dnl # dnl # Check if cc supports -Wimplicit-fallthrough option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH], [ AC_MSG_CHECKING([whether $CC supports -Wimplicit-fallthrough]) saved_flags="$CFLAGS" CFLAGS="$CFLAGS -Werror -Wimplicit-fallthrough" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ IMPLICIT_FALLTHROUGH=-Wimplicit-fallthrough AC_DEFINE([HAVE_IMPLICIT_FALLTHROUGH], 1, [Define if compiler supports -Wimplicit-fallthrough]) AC_MSG_RESULT([yes]) ], [ IMPLICIT_FALLTHROUGH= AC_MSG_RESULT([no]) ]) CFLAGS="$saved_flags" AC_SUBST([IMPLICIT_FALLTHROUGH]) ]) dnl # dnl # Check if cc supports -Winfinite-recursion option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_INFINITE_RECURSION], [ AC_MSG_CHECKING([whether $CC supports -Winfinite-recursion]) saved_flags="$CFLAGS" CFLAGS="$CFLAGS -Werror -Winfinite-recursion" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ INFINITE_RECURSION=-Winfinite-recursion AC_DEFINE([HAVE_INFINITE_RECURSION], 1, [Define if compiler supports -Winfinite-recursion]) AC_MSG_RESULT([yes]) ], [ INFINITE_RECURSION= AC_MSG_RESULT([no]) ]) CFLAGS="$saved_flags" AC_SUBST([INFINITE_RECURSION]) ]) dnl # dnl # Check if kernel cc supports -Winfinite-recursion option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_INFINITE_RECURSION], [ AC_MSG_CHECKING([whether $KERNEL_CC supports -Winfinite-recursion]) saved_cc="$CC" saved_flags="$CFLAGS" CC="gcc" CFLAGS="$CFLAGS -Werror -Winfinite-recursion" AS_IF([ test -n "$KERNEL_CC" ], [ CC="$KERNEL_CC" ]) AS_IF([ test -n "$KERNEL_LLVM" ], [ CC="clang" ]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ KERNEL_INFINITE_RECURSION=-Winfinite-recursion AC_DEFINE([HAVE_KERNEL_INFINITE_RECURSION], 1, [Define if compiler supports -Winfinite-recursion]) AC_MSG_RESULT([yes]) ], [ KERNEL_INFINITE_RECURSION= AC_MSG_RESULT([no]) ]) CC="$saved_cc" CFLAGS="$saved_flags" AC_SUBST([KERNEL_INFINITE_RECURSION]) ]) dnl # dnl # Check if cc supports -Wformat-overflow option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_FORMAT_OVERFLOW], [ AC_MSG_CHECKING([whether $CC supports -Wformat-overflow]) saved_flags="$CFLAGS" CFLAGS="$CFLAGS -Werror -Wformat-overflow" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ FORMAT_OVERFLOW=-Wformat-overflow AC_DEFINE([HAVE_FORMAT_OVERFLOW], 1, [Define if compiler supports -Wformat-overflow]) AC_MSG_RESULT([yes]) ], [ FORMAT_OVERFLOW= AC_MSG_RESULT([no]) ]) CFLAGS="$saved_flags" AC_SUBST([FORMAT_OVERFLOW]) ]) dnl # dnl # Check if cc supports -fno-omit-frame-pointer option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER], [ AC_MSG_CHECKING([whether $CC supports -fno-omit-frame-pointer]) saved_flags="$CFLAGS" CFLAGS="$CFLAGS -Werror -fno-omit-frame-pointer" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ NO_OMIT_FRAME_POINTER=-fno-omit-frame-pointer AC_MSG_RESULT([yes]) ], [ NO_OMIT_FRAME_POINTER= AC_MSG_RESULT([no]) ]) CFLAGS="$saved_flags" AC_SUBST([NO_OMIT_FRAME_POINTER]) ]) dnl # dnl # Check if cc supports -fno-ipa-sra option. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA], [ AC_MSG_CHECKING([whether $CC supports -fno-ipa-sra]) saved_flags="$CFLAGS" CFLAGS="$CFLAGS -Werror -fno-ipa-sra" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ NO_IPA_SRA=-fno-ipa-sra AC_MSG_RESULT([yes]) ], [ NO_IPA_SRA= AC_MSG_RESULT([no]) ]) CFLAGS="$saved_flags" AC_SUBST([NO_IPA_SRA]) ]) + +dnl # +dnl # Check if kernel cc supports -fno-ipa-sra option. +dnl # +AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA], [ + AC_MSG_CHECKING([whether $KERNEL_CC supports -fno-ipa-sra]) + + saved_cc="$CC" + saved_flags="$CFLAGS" + CC="gcc" + CFLAGS="$CFLAGS -Werror -fno-ipa-sra" + + AS_IF([ test -n "$KERNEL_CC" ], [ + CC="$KERNEL_CC" + ]) + AS_IF([ test -n "$KERNEL_LLVM" ], [ + CC="clang" + ]) + + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ + KERNEL_NO_IPA_SRA=-fno-ipa-sra + AC_MSG_RESULT([yes]) + ], [ + KERNEL_NO_IPA_SRA= + AC_MSG_RESULT([no]) + ]) + + CC="$saved_cc" + CFLAGS="$saved_flags" + AC_SUBST([KERNEL_NO_IPA_SRA]) +]) diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 index bb10bec04017..6355952487f7 100644 --- a/config/zfs-build.m4 +++ b/config/zfs-build.m4 @@ -1,643 +1,644 @@ AC_DEFUN([ZFS_AC_LICENSE], [ AC_MSG_CHECKING([zfs author]) AC_MSG_RESULT([$ZFS_META_AUTHOR]) AC_MSG_CHECKING([zfs license]) AC_MSG_RESULT([$ZFS_META_LICENSE]) ]) AC_DEFUN([ZFS_AC_DEBUG_ENABLE], [ DEBUG_CFLAGS="-Werror" DEBUG_CPPFLAGS="-DDEBUG -UNDEBUG" DEBUG_LDFLAGS="" DEBUG_ZFS="_with_debug" WITH_DEBUG="true" AC_DEFINE(ZFS_DEBUG, 1, [zfs debugging enabled]) KERNEL_DEBUG_CFLAGS="-Werror" KERNEL_DEBUG_CPPFLAGS="-DDEBUG -UNDEBUG" ]) AC_DEFUN([ZFS_AC_DEBUG_DISABLE], [ DEBUG_CFLAGS="" DEBUG_CPPFLAGS="-UDEBUG -DNDEBUG" DEBUG_LDFLAGS="" DEBUG_ZFS="_without_debug" WITH_DEBUG="" KERNEL_DEBUG_CFLAGS="" KERNEL_DEBUG_CPPFLAGS="-UDEBUG -DNDEBUG" ]) dnl # dnl # When debugging is enabled: dnl # - Enable all ASSERTs (-DDEBUG) dnl # - Promote all compiler warnings to errors (-Werror) dnl # dnl # (If INVARIANTS is detected, we need to force DEBUG, or strange panics dnl # can ensue.) dnl # AC_DEFUN([ZFS_AC_DEBUG], [ AC_MSG_CHECKING([whether assertion support will be enabled]) AC_ARG_ENABLE([debug], [AS_HELP_STRING([--enable-debug], [Enable compiler and code assertions @<:@default=no@:>@])], [], [enable_debug=no]) AS_CASE(["x$enable_debug"], ["xyes"], [ZFS_AC_DEBUG_ENABLE], ["xno"], [ZFS_AC_DEBUG_DISABLE], [AC_MSG_ERROR([Unknown option $enable_debug])]) AS_CASE(["x$enable_invariants"], ["xyes"], [], ["xno"], [], [ZFS_AC_DEBUG_INVARIANTS_DETECT]) AS_CASE(["x$enable_invariants"], ["xyes"], [ZFS_AC_DEBUG_ENABLE], ["xno"], [], [AC_MSG_ERROR([Unknown option $enable_invariants])]) AC_SUBST(DEBUG_CFLAGS) AC_SUBST(DEBUG_CPPFLAGS) AC_SUBST(DEBUG_LDFLAGS) AC_SUBST(DEBUG_ZFS) AC_SUBST(WITH_DEBUG) AC_SUBST(KERNEL_DEBUG_CFLAGS) AC_SUBST(KERNEL_DEBUG_CPPFLAGS) AC_MSG_RESULT([$enable_debug]) ]) AC_DEFUN([ZFS_AC_DEBUGINFO_ENABLE], [ DEBUG_CFLAGS="$DEBUG_CFLAGS -g -fno-inline $NO_IPA_SRA" - KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $NO_IPA_SRA" + KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $KERNEL_NO_IPA_SRA" KERNEL_MAKE="$KERNEL_MAKE CONFIG_DEBUG_INFO=y" DEBUGINFO_ZFS="_with_debuginfo" ]) AC_DEFUN([ZFS_AC_DEBUGINFO_DISABLE], [ DEBUGINFO_ZFS="_without_debuginfo" ]) AC_DEFUN([ZFS_AC_DEBUGINFO], [ AC_MSG_CHECKING([whether debuginfo support will be forced]) AC_ARG_ENABLE([debuginfo], [AS_HELP_STRING([--enable-debuginfo], [Force generation of debuginfo @<:@default=no@:>@])], [], [enable_debuginfo=no]) AS_CASE(["x$enable_debuginfo"], ["xyes"], [ZFS_AC_DEBUGINFO_ENABLE], ["xno"], [ZFS_AC_DEBUGINFO_DISABLE], [AC_MSG_ERROR([Unknown option $enable_debuginfo])]) AC_SUBST(DEBUG_CFLAGS) AC_SUBST(DEBUGINFO_ZFS) AC_SUBST(KERNEL_DEBUG_CFLAGS) AC_SUBST(KERNEL_MAKE) AC_MSG_RESULT([$enable_debuginfo]) ]) dnl # dnl # Disabled by default, provides basic memory tracking. Track the total dnl # number of bytes allocated with kmem_alloc() and freed with kmem_free(). dnl # Then at module unload time if any bytes were leaked it will be reported dnl # on the console. dnl # AC_DEFUN([ZFS_AC_DEBUG_KMEM], [ AC_MSG_CHECKING([whether basic kmem accounting is enabled]) AC_ARG_ENABLE([debug-kmem], [AS_HELP_STRING([--enable-debug-kmem], [Enable basic kmem accounting @<:@default=no@:>@])], [], [enable_debug_kmem=no]) AS_IF([test "x$enable_debug_kmem" = xyes], [ KERNEL_DEBUG_CPPFLAGS="${KERNEL_DEBUG_CPPFLAGS} -DDEBUG_KMEM" DEBUG_KMEM_ZFS="_with_debug_kmem" ], [ DEBUG_KMEM_ZFS="_without_debug_kmem" ]) AC_SUBST(KERNEL_DEBUG_CPPFLAGS) AC_SUBST(DEBUG_KMEM_ZFS) AC_MSG_RESULT([$enable_debug_kmem]) ]) dnl # dnl # Disabled by default, provides detailed memory tracking. This feature dnl # also requires --enable-debug-kmem to be set. When enabled not only will dnl # total bytes be tracked but also the location of every kmem_alloc() and dnl # kmem_free(). When the module is unloaded a list of all leaked addresses dnl # and where they were allocated will be dumped to the console. Enabling dnl # this feature has a significant impact on performance but it makes finding dnl # memory leaks straight forward. dnl # AC_DEFUN([ZFS_AC_DEBUG_KMEM_TRACKING], [ AC_MSG_CHECKING([whether detailed kmem tracking is enabled]) AC_ARG_ENABLE([debug-kmem-tracking], [AS_HELP_STRING([--enable-debug-kmem-tracking], [Enable detailed kmem tracking @<:@default=no@:>@])], [], [enable_debug_kmem_tracking=no]) AS_IF([test "x$enable_debug_kmem_tracking" = xyes], [ KERNEL_DEBUG_CPPFLAGS="${KERNEL_DEBUG_CPPFLAGS} -DDEBUG_KMEM_TRACKING" DEBUG_KMEM_TRACKING_ZFS="_with_debug_kmem_tracking" ], [ DEBUG_KMEM_TRACKING_ZFS="_without_debug_kmem_tracking" ]) AC_SUBST(KERNEL_DEBUG_CPPFLAGS) AC_SUBST(DEBUG_KMEM_TRACKING_ZFS) AC_MSG_RESULT([$enable_debug_kmem_tracking]) ]) AC_DEFUN([ZFS_AC_DEBUG_INVARIANTS_DETECT_FREEBSD], [ AS_IF([sysctl -n kern.conftxt | grep -Fqx $'options\tINVARIANTS'], [enable_invariants="yes"], [enable_invariants="no"]) ]) AC_DEFUN([ZFS_AC_DEBUG_INVARIANTS_DETECT], [ AM_COND_IF([BUILD_FREEBSD], [ZFS_AC_DEBUG_INVARIANTS_DETECT_FREEBSD], [enable_invariants="no"]) ]) dnl # dnl # Detected for the running kernel by default, enables INVARIANTS features dnl # in the FreeBSD kernel module. This feature must be used when building dnl # for a FreeBSD kernel with "options INVARIANTS" in the KERNCONF and must dnl # not be used when the INVARIANTS option is absent. dnl # AC_DEFUN([ZFS_AC_DEBUG_INVARIANTS], [ AC_MSG_CHECKING([whether FreeBSD kernel INVARIANTS checks are enabled]) AC_ARG_ENABLE([invariants], [AS_HELP_STRING([--enable-invariants], [Enable FreeBSD kernel INVARIANTS checks [[default: detect]]])], [], [ZFS_AC_DEBUG_INVARIANTS_DETECT]) AS_IF([test "x$enable_invariants" = xyes], [WITH_INVARIANTS="true"], [WITH_INVARIANTS=""]) AC_SUBST(WITH_INVARIANTS) AC_MSG_RESULT([$enable_invariants]) ]) AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [ AX_COUNT_CPUS([]) AC_SUBST(CPU_COUNT) ZFS_AC_CONFIG_ALWAYS_CC_NO_CLOBBERED ZFS_AC_CONFIG_ALWAYS_CC_INFINITE_RECURSION ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_INFINITE_RECURSION ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH ZFS_AC_CONFIG_ALWAYS_CC_FRAME_LARGER_THAN ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH ZFS_AC_CONFIG_ALWAYS_CC_FORMAT_OVERFLOW ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA + ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA ZFS_AC_CONFIG_ALWAYS_CC_ASAN ZFS_AC_CONFIG_ALWAYS_CC_UBSAN ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD ZFS_AC_CONFIG_ALWAYS_SYSTEM ZFS_AC_CONFIG_ALWAYS_ARCH ZFS_AC_CONFIG_ALWAYS_PYTHON ZFS_AC_CONFIG_ALWAYS_PYZFS ZFS_AC_CONFIG_ALWAYS_SED ZFS_AC_CONFIG_ALWAYS_CPPCHECK ZFS_AC_CONFIG_ALWAYS_SHELLCHECK ZFS_AC_CONFIG_ALWAYS_PARALLEL ]) AC_DEFUN([ZFS_AC_CONFIG], [ dnl # Remove the previous build test directory. rm -Rf build ZFS_CONFIG=all AC_ARG_WITH([config], AS_HELP_STRING([--with-config=CONFIG], [Config file 'kernel|user|all|srpm']), [ZFS_CONFIG="$withval"]) AC_ARG_ENABLE([linux-builtin], [AS_HELP_STRING([--enable-linux-builtin], [Configure for builtin in-tree kernel modules @<:@default=no@:>@])], [], [enable_linux_builtin=no]) AC_MSG_CHECKING([zfs config]) AC_MSG_RESULT([$ZFS_CONFIG]); AC_SUBST(ZFS_CONFIG) ZFS_AC_CONFIG_ALWAYS AM_COND_IF([BUILD_LINUX], [ AC_ARG_VAR([TEST_JOBS], [simultaneous jobs during configure]) if test "x$ac_cv_env_TEST_JOBS_set" != "xset"; then TEST_JOBS=$CPU_COUNT fi AC_SUBST(TEST_JOBS) ]) ZFS_INIT_SYSV= ZFS_INIT_SYSTEMD= ZFS_WANT_MODULES_LOAD_D= case "$ZFS_CONFIG" in kernel) ZFS_AC_CONFIG_KERNEL ;; user) ZFS_AC_CONFIG_USER ;; all) ZFS_AC_CONFIG_USER ZFS_AC_CONFIG_KERNEL ;; dist) ;; srpm) ;; *) AC_MSG_RESULT([Error!]) AC_MSG_ERROR([Bad value "$ZFS_CONFIG" for --with-config, user kernel|user|all|srpm]) ;; esac AM_CONDITIONAL([INIT_SYSV], [test "x$ZFS_INIT_SYSV" = "xyes"]) AM_CONDITIONAL([INIT_SYSTEMD], [test "x$ZFS_INIT_SYSTEMD" = "xyes"]) AM_CONDITIONAL([WANT_MODULES_LOAD_D], [test "x$ZFS_WANT_MODULES_LOAD_D" = "xyes"]) AM_CONDITIONAL([CONFIG_USER], [test "$ZFS_CONFIG" = user -o "$ZFS_CONFIG" = all]) AM_CONDITIONAL([CONFIG_KERNEL], [test "$ZFS_CONFIG" = kernel -o "$ZFS_CONFIG" = all] && [test "x$enable_linux_builtin" != xyes ]) AM_CONDITIONAL([CONFIG_QAT], [test "$ZFS_CONFIG" = kernel -o "$ZFS_CONFIG" = all] && [test "x$qatsrc" != x ]) AM_CONDITIONAL([WANT_DEVNAME2DEVID], [test "x$user_libudev" = xyes ]) AM_CONDITIONAL([WANT_MMAP_LIBAIO], [test "x$user_libaio" = xyes ]) AM_CONDITIONAL([PAM_ZFS_ENABLED], [test "x$enable_pam" = xyes]) ]) dnl # dnl # Check for rpm+rpmbuild to build RPM packages. If these tools dnl # are missing it is non-fatal but you will not be able to build dnl # RPM packages and will be warned if you try too. dnl # dnl # By default the generic spec file will be used because it requires dnl # minimal dependencies. Distribution specific spec files can be dnl # placed under the 'rpm/' directory and enabled using dnl # the --with-spec= configure option. dnl # AC_DEFUN([ZFS_AC_RPM], [ RPM=rpm RPMBUILD=rpmbuild AC_MSG_CHECKING([whether $RPM is available]) AS_IF([tmp=$($RPM --version 2>/dev/null)], [ RPM_VERSION=$(echo $tmp | $AWK '/RPM/ { print $[3] }') HAVE_RPM=yes AC_MSG_RESULT([$HAVE_RPM ($RPM_VERSION)]) ],[ HAVE_RPM=no AC_MSG_RESULT([$HAVE_RPM]) ]) AC_MSG_CHECKING([whether $RPMBUILD is available]) AS_IF([tmp=$($RPMBUILD --version 2>/dev/null)], [ RPMBUILD_VERSION=$(echo $tmp | $AWK '/RPM/ { print $[3] }') HAVE_RPMBUILD=yes AC_MSG_RESULT([$HAVE_RPMBUILD ($RPMBUILD_VERSION)]) ],[ HAVE_RPMBUILD=no AC_MSG_RESULT([$HAVE_RPMBUILD]) ]) RPM_DEFINE_COMMON='--define "$(DEBUG_ZFS) 1"' RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(DEBUGINFO_ZFS) 1"' RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(DEBUG_KMEM_ZFS) 1"' RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(DEBUG_KMEM_TRACKING_ZFS) 1"' RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(ASAN_ZFS) 1"' RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(UBSAN_ZFS) 1"' AS_IF([test "x$enable_debuginfo" = xyes], [ RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "__strip /bin/true"' ]) RPM_DEFINE_UTIL=' --define "_initconfdir $(initconfdir)"' dnl # Make the next three RPM_DEFINE_UTIL additions conditional, since dnl # their values may not be set when running: dnl # dnl # ./configure --with-config=srpm dnl # AS_IF([test -n "$dracutdir" ], [ RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_dracutdir $(dracutdir)"' ]) AS_IF([test -n "$udevdir" ], [ RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevdir $(udevdir)"' ]) AS_IF([test -n "$udevruledir" ], [ RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevruledir $(udevruledir)"' ]) RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_SYSTEMD)' RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYZFS)' RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PAM)' RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYTHON_VERSION)' RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYTHON_PKG_VERSION)' dnl # Override default lib directory on Debian/Ubuntu systems. The dnl # provided /usr/lib/rpm/platform//macros files do not dnl # specify the correct path for multiarch systems as described dnl # by the packaging guidelines. dnl # dnl # https://wiki.ubuntu.com/MultiarchSpec dnl # https://wiki.debian.org/Multiarch/Implementation dnl # AS_IF([test "$DEFAULT_PACKAGE" = "deb"], [ MULTIARCH_LIBDIR="lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)" RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_lib $(MULTIARCH_LIBDIR)"' AC_SUBST(MULTIARCH_LIBDIR) ]) dnl # Make RPM_DEFINE_KMOD additions conditional on CONFIG_KERNEL, dnl # since the values will not be set otherwise. The spec files dnl # provide defaults for them. dnl # RPM_DEFINE_KMOD='--define "_wrong_version_format_terminate_build 0"' AM_COND_IF([CONFIG_KERNEL], [ RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernels $(LINUX_VERSION)"' RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "ksrc $(LINUX)"' RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kobj $(LINUX_OBJ)"' RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_cc KERNEL_CC=$(KERNEL_CC)"' RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_ld KERNEL_LD=$(KERNEL_LD)"' RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_llvm KERNEL_LLVM=$(KERNEL_LLVM)"' ]) RPM_DEFINE_DKMS='' SRPM_DEFINE_COMMON='--define "build_src_rpm 1"' SRPM_DEFINE_UTIL= SRPM_DEFINE_KMOD= SRPM_DEFINE_DKMS= RPM_SPEC_DIR="rpm/generic" AC_ARG_WITH([spec], AS_HELP_STRING([--with-spec=SPEC], [Spec files 'generic|redhat']), [RPM_SPEC_DIR="rpm/$withval"]) AC_MSG_CHECKING([whether spec files are available]) AC_MSG_RESULT([yes ($RPM_SPEC_DIR/*.spec.in)]) AC_SUBST(HAVE_RPM) AC_SUBST(RPM) AC_SUBST(RPM_VERSION) AC_SUBST(HAVE_RPMBUILD) AC_SUBST(RPMBUILD) AC_SUBST(RPMBUILD_VERSION) AC_SUBST(RPM_SPEC_DIR) AC_SUBST(RPM_DEFINE_UTIL) AC_SUBST(RPM_DEFINE_KMOD) AC_SUBST(RPM_DEFINE_DKMS) AC_SUBST(RPM_DEFINE_COMMON) AC_SUBST(SRPM_DEFINE_UTIL) AC_SUBST(SRPM_DEFINE_KMOD) AC_SUBST(SRPM_DEFINE_DKMS) AC_SUBST(SRPM_DEFINE_COMMON) ]) dnl # dnl # Check for dpkg+dpkg-buildpackage to build DEB packages. If these dnl # tools are missing it is non-fatal but you will not be able to build dnl # DEB packages and will be warned if you try too. dnl # AC_DEFUN([ZFS_AC_DPKG], [ DPKG=dpkg DPKGBUILD=dpkg-buildpackage AC_MSG_CHECKING([whether $DPKG is available]) AS_IF([tmp=$($DPKG --version 2>/dev/null)], [ DPKG_VERSION=$(echo $tmp | $AWK '/Debian/ { print $[7] }') HAVE_DPKG=yes AC_MSG_RESULT([$HAVE_DPKG ($DPKG_VERSION)]) ],[ HAVE_DPKG=no AC_MSG_RESULT([$HAVE_DPKG]) ]) AC_MSG_CHECKING([whether $DPKGBUILD is available]) AS_IF([tmp=$($DPKGBUILD --version 2>/dev/null)], [ DPKGBUILD_VERSION=$(echo $tmp | \ $AWK '/Debian/ { print $[4] }' | cut -f-4 -d'.') HAVE_DPKGBUILD=yes AC_MSG_RESULT([$HAVE_DPKGBUILD ($DPKGBUILD_VERSION)]) ],[ HAVE_DPKGBUILD=no AC_MSG_RESULT([$HAVE_DPKGBUILD]) ]) AC_SUBST(HAVE_DPKG) AC_SUBST(DPKG) AC_SUBST(DPKG_VERSION) AC_SUBST(HAVE_DPKGBUILD) AC_SUBST(DPKGBUILD) AC_SUBST(DPKGBUILD_VERSION) ]) dnl # dnl # Until native packaging for various different packing systems dnl # can be added the least we can do is attempt to use alien to dnl # convert the RPM packages to the needed package type. This is dnl # a hack but so far it has worked reasonable well. dnl # AC_DEFUN([ZFS_AC_ALIEN], [ ALIEN=alien AC_MSG_CHECKING([whether $ALIEN is available]) AS_IF([tmp=$($ALIEN --version 2>/dev/null)], [ ALIEN_VERSION=$(echo $tmp | $AWK '{ print $[3] }') ALIEN_MAJOR=$(echo ${ALIEN_VERSION} | $AWK -F'.' '{ print $[1] }') ALIEN_MINOR=$(echo ${ALIEN_VERSION} | $AWK -F'.' '{ print $[2] }') ALIEN_POINT=$(echo ${ALIEN_VERSION} | $AWK -F'.' '{ print $[3] }') HAVE_ALIEN=yes AC_MSG_RESULT([$HAVE_ALIEN ($ALIEN_VERSION)]) ],[ HAVE_ALIEN=no AC_MSG_RESULT([$HAVE_ALIEN]) ]) AC_SUBST(HAVE_ALIEN) AC_SUBST(ALIEN) AC_SUBST(ALIEN_VERSION) AC_SUBST(ALIEN_MAJOR) AC_SUBST(ALIEN_MINOR) AC_SUBST(ALIEN_POINT) ]) dnl # dnl # Using the VENDOR tag from config.guess set the default dnl # package type for 'make pkg': (rpm | deb | tgz) dnl # AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ AC_MSG_CHECKING([os distribution]) AC_ARG_WITH([vendor], [AS_HELP_STRING([--with-vendor], [Distribution vendor @<:@default=check@:>@])], [with_vendor=$withval], [with_vendor=check]) AS_IF([test "x$with_vendor" = "xcheck"],[ if test -f /etc/toss-release ; then VENDOR=toss ; elif test -f /etc/fedora-release ; then VENDOR=fedora ; elif test -f /etc/redhat-release ; then VENDOR=redhat ; elif test -f /etc/gentoo-release ; then VENDOR=gentoo ; elif test -f /etc/arch-release ; then VENDOR=arch ; elif test -f /etc/SuSE-release ; then VENDOR=sles ; elif test -f /etc/slackware-version ; then VENDOR=slackware ; elif test -f /etc/lunar.release ; then VENDOR=lunar ; elif test -f /etc/lsb-release ; then VENDOR=ubuntu ; elif test -f /etc/debian_version ; then VENDOR=debian ; elif test -f /etc/alpine-release ; then VENDOR=alpine ; elif test -f /bin/freebsd-version ; then VENDOR=freebsd ; else VENDOR= ; fi], [ test "x${with_vendor}" != x],[ VENDOR="$with_vendor" ], [ VENDOR= ; ] ) AC_MSG_RESULT([$VENDOR]) AC_SUBST(VENDOR) AC_MSG_CHECKING([default package type]) case "$VENDOR" in toss) DEFAULT_PACKAGE=rpm ;; redhat) DEFAULT_PACKAGE=rpm ;; fedora) DEFAULT_PACKAGE=rpm ;; gentoo) DEFAULT_PACKAGE=tgz ;; alpine) DEFAULT_PACKAGE=tgz ;; arch) DEFAULT_PACKAGE=tgz ;; sles) DEFAULT_PACKAGE=rpm ;; slackware) DEFAULT_PACKAGE=tgz ;; lunar) DEFAULT_PACKAGE=tgz ;; ubuntu) DEFAULT_PACKAGE=deb ;; debian) DEFAULT_PACKAGE=deb ;; freebsd) DEFAULT_PACKAGE=pkg ;; *) DEFAULT_PACKAGE=rpm ;; esac AC_MSG_RESULT([$DEFAULT_PACKAGE]) AC_SUBST(DEFAULT_PACKAGE) AC_MSG_CHECKING([default init directory]) case "$VENDOR" in freebsd) initdir=$sysconfdir/rc.d ;; *) initdir=$sysconfdir/init.d;; esac AC_MSG_RESULT([$initdir]) AC_SUBST(initdir) AC_MSG_CHECKING([default init script type and shell]) case "$VENDOR" in toss) DEFAULT_INIT_SCRIPT=redhat ;; redhat) DEFAULT_INIT_SCRIPT=redhat ;; fedora) DEFAULT_INIT_SCRIPT=fedora ;; gentoo) DEFAULT_INIT_SCRIPT=openrc ;; alpine) DEFAULT_INIT_SCRIPT=openrc ;; arch) DEFAULT_INIT_SCRIPT=lsb ;; sles) DEFAULT_INIT_SCRIPT=lsb ;; slackware) DEFAULT_INIT_SCRIPT=lsb ;; lunar) DEFAULT_INIT_SCRIPT=lunar ;; ubuntu) DEFAULT_INIT_SCRIPT=lsb ;; debian) DEFAULT_INIT_SCRIPT=lsb ;; freebsd) DEFAULT_INIT_SCRIPT=freebsd;; *) DEFAULT_INIT_SCRIPT=lsb ;; esac case "$VENDOR" in gentoo) DEFAULT_INIT_SHELL="/sbin/openrc-run";; alpine) DEFAULT_INIT_SHELL="/sbin/openrc-run";; *) DEFAULT_INIT_SHELL="/bin/sh" ;; esac AC_MSG_RESULT([$DEFAULT_INIT_SCRIPT:$DEFAULT_INIT_SHELL]) AC_SUBST(DEFAULT_INIT_SCRIPT) AC_SUBST(DEFAULT_INIT_SHELL) AC_MSG_CHECKING([default nfs server init script]) AS_IF([test "$VENDOR" = "debian"], [DEFAULT_INIT_NFS_SERVER="nfs-kernel-server"], [DEFAULT_INIT_NFS_SERVER="nfs"] ) AC_MSG_RESULT([$DEFAULT_INIT_NFS_SERVER]) AC_SUBST(DEFAULT_INIT_NFS_SERVER) AC_MSG_CHECKING([default init config directory]) case "$VENDOR" in alpine) initconfdir=/etc/conf.d ;; gentoo) initconfdir=/etc/conf.d ;; toss) initconfdir=/etc/sysconfig ;; redhat) initconfdir=/etc/sysconfig ;; fedora) initconfdir=/etc/sysconfig ;; sles) initconfdir=/etc/sysconfig ;; ubuntu) initconfdir=/etc/default ;; debian) initconfdir=/etc/default ;; freebsd) initconfdir=$sysconfdir/rc.conf.d;; *) initconfdir=/etc/default ;; esac AC_MSG_RESULT([$initconfdir]) AC_SUBST(initconfdir) AC_MSG_CHECKING([whether initramfs-tools is available]) if test -d /usr/share/initramfs-tools ; then RPM_DEFINE_INITRAMFS='--define "_initramfs 1"' AC_MSG_RESULT([yes]) else RPM_DEFINE_INITRAMFS='' AC_MSG_RESULT([no]) fi AC_SUBST(RPM_DEFINE_INITRAMFS) ]) dnl # dnl # Default ZFS package configuration dnl # AC_DEFUN([ZFS_AC_PACKAGE], [ ZFS_AC_DEFAULT_PACKAGE AS_IF([test x$VENDOR != xfreebsd], [ ZFS_AC_RPM ZFS_AC_DPKG ZFS_AC_ALIEN ]) ]) diff --git a/module/Kbuild.in b/module/Kbuild.in index 581d50e64b42..a39f9d9d0500 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -1,469 +1,475 @@ # When integrated in to a monolithic kernel the spl module must appear # first. This ensures its module initialization function is run before # any of the other module initialization functions which depend on it. ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement ZFS_MODULE_CFLAGS += -Wmissing-prototypes ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @NO_FORMAT_ZERO_LENGTH@ ifneq ($(KBUILD_EXTMOD),) zfs_include = @abs_top_srcdir@/include icp_include = @abs_srcdir@/icp/include zstd_include = @abs_srcdir@/zstd/include ZFS_MODULE_CFLAGS += -include @abs_top_builddir@/zfs_config.h ZFS_MODULE_CFLAGS += -I@abs_top_builddir@/include src = @abs_srcdir@ obj = @abs_builddir@ else zfs_include = $(srctree)/include/zfs icp_include = $(srctree)/$(src)/icp/include zstd_include = $(srctree)/$(src)/zstd/include ZFS_MODULE_CFLAGS += -include $(zfs_include)/zfs_config.h endif ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/kernel ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/spl ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/zfs ZFS_MODULE_CFLAGS += -I$(zfs_include) ZFS_MODULE_CPPFLAGS += -D_KERNEL ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@ # KASAN enables -Werror=frame-larger-than=1024, which # breaks oh so many parts of our build. ifeq ($(CONFIG_KASAN),y) ZFS_MODULE_CFLAGS += -Wno-error=frame-larger-than= endif ifneq ($(KBUILD_EXTMOD),) @CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include @CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@ endif asflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) +ifeq ($(CONFIG_ARM64),y) +CFLAGS_REMOVE_zcommon/zfs_fletcher_aarch64_neon.o += -mgeneral-regs-only +CFLAGS_REMOVE_zfs/vdev_raidz_math_aarch64_neon.o += -mgeneral-regs-only +CFLAGS_REMOVE_zfs/vdev_raidz_math_aarch64_neonx2.o += -mgeneral-regs-only +endif + # Suppress unused-value warnings in sparc64 architecture headers ccflags-$(CONFIG_SPARC64) += -Wno-unused-value obj-$(CONFIG_ZFS) := spl.o zfs.o SPL_OBJS := \ spl-atomic.o \ spl-condvar.o \ spl-cred.o \ spl-err.o \ spl-generic.o \ spl-kmem-cache.o \ spl-kmem.o \ spl-kstat.o \ spl-proc.o \ spl-procfs-list.o \ spl-taskq.o \ spl-thread.o \ spl-trace.o \ spl-tsd.o \ spl-vmem.o \ spl-xdr.o \ spl-zlib.o \ spl-zone.o spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS)) zfs-objs += avl/avl.o ICP_OBJS := \ algs/aes/aes_impl.o \ algs/aes/aes_impl_generic.o \ algs/aes/aes_modes.o \ algs/blake3/blake3.o \ algs/blake3/blake3_generic.o \ algs/blake3/blake3_impl.o \ algs/blake3/blake3_x86-64.o \ algs/edonr/edonr.o \ algs/modes/cbc.o \ algs/modes/ccm.o \ algs/modes/ctr.o \ algs/modes/ecb.o \ algs/modes/gcm.o \ algs/modes/gcm_generic.o \ algs/modes/modes.o \ algs/sha2/sha2.o \ algs/skein/skein.o \ algs/skein/skein_block.o \ algs/skein/skein_iv.o \ api/kcf_cipher.o \ api/kcf_ctxops.o \ api/kcf_mac.o \ core/kcf_callprov.o \ core/kcf_mech_tabs.o \ core/kcf_prov_lib.o \ core/kcf_prov_tabs.o \ core/kcf_sched.o \ illumos-crypto.o \ io/aes.o \ io/sha2_mod.o \ io/skein_mod.o \ spi/kcf_spi.o ICP_OBJS_X86_64 := \ asm-x86_64/aes/aes_aesni.o \ asm-x86_64/aes/aes_amd64.o \ asm-x86_64/aes/aeskey.o \ asm-x86_64/blake3/blake3_avx2.o \ asm-x86_64/blake3/blake3_avx512.o \ asm-x86_64/blake3/blake3_sse2.o \ asm-x86_64/blake3/blake3_sse41.o \ asm-x86_64/modes/aesni-gcm-x86_64.o \ asm-x86_64/modes/gcm_pclmulqdq.o \ asm-x86_64/modes/ghash-x86_64.o \ asm-x86_64/sha2/sha256_impl.o \ asm-x86_64/sha2/sha512_impl.o ICP_OBJS_X86 := \ algs/aes/aes_impl_aesni.o \ algs/aes/aes_impl_x86-64.o \ algs/modes/gcm_pclmulqdq.o ICP_OBJS_ARM64 := \ asm-aarch64/blake3/b3_aarch64_sse2.o \ asm-aarch64/blake3/b3_aarch64_sse41.o ICP_OBJS_PPC_PPC64 := \ asm-ppc64/blake3/b3_ppc64le_sse2.o \ asm-ppc64/blake3/b3_ppc64le_sse41.o zfs-objs += $(addprefix icp/,$(ICP_OBJS)) zfs-$(CONFIG_X86) += $(addprefix icp/,$(ICP_OBJS_X86)) zfs-$(CONFIG_UML_X86)+= $(addprefix icp/,$(ICP_OBJS_X86)) zfs-$(CONFIG_X86_64) += $(addprefix icp/,$(ICP_OBJS_X86_64)) zfs-$(CONFIG_ARM64) += $(addprefix icp/,$(ICP_OBJS_ARM64)) zfs-$(CONFIG_PPC) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64)) zfs-$(CONFIG_PPC64) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64)) $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \ $(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : asflags-y += -I$(icp_include) $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \ $(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : ccflags-y += -I$(icp_include) # Suppress objtool "return with modified stack frame" warnings. OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y # Suppress objtool "unsupported stack pointer realignment" warnings. We are # not using a DRAP register while aligning the stack to a 64 byte boundary. # See #6950 for the reasoning. OBJECT_FILES_NON_STANDARD_sha256_impl.o := y OBJECT_FILES_NON_STANDARD_sha512_impl.o := y LUA_OBJS := \ lapi.o \ lauxlib.o \ lbaselib.o \ lcode.o \ lcompat.o \ lcorolib.o \ lctype.o \ ldebug.o \ ldo.o \ lfunc.o \ lgc.o \ llex.o \ lmem.o \ lobject.o \ lopcodes.o \ lparser.o \ lstate.o \ lstring.o \ lstrlib.o \ ltable.o \ ltablib.o \ ltm.o \ lvm.o \ lzio.o \ setjmp/setjmp.o zfs-objs += $(addprefix lua/,$(LUA_OBJS)) NVPAIR_OBJS := \ fnvpair.o \ nvpair.o \ nvpair_alloc_fixed.o \ nvpair_alloc_spl.o zfs-objs += $(addprefix nvpair/,$(NVPAIR_OBJS)) UNICODE_OBJS := \ u8_textprep.o \ uconv.o zfs-objs += $(addprefix unicode/,$(UNICODE_OBJS)) ZCOMMON_OBJS := \ cityhash.o \ zfeature_common.o \ zfs_comutil.o \ zfs_deleg.o \ zfs_fletcher.o \ zfs_fletcher_superscalar.o \ zfs_fletcher_superscalar4.o \ zfs_namecheck.o \ zfs_prop.o \ zpool_prop.o \ zprop_common.o ZCOMMON_OBJS_X86 := \ zfs_fletcher_avx512.o \ zfs_fletcher_intel.o \ zfs_fletcher_sse.o ZCOMMON_OBJS_ARM64 := \ zfs_fletcher_aarch64_neon.o zfs-objs += $(addprefix zcommon/,$(ZCOMMON_OBJS)) zfs-$(CONFIG_X86) += $(addprefix zcommon/,$(ZCOMMON_OBJS_X86)) zfs-$(CONFIG_UML_X86)+= $(addprefix zcommon/,$(ZCOMMON_OBJS_X86)) zfs-$(CONFIG_ARM64) += $(addprefix zcommon/,$(ZCOMMON_OBJS_ARM64)) # Zstd uses -O3 by default, so we should follow ZFS_ZSTD_FLAGS := -O3 # -fno-tree-vectorize gets set for gcc in zstd/common/compiler.h # Set it for other compilers, too. ZFS_ZSTD_FLAGS += -fno-tree-vectorize # SSE register return with SSE disabled if -march=znverX is passed ZFS_ZSTD_FLAGS += -U__BMI__ # Quiet warnings about frame size due to unused code in unmodified zstd lib ZFS_ZSTD_FLAGS += -Wframe-larger-than=20480 ZSTD_OBJS := \ zfs_zstd.o \ zstd_sparc.o ZSTD_UPSTREAM_OBJS := \ lib/common/entropy_common.o \ lib/common/error_private.o \ lib/common/fse_decompress.o \ lib/common/pool.o \ lib/common/zstd_common.o \ lib/compress/fse_compress.o \ lib/compress/hist.o \ lib/compress/huf_compress.o \ lib/compress/zstd_compress.o \ lib/compress/zstd_compress_literals.o \ lib/compress/zstd_compress_sequences.o \ lib/compress/zstd_compress_superblock.o \ lib/compress/zstd_double_fast.o \ lib/compress/zstd_fast.o \ lib/compress/zstd_lazy.o \ lib/compress/zstd_ldm.o \ lib/compress/zstd_opt.o \ lib/decompress/huf_decompress.o \ lib/decompress/zstd_ddict.o \ lib/decompress/zstd_decompress.o \ lib/decompress/zstd_decompress_block.o zfs-objs += $(addprefix zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) # Disable aarch64 neon SIMD instructions for kernel mode $(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -I$(zstd_include) $(ZFS_ZSTD_FLAGS) $(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : asflags-y += -I$(zstd_include) $(addprefix $(obj)/zstd/,$(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -include $(zstd_include)/aarch64_compat.h -include $(zstd_include)/zstd_compat_wrapper.h -Wp,-w $(obj)/zstd/zfs_zstd.o : ccflags-y += -include $(zstd_include)/zstd_compat_wrapper.h ZFS_OBJS := \ abd.o \ aggsum.o \ arc.o \ blake3_zfs.o \ blkptr.o \ bplist.o \ bpobj.o \ bptree.o \ bqueue.o \ btree.o \ dataset_kstats.o \ dbuf.o \ dbuf_stats.o \ ddt.o \ ddt_zap.o \ dmu.o \ dmu_diff.o \ dmu_object.o \ dmu_objset.o \ dmu_recv.o \ dmu_redact.o \ dmu_send.o \ dmu_traverse.o \ dmu_tx.o \ dmu_zfetch.o \ dnode.o \ dnode_sync.o \ dsl_bookmark.o \ dsl_crypt.o \ dsl_dataset.o \ dsl_deadlist.o \ dsl_deleg.o \ dsl_destroy.o \ dsl_dir.o \ dsl_pool.o \ dsl_prop.o \ dsl_scan.o \ dsl_synctask.o \ dsl_userhold.o \ edonr_zfs.o \ fm.o \ gzip.o \ hkdf.o \ lz4.o \ lz4_zfs.o \ lzjb.o \ metaslab.o \ mmp.o \ multilist.o \ objlist.o \ pathname.o \ range_tree.o \ refcount.o \ rrwlock.o \ sa.o \ sha256.o \ skein_zfs.o \ spa.o \ spa_checkpoint.o \ spa_config.o \ spa_errlog.o \ spa_history.o \ spa_log_spacemap.o \ spa_misc.o \ spa_stats.o \ space_map.o \ space_reftree.o \ txg.o \ uberblock.o \ unique.o \ vdev.o \ vdev_cache.o \ vdev_draid.o \ vdev_draid_rand.o \ vdev_indirect.o \ vdev_indirect_births.o \ vdev_indirect_mapping.o \ vdev_initialize.o \ vdev_label.o \ vdev_mirror.o \ vdev_missing.o \ vdev_queue.o \ vdev_raidz.o \ vdev_raidz_math.o \ vdev_raidz_math_scalar.o \ vdev_rebuild.o \ vdev_removal.o \ vdev_root.o \ vdev_trim.o \ zap.o \ zap_leaf.o \ zap_micro.o \ zcp.o \ zcp_get.o \ zcp_global.o \ zcp_iter.o \ zcp_set.o \ zcp_synctask.o \ zfeature.o \ zfs_byteswap.o \ zfs_chksum.o \ zfs_fm.o \ zfs_fuid.o \ zfs_ioctl.o \ zfs_log.o \ zfs_onexit.o \ zfs_quota.o \ zfs_ratelimit.o \ zfs_replay.o \ zfs_rlock.o \ zfs_sa.o \ zfs_vnops.o \ zil.o \ zio.o \ zio_checksum.o \ zio_compress.o \ zio_inject.o \ zle.o \ zrlock.o \ zthr.o \ zvol.o ZFS_OBJS_OS := \ abd_os.o \ arc_os.o \ mmp_os.o \ policy.o \ qat.o \ qat_compress.o \ qat_crypt.o \ spa_misc_os.o \ trace.o \ vdev_disk.o \ vdev_file.o \ zfs_acl.o \ zfs_ctldir.o \ zfs_debug.o \ zfs_dir.o \ zfs_file_os.o \ zfs_ioctl_os.o \ zfs_racct.o \ zfs_sysfs.o \ zfs_uio.o \ zfs_vfsops.o \ zfs_vnops_os.o \ zfs_znode.o \ zio_crypt.o \ zpl_ctldir.o \ zpl_export.o \ zpl_file.o \ zpl_inode.o \ zpl_super.o \ zpl_xattr.o \ zvol_os.o ZFS_OBJS_X86 := \ vdev_raidz_math_avx2.o \ vdev_raidz_math_avx512bw.o \ vdev_raidz_math_avx512f.o \ vdev_raidz_math_sse2.o \ vdev_raidz_math_ssse3.o ZFS_OBJS_ARM64 := \ vdev_raidz_math_aarch64_neon.o \ vdev_raidz_math_aarch64_neonx2.o ZFS_OBJS_PPC_PPC64 := \ vdev_raidz_math_powerpc_altivec.o zfs-objs += $(addprefix zfs/,$(ZFS_OBJS)) $(addprefix os/linux/zfs/,$(ZFS_OBJS_OS)) zfs-$(CONFIG_X86) += $(addprefix zfs/,$(ZFS_OBJS_X86)) zfs-$(CONFIG_UML_X86)+= $(addprefix zfs/,$(ZFS_OBJS_X86)) zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64)) zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64)) zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64)) # Suppress incorrect warnings from versions of objtool which are not # aware of x86 EVEX prefix instructions used for AVX512. OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512f.o := y ifeq ($(CONFIG_ALTIVEC),y) $(obj)/zfs/vdev_raidz_math_powerpc_altivec.o : c_flags += -maltivec endif diff --git a/module/icp/algs/edonr/edonr.c b/module/icp/algs/edonr/edonr.c index 345133d7433a..b1f710cc0439 100644 --- a/module/icp/algs/edonr/edonr.c +++ b/module/icp/algs/edonr/edonr.c @@ -1,753 +1,755 @@ /* * IDI,NTNU * * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * Copyright (C) 2009, 2010, Jorn Amundsen * Tweaked Edon-R implementation for SUPERCOP, based on NIST API. * * $Id: edonr.c 517 2013-02-17 20:34:39Z joern $ */ /* * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved */ /* * Unlike sha2 or skein, we won't expose edonr via the Kernel Cryptographic * Framework (KCF), because Edon-R is *NOT* suitable for general-purpose * cryptographic use. Users of Edon-R must interface directly to this module. */ #include #include #include /* big endian support, provides no-op's if run on little endian hosts */ #include "edonr_byteorder.h" #define hashState224(x) ((x)->pipe->p256) #define hashState256(x) ((x)->pipe->p256) #define hashState384(x) ((x)->pipe->p512) #define hashState512(x) ((x)->pipe->p512) /* rotate shortcuts */ #define rotl32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) #define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) #define rotl64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) #define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) #if !defined(__C99_RESTRICT) #define restrict /* restrict */ #endif #define EDONR_VALID_HASHBITLEN(x) \ ((x) == 512 || (x) == 384 || (x) == 256 || (x) == 224) /* EdonR224 initial double chaining pipe */ static const uint32_t i224p2[16] = { 0x00010203ul, 0x04050607ul, 0x08090a0bul, 0x0c0d0e0ful, 0x10111213ul, 0x14151617ul, 0x18191a1bul, 0x1c1d1e1ful, 0x20212223ul, 0x24252627ul, 0x28292a2bul, 0x2c2d2e2ful, 0x30313233ul, 0x34353637ul, 0x38393a3bul, 0x3c3d3e3ful, }; /* EdonR256 initial double chaining pipe */ static const uint32_t i256p2[16] = { 0x40414243ul, 0x44454647ul, 0x48494a4bul, 0x4c4d4e4ful, 0x50515253ul, 0x54555657ul, 0x58595a5bul, 0x5c5d5e5ful, 0x60616263ul, 0x64656667ul, 0x68696a6bul, 0x6c6d6e6ful, 0x70717273ul, 0x74757677ul, 0x78797a7bul, 0x7c7d7e7ful, }; /* EdonR384 initial double chaining pipe */ static const uint64_t i384p2[16] = { 0x0001020304050607ull, 0x08090a0b0c0d0e0full, 0x1011121314151617ull, 0x18191a1b1c1d1e1full, 0x2021222324252627ull, 0x28292a2b2c2d2e2full, 0x3031323334353637ull, 0x38393a3b3c3d3e3full, 0x4041424344454647ull, 0x48494a4b4c4d4e4full, 0x5051525354555657ull, 0x58595a5b5c5d5e5full, 0x6061626364656667ull, 0x68696a6b6c6d6e6full, 0x7071727374757677ull, 0x78797a7b7c7d7e7full }; /* EdonR512 initial double chaining pipe */ static const uint64_t i512p2[16] = { 0x8081828384858687ull, 0x88898a8b8c8d8e8full, 0x9091929394959697ull, 0x98999a9b9c9d9e9full, 0xa0a1a2a3a4a5a6a7ull, 0xa8a9aaabacadaeafull, 0xb0b1b2b3b4b5b6b7ull, 0xb8b9babbbcbdbebfull, 0xc0c1c2c3c4c5c6c7ull, 0xc8c9cacbcccdcecfull, 0xd0d1d2d3d4d5d6d7ull, 0xd8d9dadbdcdddedfull, 0xe0e1e2e3e4e5e6e7ull, 0xe8e9eaebecedeeefull, 0xf0f1f2f3f4f5f6f7ull, 0xf8f9fafbfcfdfeffull }; /* * First Latin Square * 0 7 1 3 2 4 6 5 * 4 1 7 6 3 0 5 2 * 7 0 4 2 5 3 1 6 * 1 4 0 5 6 2 7 3 * 2 3 6 7 1 5 0 4 * 5 2 3 1 7 6 4 0 * 3 6 5 0 4 7 2 1 * 6 5 2 4 0 1 3 7 */ #define LS1_256(c, x0, x1, x2, x3, x4, x5, x6, x7) \ { \ uint32_t x04, x17, x23, x56, x07, x26; \ x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \ s0 = c + x07 + x2; \ s1 = rotl32(x07 + x3, 4); \ s2 = rotl32(x07 + x6, 8); \ x23 = x2 + x3; \ s5 = rotl32(x04 + x23 + x5, 22); \ x56 = x5 + x6; \ s6 = rotl32(x17 + x56 + x0, 24); \ x26 = x23+x56; \ s3 = rotl32(x26 + x7, 13); \ s4 = rotl32(x26 + x1, 17); \ s7 = rotl32(x26 + x4, 29); \ } #define LS1_512(c, x0, x1, x2, x3, x4, x5, x6, x7) \ { \ uint64_t x04, x17, x23, x56, x07, x26; \ x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \ s0 = c + x07 + x2; \ s1 = rotl64(x07 + x3, 5); \ s2 = rotl64(x07 + x6, 15); \ x23 = x2 + x3; \ s5 = rotl64(x04 + x23 + x5, 40); \ x56 = x5 + x6; \ s6 = rotl64(x17 + x56 + x0, 50); \ x26 = x23+x56; \ s3 = rotl64(x26 + x7, 22); \ s4 = rotl64(x26 + x1, 31); \ s7 = rotl64(x26 + x4, 59); \ } /* * Second Orthogonal Latin Square * 0 4 2 3 1 6 5 7 * 7 6 3 2 5 4 1 0 * 5 3 1 6 0 2 7 4 * 1 0 5 4 3 7 2 6 * 2 1 0 7 4 5 6 3 * 3 5 7 0 6 1 4 2 * 4 7 6 1 2 0 3 5 * 6 2 4 5 7 3 0 1 */ #define LS2_256(c, y0, y1, y2, y3, y4, y5, y6, y7) \ { \ uint32_t y01, y25, y34, y67, y04, y05, y27, y37; \ y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \ t0 = ~c + y05 + y7; \ t2 = rotl32(y05 + y3, 9); \ y34 = y3+y4, y04 = y01+y34; \ t1 = rotl32(y04 + y6, 5); \ t4 = rotl32(y04 + y5, 15); \ y67 = y6+y7, y37 = y34+y67; \ t3 = rotl32(y37 + y2, 11); \ t7 = rotl32(y37 + y0, 27); \ y27 = y25+y67; \ t5 = rotl32(y27 + y4, 20); \ t6 = rotl32(y27 + y1, 25); \ } #define LS2_512(c, y0, y1, y2, y3, y4, y5, y6, y7) \ { \ uint64_t y01, y25, y34, y67, y04, y05, y27, y37; \ y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \ t0 = ~c + y05 + y7; \ t2 = rotl64(y05 + y3, 19); \ y34 = y3+y4, y04 = y01+y34; \ t1 = rotl64(y04 + y6, 10); \ t4 = rotl64(y04 + y5, 36); \ y67 = y6+y7, y37 = y34+y67; \ t3 = rotl64(y37 + y2, 29); \ t7 = rotl64(y37 + y0, 55); \ y27 = y25+y67; \ t5 = rotl64(y27 + y4, 44); \ t6 = rotl64(y27 + y1, 48); \ } #define quasi_exform256(r0, r1, r2, r3, r4, r5, r6, r7) \ { \ uint32_t s04, s17, s23, s56, t01, t25, t34, t67; \ s04 = s0 ^ s4, t01 = t0 ^ t1; \ r0 = (s04 ^ s1) + (t01 ^ t5); \ t67 = t6 ^ t7; \ r1 = (s04 ^ s7) + (t2 ^ t67); \ s23 = s2 ^ s3; \ r7 = (s23 ^ s5) + (t4 ^ t67); \ t34 = t3 ^ t4; \ r3 = (s23 ^ s4) + (t0 ^ t34); \ s56 = s5 ^ s6; \ r5 = (s3 ^ s56) + (t34 ^ t6); \ t25 = t2 ^ t5; \ r6 = (s2 ^ s56) + (t25 ^ t7); \ s17 = s1 ^ s7; \ r4 = (s0 ^ s17) + (t1 ^ t25); \ r2 = (s17 ^ s6) + (t01 ^ t3); \ } #define quasi_exform512(r0, r1, r2, r3, r4, r5, r6, r7) \ { \ uint64_t s04, s17, s23, s56, t01, t25, t34, t67; \ s04 = s0 ^ s4, t01 = t0 ^ t1; \ r0 = (s04 ^ s1) + (t01 ^ t5); \ t67 = t6 ^ t7; \ r1 = (s04 ^ s7) + (t2 ^ t67); \ s23 = s2 ^ s3; \ r7 = (s23 ^ s5) + (t4 ^ t67); \ t34 = t3 ^ t4; \ r3 = (s23 ^ s4) + (t0 ^ t34); \ s56 = s5 ^ s6; \ r5 = (s3 ^ s56) + (t34 ^ t6); \ t25 = t2 ^ t5; \ r6 = (s2 ^ s56) + (t25 ^ t7); \ s17 = s1 ^ s7; \ r4 = (s0 ^ s17) + (t1 ^ t25); \ r2 = (s17 ^ s6) + (t01 ^ t3); \ } static size_t Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p) { size_t bl; for (bl = bitlen; bl >= EdonR256_BLOCK_BITSIZE; bl -= EdonR256_BLOCK_BITSIZE, data += 16) { uint32_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4, t5, t6, t7; uint32_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4, q5, q6, q7; const uint32_t defix = 0xaaaaaaaa; #if defined(MACHINE_IS_BIG_ENDIAN) uint32_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8, swp9, swp10, swp11, swp12, swp13, swp14, swp15; #define d(j) swp ## j #define s32(j) ld_swap32((uint32_t *)data + j, swp ## j) #else #define d(j) data[j] #endif /* First row of quasigroup e-transformations */ #if defined(MACHINE_IS_BIG_ENDIAN) s32(8); s32(9); s32(10); s32(11); s32(12); s32(13); s32(14); s32(15); #endif LS1_256(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9), d(8)); #if defined(MACHINE_IS_BIG_ENDIAN) s32(0); s32(1); s32(2); s32(3); s32(4); s32(5); s32(6); s32(7); #undef s32 #endif LS2_256(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7)); quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); LS2_256(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14), d(15)); quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); /* Second row of quasigroup e-transformations */ LS1_256(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7); quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); /* Third row of quasigroup e-transformations */ LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); LS2_256(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]); quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); LS1_256(defix, q0, q1, q2, q3, q4, q5, q6, q7); LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); /* Fourth row of quasigroup e-transformations */ LS1_256(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0)); LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7); quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); /* Edon-R tweak on the original SHA-3 Edon-R submission. */ p[0] ^= d(8) ^ p0; p[1] ^= d(9) ^ p1; p[2] ^= d(10) ^ p2; p[3] ^= d(11) ^ p3; p[4] ^= d(12) ^ p4; p[5] ^= d(13) ^ p5; p[6] ^= d(14) ^ p6; p[7] ^= d(15) ^ p7; p[8] ^= d(0) ^ q0; p[9] ^= d(1) ^ q1; p[10] ^= d(2) ^ q2; p[11] ^= d(3) ^ q3; p[12] ^= d(4) ^ q4; p[13] ^= d(5) ^ q5; p[14] ^= d(6) ^ q6; p[15] ^= d(7) ^ q7; } #undef d return (bitlen - bl); } /* * Why is this #pragma here? * * Checksum functions like this one can go over the stack frame size check * Linux imposes on 32-bit platforms (-Wframe-larger-than=1024). We can * safely ignore the compiler error since we know that in OpenZFS, that * the function will be called from a worker thread that won't be using * much stack. The only function that goes over the 1k limit is Q512(), * which only goes over it by a hair (1248 bytes on ARM32). */ #include /* for _ILP32 */ -#ifdef _ILP32 /* We're 32-bit, assume small stack frames */ +#if defined(_ILP32) /* We're 32-bit, assume small stack frames */ +#if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic ignored "-Wframe-larger-than=" #endif +#endif #if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__) static inline size_t #else static size_t #endif Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p) { size_t bl; for (bl = bitlen; bl >= EdonR512_BLOCK_BITSIZE; bl -= EdonR512_BLOCK_BITSIZE, data += 16) { uint64_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4, t5, t6, t7; uint64_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4, q5, q6, q7; const uint64_t defix = 0xaaaaaaaaaaaaaaaaull; #if defined(MACHINE_IS_BIG_ENDIAN) uint64_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8, swp9, swp10, swp11, swp12, swp13, swp14, swp15; #define d(j) swp##j #define s64(j) ld_swap64((uint64_t *)data+j, swp##j) #else #define d(j) data[j] #endif /* First row of quasigroup e-transformations */ #if defined(MACHINE_IS_BIG_ENDIAN) s64(8); s64(9); s64(10); s64(11); s64(12); s64(13); s64(14); s64(15); #endif LS1_512(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9), d(8)); #if defined(MACHINE_IS_BIG_ENDIAN) s64(0); s64(1); s64(2); s64(3); s64(4); s64(5); s64(6); s64(7); #undef s64 #endif LS2_512(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7)); quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); LS2_512(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14), d(15)); quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); /* Second row of quasigroup e-transformations */ LS1_512(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7); quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); /* Third row of quasigroup e-transformations */ LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); LS2_512(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]); quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); LS1_512(defix, q0, q1, q2, q3, q4, q5, q6, q7); LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); /* Fourth row of quasigroup e-transformations */ LS1_512(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0)); LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7); quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); /* Edon-R tweak on the original SHA-3 Edon-R submission. */ p[0] ^= d(8) ^ p0; p[1] ^= d(9) ^ p1; p[2] ^= d(10) ^ p2; p[3] ^= d(11) ^ p3; p[4] ^= d(12) ^ p4; p[5] ^= d(13) ^ p5; p[6] ^= d(14) ^ p6; p[7] ^= d(15) ^ p7; p[8] ^= d(0) ^ q0; p[9] ^= d(1) ^ q1; p[10] ^= d(2) ^ q2; p[11] ^= d(3) ^ q3; p[12] ^= d(4) ^ q4; p[13] ^= d(5) ^ q5; p[14] ^= d(6) ^ q6; p[15] ^= d(7) ^ q7; } #undef d return (bitlen - bl); } void EdonRInit(EdonRState *state, size_t hashbitlen) { ASSERT(EDONR_VALID_HASHBITLEN(hashbitlen)); switch (hashbitlen) { case 224: state->hashbitlen = 224; state->bits_processed = 0; state->unprocessed_bits = 0; memcpy(hashState224(state)->DoublePipe, i224p2, sizeof (i224p2)); break; case 256: state->hashbitlen = 256; state->bits_processed = 0; state->unprocessed_bits = 0; memcpy(hashState256(state)->DoublePipe, i256p2, sizeof (i256p2)); break; case 384: state->hashbitlen = 384; state->bits_processed = 0; state->unprocessed_bits = 0; memcpy(hashState384(state)->DoublePipe, i384p2, sizeof (i384p2)); break; case 512: state->hashbitlen = 512; state->bits_processed = 0; state->unprocessed_bits = 0; memcpy(hashState512(state)->DoublePipe, i512p2, sizeof (i512p2)); break; } } void EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen) { uint32_t *data32; uint64_t *data64; size_t bits_processed; ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen)); switch (state->hashbitlen) { case 224: case 256: if (state->unprocessed_bits > 0) { /* LastBytes = databitlen / 8 */ int LastBytes = (int)databitlen >> 3; ASSERT(state->unprocessed_bits + databitlen <= EdonR256_BLOCK_SIZE * 8); memcpy(hashState256(state)->LastPart + (state->unprocessed_bits >> 3), data, LastBytes); state->unprocessed_bits += (int)databitlen; databitlen = state->unprocessed_bits; /* LINTED E_BAD_PTR_CAST_ALIGN */ data32 = (uint32_t *)hashState256(state)->LastPart; } else /* LINTED E_BAD_PTR_CAST_ALIGN */ data32 = (uint32_t *)data; bits_processed = Q256(databitlen, data32, hashState256(state)->DoublePipe); state->bits_processed += bits_processed; databitlen -= bits_processed; state->unprocessed_bits = (int)databitlen; if (databitlen > 0) { /* LastBytes = Ceil(databitlen / 8) */ int LastBytes = ((~(((-(int)databitlen) >> 3) & 0x01ff)) + 1) & 0x01ff; data32 += bits_processed >> 5; /* byte size update */ memmove(hashState256(state)->LastPart, data32, LastBytes); } break; case 384: case 512: if (state->unprocessed_bits > 0) { /* LastBytes = databitlen / 8 */ int LastBytes = (int)databitlen >> 3; ASSERT(state->unprocessed_bits + databitlen <= EdonR512_BLOCK_SIZE * 8); memcpy(hashState512(state)->LastPart + (state->unprocessed_bits >> 3), data, LastBytes); state->unprocessed_bits += (int)databitlen; databitlen = state->unprocessed_bits; /* LINTED E_BAD_PTR_CAST_ALIGN */ data64 = (uint64_t *)hashState512(state)->LastPart; } else /* LINTED E_BAD_PTR_CAST_ALIGN */ data64 = (uint64_t *)data; bits_processed = Q512(databitlen, data64, hashState512(state)->DoublePipe); state->bits_processed += bits_processed; databitlen -= bits_processed; state->unprocessed_bits = (int)databitlen; if (databitlen > 0) { /* LastBytes = Ceil(databitlen / 8) */ int LastBytes = ((~(((-(int)databitlen) >> 3) & 0x03ff)) + 1) & 0x03ff; data64 += bits_processed >> 6; /* byte size update */ memmove(hashState512(state)->LastPart, data64, LastBytes); } break; } } void EdonRFinal(EdonRState *state, uint8_t *hashval) { uint32_t *data32; uint64_t *data64, num_bits; size_t databitlen; int LastByte, PadOnePosition; num_bits = state->bits_processed + state->unprocessed_bits; ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen)); switch (state->hashbitlen) { case 224: case 256: LastByte = (int)state->unprocessed_bits >> 3; PadOnePosition = 7 - (state->unprocessed_bits & 0x07); hashState256(state)->LastPart[LastByte] = (hashState256(state)->LastPart[LastByte] & (0xff << (PadOnePosition + 1))) ^ (0x01 << PadOnePosition); /* LINTED E_BAD_PTR_CAST_ALIGN */ data64 = (uint64_t *)hashState256(state)->LastPart; if (state->unprocessed_bits < 448) { (void) memset((hashState256(state)->LastPart) + LastByte + 1, 0x00, EdonR256_BLOCK_SIZE - LastByte - 9); databitlen = EdonR256_BLOCK_SIZE * 8; #if defined(MACHINE_IS_BIG_ENDIAN) st_swap64(num_bits, data64 + 7); #else data64[7] = num_bits; #endif } else { (void) memset((hashState256(state)->LastPart) + LastByte + 1, 0x00, EdonR256_BLOCK_SIZE * 2 - LastByte - 9); databitlen = EdonR256_BLOCK_SIZE * 16; #if defined(MACHINE_IS_BIG_ENDIAN) st_swap64(num_bits, data64 + 15); #else data64[15] = num_bits; #endif } /* LINTED E_BAD_PTR_CAST_ALIGN */ data32 = (uint32_t *)hashState256(state)->LastPart; state->bits_processed += Q256(databitlen, data32, hashState256(state)->DoublePipe); break; case 384: case 512: LastByte = (int)state->unprocessed_bits >> 3; PadOnePosition = 7 - (state->unprocessed_bits & 0x07); hashState512(state)->LastPart[LastByte] = (hashState512(state)->LastPart[LastByte] & (0xff << (PadOnePosition + 1))) ^ (0x01 << PadOnePosition); /* LINTED E_BAD_PTR_CAST_ALIGN */ data64 = (uint64_t *)hashState512(state)->LastPart; if (state->unprocessed_bits < 960) { (void) memset((hashState512(state)->LastPart) + LastByte + 1, 0x00, EdonR512_BLOCK_SIZE - LastByte - 9); databitlen = EdonR512_BLOCK_SIZE * 8; #if defined(MACHINE_IS_BIG_ENDIAN) st_swap64(num_bits, data64 + 15); #else data64[15] = num_bits; #endif } else { (void) memset((hashState512(state)->LastPart) + LastByte + 1, 0x00, EdonR512_BLOCK_SIZE * 2 - LastByte - 9); databitlen = EdonR512_BLOCK_SIZE * 16; #if defined(MACHINE_IS_BIG_ENDIAN) st_swap64(num_bits, data64 + 31); #else data64[31] = num_bits; #endif } state->bits_processed += Q512(databitlen, data64, hashState512(state)->DoublePipe); break; } switch (state->hashbitlen) { case 224: { #if defined(MACHINE_IS_BIG_ENDIAN) uint32_t *d32 = (uint32_t *)hashval; uint32_t *s32 = hashState224(state)->DoublePipe + 9; int j; for (j = 0; j < EdonR224_DIGEST_SIZE >> 2; j++) st_swap32(s32[j], d32 + j); #else memcpy(hashval, hashState256(state)->DoublePipe + 9, EdonR224_DIGEST_SIZE); #endif break; } case 256: { #if defined(MACHINE_IS_BIG_ENDIAN) uint32_t *d32 = (uint32_t *)hashval; uint32_t *s32 = hashState224(state)->DoublePipe + 8; int j; for (j = 0; j < EdonR256_DIGEST_SIZE >> 2; j++) st_swap32(s32[j], d32 + j); #else memcpy(hashval, hashState256(state)->DoublePipe + 8, EdonR256_DIGEST_SIZE); #endif break; } case 384: { #if defined(MACHINE_IS_BIG_ENDIAN) uint64_t *d64 = (uint64_t *)hashval; uint64_t *s64 = hashState384(state)->DoublePipe + 10; int j; for (j = 0; j < EdonR384_DIGEST_SIZE >> 3; j++) st_swap64(s64[j], d64 + j); #else memcpy(hashval, hashState384(state)->DoublePipe + 10, EdonR384_DIGEST_SIZE); #endif break; } case 512: { #if defined(MACHINE_IS_BIG_ENDIAN) uint64_t *d64 = (uint64_t *)hashval; uint64_t *s64 = hashState512(state)->DoublePipe + 8; int j; for (j = 0; j < EdonR512_DIGEST_SIZE >> 3; j++) st_swap64(s64[j], d64 + j); #else memcpy(hashval, hashState512(state)->DoublePipe + 8, EdonR512_DIGEST_SIZE); #endif break; } } } void EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen, uint8_t *hashval) { EdonRState state; EdonRInit(&state, hashbitlen); EdonRUpdate(&state, data, databitlen); EdonRFinal(&state, hashval); } #ifdef _KERNEL EXPORT_SYMBOL(EdonRInit); EXPORT_SYMBOL(EdonRUpdate); EXPORT_SYMBOL(EdonRHash); EXPORT_SYMBOL(EdonRFinal); #endif diff --git a/module/icp/algs/skein/skein_block.c b/module/icp/algs/skein/skein_block.c index 7ba165a48511..3ad52da5f6a3 100644 --- a/module/icp/algs/skein/skein_block.c +++ b/module/icp/algs/skein/skein_block.c @@ -1,790 +1,792 @@ /* * Implementation of the Skein block functions. * Source code author: Doug Whiting, 2008. * This algorithm and source code is released to the public domain. * Compile-time switches: * SKEIN_USE_ASM -- set bits (256/512/1024) to select which * versions use ASM code for block processing * [default: use C for all block sizes] */ /* Copyright 2013 Doug Whiting. This code is released to the public domain. */ #include #include "skein_impl.h" #include /* for _ILP32 */ #ifndef SKEIN_USE_ASM #define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */ #endif #ifndef SKEIN_LOOP /* * The low-level checksum routines use a lot of stack space. On systems where * small stacks frame are enforced (like 32-bit kernel builds), do not unroll * checksum calculations to save stack space. * * Even with no loops unrolled, we still can exceed the 1k stack frame limit * in Skein1024_Process_Block() (it hits 1272 bytes on ARM32). We can * safely ignore it though, since that the checksum functions will be called * from a worker thread that won't be using much stack. That's why we have * the #pragma here to ignore the warning. */ #if defined(_ILP32) || defined(__powerpc) /* Assume small stack */ +#if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic ignored "-Wframe-larger-than=" +#endif /* * We're running on 32-bit, don't unroll loops to save stack frame space * * Due to the ways the calculations on SKEIN_LOOP are done in * Skein_*_Process_Block(), a value of 111 disables unrolling loops * in any of those functions. */ #define SKEIN_LOOP 111 #else /* We're compiling with large stacks */ #define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */ #endif #endif /* some useful definitions for code here */ #define BLK_BITS (WCNT*64) #define KW_TWK_BASE (0) #define KW_KEY_BASE (3) #define ks (kw + KW_KEY_BASE) #define ts (kw + KW_TWK_BASE) /* no debugging in Illumos version */ #define DebugSaveTweak(ctx) /* Skein_256 */ #if !(SKEIN_USE_ASM & 256) void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr, size_t blkCnt, size_t byteCntAdd) { enum { WCNT = SKEIN_256_STATE_WORDS }; #undef RCNT #define RCNT (SKEIN_256_ROUNDS_TOTAL / 8) #ifdef SKEIN_LOOP /* configure how much to unroll the loop */ #define SKEIN_UNROLL_256 (((SKEIN_LOOP) / 100) % 10) #else #define SKEIN_UNROLL_256 (0) #endif #if SKEIN_UNROLL_256 #if (RCNT % SKEIN_UNROLL_256) #error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */ #endif size_t r; /* key schedule words : chaining vars + tweak + "rotation" */ uint64_t kw[WCNT + 4 + RCNT * 2]; #else uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */ #endif /* local copy of context vars, for speed */ uint64_t X0, X1, X2, X3; uint64_t w[WCNT]; /* local copy of input block */ #ifdef SKEIN_DEBUG /* use for debugging (help compiler put Xn in registers) */ const uint64_t *Xptr[4]; Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; #endif Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ ts[0] = ctx->h.T[0]; ts[1] = ctx->h.T[1]; do { /* * this implementation only supports 2**64 input bytes * (no carry out here) */ ts[0] += byteCntAdd; /* update processed length */ /* precompute the key schedule for this block */ ks[0] = ctx->X[0]; ks[1] = ctx->X[1]; ks[2] = ctx->X[2]; ks[3] = ctx->X[3]; ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY; ts[2] = ts[0] ^ ts[1]; /* get input block in little-endian format */ Skein_Get64_LSB_First(w, blkPtr, WCNT); DebugSaveTweak(ctx); Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts); X0 = w[0] + ks[0]; /* do the first full key injection */ X1 = w[1] + ks[1] + ts[0]; X2 = w[2] + ks[2] + ts[1]; X3 = w[3] + ks[3]; Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, Xptr); /* show starting state values */ blkPtr += SKEIN_256_BLOCK_BYTES; /* run the rounds */ #define Round256(p0, p1, p2, p3, ROT, rNum) \ X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \ X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \ #if SKEIN_UNROLL_256 == 0 #define R256(p0, p1, p2, p3, ROT, rNum) /* fully unrolled */ \ Round256(p0, p1, p2, p3, ROT, rNum) \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr); #define I256(R) \ X0 += ks[((R) + 1) % 5]; /* inject the key schedule value */ \ X1 += ks[((R) + 2) % 5] + ts[((R) + 1) % 3]; \ X2 += ks[((R) + 3) % 5] + ts[((R) + 2) % 3]; \ X3 += ks[((R) + 4) % 5] + (R) + 1; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); #else /* looping version */ #define R256(p0, p1, p2, p3, ROT, rNum) \ Round256(p0, p1, p2, p3, ROT, rNum) \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr); #define I256(R) \ X0 += ks[r + (R) + 0]; /* inject the key schedule value */ \ X1 += ks[r + (R) + 1] + ts[r + (R) + 0]; \ X2 += ks[r + (R) + 2] + ts[r + (R) + 1]; \ X3 += ks[r + (R) + 3] + r + (R); \ ks[r + (R) + 4] = ks[r + (R) - 1]; /* rotate key schedule */ \ ts[r + (R) + 2] = ts[r + (R) - 1]; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); /* loop through it */ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256) #endif { #define R256_8_rounds(R) \ R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1); \ R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2); \ R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3); \ R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4); \ I256(2 * (R)); \ R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5); \ R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6); \ R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7); \ R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8); \ I256(2 * (R) + 1); R256_8_rounds(0); #define R256_Unroll_R(NN) \ ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL / 8 > (NN)) || \ (SKEIN_UNROLL_256 > (NN))) #if R256_Unroll_R(1) R256_8_rounds(1); #endif #if R256_Unroll_R(2) R256_8_rounds(2); #endif #if R256_Unroll_R(3) R256_8_rounds(3); #endif #if R256_Unroll_R(4) R256_8_rounds(4); #endif #if R256_Unroll_R(5) R256_8_rounds(5); #endif #if R256_Unroll_R(6) R256_8_rounds(6); #endif #if R256_Unroll_R(7) R256_8_rounds(7); #endif #if R256_Unroll_R(8) R256_8_rounds(8); #endif #if R256_Unroll_R(9) R256_8_rounds(9); #endif #if R256_Unroll_R(10) R256_8_rounds(10); #endif #if R256_Unroll_R(11) R256_8_rounds(11); #endif #if R256_Unroll_R(12) R256_8_rounds(12); #endif #if R256_Unroll_R(13) R256_8_rounds(13); #endif #if R256_Unroll_R(14) R256_8_rounds(14); #endif #if (SKEIN_UNROLL_256 > 14) #error "need more unrolling in Skein_256_Process_Block" #endif } /* * do the final "feedforward" xor, update context chaining vars */ ctx->X[0] = X0 ^ w[0]; ctx->X[1] = X1 ^ w[1]; ctx->X[2] = X2 ^ w[2]; ctx->X[3] = X3 ^ w[3]; Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X); ts[1] &= ~SKEIN_T1_FLAG_FIRST; } while (--blkCnt); ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; } #if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) size_t Skein_256_Process_Block_CodeSize(void) { return ((uint8_t *)Skein_256_Process_Block_CodeSize) - ((uint8_t *)Skein_256_Process_Block); } uint_t Skein_256_Unroll_Cnt(void) { return (SKEIN_UNROLL_256); } #endif #endif /* Skein_512 */ #if !(SKEIN_USE_ASM & 512) void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr, size_t blkCnt, size_t byteCntAdd) { enum { WCNT = SKEIN_512_STATE_WORDS }; #undef RCNT #define RCNT (SKEIN_512_ROUNDS_TOTAL / 8) #ifdef SKEIN_LOOP /* configure how much to unroll the loop */ #define SKEIN_UNROLL_512 (((SKEIN_LOOP) / 10) % 10) #else #define SKEIN_UNROLL_512 (0) #endif #if SKEIN_UNROLL_512 #if (RCNT % SKEIN_UNROLL_512) #error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ #endif size_t r; /* key schedule words : chaining vars + tweak + "rotation" */ uint64_t kw[WCNT + 4 + RCNT * 2]; #else uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */ #endif /* local copy of vars, for speed */ uint64_t X0, X1, X2, X3, X4, X5, X6, X7; uint64_t w[WCNT]; /* local copy of input block */ #ifdef SKEIN_DEBUG /* use for debugging (help compiler put Xn in registers) */ const uint64_t *Xptr[8]; Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7; #endif Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ ts[0] = ctx->h.T[0]; ts[1] = ctx->h.T[1]; do { /* * this implementation only supports 2**64 input bytes * (no carry out here) */ ts[0] += byteCntAdd; /* update processed length */ /* precompute the key schedule for this block */ ks[0] = ctx->X[0]; ks[1] = ctx->X[1]; ks[2] = ctx->X[2]; ks[3] = ctx->X[3]; ks[4] = ctx->X[4]; ks[5] = ctx->X[5]; ks[6] = ctx->X[6]; ks[7] = ctx->X[7]; ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY; ts[2] = ts[0] ^ ts[1]; /* get input block in little-endian format */ Skein_Get64_LSB_First(w, blkPtr, WCNT); DebugSaveTweak(ctx); Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts); X0 = w[0] + ks[0]; /* do the first full key injection */ X1 = w[1] + ks[1]; X2 = w[2] + ks[2]; X3 = w[3] + ks[3]; X4 = w[4] + ks[4]; X5 = w[5] + ks[5] + ts[0]; X6 = w[6] + ks[6] + ts[1]; X7 = w[7] + ks[7]; blkPtr += SKEIN_512_BLOCK_BYTES; Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, Xptr); /* run the rounds */ #define Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \ X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\ X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\ X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\ X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6; #if SKEIN_UNROLL_512 == 0 #define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) /* unrolled */ \ Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr); #define I512(R) \ X0 += ks[((R) + 1) % 9]; /* inject the key schedule value */\ X1 += ks[((R) + 2) % 9]; \ X2 += ks[((R) + 3) % 9]; \ X3 += ks[((R) + 4) % 9]; \ X4 += ks[((R) + 5) % 9]; \ X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3]; \ X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3]; \ X7 += ks[((R) + 8) % 9] + (R) + 1; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); #else /* looping version */ #define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \ Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr); #define I512(R) \ X0 += ks[r + (R) + 0]; /* inject the key schedule value */ \ X1 += ks[r + (R) + 1]; \ X2 += ks[r + (R) + 2]; \ X3 += ks[r + (R) + 3]; \ X4 += ks[r + (R) + 4]; \ X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \ X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \ X7 += ks[r + (R) + 7] + r + (R); \ ks[r + (R)+8] = ks[r + (R) - 1]; /* rotate key schedule */\ ts[r + (R)+2] = ts[r + (R) - 1]; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); /* loop through it */ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512) #endif /* end of looped code definitions */ { #define R512_8_rounds(R) /* do 8 full rounds */ \ R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1); \ R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2); \ R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3); \ R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4); \ I512(2 * (R)); \ R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5); \ R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6); \ R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7); \ R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8); \ I512(2*(R) + 1); /* and key injection */ R512_8_rounds(0); #define R512_Unroll_R(NN) \ ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL / 8 > (NN)) || \ (SKEIN_UNROLL_512 > (NN))) #if R512_Unroll_R(1) R512_8_rounds(1); #endif #if R512_Unroll_R(2) R512_8_rounds(2); #endif #if R512_Unroll_R(3) R512_8_rounds(3); #endif #if R512_Unroll_R(4) R512_8_rounds(4); #endif #if R512_Unroll_R(5) R512_8_rounds(5); #endif #if R512_Unroll_R(6) R512_8_rounds(6); #endif #if R512_Unroll_R(7) R512_8_rounds(7); #endif #if R512_Unroll_R(8) R512_8_rounds(8); #endif #if R512_Unroll_R(9) R512_8_rounds(9); #endif #if R512_Unroll_R(10) R512_8_rounds(10); #endif #if R512_Unroll_R(11) R512_8_rounds(11); #endif #if R512_Unroll_R(12) R512_8_rounds(12); #endif #if R512_Unroll_R(13) R512_8_rounds(13); #endif #if R512_Unroll_R(14) R512_8_rounds(14); #endif #if (SKEIN_UNROLL_512 > 14) #error "need more unrolling in Skein_512_Process_Block" #endif } /* * do the final "feedforward" xor, update context chaining vars */ ctx->X[0] = X0 ^ w[0]; ctx->X[1] = X1 ^ w[1]; ctx->X[2] = X2 ^ w[2]; ctx->X[3] = X3 ^ w[3]; ctx->X[4] = X4 ^ w[4]; ctx->X[5] = X5 ^ w[5]; ctx->X[6] = X6 ^ w[6]; ctx->X[7] = X7 ^ w[7]; Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X); ts[1] &= ~SKEIN_T1_FLAG_FIRST; } while (--blkCnt); ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; } #if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) size_t Skein_512_Process_Block_CodeSize(void) { return ((uint8_t *)Skein_512_Process_Block_CodeSize) - ((uint8_t *)Skein_512_Process_Block); } uint_t Skein_512_Unroll_Cnt(void) { return (SKEIN_UNROLL_512); } #endif #endif /* Skein1024 */ #if !(SKEIN_USE_ASM & 1024) void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr, size_t blkCnt, size_t byteCntAdd) { /* do it in C, always looping (unrolled is bigger AND slower!) */ enum { WCNT = SKEIN1024_STATE_WORDS }; #undef RCNT #define RCNT (SKEIN1024_ROUNDS_TOTAL/8) #ifdef SKEIN_LOOP /* configure how much to unroll the loop */ #define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10) #else #define SKEIN_UNROLL_1024 (0) #endif #if (SKEIN_UNROLL_1024 != 0) #if (RCNT % SKEIN_UNROLL_1024) #error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */ #endif size_t r; /* key schedule words : chaining vars + tweak + "rotation" */ uint64_t kw[WCNT + 4 + RCNT * 2]; #else uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */ #endif /* local copy of vars, for speed */ uint64_t X00, X01, X02, X03, X04, X05, X06, X07, X08, X09, X10, X11, X12, X13, X14, X15; uint64_t w[WCNT]; /* local copy of input block */ #ifdef SKEIN_DEBUG /* use for debugging (help compiler put Xn in registers) */ const uint64_t *Xptr[16]; Xptr[0] = &X00; Xptr[1] = &X01; Xptr[2] = &X02; Xptr[3] = &X03; Xptr[4] = &X04; Xptr[5] = &X05; Xptr[6] = &X06; Xptr[7] = &X07; Xptr[8] = &X08; Xptr[9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11; Xptr[12] = &X12; Xptr[13] = &X13; Xptr[14] = &X14; Xptr[15] = &X15; #endif Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ ts[0] = ctx->h.T[0]; ts[1] = ctx->h.T[1]; do { /* * this implementation only supports 2**64 input bytes * (no carry out here) */ ts[0] += byteCntAdd; /* update processed length */ /* precompute the key schedule for this block */ ks[0] = ctx->X[0]; ks[1] = ctx->X[1]; ks[2] = ctx->X[2]; ks[3] = ctx->X[3]; ks[4] = ctx->X[4]; ks[5] = ctx->X[5]; ks[6] = ctx->X[6]; ks[7] = ctx->X[7]; ks[8] = ctx->X[8]; ks[9] = ctx->X[9]; ks[10] = ctx->X[10]; ks[11] = ctx->X[11]; ks[12] = ctx->X[12]; ks[13] = ctx->X[13]; ks[14] = ctx->X[14]; ks[15] = ctx->X[15]; ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^ ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY; ts[2] = ts[0] ^ ts[1]; /* get input block in little-endian format */ Skein_Get64_LSB_First(w, blkPtr, WCNT); DebugSaveTweak(ctx); Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts); X00 = w[0] + ks[0]; /* do the first full key injection */ X01 = w[1] + ks[1]; X02 = w[2] + ks[2]; X03 = w[3] + ks[3]; X04 = w[4] + ks[4]; X05 = w[5] + ks[5]; X06 = w[6] + ks[6]; X07 = w[7] + ks[7]; X08 = w[8] + ks[8]; X09 = w[9] + ks[9]; X10 = w[10] + ks[10]; X11 = w[11] + ks[11]; X12 = w[12] + ks[12]; X13 = w[13] + ks[13] + ts[0]; X14 = w[14] + ks[14] + ts[1]; X15 = w[15] + ks[15]; Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, Xptr); #define Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \ pD, pE, pF, ROT, rNum) \ X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\ X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\ X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\ X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;\ X##p8 += X##p9; X##p9 = RotL_64(X##p9, ROT##_4); X##p9 ^= X##p8;\ X##pA += X##pB; X##pB = RotL_64(X##pB, ROT##_5); X##pB ^= X##pA;\ X##pC += X##pD; X##pD = RotL_64(X##pD, ROT##_6); X##pD ^= X##pC;\ X##pE += X##pF; X##pF = RotL_64(X##pF, ROT##_7); X##pF ^= X##pE; #if SKEIN_UNROLL_1024 == 0 #define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, \ pE, pF, ROT, rn) \ Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \ pD, pE, pF, ROT, rn) \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rn, Xptr); #define I1024(R) \ X00 += ks[((R) + 1) % 17]; /* inject the key schedule value */\ X01 += ks[((R) + 2) % 17]; \ X02 += ks[((R) + 3) % 17]; \ X03 += ks[((R) + 4) % 17]; \ X04 += ks[((R) + 5) % 17]; \ X05 += ks[((R) + 6) % 17]; \ X06 += ks[((R) + 7) % 17]; \ X07 += ks[((R) + 8) % 17]; \ X08 += ks[((R) + 9) % 17]; \ X09 += ks[((R) + 10) % 17]; \ X10 += ks[((R) + 11) % 17]; \ X11 += ks[((R) + 12) % 17]; \ X12 += ks[((R) + 13) % 17]; \ X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3]; \ X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3]; \ X15 += ks[((R) + 16) % 17] + (R) +1; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); #else /* looping version */ #define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, \ pE, pF, ROT, rn) \ Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \ pD, pE, pF, ROT, rn) \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, Xptr); #define I1024(R) \ X00 += ks[r + (R) + 0]; /* inject the key schedule value */ \ X01 += ks[r + (R) + 1]; \ X02 += ks[r + (R) + 2]; \ X03 += ks[r + (R) + 3]; \ X04 += ks[r + (R) + 4]; \ X05 += ks[r + (R) + 5]; \ X06 += ks[r + (R) + 6]; \ X07 += ks[r + (R) + 7]; \ X08 += ks[r + (R) + 8]; \ X09 += ks[r + (R) + 9]; \ X10 += ks[r + (R) + 10]; \ X11 += ks[r + (R) + 11]; \ X12 += ks[r + (R) + 12]; \ X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \ X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \ X15 += ks[r + (R) + 15] + r + (R); \ ks[r + (R) + 16] = ks[r + (R) - 1]; /* rotate key schedule */\ ts[r + (R) + 2] = ts[r + (R) - 1]; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); /* loop through it */ for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024) #endif { #define R1024_8_rounds(R) /* do 8 full rounds */ \ R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, \ 14, 15, R1024_0, 8 * (R) + 1); \ R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, \ 08, 01, R1024_1, 8 * (R) + 2); \ R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, \ 10, 09, R1024_2, 8 * (R) + 3); \ R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, \ 12, 07, R1024_3, 8 * (R) + 4); \ I1024(2 * (R)); \ R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, \ 14, 15, R1024_4, 8 * (R) + 5); \ R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, \ 08, 01, R1024_5, 8 * (R) + 6); \ R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, \ 10, 09, R1024_6, 8 * (R) + 7); \ R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, \ 12, 07, R1024_7, 8 * (R) + 8); \ I1024(2 * (R) + 1); R1024_8_rounds(0); #define R1024_Unroll_R(NN) \ ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || \ (SKEIN_UNROLL_1024 > (NN))) #if R1024_Unroll_R(1) R1024_8_rounds(1); #endif #if R1024_Unroll_R(2) R1024_8_rounds(2); #endif #if R1024_Unroll_R(3) R1024_8_rounds(3); #endif #if R1024_Unroll_R(4) R1024_8_rounds(4); #endif #if R1024_Unroll_R(5) R1024_8_rounds(5); #endif #if R1024_Unroll_R(6) R1024_8_rounds(6); #endif #if R1024_Unroll_R(7) R1024_8_rounds(7); #endif #if R1024_Unroll_R(8) R1024_8_rounds(8); #endif #if R1024_Unroll_R(9) R1024_8_rounds(9); #endif #if R1024_Unroll_R(10) R1024_8_rounds(10); #endif #if R1024_Unroll_R(11) R1024_8_rounds(11); #endif #if R1024_Unroll_R(12) R1024_8_rounds(12); #endif #if R1024_Unroll_R(13) R1024_8_rounds(13); #endif #if R1024_Unroll_R(14) R1024_8_rounds(14); #endif #if (SKEIN_UNROLL_1024 > 14) #error "need more unrolling in Skein_1024_Process_Block" #endif } /* * do the final "feedforward" xor, update context chaining vars */ ctx->X[0] = X00 ^ w[0]; ctx->X[1] = X01 ^ w[1]; ctx->X[2] = X02 ^ w[2]; ctx->X[3] = X03 ^ w[3]; ctx->X[4] = X04 ^ w[4]; ctx->X[5] = X05 ^ w[5]; ctx->X[6] = X06 ^ w[6]; ctx->X[7] = X07 ^ w[7]; ctx->X[8] = X08 ^ w[8]; ctx->X[9] = X09 ^ w[9]; ctx->X[10] = X10 ^ w[10]; ctx->X[11] = X11 ^ w[11]; ctx->X[12] = X12 ^ w[12]; ctx->X[13] = X13 ^ w[13]; ctx->X[14] = X14 ^ w[14]; ctx->X[15] = X15 ^ w[15]; Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X); ts[1] &= ~SKEIN_T1_FLAG_FIRST; blkPtr += SKEIN1024_BLOCK_BYTES; } while (--blkCnt); ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; } #if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) size_t Skein1024_Process_Block_CodeSize(void) { return ((uint8_t *)Skein1024_Process_Block_CodeSize) - ((uint8_t *)Skein1024_Process_Block); } uint_t Skein1024_Unroll_Cnt(void) { return (SKEIN_UNROLL_1024); } #endif #endif diff --git a/module/lua/ldo.c b/module/lua/ldo.c index e2a3d0279d7f..291bca044e7b 100644 --- a/module/lua/ldo.c +++ b/module/lua/ldo.c @@ -1,759 +1,760 @@ /* ** $Id: ldo.c,v 2.108.1.3 2013/11/08 18:22:50 roberto Exp $ ** Stack and Call structure of Lua ** See Copyright Notice in lua.h */ #define ldo_c #define LUA_CORE #include #include "lapi.h" #include "ldebug.h" #include "ldo.h" #include "lfunc.h" #include "lgc.h" #include "lmem.h" #include "lobject.h" #include "lopcodes.h" #include "lparser.h" #include "lstate.h" #include "lstring.h" #include "ltable.h" #include "ltm.h" #include "lvm.h" #include "lzio.h" /* Return the number of bytes available on the stack. */ #if defined (_KERNEL) && defined(__linux__) #include static intptr_t stack_remaining(void) { intptr_t local; local = (intptr_t)&local - (intptr_t)current->stack; return local; } #elif defined (_KERNEL) && defined(__FreeBSD__) #include static intptr_t stack_remaining(void) { intptr_t local; local = (intptr_t)&local - (intptr_t)curthread->td_kstack; return local; } #else static intptr_t stack_remaining(void) { return INTPTR_MAX; } #endif /* ** {====================================================== ** Error-recovery functions ** ======================================================= */ /* ** LUAI_THROW/LUAI_TRY define how Lua does exception handling. By ** default, Lua handles errors with exceptions when compiling as ** C++ code, with _longjmp/_setjmp when asked to use them, and with ** longjmp/setjmp otherwise. */ #if !defined(LUAI_THROW) #ifdef _KERNEL #ifdef __linux__ #if defined(__i386__) #define JMP_BUF_CNT 6 #elif defined(__x86_64__) #define JMP_BUF_CNT 8 #elif defined(__sparc__) && defined(__arch64__) #define JMP_BUF_CNT 6 #elif defined(__powerpc__) #define JMP_BUF_CNT 26 #elif defined(__aarch64__) #define JMP_BUF_CNT 64 #elif defined(__arm__) #define JMP_BUF_CNT 65 #elif defined(__mips__) #define JMP_BUF_CNT 12 #elif defined(__s390x__) #define JMP_BUF_CNT 18 #elif defined(__riscv) #define JMP_BUF_CNT 64 #else #define JMP_BUF_CNT 1 #endif typedef struct _label_t { long long unsigned val[JMP_BUF_CNT]; } label_t; int setjmp(label_t *) __attribute__ ((__nothrow__)); extern __attribute__((noreturn)) void longjmp(label_t *); #define LUAI_THROW(L,c) longjmp(&(c)->b) #define LUAI_TRY(L,c,a) if (setjmp(&(c)->b) == 0) { a } #define luai_jmpbuf label_t /* unsupported arches will build but not be able to run lua programs */ #if JMP_BUF_CNT == 1 int setjmp (label_t *buf) { return 1; } void longjmp (label_t * buf) { for (;;); } #endif #else #define LUAI_THROW(L,c) longjmp((c)->b, 1) #define LUAI_TRY(L,c,a) if (setjmp((c)->b) == 0) { a } #define luai_jmpbuf jmp_buf #endif #else /* _KERNEL */ #if defined(__cplusplus) && !defined(LUA_USE_LONGJMP) /* C++ exceptions */ #define LUAI_THROW(L,c) throw(c) #define LUAI_TRY(L,c,a) \ try { a } catch(...) { if ((c)->status == 0) (c)->status = -1; } #define luai_jmpbuf int /* dummy variable */ #elif defined(LUA_USE_ULONGJMP) /* in Unix, try _longjmp/_setjmp (more efficient) */ #define LUAI_THROW(L,c) _longjmp((c)->b, 1) #define LUAI_TRY(L,c,a) if (_setjmp((c)->b) == 0) { a } #define luai_jmpbuf jmp_buf #else /* default handling with long jumps */ #define LUAI_THROW(L,c) longjmp((c)->b, 1) #define LUAI_TRY(L,c,a) if (setjmp((c)->b) == 0) { a } #define luai_jmpbuf jmp_buf #endif #endif /* _KERNEL */ #endif /* LUAI_THROW */ /* chain list of long jump buffers */ struct lua_longjmp { struct lua_longjmp *previous; luai_jmpbuf b; volatile int status; /* error code */ }; static void seterrorobj (lua_State *L, int errcode, StkId oldtop) { switch (errcode) { case LUA_ERRMEM: { /* memory error? */ setsvalue2s(L, oldtop, G(L)->memerrmsg); /* reuse preregistered msg. */ break; } case LUA_ERRERR: { setsvalue2s(L, oldtop, luaS_newliteral(L, "error in error handling")); break; } default: { setobjs2s(L, oldtop, L->top - 1); /* error message on current top */ break; } } L->top = oldtop + 1; } /* * Silence infinite recursion warning which was added to -Wall in gcc 12.1 */ #if defined(__GNUC__) && !defined(__clang__) && \ defined(HAVE_KERNEL_INFINITE_RECURSION) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Winfinite-recursion" #endif l_noret luaD_throw (lua_State *L, int errcode) { if (L->errorJmp) { /* thread has an error handler? */ L->errorJmp->status = errcode; /* set status */ LUAI_THROW(L, L->errorJmp); /* jump to it */ } else { /* thread has no error handler */ L->status = cast_byte(errcode); /* mark it as dead */ if (G(L)->mainthread->errorJmp) { /* main thread has a handler? */ setobjs2s(L, G(L)->mainthread->top++, L->top - 1); /* copy error obj. */ luaD_throw(G(L)->mainthread, errcode); /* re-throw in main thread */ } else { /* no handler at all; abort */ if (G(L)->panic) { /* panic function? */ lua_unlock(L); G(L)->panic(L); /* call it (last chance to jump out) */ } panic("no error handler"); } } } -#if defined(HAVE_INFINITE_RECURSION) +#if defined(__GNUC__) && !defined(__clang__) && \ + defined(HAVE_INFINITE_RECURSION) #pragma GCC diagnostic pop #endif int luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud) { unsigned short oldnCcalls = L->nCcalls; struct lua_longjmp lj; lj.status = LUA_OK; lj.previous = L->errorJmp; /* chain new error handler */ L->errorJmp = &lj; LUAI_TRY(L, &lj, (*f)(L, ud); ); L->errorJmp = lj.previous; /* restore old error handler */ L->nCcalls = oldnCcalls; return lj.status; } /* }====================================================== */ static void correctstack (lua_State *L, TValue *oldstack) { CallInfo *ci; GCObject *up; L->top = (L->top - oldstack) + L->stack; for (up = L->openupval; up != NULL; up = up->gch.next) gco2uv(up)->v = (gco2uv(up)->v - oldstack) + L->stack; for (ci = L->ci; ci != NULL; ci = ci->previous) { ci->top = (ci->top - oldstack) + L->stack; ci->func = (ci->func - oldstack) + L->stack; if (isLua(ci)) ci->u.l.base = (ci->u.l.base - oldstack) + L->stack; } } /* some space for error handling */ #define ERRORSTACKSIZE (LUAI_MAXSTACK + 200) void luaD_reallocstack (lua_State *L, int newsize) { TValue *oldstack = L->stack; int lim = L->stacksize; lua_assert(newsize <= LUAI_MAXSTACK || newsize == ERRORSTACKSIZE); lua_assert(L->stack_last - L->stack == L->stacksize - EXTRA_STACK); luaM_reallocvector(L, L->stack, L->stacksize, newsize, TValue); for (; lim < newsize; lim++) setnilvalue(L->stack + lim); /* erase new segment */ L->stacksize = newsize; L->stack_last = L->stack + newsize - EXTRA_STACK; correctstack(L, oldstack); } void luaD_growstack (lua_State *L, int n) { int size = L->stacksize; if (size > LUAI_MAXSTACK) /* error after extra size? */ luaD_throw(L, LUA_ERRERR); else { int needed = cast_int(L->top - L->stack) + n + EXTRA_STACK; int newsize = 2 * size; if (newsize > LUAI_MAXSTACK) newsize = LUAI_MAXSTACK; if (newsize < needed) newsize = needed; if (newsize > LUAI_MAXSTACK) { /* stack overflow? */ luaD_reallocstack(L, ERRORSTACKSIZE); luaG_runerror(L, "stack overflow"); } else luaD_reallocstack(L, newsize); } } static int stackinuse (lua_State *L) { CallInfo *ci; StkId lim = L->top; for (ci = L->ci; ci != NULL; ci = ci->previous) { lua_assert(ci->top <= L->stack_last); if (lim < ci->top) lim = ci->top; } return cast_int(lim - L->stack) + 1; /* part of stack in use */ } void luaD_shrinkstack (lua_State *L) { int inuse = stackinuse(L); int goodsize = inuse + (inuse / 8) + 2*EXTRA_STACK; if (goodsize > LUAI_MAXSTACK) goodsize = LUAI_MAXSTACK; if (inuse > LUAI_MAXSTACK || /* handling stack overflow? */ goodsize >= L->stacksize) /* would grow instead of shrink? */ condmovestack(L); /* don't change stack (change only for debugging) */ else luaD_reallocstack(L, goodsize); /* shrink it */ } void luaD_hook (lua_State *L, int event, int line) { lua_Hook hook = L->hook; if (hook && L->allowhook) { CallInfo *ci = L->ci; ptrdiff_t top = savestack(L, L->top); ptrdiff_t ci_top = savestack(L, ci->top); lua_Debug ar; ar.event = event; ar.currentline = line; ar.i_ci = ci; luaD_checkstack(L, LUA_MINSTACK); /* ensure minimum stack size */ ci->top = L->top + LUA_MINSTACK; lua_assert(ci->top <= L->stack_last); L->allowhook = 0; /* cannot call hooks inside a hook */ ci->callstatus |= CIST_HOOKED; lua_unlock(L); (*hook)(L, &ar); lua_lock(L); lua_assert(!L->allowhook); L->allowhook = 1; ci->top = restorestack(L, ci_top); L->top = restorestack(L, top); ci->callstatus &= ~CIST_HOOKED; } } static void callhook (lua_State *L, CallInfo *ci) { int hook = LUA_HOOKCALL; ci->u.l.savedpc++; /* hooks assume 'pc' is already incremented */ if (isLua(ci->previous) && GET_OPCODE(*(ci->previous->u.l.savedpc - 1)) == OP_TAILCALL) { ci->callstatus |= CIST_TAIL; hook = LUA_HOOKTAILCALL; } luaD_hook(L, hook, -1); ci->u.l.savedpc--; /* correct 'pc' */ } static StkId adjust_varargs (lua_State *L, Proto *p, int actual) { int i; int nfixargs = p->numparams; StkId base, fixed; lua_assert(actual >= nfixargs); /* move fixed parameters to final position */ luaD_checkstack(L, p->maxstacksize); /* check again for new 'base' */ fixed = L->top - actual; /* first fixed argument */ base = L->top; /* final position of first argument */ for (i=0; itop++, fixed + i); setnilvalue(fixed + i); } return base; } static StkId tryfuncTM (lua_State *L, StkId func) { const TValue *tm = luaT_gettmbyobj(L, func, TM_CALL); StkId p; ptrdiff_t funcr = savestack(L, func); if (!ttisfunction(tm)) luaG_typeerror(L, func, "call"); /* Open a hole inside the stack at `func' */ for (p = L->top; p > func; p--) setobjs2s(L, p, p-1); incr_top(L); func = restorestack(L, funcr); /* previous call may change stack */ setobj2s(L, func, tm); /* tag method is the new function to be called */ return func; } #define next_ci(L) (L->ci = (L->ci->next ? L->ci->next : luaE_extendCI(L))) /* ** returns true if function has been executed (C function) */ int luaD_precall (lua_State *L, StkId func, int nresults) { lua_CFunction f; CallInfo *ci; int n; /* number of arguments (Lua) or returns (C) */ ptrdiff_t funcr = savestack(L, func); switch (ttype(func)) { case LUA_TLCF: /* light C function */ f = fvalue(func); goto Cfunc; case LUA_TCCL: { /* C closure */ f = clCvalue(func)->f; Cfunc: luaD_checkstack(L, LUA_MINSTACK); /* ensure minimum stack size */ ci = next_ci(L); /* now 'enter' new function */ ci->nresults = nresults; ci->func = restorestack(L, funcr); ci->top = L->top + LUA_MINSTACK; lua_assert(ci->top <= L->stack_last); ci->callstatus = 0; luaC_checkGC(L); /* stack grow uses memory */ if (L->hookmask & LUA_MASKCALL) luaD_hook(L, LUA_HOOKCALL, -1); lua_unlock(L); n = (*f)(L); /* do the actual call */ lua_lock(L); api_checknelems(L, n); luaD_poscall(L, L->top - n); return 1; } case LUA_TLCL: { /* Lua function: prepare its call */ StkId base; Proto *p = clLvalue(func)->p; n = cast_int(L->top - func) - 1; /* number of real arguments */ luaD_checkstack(L, p->maxstacksize + p->numparams); for (; n < p->numparams; n++) setnilvalue(L->top++); /* complete missing arguments */ if (!p->is_vararg) { func = restorestack(L, funcr); base = func + 1; } else { base = adjust_varargs(L, p, n); func = restorestack(L, funcr); /* previous call can change stack */ } ci = next_ci(L); /* now 'enter' new function */ ci->nresults = nresults; ci->func = func; ci->u.l.base = base; ci->top = base + p->maxstacksize; lua_assert(ci->top <= L->stack_last); ci->u.l.savedpc = p->code; /* starting point */ ci->callstatus = CIST_LUA; L->top = ci->top; luaC_checkGC(L); /* stack grow uses memory */ if (L->hookmask & LUA_MASKCALL) callhook(L, ci); return 0; } default: { /* not a function */ func = tryfuncTM(L, func); /* retry with 'function' tag method */ return luaD_precall(L, func, nresults); /* now it must be a function */ } } } int luaD_poscall (lua_State *L, StkId firstResult) { StkId res; int wanted, i; CallInfo *ci = L->ci; if (L->hookmask & (LUA_MASKRET | LUA_MASKLINE)) { if (L->hookmask & LUA_MASKRET) { ptrdiff_t fr = savestack(L, firstResult); /* hook may change stack */ luaD_hook(L, LUA_HOOKRET, -1); firstResult = restorestack(L, fr); } L->oldpc = ci->previous->u.l.savedpc; /* 'oldpc' for caller function */ } res = ci->func; /* res == final position of 1st result */ wanted = ci->nresults; L->ci = ci->previous; /* back to caller */ /* move results to correct place */ for (i = wanted; i != 0 && firstResult < L->top; i--) setobjs2s(L, res++, firstResult++); while (i-- > 0) setnilvalue(res++); L->top = res; return (wanted - LUA_MULTRET); /* 0 iff wanted == LUA_MULTRET */ } /* ** Call a function (C or Lua). The function to be called is at *func. ** The arguments are on the stack, right after the function. ** When returns, all the results are on the stack, starting at the original ** function position. */ void luaD_call (lua_State *L, StkId func, int nResults, int allowyield) { if (++L->nCcalls >= LUAI_MAXCCALLS) { if (L->nCcalls == LUAI_MAXCCALLS) luaG_runerror(L, "C stack overflow"); else if (L->nCcalls >= (LUAI_MAXCCALLS + (LUAI_MAXCCALLS>>3))) luaD_throw(L, LUA_ERRERR); /* error while handling stack error */ } intptr_t remaining = stack_remaining(); if (L->runerror == 0 && remaining < LUAI_MINCSTACK) luaG_runerror(L, "C stack overflow"); if (L->runerror != 0 && remaining < LUAI_MINCSTACK / 2) luaD_throw(L, LUA_ERRERR); /* error while handling stack error */ if (!allowyield) L->nny++; if (!luaD_precall(L, func, nResults)) /* is a Lua function? */ luaV_execute(L); /* call it */ if (!allowyield) L->nny--; L->nCcalls--; } static void finishCcall (lua_State *L) { CallInfo *ci = L->ci; int n; lua_assert(ci->u.c.k != NULL); /* must have a continuation */ lua_assert(L->nny == 0); if (ci->callstatus & CIST_YPCALL) { /* was inside a pcall? */ ci->callstatus &= ~CIST_YPCALL; /* finish 'lua_pcall' */ L->errfunc = ci->u.c.old_errfunc; } /* finish 'lua_callk'/'lua_pcall' */ adjustresults(L, ci->nresults); /* call continuation function */ if (!(ci->callstatus & CIST_STAT)) /* no call status? */ ci->u.c.status = LUA_YIELD; /* 'default' status */ lua_assert(ci->u.c.status != LUA_OK); ci->callstatus = (ci->callstatus & ~(CIST_YPCALL | CIST_STAT)) | CIST_YIELDED; lua_unlock(L); n = (*ci->u.c.k)(L); lua_lock(L); api_checknelems(L, n); /* finish 'luaD_precall' */ luaD_poscall(L, L->top - n); } static void unroll (lua_State *L, void *ud) { UNUSED(ud); for (;;) { if (L->ci == &L->base_ci) /* stack is empty? */ return; /* coroutine finished normally */ if (!isLua(L->ci)) /* C function? */ finishCcall(L); else { /* Lua function */ luaV_finishOp(L); /* finish interrupted instruction */ luaV_execute(L); /* execute down to higher C 'boundary' */ } } } /* ** check whether thread has a suspended protected call */ static CallInfo *findpcall (lua_State *L) { CallInfo *ci; for (ci = L->ci; ci != NULL; ci = ci->previous) { /* search for a pcall */ if (ci->callstatus & CIST_YPCALL) return ci; } return NULL; /* no pending pcall */ } static int recover (lua_State *L, int status) { StkId oldtop; CallInfo *ci = findpcall(L); if (ci == NULL) return 0; /* no recovery point */ /* "finish" luaD_pcall */ oldtop = restorestack(L, ci->extra); luaF_close(L, oldtop); seterrorobj(L, status, oldtop); L->ci = ci; L->allowhook = ci->u.c.old_allowhook; L->nny = 0; /* should be zero to be yieldable */ luaD_shrinkstack(L); L->errfunc = ci->u.c.old_errfunc; ci->callstatus |= CIST_STAT; /* call has error status */ ci->u.c.status = status; /* (here it is) */ return 1; /* continue running the coroutine */ } /* ** signal an error in the call to 'resume', not in the execution of the ** coroutine itself. (Such errors should not be handled by any coroutine ** error handler and should not kill the coroutine.) */ static l_noret resume_error (lua_State *L, const char *msg, StkId firstArg) { L->top = firstArg; /* remove args from the stack */ setsvalue2s(L, L->top, luaS_new(L, msg)); /* push error message */ api_incr_top(L); luaD_throw(L, -1); /* jump back to 'lua_resume' */ } /* ** do the work for 'lua_resume' in protected mode */ static void resume_cb (lua_State *L, void *ud) { int nCcalls = L->nCcalls; StkId firstArg = cast(StkId, ud); CallInfo *ci = L->ci; if (nCcalls >= LUAI_MAXCCALLS) resume_error(L, "C stack overflow", firstArg); if (L->status == LUA_OK) { /* may be starting a coroutine */ if (ci != &L->base_ci) /* not in base level? */ resume_error(L, "cannot resume non-suspended coroutine", firstArg); /* coroutine is in base level; start running it */ if (!luaD_precall(L, firstArg - 1, LUA_MULTRET)) /* Lua function? */ luaV_execute(L); /* call it */ } else if (L->status != LUA_YIELD) resume_error(L, "cannot resume dead coroutine", firstArg); else { /* resuming from previous yield */ L->status = LUA_OK; ci->func = restorestack(L, ci->extra); if (isLua(ci)) /* yielded inside a hook? */ luaV_execute(L); /* just continue running Lua code */ else { /* 'common' yield */ if (ci->u.c.k != NULL) { /* does it have a continuation? */ int n; ci->u.c.status = LUA_YIELD; /* 'default' status */ ci->callstatus |= CIST_YIELDED; lua_unlock(L); n = (*ci->u.c.k)(L); /* call continuation */ lua_lock(L); api_checknelems(L, n); firstArg = L->top - n; /* yield results come from continuation */ } luaD_poscall(L, firstArg); /* finish 'luaD_precall' */ } unroll(L, NULL); } lua_assert(nCcalls == L->nCcalls); } LUA_API int lua_resume (lua_State *L, lua_State *from, int nargs) { int status; int oldnny = L->nny; /* save 'nny' */ lua_lock(L); luai_userstateresume(L, nargs); L->nCcalls = (from) ? from->nCcalls + 1 : 1; L->nny = 0; /* allow yields */ api_checknelems(L, (L->status == LUA_OK) ? nargs + 1 : nargs); status = luaD_rawrunprotected(L, resume_cb, L->top - nargs); if (status == -1) /* error calling 'lua_resume'? */ status = LUA_ERRRUN; else { /* yield or regular error */ while (status != LUA_OK && status != LUA_YIELD) { /* error? */ if (recover(L, status)) /* recover point? */ status = luaD_rawrunprotected(L, unroll, NULL); /* run continuation */ else { /* unrecoverable error */ L->status = cast_byte(status); /* mark thread as `dead' */ seterrorobj(L, status, L->top); L->ci->top = L->top; break; } } lua_assert(status == L->status); } L->nny = oldnny; /* restore 'nny' */ L->nCcalls--; lua_assert(L->nCcalls == ((from) ? from->nCcalls : 0)); lua_unlock(L); return status; } LUA_API int lua_yieldk (lua_State *L, int nresults, int ctx, lua_CFunction k) { CallInfo *ci = L->ci; luai_userstateyield(L, nresults); lua_lock(L); api_checknelems(L, nresults); if (L->nny > 0) { if (L != G(L)->mainthread) luaG_runerror(L, "attempt to yield across a C-call boundary"); else luaG_runerror(L, "attempt to yield from outside a coroutine"); } L->status = LUA_YIELD; ci->extra = savestack(L, ci->func); /* save current 'func' */ if (isLua(ci)) { /* inside a hook? */ api_check(L, k == NULL, "hooks cannot continue after yielding"); } else { if ((ci->u.c.k = k) != NULL) /* is there a continuation? */ ci->u.c.ctx = ctx; /* save context */ ci->func = L->top - nresults - 1; /* protect stack below results */ luaD_throw(L, LUA_YIELD); } lua_assert(ci->callstatus & CIST_HOOKED); /* must be inside a hook */ lua_unlock(L); return 0; /* return to 'luaD_hook' */ } int luaD_pcall (lua_State *L, Pfunc func, void *u, ptrdiff_t old_top, ptrdiff_t ef) { int status; CallInfo *old_ci = L->ci; lu_byte old_allowhooks = L->allowhook; unsigned short old_nny = L->nny; ptrdiff_t old_errfunc = L->errfunc; L->errfunc = ef; status = luaD_rawrunprotected(L, func, u); if (status != LUA_OK) { /* an error occurred? */ StkId oldtop = restorestack(L, old_top); luaF_close(L, oldtop); /* close possible pending closures */ seterrorobj(L, status, oldtop); L->ci = old_ci; L->allowhook = old_allowhooks; L->nny = old_nny; luaD_shrinkstack(L); } L->errfunc = old_errfunc; return status; } /* ** Execute a protected parser. */ struct SParser { /* data to `f_parser' */ ZIO *z; Mbuffer buff; /* dynamic structure used by the scanner */ Dyndata dyd; /* dynamic structures used by the parser */ const char *mode; const char *name; }; static void checkmode (lua_State *L, const char *mode, const char *x) { if (mode && strchr(mode, x[0]) == NULL) { luaO_pushfstring(L, "attempt to load a %s chunk (mode is " LUA_QS ")", x, mode); luaD_throw(L, LUA_ERRSYNTAX); } } static void f_parser (lua_State *L, void *ud) { int i; Closure *cl; struct SParser *p = cast(struct SParser *, ud); int c = zgetc(p->z); /* read first character */ lua_assert(c != LUA_SIGNATURE[0]); /* binary not supported */ checkmode(L, p->mode, "text"); cl = luaY_parser(L, p->z, &p->buff, &p->dyd, p->name, c); lua_assert(cl->l.nupvalues == cl->l.p->sizeupvalues); for (i = 0; i < cl->l.nupvalues; i++) { /* initialize upvalues */ UpVal *up = luaF_newupval(L); cl->l.upvals[i] = up; luaC_objbarrier(L, cl, up); } } int luaD_protectedparser (lua_State *L, ZIO *z, const char *name, const char *mode) { struct SParser p; int status; L->nny++; /* cannot yield during parsing */ p.z = z; p.name = name; p.mode = mode; p.dyd.actvar.arr = NULL; p.dyd.actvar.size = 0; p.dyd.gt.arr = NULL; p.dyd.gt.size = 0; p.dyd.label.arr = NULL; p.dyd.label.size = 0; luaZ_initbuffer(L, &p.buff); status = luaD_pcall(L, f_parser, &p, savestack(L, L->top), L->errfunc); luaZ_freebuffer(L, &p.buff); luaM_freearray(L, p.dyd.actvar.arr, p.dyd.actvar.size); luaM_freearray(L, p.dyd.gt.arr, p.dyd.gt.size); luaM_freearray(L, p.dyd.label.arr, p.dyd.label.size); L->nny--; return status; } diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index 71eedf635f73..38515023e4b8 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -1,927 +1,931 @@ /* * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. * Copyright (C) 2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Brian Behlendorf . * UCRL-CODE-235197 * * This file is part of the SPL, Solaris Porting Layer. * * The SPL is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * The SPL is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with the SPL. If not, see . * * Solaris Porting Layer (SPL) Generic Implementation. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include unsigned long spl_hostid = 0; EXPORT_SYMBOL(spl_hostid); /* CSTYLED */ module_param(spl_hostid, ulong, 0644); MODULE_PARM_DESC(spl_hostid, "The system hostid."); proc_t p0; EXPORT_SYMBOL(p0); /* * xoshiro256++ 1.0 PRNG by David Blackman and Sebastiano Vigna * * "Scrambled Linear Pseudorandom Number Generators∗" * https://vigna.di.unimi.it/ftp/papers/ScrambledLinear.pdf * * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose * is to provide bytes containing random numbers. It is mapped to /dev/urandom * on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's * random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so * we can implement it using a fast PRNG that we seed using Linux' actual * equivalent to random_get_pseudo_bytes(). We do this by providing each CPU * with an independent seed so that all calls to random_get_pseudo_bytes() are * free of atomic instructions. * * A consequence of using a fast PRNG is that using random_get_pseudo_bytes() * to generate words larger than 256 bits will paradoxically be limited to * `2^256 - 1` possibilities. This is because we have a sequence of `2^256 - 1` * 256-bit words and selecting the first will implicitly select the second. If * a caller finds this behavior undesirable, random_get_bytes() should be used * instead. * * XXX: Linux interrupt handlers that trigger within the critical section * formed by `s[3] = xp[3];` and `xp[0] = s[0];` and call this function will * see the same numbers. Nothing in the code currently calls this in an * interrupt handler, so this is considered to be okay. If that becomes a * problem, we could create a set of per-cpu variables for interrupt handlers * and use them when in_interrupt() from linux/preempt_mask.h evaluates to * true. */ static void __percpu *spl_pseudo_entropy; /* * rotl()/spl_rand_next()/spl_rand_jump() are copied from the following CC-0 * licensed file: * * https://prng.di.unimi.it/xoshiro256plusplus.c */ static inline uint64_t rotl(const uint64_t x, int k) { return ((x << k) | (x >> (64 - k))); } static inline uint64_t spl_rand_next(uint64_t *s) { const uint64_t result = rotl(s[0] + s[3], 23) + s[0]; const uint64_t t = s[1] << 17; s[2] ^= s[0]; s[3] ^= s[1]; s[1] ^= s[2]; s[0] ^= s[3]; s[2] ^= t; s[3] = rotl(s[3], 45); return (result); } static inline void spl_rand_jump(uint64_t *s) { static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c }; uint64_t s0 = 0; uint64_t s1 = 0; uint64_t s2 = 0; uint64_t s3 = 0; int i, b; for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++) for (b = 0; b < 64; b++) { if (JUMP[i] & 1ULL << b) { s0 ^= s[0]; s1 ^= s[1]; s2 ^= s[2]; s3 ^= s[3]; } (void) spl_rand_next(s); } s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; } int random_get_pseudo_bytes(uint8_t *ptr, size_t len) { uint64_t *xp, s[4]; ASSERT(ptr); xp = get_cpu_ptr(spl_pseudo_entropy); s[0] = xp[0]; s[1] = xp[1]; s[2] = xp[2]; s[3] = xp[3]; while (len) { union { uint64_t ui64; uint8_t byte[sizeof (uint64_t)]; }entropy; int i = MIN(len, sizeof (uint64_t)); len -= i; entropy.ui64 = spl_rand_next(s); /* * xoshiro256++ has low entropy lower bytes, so we copy the * higher order bytes first. */ while (i--) #ifdef _ZFS_BIG_ENDIAN *ptr++ = entropy.byte[i]; #else *ptr++ = entropy.byte[7 - i]; #endif } xp[0] = s[0]; xp[1] = s[1]; xp[2] = s[2]; xp[3] = s[3]; put_cpu_ptr(spl_pseudo_entropy); return (0); } EXPORT_SYMBOL(random_get_pseudo_bytes); #if BITS_PER_LONG == 32 /* * Support 64/64 => 64 division on a 32-bit platform. While the kernel * provides a div64_u64() function for this we do not use it because the * implementation is flawed. There are cases which return incorrect * results as late as linux-2.6.35. Until this is fixed upstream the * spl must provide its own implementation. * * This implementation is a slightly modified version of the algorithm * proposed by the book 'Hacker's Delight'. The original source can be * found here and is available for use without restriction. * * http://www.hackersdelight.org/HDcode/newCode/divDouble.c */ /* * Calculate number of leading of zeros for a 64-bit value. */ static int nlz64(uint64_t x) { register int n = 0; if (x == 0) return (64); if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; } if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; } if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n + 8; x = x << 8; } if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n + 4; x = x << 4; } if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n + 2; x = x << 2; } if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n + 1; } return (n); } /* * Newer kernels have a div_u64() function but we define our own * to simplify portability between kernel versions. */ static inline uint64_t __div_u64(uint64_t u, uint32_t v) { (void) do_div(u, v); return (u); } /* * Turn off missing prototypes warning for these functions. They are * replacements for libgcc-provided functions and will never be called * directly. */ +#if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmissing-prototypes" +#endif /* * Implementation of 64-bit unsigned division for 32-bit machines. * * First the procedure takes care of the case in which the divisor is a * 32-bit quantity. There are two subcases: (1) If the left half of the * dividend is less than the divisor, one execution of do_div() is all that * is required (overflow is not possible). (2) Otherwise it does two * divisions, using the grade school method. */ uint64_t __udivdi3(uint64_t u, uint64_t v) { uint64_t u0, u1, v1, q0, q1, k; int n; if (v >> 32 == 0) { // If v < 2**32: if (u >> 32 < v) { // If u/v cannot overflow, return (__div_u64(u, v)); // just do one division. } else { // If u/v would overflow: u1 = u >> 32; // Break u into two halves. u0 = u & 0xFFFFFFFF; q1 = __div_u64(u1, v); // First quotient digit. k = u1 - q1 * v; // First remainder, < v. u0 += (k << 32); q0 = __div_u64(u0, v); // Seconds quotient digit. return ((q1 << 32) + q0); } } else { // If v >= 2**32: n = nlz64(v); // 0 <= n <= 31. v1 = (v << n) >> 32; // Normalize divisor, MSB is 1. u1 = u >> 1; // To ensure no overflow. q1 = __div_u64(u1, v1); // Get quotient from q0 = (q1 << n) >> 31; // Undo normalization and // division of u by 2. if (q0 != 0) // Make q0 correct or q0 = q0 - 1; // too small by 1. if ((u - q0 * v) >= v) q0 = q0 + 1; // Now q0 is correct. return (q0); } } EXPORT_SYMBOL(__udivdi3); #ifndef abs64 /* CSTYLED */ #define abs64(x) ({ uint64_t t = (x) >> 63; ((x) ^ t) - t; }) #endif /* * Implementation of 64-bit signed division for 32-bit machines. */ int64_t __divdi3(int64_t u, int64_t v) { int64_t q, t; q = __udivdi3(abs64(u), abs64(v)); t = (u ^ v) >> 63; // If u, v have different return ((q ^ t) - t); // signs, negate q. } EXPORT_SYMBOL(__divdi3); /* * Implementation of 64-bit unsigned modulo for 32-bit machines. */ uint64_t __umoddi3(uint64_t dividend, uint64_t divisor) { return (dividend - (divisor * __udivdi3(dividend, divisor))); } EXPORT_SYMBOL(__umoddi3); /* 64-bit signed modulo for 32-bit machines. */ int64_t __moddi3(int64_t n, int64_t d) { int64_t q; boolean_t nn = B_FALSE; if (n < 0) { nn = B_TRUE; n = -n; } if (d < 0) d = -d; q = __umoddi3(n, d); return (nn ? -q : q); } EXPORT_SYMBOL(__moddi3); /* * Implementation of 64-bit unsigned division/modulo for 32-bit machines. */ uint64_t __udivmoddi4(uint64_t n, uint64_t d, uint64_t *r) { uint64_t q = __udivdi3(n, d); if (r) *r = n - d * q; return (q); } EXPORT_SYMBOL(__udivmoddi4); /* * Implementation of 64-bit signed division/modulo for 32-bit machines. */ int64_t __divmoddi4(int64_t n, int64_t d, int64_t *r) { int64_t q, rr; boolean_t nn = B_FALSE; boolean_t nd = B_FALSE; if (n < 0) { nn = B_TRUE; n = -n; } if (d < 0) { nd = B_TRUE; d = -d; } q = __udivmoddi4(n, d, (uint64_t *)&rr); if (nn != nd) q = -q; if (nn) rr = -rr; if (r) *r = rr; return (q); } EXPORT_SYMBOL(__divmoddi4); #if defined(__arm) || defined(__arm__) /* * Implementation of 64-bit (un)signed division for 32-bit arm machines. * * Run-time ABI for the ARM Architecture (page 20). A pair of (unsigned) * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1}, * and the remainder in {r2, r3}. The return type is specifically left * set to 'void' to ensure the compiler does not overwrite these registers * during the return. All results are in registers as per ABI */ void __aeabi_uldivmod(uint64_t u, uint64_t v) { uint64_t res; uint64_t mod; res = __udivdi3(u, v); mod = __umoddi3(u, v); { register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); register uint32_t r1 asm("r1") = (res >> 32); register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); register uint32_t r3 asm("r3") = (mod >> 32); asm volatile("" : "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3) /* output */ : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ return; /* r0; */ } } EXPORT_SYMBOL(__aeabi_uldivmod); void __aeabi_ldivmod(int64_t u, int64_t v) { int64_t res; uint64_t mod; res = __divdi3(u, v); mod = __umoddi3(u, v); { register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); register uint32_t r1 asm("r1") = (res >> 32); register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); register uint32_t r3 asm("r3") = (mod >> 32); asm volatile("" : "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3) /* output */ : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ return; /* r0; */ } } EXPORT_SYMBOL(__aeabi_ldivmod); #endif /* __arm || __arm__ */ +#if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop +#endif #endif /* BITS_PER_LONG */ /* * NOTE: The strtoxx behavior is solely based on my reading of the Solaris * ddi_strtol(9F) man page. I have not verified the behavior of these * functions against their Solaris counterparts. It is possible that I * may have misinterpreted the man page or the man page is incorrect. */ int ddi_strtol(const char *, char **, int, long *); int ddi_strtoull(const char *, char **, int, unsigned long long *); int ddi_strtoll(const char *, char **, int, long long *); #define define_ddi_strtox(type, valtype) \ int ddi_strto##type(const char *str, char **endptr, \ int base, valtype *result) \ { \ valtype last_value, value = 0; \ char *ptr = (char *)str; \ int digit, minus = 0; \ \ while (strchr(" \t\n\r\f", *ptr)) \ ++ptr; \ \ if (strlen(ptr) == 0) \ return (EINVAL); \ \ switch (*ptr) { \ case '-': \ minus = 1; \ zfs_fallthrough; \ case '+': \ ++ptr; \ break; \ } \ \ /* Auto-detect base based on prefix */ \ if (!base) { \ if (str[0] == '0') { \ if (tolower(str[1]) == 'x' && isxdigit(str[2])) { \ base = 16; /* hex */ \ ptr += 2; \ } else if (str[1] >= '0' && str[1] < 8) { \ base = 8; /* octal */ \ ptr += 1; \ } else { \ return (EINVAL); \ } \ } else { \ base = 10; /* decimal */ \ } \ } \ \ while (1) { \ if (isdigit(*ptr)) \ digit = *ptr - '0'; \ else if (isalpha(*ptr)) \ digit = tolower(*ptr) - 'a' + 10; \ else \ break; \ \ if (digit >= base) \ break; \ \ last_value = value; \ value = value * base + digit; \ if (last_value > value) /* Overflow */ \ return (ERANGE); \ \ ptr++; \ } \ \ *result = minus ? -value : value; \ \ if (endptr) \ *endptr = ptr; \ \ return (0); \ } \ define_ddi_strtox(l, long) define_ddi_strtox(ull, unsigned long long) define_ddi_strtox(ll, long long) EXPORT_SYMBOL(ddi_strtol); EXPORT_SYMBOL(ddi_strtoll); EXPORT_SYMBOL(ddi_strtoull); int ddi_copyin(const void *from, void *to, size_t len, int flags) { /* Fake ioctl() issued by kernel, 'from' is a kernel address */ if (flags & FKIOCTL) { memcpy(to, from, len); return (0); } return (copyin(from, to, len)); } EXPORT_SYMBOL(ddi_copyin); #define define_spl_param(type, fmt) \ int \ spl_param_get_##type(char *buf, zfs_kernel_param_t *kp) \ { \ return (scnprintf(buf, PAGE_SIZE, fmt "\n", \ *(type *)kp->arg)); \ } \ int \ spl_param_set_##type(const char *buf, zfs_kernel_param_t *kp) \ { \ return (kstrto##type(buf, 0, (type *)kp->arg)); \ } \ const struct kernel_param_ops spl_param_ops_##type = { \ .set = spl_param_set_##type, \ .get = spl_param_get_##type, \ }; \ EXPORT_SYMBOL(spl_param_get_##type); \ EXPORT_SYMBOL(spl_param_set_##type); \ EXPORT_SYMBOL(spl_param_ops_##type); define_spl_param(s64, "%lld") define_spl_param(u64, "%llu") /* * Post a uevent to userspace whenever a new vdev adds to the pool. It is * necessary to sync blkid information with udev, which zed daemon uses * during device hotplug to identify the vdev. */ void spl_signal_kobj_evt(struct block_device *bdev) { #if defined(HAVE_BDEV_KOBJ) || defined(HAVE_PART_TO_DEV) #ifdef HAVE_BDEV_KOBJ struct kobject *disk_kobj = bdev_kobj(bdev); #else struct kobject *disk_kobj = &part_to_dev(bdev->bd_part)->kobj; #endif if (disk_kobj) { int ret = kobject_uevent(disk_kobj, KOBJ_CHANGE); if (ret) { pr_warn("ZFS: Sending event '%d' to kobject: '%s'" " (%p): failed(ret:%d)\n", KOBJ_CHANGE, kobject_name(disk_kobj), disk_kobj, ret); } } #else /* * This is encountered if neither bdev_kobj() nor part_to_dev() is available * in the kernel - likely due to an API change that needs to be chased down. */ #error "Unsupported kernel: unable to get struct kobj from bdev" #endif } EXPORT_SYMBOL(spl_signal_kobj_evt); int ddi_copyout(const void *from, void *to, size_t len, int flags) { /* Fake ioctl() issued by kernel, 'from' is a kernel address */ if (flags & FKIOCTL) { memcpy(to, from, len); return (0); } return (copyout(from, to, len)); } EXPORT_SYMBOL(ddi_copyout); static ssize_t spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) { #if defined(HAVE_KERNEL_READ_PPOS) return (kernel_read(file, buf, count, pos)); #else mm_segment_t saved_fs; ssize_t ret; saved_fs = get_fs(); set_fs(KERNEL_DS); ret = vfs_read(file, (void __user *)buf, count, pos); set_fs(saved_fs); return (ret); #endif } static int spl_getattr(struct file *filp, struct kstat *stat) { int rc; ASSERT(filp); ASSERT(stat); #if defined(HAVE_4ARGS_VFS_GETATTR) rc = vfs_getattr(&filp->f_path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); #elif defined(HAVE_2ARGS_VFS_GETATTR) rc = vfs_getattr(&filp->f_path, stat); #elif defined(HAVE_3ARGS_VFS_GETATTR) rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, stat); #else #error "No available vfs_getattr()" #endif if (rc) return (-rc); return (0); } /* * Read the unique system identifier from the /etc/hostid file. * * The behavior of /usr/bin/hostid on Linux systems with the * regular eglibc and coreutils is: * * 1. Generate the value if the /etc/hostid file does not exist * or if the /etc/hostid file is less than four bytes in size. * * 2. If the /etc/hostid file is at least 4 bytes, then return * the first four bytes [0..3] in native endian order. * * 3. Always ignore bytes [4..] if they exist in the file. * * Only the first four bytes are significant, even on systems that * have a 64-bit word size. * * See: * * eglibc: sysdeps/unix/sysv/linux/gethostid.c * coreutils: src/hostid.c * * Notes: * * The /etc/hostid file on Solaris is a text file that often reads: * * # DO NOT EDIT * "0123456789" * * Directly copying this file to Linux results in a constant * hostid of 4f442023 because the default comment constitutes * the first four bytes of the file. * */ static char *spl_hostid_path = HW_HOSTID_PATH; module_param(spl_hostid_path, charp, 0444); MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)"); static int hostid_read(uint32_t *hostid) { uint64_t size; uint32_t value = 0; int error; loff_t off; struct file *filp; struct kstat stat; filp = filp_open(spl_hostid_path, 0, 0); if (IS_ERR(filp)) return (ENOENT); error = spl_getattr(filp, &stat); if (error) { filp_close(filp, 0); return (error); } size = stat.size; // cppcheck-suppress sizeofwithnumericparameter if (size < sizeof (HW_HOSTID_MASK)) { filp_close(filp, 0); return (EINVAL); } off = 0; /* * Read directly into the variable like eglibc does. * Short reads are okay; native behavior is preserved. */ error = spl_kernel_read(filp, &value, sizeof (value), &off); if (error < 0) { filp_close(filp, 0); return (EIO); } /* Mask down to 32 bits like coreutils does. */ *hostid = (value & HW_HOSTID_MASK); filp_close(filp, 0); return (0); } /* * Return the system hostid. Preferentially use the spl_hostid module option * when set, otherwise use the value in the /etc/hostid file. */ uint32_t zone_get_hostid(void *zone) { uint32_t hostid; ASSERT3P(zone, ==, NULL); if (spl_hostid != 0) return ((uint32_t)(spl_hostid & HW_HOSTID_MASK)); if (hostid_read(&hostid) == 0) return (hostid); return (0); } EXPORT_SYMBOL(zone_get_hostid); static int spl_kvmem_init(void) { int rc = 0; rc = spl_kmem_init(); if (rc) return (rc); rc = spl_vmem_init(); if (rc) { spl_kmem_fini(); return (rc); } return (rc); } /* * We initialize the random number generator with 128 bits of entropy from the * system random number generator. In the improbable case that we have a zero * seed, we fallback to the system jiffies, unless it is also zero, in which * situation we use a preprogrammed seed. We step forward by 2^64 iterations to * initialize each of the per-cpu seeds so that the sequences generated on each * CPU are guaranteed to never overlap in practice. */ static int __init spl_random_init(void) { uint64_t s[4]; int i = 0; spl_pseudo_entropy = __alloc_percpu(4 * sizeof (uint64_t), sizeof (uint64_t)); if (!spl_pseudo_entropy) return (-ENOMEM); get_random_bytes(s, sizeof (s)); if (s[0] == 0 && s[1] == 0 && s[2] == 0 && s[3] == 0) { if (jiffies != 0) { s[0] = jiffies; s[1] = ~0 - jiffies; s[2] = ~jiffies; s[3] = jiffies - ~0; } else { (void) memcpy(s, "improbable seed", 16); } printk("SPL: get_random_bytes() returned 0 " "when generating random seed. Setting initial seed to " "0x%016llx%016llx%016llx%016llx.\n", cpu_to_be64(s[0]), cpu_to_be64(s[1]), cpu_to_be64(s[2]), cpu_to_be64(s[3])); } for_each_possible_cpu(i) { uint64_t *wordp = per_cpu_ptr(spl_pseudo_entropy, i); spl_rand_jump(s); wordp[0] = s[0]; wordp[1] = s[1]; wordp[2] = s[2]; wordp[3] = s[3]; } return (0); } static void spl_random_fini(void) { free_percpu(spl_pseudo_entropy); } static void spl_kvmem_fini(void) { spl_vmem_fini(); spl_kmem_fini(); } static int __init spl_init(void) { int rc = 0; if ((rc = spl_random_init())) goto out0; if ((rc = spl_kvmem_init())) goto out1; if ((rc = spl_tsd_init())) goto out2; if ((rc = spl_taskq_init())) goto out3; if ((rc = spl_kmem_cache_init())) goto out4; if ((rc = spl_proc_init())) goto out5; if ((rc = spl_kstat_init())) goto out6; if ((rc = spl_zlib_init())) goto out7; if ((rc = spl_zone_init())) goto out8; return (rc); out8: spl_zlib_fini(); out7: spl_kstat_fini(); out6: spl_proc_fini(); out5: spl_kmem_cache_fini(); out4: spl_taskq_fini(); out3: spl_tsd_fini(); out2: spl_kvmem_fini(); out1: spl_random_fini(); out0: return (rc); } static void __exit spl_fini(void) { spl_zone_fini(); spl_zlib_fini(); spl_kstat_fini(); spl_proc_fini(); spl_kmem_cache_fini(); spl_taskq_fini(); spl_tsd_fini(); spl_kvmem_fini(); spl_random_fini(); } module_init(spl_init); module_exit(spl_fini); MODULE_DESCRIPTION("Solaris Porting Layer"); MODULE_AUTHOR(ZFS_META_AUTHOR); MODULE_LICENSE("GPL"); MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE); diff --git a/module/zfs/vdev_raidz_math_aarch64_neonx2.c b/module/zfs/vdev_raidz_math_aarch64_neonx2.c index 0a1f05fd6664..bd9de91a4ba8 100644 --- a/module/zfs/vdev_raidz_math_aarch64_neonx2.c +++ b/module/zfs/vdev_raidz_math_aarch64_neonx2.c @@ -1,232 +1,236 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (C) 2016 Romain Dolbeau. All rights reserved. */ #include #if defined(__aarch64__) #include "vdev_raidz_math_aarch64_neon_common.h" #define SYN_STRIDE 4 #define ZERO_STRIDE 8 #define ZERO_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() #define ZERO_D 0, 1, 2, 3, 4, 5, 6, 7 #define COPY_STRIDE 8 #define COPY_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() #define COPY_D 0, 1, 2, 3, 4, 5, 6, 7 #define ADD_STRIDE 8 #define ADD_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() #define ADD_D 0, 1, 2, 3, 4, 5, 6, 7 #define MUL_STRIDE 4 #define MUL_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_33_36() #define MUL_D 0, 1, 2, 3 #define GEN_P_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_33_36() #define GEN_P_STRIDE 4 #define GEN_P_P 0, 1, 2, 3 #define GEN_PQ_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_16() \ GEN_X_DEFINE_17() \ GEN_X_DEFINE_33_36() #define GEN_PQ_STRIDE 4 #define GEN_PQ_D 0, 1, 2, 3 #define GEN_PQ_C 4, 5, 6, 7 #define GEN_PQR_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_16() \ GEN_X_DEFINE_17() \ GEN_X_DEFINE_33_36() #define GEN_PQR_STRIDE 4 #define GEN_PQR_D 0, 1, 2, 3 #define GEN_PQR_C 4, 5, 6, 7 #define SYN_Q_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_16() \ GEN_X_DEFINE_17() \ GEN_X_DEFINE_33_36() #define SYN_Q_STRIDE 4 #define SYN_Q_D 0, 1, 2, 3 #define SYN_Q_X 4, 5, 6, 7 #define SYN_R_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_16() \ GEN_X_DEFINE_17() \ GEN_X_DEFINE_33_36() #define SYN_R_STRIDE 4 #define SYN_R_D 0, 1, 2, 3 #define SYN_R_X 4, 5, 6, 7 #define SYN_PQ_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_16() \ GEN_X_DEFINE_17() \ GEN_X_DEFINE_33_36() #define SYN_PQ_STRIDE 4 #define SYN_PQ_D 0, 1, 2, 3 #define SYN_PQ_X 4, 5, 6, 7 #define REC_PQ_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_8_9() \ GEN_X_DEFINE_22_23() \ GEN_X_DEFINE_33_36() #define REC_PQ_STRIDE 4 #define REC_PQ_X 0, 1, 2, 3 #define REC_PQ_Y 4, 5, 6, 7 #define REC_PQ_T 8, 9, 22, 23 #define SYN_PR_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_16() \ GEN_X_DEFINE_17() \ GEN_X_DEFINE_33_36() #define SYN_PR_STRIDE 4 #define SYN_PR_D 0, 1, 2, 3 #define SYN_PR_X 4, 5, 6, 7 #define REC_PR_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_8_9() \ GEN_X_DEFINE_22_23() \ GEN_X_DEFINE_33_36() #define REC_PR_STRIDE 4 #define REC_PR_X 0, 1, 2, 3 #define REC_PR_Y 4, 5, 6, 7 #define REC_PR_T 8, 9, 22, 23 #define SYN_QR_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_16() \ GEN_X_DEFINE_17() \ GEN_X_DEFINE_33_36() #define SYN_QR_STRIDE 4 #define SYN_QR_D 0, 1, 2, 3 #define SYN_QR_X 4, 5, 6, 7 #define REC_QR_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_8_9() \ GEN_X_DEFINE_22_23() \ GEN_X_DEFINE_33_36() #define REC_QR_STRIDE 4 #define REC_QR_X 0, 1, 2, 3 #define REC_QR_Y 4, 5, 6, 7 #define REC_QR_T 8, 9, 22, 23 #define SYN_PQR_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_16() \ GEN_X_DEFINE_17() \ GEN_X_DEFINE_33_36() #define SYN_PQR_STRIDE 4 #define SYN_PQR_D 0, 1, 2, 3 #define SYN_PQR_X 4, 5, 6, 7 #define REC_PQR_DEFINE() \ GEN_X_DEFINE_0_3() \ GEN_X_DEFINE_4_5() \ GEN_X_DEFINE_6_7() \ GEN_X_DEFINE_8_9() \ GEN_X_DEFINE_31() \ GEN_X_DEFINE_32() \ GEN_X_DEFINE_33_36() #define REC_PQR_STRIDE 2 #define REC_PQR_X 0, 1 #define REC_PQR_Y 2, 3 #define REC_PQR_Z 4, 5 #define REC_PQR_XS 6, 7 #define REC_PQR_YS 8, 9 #include #include "vdev_raidz_math_impl.h" DEFINE_GEN_METHODS(aarch64_neonx2); /* * If compiled with -O0, gcc doesn't do any stack frame coalescing * and -Wframe-larger-than=1024 is triggered in debug mode. */ +#if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic ignored "-Wframe-larger-than=" +#endif DEFINE_REC_METHODS(aarch64_neonx2); +#if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop +#endif static boolean_t raidz_will_aarch64_neonx2_work(void) { return (kfpu_allowed()); } const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl = { .init = NULL, .fini = NULL, .gen = RAIDZ_GEN_METHODS(aarch64_neonx2), .rec = RAIDZ_REC_METHODS(aarch64_neonx2), .is_supported = &raidz_will_aarch64_neonx2_work, .name = "aarch64_neonx2" }; #endif /* defined(__aarch64__) */