diff --git a/config/always-compiler-options.m4 b/config/always-compiler-options.m4
index 1e7ec3db9f63..6383b12506ee 100644
--- a/config/always-compiler-options.m4
+++ b/config/always-compiler-options.m4
@@ -1,326 +1,357 @@
 dnl #
 dnl # Enabled -fsanitize=address if supported by $CC.
 dnl #
 dnl # LDFLAGS needs -fsanitize=address at all times so libraries compiled with
 dnl # it will be linked successfully. CFLAGS will vary by binary being built.
 dnl #
 dnl # The ASAN_OPTIONS environment variable can be used to further control
 dnl # the behavior of binaries and libraries build with -fsanitize=address.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_ASAN], [
 	AC_MSG_CHECKING([whether to build with -fsanitize=address support])
 	AC_ARG_ENABLE([asan],
 		[AS_HELP_STRING([--enable-asan],
 		[Enable -fsanitize=address support  @<:@default=no@:>@])],
 		[],
 		[enable_asan=no])
 
 	AM_CONDITIONAL([ASAN_ENABLED], [test x$enable_asan = xyes])
 	AC_SUBST([ASAN_ENABLED], [$enable_asan])
 	AC_MSG_RESULT($enable_asan)
 
 	AS_IF([ test "$enable_asan" = "yes" ], [
 		AC_MSG_CHECKING([whether $CC supports -fsanitize=address])
 		saved_cflags="$CFLAGS"
 		CFLAGS="$CFLAGS -Werror -fsanitize=address"
 		AC_LINK_IFELSE([
 			AC_LANG_SOURCE([[ int main() { return 0; } ]])
 		], [
 			ASAN_CFLAGS="-fsanitize=address"
 			ASAN_LDFLAGS="-fsanitize=address"
 			ASAN_ZFS="_with_asan"
 			AC_MSG_RESULT([yes])
 		], [
 			AC_MSG_ERROR([$CC does not support -fsanitize=address])
 		])
 		CFLAGS="$saved_cflags"
 	], [
 		ASAN_CFLAGS=""
 		ASAN_LDFLAGS=""
 		ASAN_ZFS="_without_asan"
 	])
 
 	AC_SUBST([ASAN_CFLAGS])
 	AC_SUBST([ASAN_LDFLAGS])
 	AC_SUBST([ASAN_ZFS])
 ])
 
 dnl #
 dnl # Enabled -fsanitize=undefined if supported by cc.
 dnl #
 dnl # LDFLAGS needs -fsanitize=undefined at all times so libraries compiled with
 dnl # it will be linked successfully. CFLAGS will vary by binary being built.
 dnl #
 dnl # The UBSAN_OPTIONS environment variable can be used to further control
 dnl # the behavior of binaries and libraries build with -fsanitize=undefined.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_UBSAN], [
 	AC_MSG_CHECKING([whether to build with -fsanitize=undefined support])
 	AC_ARG_ENABLE([ubsan],
 		[AS_HELP_STRING([--enable-ubsan],
 		[Enable -fsanitize=undefined support  @<:@default=no@:>@])],
 		[],
 		[enable_ubsan=no])
 
 	AM_CONDITIONAL([UBSAN_ENABLED], [test x$enable_ubsan = xyes])
 	AC_SUBST([UBSAN_ENABLED], [$enable_ubsan])
 	AC_MSG_RESULT($enable_ubsan)
 
 	AS_IF([ test "$enable_ubsan" = "yes" ], [
 		AC_MSG_CHECKING([whether $CC supports -fsanitize=undefined])
 		saved_cflags="$CFLAGS"
 		CFLAGS="$CFLAGS -Werror -fsanitize=undefined"
 		AC_LINK_IFELSE([
 			AC_LANG_SOURCE([[ int main() { return 0; } ]])
 		], [
 			UBSAN_CFLAGS="-fsanitize=undefined"
 			UBSAN_LDFLAGS="-fsanitize=undefined"
 			UBSAN_ZFS="_with_ubsan"
 			AC_MSG_RESULT([yes])
 		], [
 			AC_MSG_ERROR([$CC does not support -fsanitize=undefined])
 		])
 		CFLAGS="$saved_cflags"
 	], [
 		UBSAN_CFLAGS=""
 		UBSAN_LDFLAGS=""
 		UBSAN_ZFS="_without_ubsan"
 	])
 
 	AC_SUBST([UBSAN_CFLAGS])
 	AC_SUBST([UBSAN_LDFLAGS])
 	AC_SUBST([UBSAN_ZFS])
 ])
 
 dnl #
 dnl # Check if cc supports -Wframe-larger-than=<size> option.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_FRAME_LARGER_THAN], [
 	AC_MSG_CHECKING([whether $CC supports -Wframe-larger-than=<size>])
 
 	saved_flags="$CFLAGS"
 	CFLAGS="$CFLAGS -Werror -Wframe-larger-than=4096"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		FRAME_LARGER_THAN="-Wframe-larger-than=4096"
 		AC_MSG_RESULT([yes])
 	], [
 		FRAME_LARGER_THAN=""
 		AC_MSG_RESULT([no])
 	])
 
 	CFLAGS="$saved_flags"
 	AC_SUBST([FRAME_LARGER_THAN])
 ])
 
 dnl #
 dnl # Check if cc supports -Wno-format-truncation option.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION], [
 	AC_MSG_CHECKING([whether $CC supports -Wno-format-truncation])
 
 	saved_flags="$CFLAGS"
 	CFLAGS="$CFLAGS -Werror -Wno-format-truncation"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		NO_FORMAT_TRUNCATION=-Wno-format-truncation
 		AC_MSG_RESULT([yes])
 	], [
 		NO_FORMAT_TRUNCATION=
 		AC_MSG_RESULT([no])
 	])
 
 	CFLAGS="$saved_flags"
 	AC_SUBST([NO_FORMAT_TRUNCATION])
 ])
 
 dnl #
 dnl # Check if cc supports -Wno-format-zero-length option.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH], [
 	AC_MSG_CHECKING([whether $CC supports -Wno-format-zero-length])
 
 	saved_flags="$CFLAGS"
 	CFLAGS="$CFLAGS -Werror -Wno-format-zero-length"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		NO_FORMAT_ZERO_LENGTH=-Wno-format-zero-length
 		AC_MSG_RESULT([yes])
 	], [
 		NO_FORMAT_ZERO_LENGTH=
 		AC_MSG_RESULT([no])
 	])
 
 	CFLAGS="$saved_flags"
 	AC_SUBST([NO_FORMAT_ZERO_LENGTH])
 ])
 
 dnl #
 dnl # Check if cc supports -Wno-clobbered option.
 dnl #
 dnl # We actually invoke it with the -Wclobbered option
 dnl # and infer the 'no-' version does or doesn't exist based upon
 dnl # the results.  This is required because when checking any of
 dnl # no- prefixed options gcc always returns success.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_CLOBBERED], [
 	AC_MSG_CHECKING([whether $CC supports -Wno-clobbered])
 
 	saved_flags="$CFLAGS"
 	CFLAGS="$CFLAGS -Werror -Wclobbered"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		NO_CLOBBERED=-Wno-clobbered
 		AC_MSG_RESULT([yes])
 	], [
 		NO_CLOBBERED=
 		AC_MSG_RESULT([no])
 	])
 
 	CFLAGS="$saved_flags"
 	AC_SUBST([NO_CLOBBERED])
 ])
 
 dnl #
 dnl # Check if cc supports -Wimplicit-fallthrough option.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH], [
 	AC_MSG_CHECKING([whether $CC supports -Wimplicit-fallthrough])
 
 	saved_flags="$CFLAGS"
 	CFLAGS="$CFLAGS -Werror -Wimplicit-fallthrough"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		IMPLICIT_FALLTHROUGH=-Wimplicit-fallthrough
 		AC_DEFINE([HAVE_IMPLICIT_FALLTHROUGH], 1,
 			[Define if compiler supports -Wimplicit-fallthrough])
 		AC_MSG_RESULT([yes])
 	], [
 		IMPLICIT_FALLTHROUGH=
 		AC_MSG_RESULT([no])
 	])
 
 	CFLAGS="$saved_flags"
 	AC_SUBST([IMPLICIT_FALLTHROUGH])
 ])
 
 dnl #
 dnl # Check if cc supports -Winfinite-recursion option.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_INFINITE_RECURSION], [
 	AC_MSG_CHECKING([whether $CC supports -Winfinite-recursion])
 
 	saved_flags="$CFLAGS"
 	CFLAGS="$CFLAGS -Werror -Winfinite-recursion"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		INFINITE_RECURSION=-Winfinite-recursion
 		AC_DEFINE([HAVE_INFINITE_RECURSION], 1,
 			[Define if compiler supports -Winfinite-recursion])
 		AC_MSG_RESULT([yes])
 	], [
 		INFINITE_RECURSION=
 		AC_MSG_RESULT([no])
 	])
 
 	CFLAGS="$saved_flags"
 	AC_SUBST([INFINITE_RECURSION])
 ])
 
 dnl #
 dnl # Check if kernel cc supports -Winfinite-recursion option.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_INFINITE_RECURSION], [
 	AC_MSG_CHECKING([whether $KERNEL_CC supports -Winfinite-recursion])
 
 	saved_cc="$CC"
 	saved_flags="$CFLAGS"
 	CC="gcc"
 	CFLAGS="$CFLAGS -Werror -Winfinite-recursion"
 
 	AS_IF([ test -n "$KERNEL_CC" ], [
 		CC="$KERNEL_CC"
 	])
 	AS_IF([ test -n "$KERNEL_LLVM" ], [
 		CC="clang"
 	])
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		KERNEL_INFINITE_RECURSION=-Winfinite-recursion
 		AC_DEFINE([HAVE_KERNEL_INFINITE_RECURSION], 1,
 			[Define if compiler supports -Winfinite-recursion])
 		AC_MSG_RESULT([yes])
 	], [
 		KERNEL_INFINITE_RECURSION=
 		AC_MSG_RESULT([no])
 	])
 
 	CC="$saved_cc"
 	CFLAGS="$saved_flags"
 	AC_SUBST([KERNEL_INFINITE_RECURSION])
 ])
 
 dnl #
 dnl # Check if cc supports -Wformat-overflow option.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_FORMAT_OVERFLOW], [
 	AC_MSG_CHECKING([whether $CC supports -Wformat-overflow])
 
 	saved_flags="$CFLAGS"
 	CFLAGS="$CFLAGS -Werror -Wformat-overflow"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		FORMAT_OVERFLOW=-Wformat-overflow
 		AC_DEFINE([HAVE_FORMAT_OVERFLOW], 1,
 			[Define if compiler supports -Wformat-overflow])
 		AC_MSG_RESULT([yes])
 	], [
 		FORMAT_OVERFLOW=
 		AC_MSG_RESULT([no])
 	])
 
 	CFLAGS="$saved_flags"
 	AC_SUBST([FORMAT_OVERFLOW])
 ])
 
 dnl #
 dnl # Check if cc supports -fno-omit-frame-pointer option.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER], [
 	AC_MSG_CHECKING([whether $CC supports -fno-omit-frame-pointer])
 
 	saved_flags="$CFLAGS"
 	CFLAGS="$CFLAGS -Werror -fno-omit-frame-pointer"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		NO_OMIT_FRAME_POINTER=-fno-omit-frame-pointer
 		AC_MSG_RESULT([yes])
 	], [
 		NO_OMIT_FRAME_POINTER=
 		AC_MSG_RESULT([no])
 	])
 
 	CFLAGS="$saved_flags"
 	AC_SUBST([NO_OMIT_FRAME_POINTER])
 ])
 
 dnl #
 dnl # Check if cc supports -fno-ipa-sra option.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA], [
 	AC_MSG_CHECKING([whether $CC supports -fno-ipa-sra])
 
 	saved_flags="$CFLAGS"
 	CFLAGS="$CFLAGS -Werror -fno-ipa-sra"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		NO_IPA_SRA=-fno-ipa-sra
 		AC_MSG_RESULT([yes])
 	], [
 		NO_IPA_SRA=
 		AC_MSG_RESULT([no])
 	])
 
 	CFLAGS="$saved_flags"
 	AC_SUBST([NO_IPA_SRA])
 ])
+
+dnl #
+dnl # Check if kernel cc supports -fno-ipa-sra option.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA], [
+	AC_MSG_CHECKING([whether $KERNEL_CC supports -fno-ipa-sra])
+
+	saved_cc="$CC"
+	saved_flags="$CFLAGS"
+	CC="gcc"
+	CFLAGS="$CFLAGS -Werror -fno-ipa-sra"
+
+	AS_IF([ test -n "$KERNEL_CC" ], [
+		CC="$KERNEL_CC"
+	])
+	AS_IF([ test -n "$KERNEL_LLVM" ], [
+		CC="clang"
+	])
+
+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
+		KERNEL_NO_IPA_SRA=-fno-ipa-sra
+		AC_MSG_RESULT([yes])
+	], [
+		KERNEL_NO_IPA_SRA=
+		AC_MSG_RESULT([no])
+	])
+
+	CC="$saved_cc"
+	CFLAGS="$saved_flags"
+	AC_SUBST([KERNEL_NO_IPA_SRA])
+])
diff --git a/config/zfs-build.m4 b/config/zfs-build.m4
index bb10bec04017..6355952487f7 100644
--- a/config/zfs-build.m4
+++ b/config/zfs-build.m4
@@ -1,643 +1,644 @@
 AC_DEFUN([ZFS_AC_LICENSE], [
 	AC_MSG_CHECKING([zfs author])
 	AC_MSG_RESULT([$ZFS_META_AUTHOR])
 
 	AC_MSG_CHECKING([zfs license])
 	AC_MSG_RESULT([$ZFS_META_LICENSE])
 ])
 
 AC_DEFUN([ZFS_AC_DEBUG_ENABLE], [
 	DEBUG_CFLAGS="-Werror"
 	DEBUG_CPPFLAGS="-DDEBUG -UNDEBUG"
 	DEBUG_LDFLAGS=""
 	DEBUG_ZFS="_with_debug"
 	WITH_DEBUG="true"
 	AC_DEFINE(ZFS_DEBUG, 1, [zfs debugging enabled])
 
 	KERNEL_DEBUG_CFLAGS="-Werror"
 	KERNEL_DEBUG_CPPFLAGS="-DDEBUG -UNDEBUG"
 ])
 
 AC_DEFUN([ZFS_AC_DEBUG_DISABLE], [
 	DEBUG_CFLAGS=""
 	DEBUG_CPPFLAGS="-UDEBUG -DNDEBUG"
 	DEBUG_LDFLAGS=""
 	DEBUG_ZFS="_without_debug"
 	WITH_DEBUG=""
 
 	KERNEL_DEBUG_CFLAGS=""
 	KERNEL_DEBUG_CPPFLAGS="-UDEBUG -DNDEBUG"
 ])
 
 dnl #
 dnl # When debugging is enabled:
 dnl # - Enable all ASSERTs (-DDEBUG)
 dnl # - Promote all compiler warnings to errors (-Werror)
 dnl #
 dnl # (If INVARIANTS is detected, we need to force DEBUG, or strange panics
 dnl # can ensue.)
 dnl #
 AC_DEFUN([ZFS_AC_DEBUG], [
 	AC_MSG_CHECKING([whether assertion support will be enabled])
 	AC_ARG_ENABLE([debug],
 		[AS_HELP_STRING([--enable-debug],
 		[Enable compiler and code assertions @<:@default=no@:>@])],
 		[],
 		[enable_debug=no])
 
 	AS_CASE(["x$enable_debug"],
 		["xyes"],
 		[ZFS_AC_DEBUG_ENABLE],
 		["xno"],
 		[ZFS_AC_DEBUG_DISABLE],
 		[AC_MSG_ERROR([Unknown option $enable_debug])])
 
 	AS_CASE(["x$enable_invariants"],
 		["xyes"],
 		[],
 		["xno"],
 		[],
 		[ZFS_AC_DEBUG_INVARIANTS_DETECT])
 
 	AS_CASE(["x$enable_invariants"],
 		["xyes"],
 		[ZFS_AC_DEBUG_ENABLE],
 		["xno"],
 		[],
 		[AC_MSG_ERROR([Unknown option $enable_invariants])])
 
 	AC_SUBST(DEBUG_CFLAGS)
 	AC_SUBST(DEBUG_CPPFLAGS)
 	AC_SUBST(DEBUG_LDFLAGS)
 	AC_SUBST(DEBUG_ZFS)
 	AC_SUBST(WITH_DEBUG)
 
 	AC_SUBST(KERNEL_DEBUG_CFLAGS)
 	AC_SUBST(KERNEL_DEBUG_CPPFLAGS)
 
 	AC_MSG_RESULT([$enable_debug])
 ])
 
 AC_DEFUN([ZFS_AC_DEBUGINFO_ENABLE], [
 	DEBUG_CFLAGS="$DEBUG_CFLAGS -g -fno-inline $NO_IPA_SRA"
 
-	KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $NO_IPA_SRA"
+	KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $KERNEL_NO_IPA_SRA"
 	KERNEL_MAKE="$KERNEL_MAKE CONFIG_DEBUG_INFO=y"
 
 	DEBUGINFO_ZFS="_with_debuginfo"
 ])
 
 AC_DEFUN([ZFS_AC_DEBUGINFO_DISABLE], [
 	DEBUGINFO_ZFS="_without_debuginfo"
 ])
 
 AC_DEFUN([ZFS_AC_DEBUGINFO], [
 	AC_MSG_CHECKING([whether debuginfo support will be forced])
 	AC_ARG_ENABLE([debuginfo],
 		[AS_HELP_STRING([--enable-debuginfo],
 		[Force generation of debuginfo @<:@default=no@:>@])],
 		[],
 		[enable_debuginfo=no])
 
 	AS_CASE(["x$enable_debuginfo"],
 		["xyes"],
 		[ZFS_AC_DEBUGINFO_ENABLE],
 		["xno"],
 		[ZFS_AC_DEBUGINFO_DISABLE],
 		[AC_MSG_ERROR([Unknown option $enable_debuginfo])])
 
 	AC_SUBST(DEBUG_CFLAGS)
 	AC_SUBST(DEBUGINFO_ZFS)
 
 	AC_SUBST(KERNEL_DEBUG_CFLAGS)
 	AC_SUBST(KERNEL_MAKE)
 
 	AC_MSG_RESULT([$enable_debuginfo])
 ])
 
 dnl #
 dnl # Disabled by default, provides basic memory tracking.  Track the total
 dnl # number of bytes allocated with kmem_alloc() and freed with kmem_free().
 dnl # Then at module unload time if any bytes were leaked it will be reported
 dnl # on the console.
 dnl #
 AC_DEFUN([ZFS_AC_DEBUG_KMEM], [
 	AC_MSG_CHECKING([whether basic kmem accounting is enabled])
 	AC_ARG_ENABLE([debug-kmem],
 		[AS_HELP_STRING([--enable-debug-kmem],
 		[Enable basic kmem accounting @<:@default=no@:>@])],
 		[],
 		[enable_debug_kmem=no])
 
 	AS_IF([test "x$enable_debug_kmem" = xyes], [
 		KERNEL_DEBUG_CPPFLAGS="${KERNEL_DEBUG_CPPFLAGS} -DDEBUG_KMEM"
 		DEBUG_KMEM_ZFS="_with_debug_kmem"
 	], [
 		DEBUG_KMEM_ZFS="_without_debug_kmem"
 	])
 
 	AC_SUBST(KERNEL_DEBUG_CPPFLAGS)
 	AC_SUBST(DEBUG_KMEM_ZFS)
 
 	AC_MSG_RESULT([$enable_debug_kmem])
 ])
 
 dnl #
 dnl # Disabled by default, provides detailed memory tracking.  This feature
 dnl # also requires --enable-debug-kmem to be set.  When enabled not only will
 dnl # total bytes be tracked but also the location of every kmem_alloc() and
 dnl # kmem_free().  When the module is unloaded a list of all leaked addresses
 dnl # and where they were allocated will be dumped to the console.  Enabling
 dnl # this feature has a significant impact on performance but it makes finding
 dnl # memory leaks straight forward.
 dnl #
 AC_DEFUN([ZFS_AC_DEBUG_KMEM_TRACKING], [
 	AC_MSG_CHECKING([whether detailed kmem tracking is enabled])
 	AC_ARG_ENABLE([debug-kmem-tracking],
 		[AS_HELP_STRING([--enable-debug-kmem-tracking],
 		[Enable detailed kmem tracking  @<:@default=no@:>@])],
 		[],
 		[enable_debug_kmem_tracking=no])
 
 	AS_IF([test "x$enable_debug_kmem_tracking" = xyes], [
 		KERNEL_DEBUG_CPPFLAGS="${KERNEL_DEBUG_CPPFLAGS} -DDEBUG_KMEM_TRACKING"
 		DEBUG_KMEM_TRACKING_ZFS="_with_debug_kmem_tracking"
 	], [
 		DEBUG_KMEM_TRACKING_ZFS="_without_debug_kmem_tracking"
 	])
 
 	AC_SUBST(KERNEL_DEBUG_CPPFLAGS)
 	AC_SUBST(DEBUG_KMEM_TRACKING_ZFS)
 
 	AC_MSG_RESULT([$enable_debug_kmem_tracking])
 ])
 
 AC_DEFUN([ZFS_AC_DEBUG_INVARIANTS_DETECT_FREEBSD], [
 	AS_IF([sysctl -n kern.conftxt | grep -Fqx $'options\tINVARIANTS'],
 		[enable_invariants="yes"],
 		[enable_invariants="no"])
 ])
 
 AC_DEFUN([ZFS_AC_DEBUG_INVARIANTS_DETECT], [
 	AM_COND_IF([BUILD_FREEBSD],
 		[ZFS_AC_DEBUG_INVARIANTS_DETECT_FREEBSD],
 		[enable_invariants="no"])
 ])
 
 dnl #
 dnl # Detected for the running kernel by default, enables INVARIANTS features
 dnl # in the FreeBSD kernel module.  This feature must be used when building
 dnl # for a FreeBSD kernel with "options INVARIANTS" in the KERNCONF and must
 dnl # not be used when the INVARIANTS option is absent.
 dnl #
 AC_DEFUN([ZFS_AC_DEBUG_INVARIANTS], [
 	AC_MSG_CHECKING([whether FreeBSD kernel INVARIANTS checks are enabled])
 	AC_ARG_ENABLE([invariants],
 		[AS_HELP_STRING([--enable-invariants],
 		[Enable FreeBSD kernel INVARIANTS checks [[default: detect]]])],
 		[], [ZFS_AC_DEBUG_INVARIANTS_DETECT])
 
 	AS_IF([test "x$enable_invariants" = xyes],
 		[WITH_INVARIANTS="true"],
 		[WITH_INVARIANTS=""])
 	AC_SUBST(WITH_INVARIANTS)
 
 	AC_MSG_RESULT([$enable_invariants])
 ])
 
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
 	AX_COUNT_CPUS([])
 	AC_SUBST(CPU_COUNT)
 
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_CLOBBERED
 	ZFS_AC_CONFIG_ALWAYS_CC_INFINITE_RECURSION
 	ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_INFINITE_RECURSION
 	ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH
 	ZFS_AC_CONFIG_ALWAYS_CC_FRAME_LARGER_THAN
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH
 	ZFS_AC_CONFIG_ALWAYS_CC_FORMAT_OVERFLOW
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA
+	ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA
 	ZFS_AC_CONFIG_ALWAYS_CC_ASAN
 	ZFS_AC_CONFIG_ALWAYS_CC_UBSAN
 	ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD
 	ZFS_AC_CONFIG_ALWAYS_SYSTEM
 	ZFS_AC_CONFIG_ALWAYS_ARCH
 	ZFS_AC_CONFIG_ALWAYS_PYTHON
 	ZFS_AC_CONFIG_ALWAYS_PYZFS
 	ZFS_AC_CONFIG_ALWAYS_SED
 	ZFS_AC_CONFIG_ALWAYS_CPPCHECK
 	ZFS_AC_CONFIG_ALWAYS_SHELLCHECK
 	ZFS_AC_CONFIG_ALWAYS_PARALLEL
 ])
 
 AC_DEFUN([ZFS_AC_CONFIG], [
 
         dnl # Remove the previous build test directory.
         rm -Rf build
 
 	ZFS_CONFIG=all
 	AC_ARG_WITH([config],
 		AS_HELP_STRING([--with-config=CONFIG],
 		[Config file 'kernel|user|all|srpm']),
 		[ZFS_CONFIG="$withval"])
 	AC_ARG_ENABLE([linux-builtin],
 		[AS_HELP_STRING([--enable-linux-builtin],
 		[Configure for builtin in-tree kernel modules @<:@default=no@:>@])],
 		[],
 		[enable_linux_builtin=no])
 
 	AC_MSG_CHECKING([zfs config])
 	AC_MSG_RESULT([$ZFS_CONFIG]);
 	AC_SUBST(ZFS_CONFIG)
 
 	ZFS_AC_CONFIG_ALWAYS
 
 	AM_COND_IF([BUILD_LINUX], [
 		AC_ARG_VAR([TEST_JOBS], [simultaneous jobs during configure])
 		if test "x$ac_cv_env_TEST_JOBS_set" != "xset"; then
 			TEST_JOBS=$CPU_COUNT
 		fi
 		AC_SUBST(TEST_JOBS)
 	])
 
 	ZFS_INIT_SYSV=
 	ZFS_INIT_SYSTEMD=
 	ZFS_WANT_MODULES_LOAD_D=
 
 	case "$ZFS_CONFIG" in
 		kernel) ZFS_AC_CONFIG_KERNEL ;;
 		user)	ZFS_AC_CONFIG_USER   ;;
 		all)    ZFS_AC_CONFIG_USER
 			ZFS_AC_CONFIG_KERNEL ;;
 		dist)                        ;;
 		srpm)                        ;;
 		*)
 		AC_MSG_RESULT([Error!])
 		AC_MSG_ERROR([Bad value "$ZFS_CONFIG" for --with-config,
 		              user kernel|user|all|srpm]) ;;
 	esac
 
 	AM_CONDITIONAL([INIT_SYSV],           [test "x$ZFS_INIT_SYSV" = "xyes"])
 	AM_CONDITIONAL([INIT_SYSTEMD],        [test "x$ZFS_INIT_SYSTEMD" = "xyes"])
 	AM_CONDITIONAL([WANT_MODULES_LOAD_D], [test "x$ZFS_WANT_MODULES_LOAD_D" = "xyes"])
 
 	AM_CONDITIONAL([CONFIG_USER],
 	    [test "$ZFS_CONFIG" = user -o "$ZFS_CONFIG" = all])
 	AM_CONDITIONAL([CONFIG_KERNEL],
 	    [test "$ZFS_CONFIG" = kernel -o "$ZFS_CONFIG" = all] &&
 	    [test "x$enable_linux_builtin" != xyes ])
 	AM_CONDITIONAL([CONFIG_QAT],
 	    [test "$ZFS_CONFIG" = kernel -o "$ZFS_CONFIG" = all] &&
 	    [test "x$qatsrc" != x ])
 	AM_CONDITIONAL([WANT_DEVNAME2DEVID], [test "x$user_libudev" = xyes ])
 	AM_CONDITIONAL([WANT_MMAP_LIBAIO], [test "x$user_libaio" = xyes ])
 	AM_CONDITIONAL([PAM_ZFS_ENABLED], [test "x$enable_pam" = xyes])
 ])
 
 dnl #
 dnl # Check for rpm+rpmbuild to build RPM packages.  If these tools
 dnl # are missing it is non-fatal but you will not be able to build
 dnl # RPM packages and will be warned if you try too.
 dnl #
 dnl # By default the generic spec file will be used because it requires
 dnl # minimal dependencies.  Distribution specific spec files can be
 dnl # placed under the 'rpm/<distribution>' directory and enabled using
 dnl # the --with-spec=<distribution> configure option.
 dnl #
 AC_DEFUN([ZFS_AC_RPM], [
 	RPM=rpm
 	RPMBUILD=rpmbuild
 
 	AC_MSG_CHECKING([whether $RPM is available])
 	AS_IF([tmp=$($RPM --version 2>/dev/null)], [
 		RPM_VERSION=$(echo $tmp | $AWK '/RPM/ { print $[3] }')
 		HAVE_RPM=yes
 		AC_MSG_RESULT([$HAVE_RPM ($RPM_VERSION)])
 	],[
 		HAVE_RPM=no
 		AC_MSG_RESULT([$HAVE_RPM])
 	])
 
 	AC_MSG_CHECKING([whether $RPMBUILD is available])
 	AS_IF([tmp=$($RPMBUILD --version 2>/dev/null)], [
 		RPMBUILD_VERSION=$(echo $tmp | $AWK '/RPM/ { print $[3] }')
 		HAVE_RPMBUILD=yes
 		AC_MSG_RESULT([$HAVE_RPMBUILD ($RPMBUILD_VERSION)])
 	],[
 		HAVE_RPMBUILD=no
 		AC_MSG_RESULT([$HAVE_RPMBUILD])
 	])
 
 	RPM_DEFINE_COMMON='--define "$(DEBUG_ZFS) 1"'
 	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(DEBUGINFO_ZFS) 1"'
 	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(DEBUG_KMEM_ZFS) 1"'
 	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(DEBUG_KMEM_TRACKING_ZFS) 1"'
 	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(ASAN_ZFS) 1"'
 	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(UBSAN_ZFS) 1"'
 
 	AS_IF([test "x$enable_debuginfo" = xyes], [
 		RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "__strip /bin/true"'
 	])
 
 	RPM_DEFINE_UTIL=' --define "_initconfdir $(initconfdir)"'
 
 	dnl # Make the next three RPM_DEFINE_UTIL additions conditional, since
 	dnl # their values may not be set when running:
 	dnl #
 	dnl #	./configure --with-config=srpm
 	dnl #
 	AS_IF([test -n "$dracutdir" ], [
 		RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_dracutdir $(dracutdir)"'
 	])
 	AS_IF([test -n "$udevdir" ], [
 		RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevdir $(udevdir)"'
 	])
 	AS_IF([test -n "$udevruledir" ], [
 		RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevruledir $(udevruledir)"'
 	])
 	RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_SYSTEMD)'
 	RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYZFS)'
 	RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PAM)'
 	RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYTHON_VERSION)'
 	RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYTHON_PKG_VERSION)'
 
 	dnl # Override default lib directory on Debian/Ubuntu systems.  The
 	dnl # provided /usr/lib/rpm/platform/<arch>/macros files do not
 	dnl # specify the correct path for multiarch systems as described
 	dnl # by the packaging guidelines.
 	dnl #
 	dnl # https://wiki.ubuntu.com/MultiarchSpec
 	dnl # https://wiki.debian.org/Multiarch/Implementation
 	dnl #
 	AS_IF([test "$DEFAULT_PACKAGE" = "deb"], [
 		MULTIARCH_LIBDIR="lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)"
 		RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_lib $(MULTIARCH_LIBDIR)"'
 		AC_SUBST(MULTIARCH_LIBDIR)
 	])
 
 	dnl # Make RPM_DEFINE_KMOD additions conditional on CONFIG_KERNEL,
 	dnl # since the values will not be set otherwise. The spec files
 	dnl # provide defaults for them.
 	dnl #
 	RPM_DEFINE_KMOD='--define "_wrong_version_format_terminate_build 0"'
 	AM_COND_IF([CONFIG_KERNEL], [
 		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernels $(LINUX_VERSION)"'
 		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "ksrc $(LINUX)"'
 		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kobj $(LINUX_OBJ)"'
 		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_cc KERNEL_CC=$(KERNEL_CC)"'
 		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_ld KERNEL_LD=$(KERNEL_LD)"'
 		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_llvm KERNEL_LLVM=$(KERNEL_LLVM)"'
 	])
 
 	RPM_DEFINE_DKMS=''
 
 	SRPM_DEFINE_COMMON='--define "build_src_rpm 1"'
 	SRPM_DEFINE_UTIL=
 	SRPM_DEFINE_KMOD=
 	SRPM_DEFINE_DKMS=
 
 	RPM_SPEC_DIR="rpm/generic"
 	AC_ARG_WITH([spec],
 		AS_HELP_STRING([--with-spec=SPEC],
 		[Spec files 'generic|redhat']),
 		[RPM_SPEC_DIR="rpm/$withval"])
 
 	AC_MSG_CHECKING([whether spec files are available])
 	AC_MSG_RESULT([yes ($RPM_SPEC_DIR/*.spec.in)])
 
 	AC_SUBST(HAVE_RPM)
 	AC_SUBST(RPM)
 	AC_SUBST(RPM_VERSION)
 
 	AC_SUBST(HAVE_RPMBUILD)
 	AC_SUBST(RPMBUILD)
 	AC_SUBST(RPMBUILD_VERSION)
 
 	AC_SUBST(RPM_SPEC_DIR)
 	AC_SUBST(RPM_DEFINE_UTIL)
 	AC_SUBST(RPM_DEFINE_KMOD)
 	AC_SUBST(RPM_DEFINE_DKMS)
 	AC_SUBST(RPM_DEFINE_COMMON)
 	AC_SUBST(SRPM_DEFINE_UTIL)
 	AC_SUBST(SRPM_DEFINE_KMOD)
 	AC_SUBST(SRPM_DEFINE_DKMS)
 	AC_SUBST(SRPM_DEFINE_COMMON)
 ])
 
 dnl #
 dnl # Check for dpkg+dpkg-buildpackage to build DEB packages.  If these
 dnl # tools are missing it is non-fatal but you will not be able to build
 dnl # DEB packages and will be warned if you try too.
 dnl #
 AC_DEFUN([ZFS_AC_DPKG], [
 	DPKG=dpkg
 	DPKGBUILD=dpkg-buildpackage
 
 	AC_MSG_CHECKING([whether $DPKG is available])
 	AS_IF([tmp=$($DPKG --version 2>/dev/null)], [
 		DPKG_VERSION=$(echo $tmp | $AWK '/Debian/ { print $[7] }')
 		HAVE_DPKG=yes
 		AC_MSG_RESULT([$HAVE_DPKG ($DPKG_VERSION)])
 	],[
 		HAVE_DPKG=no
 		AC_MSG_RESULT([$HAVE_DPKG])
 	])
 
 	AC_MSG_CHECKING([whether $DPKGBUILD is available])
 	AS_IF([tmp=$($DPKGBUILD --version 2>/dev/null)], [
 		DPKGBUILD_VERSION=$(echo $tmp | \
 		    $AWK '/Debian/ { print $[4] }' | cut -f-4 -d'.')
 		HAVE_DPKGBUILD=yes
 		AC_MSG_RESULT([$HAVE_DPKGBUILD ($DPKGBUILD_VERSION)])
 	],[
 		HAVE_DPKGBUILD=no
 		AC_MSG_RESULT([$HAVE_DPKGBUILD])
 	])
 
 	AC_SUBST(HAVE_DPKG)
 	AC_SUBST(DPKG)
 	AC_SUBST(DPKG_VERSION)
 
 	AC_SUBST(HAVE_DPKGBUILD)
 	AC_SUBST(DPKGBUILD)
 	AC_SUBST(DPKGBUILD_VERSION)
 ])
 
 dnl #
 dnl # Until native packaging for various different packing systems
 dnl # can be added the least we can do is attempt to use alien to
 dnl # convert the RPM packages to the needed package type.  This is
 dnl # a hack but so far it has worked reasonable well.
 dnl #
 AC_DEFUN([ZFS_AC_ALIEN], [
 	ALIEN=alien
 
 	AC_MSG_CHECKING([whether $ALIEN is available])
 	AS_IF([tmp=$($ALIEN --version 2>/dev/null)], [
 		ALIEN_VERSION=$(echo $tmp | $AWK '{ print $[3] }')
 		ALIEN_MAJOR=$(echo ${ALIEN_VERSION} | $AWK -F'.' '{ print $[1] }')
 		ALIEN_MINOR=$(echo ${ALIEN_VERSION} | $AWK -F'.' '{ print $[2] }')
 		ALIEN_POINT=$(echo ${ALIEN_VERSION} | $AWK -F'.' '{ print $[3] }')
 		HAVE_ALIEN=yes
 		AC_MSG_RESULT([$HAVE_ALIEN ($ALIEN_VERSION)])
 	],[
 		HAVE_ALIEN=no
 		AC_MSG_RESULT([$HAVE_ALIEN])
 	])
 
 	AC_SUBST(HAVE_ALIEN)
 	AC_SUBST(ALIEN)
 	AC_SUBST(ALIEN_VERSION)
 	AC_SUBST(ALIEN_MAJOR)
 	AC_SUBST(ALIEN_MINOR)
 	AC_SUBST(ALIEN_POINT)
 ])
 
 dnl #
 dnl # Using the VENDOR tag from config.guess set the default
 dnl # package type for 'make pkg': (rpm | deb | tgz)
 dnl #
 AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [
 	AC_MSG_CHECKING([os distribution])
 	AC_ARG_WITH([vendor],
 		[AS_HELP_STRING([--with-vendor],
 			[Distribution vendor @<:@default=check@:>@])],
 		[with_vendor=$withval],
 		[with_vendor=check])
 	AS_IF([test "x$with_vendor" = "xcheck"],[
 		if test -f /etc/toss-release ; then
 			VENDOR=toss ;
 		elif test -f /etc/fedora-release ; then
 			VENDOR=fedora ;
 		elif test -f /etc/redhat-release ; then
 			VENDOR=redhat ;
 		elif test -f /etc/gentoo-release ; then
 			VENDOR=gentoo ;
 		elif test -f /etc/arch-release ; then
 			VENDOR=arch ;
 		elif test -f /etc/SuSE-release ; then
 			VENDOR=sles ;
 		elif test -f /etc/slackware-version ; then
 			VENDOR=slackware ;
 		elif test -f /etc/lunar.release ; then
 			VENDOR=lunar ;
 		elif test -f /etc/lsb-release ; then
 			VENDOR=ubuntu ;
 		elif test -f /etc/debian_version ; then
 			VENDOR=debian ;
 		elif test -f /etc/alpine-release ; then
 			VENDOR=alpine ;
 		elif test -f /bin/freebsd-version ; then
 			VENDOR=freebsd ;
 		else
 			VENDOR= ;
 		fi],
 		[ test "x${with_vendor}" != x],[
 			VENDOR="$with_vendor" ],
 		[ VENDOR= ; ]
 	)
 	AC_MSG_RESULT([$VENDOR])
 	AC_SUBST(VENDOR)
 
 	AC_MSG_CHECKING([default package type])
 	case "$VENDOR" in
 		toss)       DEFAULT_PACKAGE=rpm  ;;
 		redhat)     DEFAULT_PACKAGE=rpm  ;;
 		fedora)     DEFAULT_PACKAGE=rpm  ;;
 		gentoo)     DEFAULT_PACKAGE=tgz  ;;
 		alpine)     DEFAULT_PACKAGE=tgz  ;;
 		arch)       DEFAULT_PACKAGE=tgz  ;;
 		sles)       DEFAULT_PACKAGE=rpm  ;;
 		slackware)  DEFAULT_PACKAGE=tgz  ;;
 		lunar)      DEFAULT_PACKAGE=tgz  ;;
 		ubuntu)     DEFAULT_PACKAGE=deb  ;;
 		debian)     DEFAULT_PACKAGE=deb  ;;
 		freebsd)    DEFAULT_PACKAGE=pkg  ;;
 		*)          DEFAULT_PACKAGE=rpm  ;;
 	esac
 	AC_MSG_RESULT([$DEFAULT_PACKAGE])
 	AC_SUBST(DEFAULT_PACKAGE)
 
 	AC_MSG_CHECKING([default init directory])
 	case "$VENDOR" in
 		freebsd)    initdir=$sysconfdir/rc.d  ;;
 		*)          initdir=$sysconfdir/init.d;;
 	esac
 	AC_MSG_RESULT([$initdir])
 	AC_SUBST(initdir)
 
 	AC_MSG_CHECKING([default init script type and shell])
 	case "$VENDOR" in
 		toss)       DEFAULT_INIT_SCRIPT=redhat ;;
 		redhat)     DEFAULT_INIT_SCRIPT=redhat ;;
 		fedora)     DEFAULT_INIT_SCRIPT=fedora ;;
 		gentoo)     DEFAULT_INIT_SCRIPT=openrc ;;
 		alpine)     DEFAULT_INIT_SCRIPT=openrc ;;
 		arch)       DEFAULT_INIT_SCRIPT=lsb    ;;
 		sles)       DEFAULT_INIT_SCRIPT=lsb    ;;
 		slackware)  DEFAULT_INIT_SCRIPT=lsb    ;;
 		lunar)      DEFAULT_INIT_SCRIPT=lunar  ;;
 		ubuntu)     DEFAULT_INIT_SCRIPT=lsb    ;;
 		debian)     DEFAULT_INIT_SCRIPT=lsb    ;;
 		freebsd)    DEFAULT_INIT_SCRIPT=freebsd;;
 		*)          DEFAULT_INIT_SCRIPT=lsb    ;;
 	esac
 
 	case "$VENDOR" in
 		gentoo)     DEFAULT_INIT_SHELL="/sbin/openrc-run";;
 		alpine)     DEFAULT_INIT_SHELL="/sbin/openrc-run";;
 		*)          DEFAULT_INIT_SHELL="/bin/sh"         ;;
 	esac
 
 	AC_MSG_RESULT([$DEFAULT_INIT_SCRIPT:$DEFAULT_INIT_SHELL])
 	AC_SUBST(DEFAULT_INIT_SCRIPT)
 	AC_SUBST(DEFAULT_INIT_SHELL)
 
 	AC_MSG_CHECKING([default nfs server init script])
 	AS_IF([test "$VENDOR" = "debian"],
 		[DEFAULT_INIT_NFS_SERVER="nfs-kernel-server"],
 		[DEFAULT_INIT_NFS_SERVER="nfs"]
 	)
 	AC_MSG_RESULT([$DEFAULT_INIT_NFS_SERVER])
 	AC_SUBST(DEFAULT_INIT_NFS_SERVER)
 
 	AC_MSG_CHECKING([default init config directory])
 	case "$VENDOR" in
 		alpine)     initconfdir=/etc/conf.d    ;;
 		gentoo)     initconfdir=/etc/conf.d    ;;
 		toss)       initconfdir=/etc/sysconfig ;;
 		redhat)     initconfdir=/etc/sysconfig ;;
 		fedora)     initconfdir=/etc/sysconfig ;;
 		sles)       initconfdir=/etc/sysconfig ;;
 		ubuntu)     initconfdir=/etc/default   ;;
 		debian)     initconfdir=/etc/default   ;;
 		freebsd)    initconfdir=$sysconfdir/rc.conf.d;;
 		*)          initconfdir=/etc/default   ;;
 	esac
 	AC_MSG_RESULT([$initconfdir])
 	AC_SUBST(initconfdir)
 
 	AC_MSG_CHECKING([whether initramfs-tools is available])
 	if test -d /usr/share/initramfs-tools ; then
 		RPM_DEFINE_INITRAMFS='--define "_initramfs 1"'
 		AC_MSG_RESULT([yes])
 	else
 		RPM_DEFINE_INITRAMFS=''
 		AC_MSG_RESULT([no])
 	fi
 	AC_SUBST(RPM_DEFINE_INITRAMFS)
 ])
 
 dnl #
 dnl # Default ZFS package configuration
 dnl #
 AC_DEFUN([ZFS_AC_PACKAGE], [
 	ZFS_AC_DEFAULT_PACKAGE
 	AS_IF([test x$VENDOR != xfreebsd], [
 		ZFS_AC_RPM
 		ZFS_AC_DPKG
 		ZFS_AC_ALIEN
 	])
 ])
diff --git a/module/Kbuild.in b/module/Kbuild.in
index 581d50e64b42..a39f9d9d0500 100644
--- a/module/Kbuild.in
+++ b/module/Kbuild.in
@@ -1,469 +1,475 @@
 # When integrated in to a monolithic kernel the spl module must appear
 # first.  This ensures its module initialization function is run before
 # any of the other module initialization functions which depend on it.
 
 ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement
 ZFS_MODULE_CFLAGS += -Wmissing-prototypes
 ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@  @NO_FORMAT_ZERO_LENGTH@
 
 ifneq ($(KBUILD_EXTMOD),)
 zfs_include = @abs_top_srcdir@/include
 icp_include = @abs_srcdir@/icp/include
 zstd_include = @abs_srcdir@/zstd/include
 ZFS_MODULE_CFLAGS += -include @abs_top_builddir@/zfs_config.h
 ZFS_MODULE_CFLAGS += -I@abs_top_builddir@/include
 src = @abs_srcdir@
 obj = @abs_builddir@
 else
 zfs_include = $(srctree)/include/zfs
 icp_include = $(srctree)/$(src)/icp/include
 zstd_include = $(srctree)/$(src)/zstd/include
 ZFS_MODULE_CFLAGS += -include $(zfs_include)/zfs_config.h
 endif
 
 ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/kernel
 ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/spl
 ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/zfs
 ZFS_MODULE_CFLAGS += -I$(zfs_include)
 ZFS_MODULE_CPPFLAGS += -D_KERNEL
 ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
 
 # KASAN enables -Werror=frame-larger-than=1024, which
 # breaks oh so many parts of our build.
 ifeq ($(CONFIG_KASAN),y)
 ZFS_MODULE_CFLAGS += -Wno-error=frame-larger-than=
 endif
 
 ifneq ($(KBUILD_EXTMOD),)
 @CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include
 @CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
 endif
 
 asflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
 ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
 
+ifeq ($(CONFIG_ARM64),y)
+CFLAGS_REMOVE_zcommon/zfs_fletcher_aarch64_neon.o += -mgeneral-regs-only
+CFLAGS_REMOVE_zfs/vdev_raidz_math_aarch64_neon.o += -mgeneral-regs-only
+CFLAGS_REMOVE_zfs/vdev_raidz_math_aarch64_neonx2.o += -mgeneral-regs-only
+endif
+
 # Suppress unused-value warnings in sparc64 architecture headers
 ccflags-$(CONFIG_SPARC64) += -Wno-unused-value
 
 
 obj-$(CONFIG_ZFS) := spl.o zfs.o
 
 SPL_OBJS := \
 	spl-atomic.o \
 	spl-condvar.o \
 	spl-cred.o \
 	spl-err.o \
 	spl-generic.o \
 	spl-kmem-cache.o \
 	spl-kmem.o \
 	spl-kstat.o \
 	spl-proc.o \
 	spl-procfs-list.o \
 	spl-taskq.o \
 	spl-thread.o \
 	spl-trace.o \
 	spl-tsd.o \
 	spl-vmem.o \
 	spl-xdr.o \
 	spl-zlib.o \
 	spl-zone.o
 
 spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS))
 
 zfs-objs += avl/avl.o
 
 ICP_OBJS := \
 	algs/aes/aes_impl.o \
 	algs/aes/aes_impl_generic.o \
 	algs/aes/aes_modes.o \
 	algs/blake3/blake3.o \
 	algs/blake3/blake3_generic.o \
 	algs/blake3/blake3_impl.o \
 	algs/blake3/blake3_x86-64.o \
 	algs/edonr/edonr.o \
 	algs/modes/cbc.o \
 	algs/modes/ccm.o \
 	algs/modes/ctr.o \
 	algs/modes/ecb.o \
 	algs/modes/gcm.o \
 	algs/modes/gcm_generic.o \
 	algs/modes/modes.o \
 	algs/sha2/sha2.o \
 	algs/skein/skein.o \
 	algs/skein/skein_block.o \
 	algs/skein/skein_iv.o \
 	api/kcf_cipher.o \
 	api/kcf_ctxops.o \
 	api/kcf_mac.o \
 	core/kcf_callprov.o \
 	core/kcf_mech_tabs.o \
 	core/kcf_prov_lib.o \
 	core/kcf_prov_tabs.o \
 	core/kcf_sched.o \
 	illumos-crypto.o \
 	io/aes.o \
 	io/sha2_mod.o \
 	io/skein_mod.o \
 	spi/kcf_spi.o
 
 ICP_OBJS_X86_64 := \
 	asm-x86_64/aes/aes_aesni.o \
 	asm-x86_64/aes/aes_amd64.o \
 	asm-x86_64/aes/aeskey.o \
 	asm-x86_64/blake3/blake3_avx2.o \
 	asm-x86_64/blake3/blake3_avx512.o \
 	asm-x86_64/blake3/blake3_sse2.o \
 	asm-x86_64/blake3/blake3_sse41.o \
 	asm-x86_64/modes/aesni-gcm-x86_64.o \
 	asm-x86_64/modes/gcm_pclmulqdq.o \
 	asm-x86_64/modes/ghash-x86_64.o \
 	asm-x86_64/sha2/sha256_impl.o \
 	asm-x86_64/sha2/sha512_impl.o
 
 
 ICP_OBJS_X86 := \
 	algs/aes/aes_impl_aesni.o \
 	algs/aes/aes_impl_x86-64.o \
 	algs/modes/gcm_pclmulqdq.o
 
 
 ICP_OBJS_ARM64 := \
 	asm-aarch64/blake3/b3_aarch64_sse2.o \
 	asm-aarch64/blake3/b3_aarch64_sse41.o
 
 
 ICP_OBJS_PPC_PPC64 := \
 	asm-ppc64/blake3/b3_ppc64le_sse2.o \
 	asm-ppc64/blake3/b3_ppc64le_sse41.o
 
 zfs-objs             += $(addprefix icp/,$(ICP_OBJS))
 zfs-$(CONFIG_X86)    += $(addprefix icp/,$(ICP_OBJS_X86))
 zfs-$(CONFIG_UML_X86)+= $(addprefix icp/,$(ICP_OBJS_X86))
 zfs-$(CONFIG_X86_64) += $(addprefix icp/,$(ICP_OBJS_X86_64))
 zfs-$(CONFIG_ARM64)  += $(addprefix icp/,$(ICP_OBJS_ARM64))
 zfs-$(CONFIG_PPC)    += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64))
 zfs-$(CONFIG_PPC64)  += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64))
 
 $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
 	$(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : asflags-y += -I$(icp_include)
 
 $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
 	$(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : ccflags-y += -I$(icp_include)
 
 # Suppress objtool "return with modified stack frame" warnings.
 OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
 
 # Suppress objtool "unsupported stack pointer realignment" warnings. We are
 # not using a DRAP register while aligning the stack to a 64 byte boundary.
 # See #6950 for the reasoning.
 OBJECT_FILES_NON_STANDARD_sha256_impl.o := y
 OBJECT_FILES_NON_STANDARD_sha512_impl.o := y
 
 
 LUA_OBJS := \
 	lapi.o \
 	lauxlib.o \
 	lbaselib.o \
 	lcode.o \
 	lcompat.o \
 	lcorolib.o \
 	lctype.o \
 	ldebug.o \
 	ldo.o \
 	lfunc.o \
 	lgc.o \
 	llex.o \
 	lmem.o \
 	lobject.o \
 	lopcodes.o \
 	lparser.o \
 	lstate.o \
 	lstring.o \
 	lstrlib.o \
 	ltable.o \
 	ltablib.o \
 	ltm.o \
 	lvm.o \
 	lzio.o \
 	setjmp/setjmp.o
 
 zfs-objs += $(addprefix lua/,$(LUA_OBJS))
 
 
 NVPAIR_OBJS := \
 	fnvpair.o \
 	nvpair.o \
 	nvpair_alloc_fixed.o \
 	nvpair_alloc_spl.o
 
 zfs-objs += $(addprefix nvpair/,$(NVPAIR_OBJS))
 
 
 UNICODE_OBJS := \
 	u8_textprep.o \
 	uconv.o
 
 zfs-objs += $(addprefix unicode/,$(UNICODE_OBJS))
 
 
 ZCOMMON_OBJS := \
 	cityhash.o \
 	zfeature_common.o \
 	zfs_comutil.o \
 	zfs_deleg.o \
 	zfs_fletcher.o \
 	zfs_fletcher_superscalar.o \
 	zfs_fletcher_superscalar4.o \
 	zfs_namecheck.o \
 	zfs_prop.o \
 	zpool_prop.o \
 	zprop_common.o
 
 ZCOMMON_OBJS_X86 := \
 	zfs_fletcher_avx512.o \
 	zfs_fletcher_intel.o \
 	zfs_fletcher_sse.o
 
 ZCOMMON_OBJS_ARM64 := \
 	zfs_fletcher_aarch64_neon.o
 
 zfs-objs            += $(addprefix zcommon/,$(ZCOMMON_OBJS))
 zfs-$(CONFIG_X86)   += $(addprefix zcommon/,$(ZCOMMON_OBJS_X86))
 zfs-$(CONFIG_UML_X86)+= $(addprefix zcommon/,$(ZCOMMON_OBJS_X86))
 zfs-$(CONFIG_ARM64) += $(addprefix zcommon/,$(ZCOMMON_OBJS_ARM64))
 
 
 # Zstd uses -O3 by default, so we should follow
 ZFS_ZSTD_FLAGS := -O3
 
 # -fno-tree-vectorize gets set for gcc in zstd/common/compiler.h
 # Set it for other compilers, too.
 ZFS_ZSTD_FLAGS += -fno-tree-vectorize
 
 # SSE register return with SSE disabled if -march=znverX is passed
 ZFS_ZSTD_FLAGS += -U__BMI__
 
 # Quiet warnings about frame size due to unused code in unmodified zstd lib
 ZFS_ZSTD_FLAGS += -Wframe-larger-than=20480
 
 ZSTD_OBJS := \
 	zfs_zstd.o \
 	zstd_sparc.o
 
 ZSTD_UPSTREAM_OBJS := \
 	lib/common/entropy_common.o \
 	lib/common/error_private.o \
 	lib/common/fse_decompress.o \
 	lib/common/pool.o \
 	lib/common/zstd_common.o \
 	lib/compress/fse_compress.o \
 	lib/compress/hist.o \
 	lib/compress/huf_compress.o \
 	lib/compress/zstd_compress.o \
 	lib/compress/zstd_compress_literals.o \
 	lib/compress/zstd_compress_sequences.o \
 	lib/compress/zstd_compress_superblock.o \
 	lib/compress/zstd_double_fast.o \
 	lib/compress/zstd_fast.o \
 	lib/compress/zstd_lazy.o \
 	lib/compress/zstd_ldm.o \
 	lib/compress/zstd_opt.o \
 	lib/decompress/huf_decompress.o \
 	lib/decompress/zstd_ddict.o \
 	lib/decompress/zstd_decompress.o \
 	lib/decompress/zstd_decompress_block.o
 
 zfs-objs += $(addprefix zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS))
 
 # Disable aarch64 neon SIMD instructions for kernel mode
 $(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -I$(zstd_include) $(ZFS_ZSTD_FLAGS)
 $(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : asflags-y += -I$(zstd_include)
 $(addprefix $(obj)/zstd/,$(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -include $(zstd_include)/aarch64_compat.h -include $(zstd_include)/zstd_compat_wrapper.h -Wp,-w
 $(obj)/zstd/zfs_zstd.o : ccflags-y += -include $(zstd_include)/zstd_compat_wrapper.h
 
 
 ZFS_OBJS := \
 	abd.o \
 	aggsum.o \
 	arc.o \
 	blake3_zfs.o \
 	blkptr.o \
 	bplist.o \
 	bpobj.o \
 	bptree.o \
 	bqueue.o \
 	btree.o \
 	dataset_kstats.o \
 	dbuf.o \
 	dbuf_stats.o \
 	ddt.o \
 	ddt_zap.o \
 	dmu.o \
 	dmu_diff.o \
 	dmu_object.o \
 	dmu_objset.o \
 	dmu_recv.o \
 	dmu_redact.o \
 	dmu_send.o \
 	dmu_traverse.o \
 	dmu_tx.o \
 	dmu_zfetch.o \
 	dnode.o \
 	dnode_sync.o \
 	dsl_bookmark.o \
 	dsl_crypt.o \
 	dsl_dataset.o \
 	dsl_deadlist.o \
 	dsl_deleg.o \
 	dsl_destroy.o \
 	dsl_dir.o \
 	dsl_pool.o \
 	dsl_prop.o \
 	dsl_scan.o \
 	dsl_synctask.o \
 	dsl_userhold.o \
 	edonr_zfs.o \
 	fm.o \
 	gzip.o \
 	hkdf.o \
 	lz4.o \
 	lz4_zfs.o \
 	lzjb.o \
 	metaslab.o \
 	mmp.o \
 	multilist.o \
 	objlist.o \
 	pathname.o \
 	range_tree.o \
 	refcount.o \
 	rrwlock.o \
 	sa.o \
 	sha256.o \
 	skein_zfs.o \
 	spa.o \
 	spa_checkpoint.o \
 	spa_config.o \
 	spa_errlog.o \
 	spa_history.o \
 	spa_log_spacemap.o \
 	spa_misc.o \
 	spa_stats.o \
 	space_map.o \
 	space_reftree.o \
 	txg.o \
 	uberblock.o \
 	unique.o \
 	vdev.o \
 	vdev_cache.o \
 	vdev_draid.o \
 	vdev_draid_rand.o \
 	vdev_indirect.o \
 	vdev_indirect_births.o \
 	vdev_indirect_mapping.o \
 	vdev_initialize.o \
 	vdev_label.o \
 	vdev_mirror.o \
 	vdev_missing.o \
 	vdev_queue.o \
 	vdev_raidz.o \
 	vdev_raidz_math.o \
 	vdev_raidz_math_scalar.o \
 	vdev_rebuild.o \
 	vdev_removal.o \
 	vdev_root.o \
 	vdev_trim.o \
 	zap.o \
 	zap_leaf.o \
 	zap_micro.o \
 	zcp.o \
 	zcp_get.o \
 	zcp_global.o \
 	zcp_iter.o \
 	zcp_set.o \
 	zcp_synctask.o \
 	zfeature.o \
 	zfs_byteswap.o \
 	zfs_chksum.o \
 	zfs_fm.o \
 	zfs_fuid.o \
 	zfs_ioctl.o \
 	zfs_log.o \
 	zfs_onexit.o \
 	zfs_quota.o \
 	zfs_ratelimit.o \
 	zfs_replay.o \
 	zfs_rlock.o \
 	zfs_sa.o \
 	zfs_vnops.o \
 	zil.o \
 	zio.o \
 	zio_checksum.o \
 	zio_compress.o \
 	zio_inject.o \
 	zle.o \
 	zrlock.o \
 	zthr.o \
 	zvol.o
 
 ZFS_OBJS_OS := \
 	abd_os.o \
 	arc_os.o \
 	mmp_os.o \
 	policy.o \
 	qat.o \
 	qat_compress.o \
 	qat_crypt.o \
 	spa_misc_os.o \
 	trace.o \
 	vdev_disk.o \
 	vdev_file.o \
 	zfs_acl.o \
 	zfs_ctldir.o \
 	zfs_debug.o \
 	zfs_dir.o \
 	zfs_file_os.o \
 	zfs_ioctl_os.o \
 	zfs_racct.o \
 	zfs_sysfs.o \
 	zfs_uio.o \
 	zfs_vfsops.o \
 	zfs_vnops_os.o \
 	zfs_znode.o \
 	zio_crypt.o \
 	zpl_ctldir.o \
 	zpl_export.o \
 	zpl_file.o \
 	zpl_inode.o \
 	zpl_super.o \
 	zpl_xattr.o \
 	zvol_os.o
 
 ZFS_OBJS_X86 := \
 	vdev_raidz_math_avx2.o \
 	vdev_raidz_math_avx512bw.o \
 	vdev_raidz_math_avx512f.o \
 	vdev_raidz_math_sse2.o \
 	vdev_raidz_math_ssse3.o
 
 ZFS_OBJS_ARM64 := \
 	vdev_raidz_math_aarch64_neon.o \
 	vdev_raidz_math_aarch64_neonx2.o
 
 ZFS_OBJS_PPC_PPC64 := \
 	vdev_raidz_math_powerpc_altivec.o
 
 zfs-objs            += $(addprefix zfs/,$(ZFS_OBJS)) $(addprefix os/linux/zfs/,$(ZFS_OBJS_OS))
 zfs-$(CONFIG_X86)   += $(addprefix zfs/,$(ZFS_OBJS_X86))
 zfs-$(CONFIG_UML_X86)+= $(addprefix zfs/,$(ZFS_OBJS_X86))
 zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
 zfs-$(CONFIG_PPC)   += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
 zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
 
 # Suppress incorrect warnings from versions of objtool which are not
 # aware of x86 EVEX prefix instructions used for AVX512.
 OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y
 OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512f.o := y
 
 ifeq ($(CONFIG_ALTIVEC),y)
 $(obj)/zfs/vdev_raidz_math_powerpc_altivec.o : c_flags += -maltivec
 endif
diff --git a/module/icp/algs/edonr/edonr.c b/module/icp/algs/edonr/edonr.c
index 345133d7433a..b1f710cc0439 100644
--- a/module/icp/algs/edonr/edonr.c
+++ b/module/icp/algs/edonr/edonr.c
@@ -1,753 +1,755 @@
 /*
  * IDI,NTNU
  *
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen@ntnu.no>
  * Tweaked Edon-R implementation for SUPERCOP, based on NIST API.
  *
  * $Id: edonr.c 517 2013-02-17 20:34:39Z joern $
  */
 /*
  * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
  */
 
 /*
  * Unlike sha2 or skein, we won't expose edonr via the Kernel Cryptographic
  * Framework (KCF), because Edon-R is *NOT* suitable for general-purpose
  * cryptographic use. Users of Edon-R must interface directly to this module.
  */
 
 #include <sys/string.h>
 #include <sys/edonr.h>
 #include <sys/debug.h>
 
 /* big endian support, provides no-op's if run on little endian hosts */
 #include "edonr_byteorder.h"
 
 #define	hashState224(x)	((x)->pipe->p256)
 #define	hashState256(x)	((x)->pipe->p256)
 #define	hashState384(x)	((x)->pipe->p512)
 #define	hashState512(x)	((x)->pipe->p512)
 
 /* rotate shortcuts */
 #define	rotl32(x, n)	(((x) << (n)) | ((x) >> (32 - (n))))
 #define	rotr32(x, n)	(((x) >> (n)) | ((x) << (32 - (n))))
 
 #define	rotl64(x, n)	(((x) << (n)) | ((x) >> (64 - (n))))
 #define	rotr64(x, n)	(((x) >> (n)) | ((x) << (64 - (n))))
 
 #if !defined(__C99_RESTRICT)
 #define	restrict	/* restrict */
 #endif
 
 #define	EDONR_VALID_HASHBITLEN(x) \
 	((x) == 512 || (x) == 384 || (x) == 256 || (x) == 224)
 
 /* EdonR224 initial double chaining pipe */
 static const uint32_t i224p2[16] = {
 	0x00010203ul, 0x04050607ul, 0x08090a0bul, 0x0c0d0e0ful,
 	0x10111213ul, 0x14151617ul, 0x18191a1bul, 0x1c1d1e1ful,
 	0x20212223ul, 0x24252627ul, 0x28292a2bul, 0x2c2d2e2ful,
 	0x30313233ul, 0x34353637ul, 0x38393a3bul, 0x3c3d3e3ful,
 };
 
 /* EdonR256 initial double chaining pipe */
 static const uint32_t i256p2[16] = {
 	0x40414243ul, 0x44454647ul, 0x48494a4bul, 0x4c4d4e4ful,
 	0x50515253ul, 0x54555657ul, 0x58595a5bul, 0x5c5d5e5ful,
 	0x60616263ul, 0x64656667ul, 0x68696a6bul, 0x6c6d6e6ful,
 	0x70717273ul, 0x74757677ul, 0x78797a7bul, 0x7c7d7e7ful,
 };
 
 /* EdonR384 initial double chaining pipe */
 static const uint64_t i384p2[16] = {
 	0x0001020304050607ull, 0x08090a0b0c0d0e0full,
 	0x1011121314151617ull, 0x18191a1b1c1d1e1full,
 	0x2021222324252627ull, 0x28292a2b2c2d2e2full,
 	0x3031323334353637ull, 0x38393a3b3c3d3e3full,
 	0x4041424344454647ull, 0x48494a4b4c4d4e4full,
 	0x5051525354555657ull, 0x58595a5b5c5d5e5full,
 	0x6061626364656667ull, 0x68696a6b6c6d6e6full,
 	0x7071727374757677ull, 0x78797a7b7c7d7e7full
 };
 
 /* EdonR512 initial double chaining pipe */
 static const uint64_t i512p2[16] = {
 	0x8081828384858687ull, 0x88898a8b8c8d8e8full,
 	0x9091929394959697ull, 0x98999a9b9c9d9e9full,
 	0xa0a1a2a3a4a5a6a7ull, 0xa8a9aaabacadaeafull,
 	0xb0b1b2b3b4b5b6b7ull, 0xb8b9babbbcbdbebfull,
 	0xc0c1c2c3c4c5c6c7ull, 0xc8c9cacbcccdcecfull,
 	0xd0d1d2d3d4d5d6d7ull, 0xd8d9dadbdcdddedfull,
 	0xe0e1e2e3e4e5e6e7ull, 0xe8e9eaebecedeeefull,
 	0xf0f1f2f3f4f5f6f7ull, 0xf8f9fafbfcfdfeffull
 };
 
 /*
  * First Latin Square
  * 0   7   1   3   2   4   6   5
  * 4   1   7   6   3   0   5   2
  * 7   0   4   2   5   3   1   6
  * 1   4   0   5   6   2   7   3
  * 2   3   6   7   1   5   0   4
  * 5   2   3   1   7   6   4   0
  * 3   6   5   0   4   7   2   1
  * 6   5   2   4   0   1   3   7
  */
 #define	LS1_256(c, x0, x1, x2, x3, x4, x5, x6, x7)			\
 {									\
 	uint32_t x04, x17, x23, x56, x07, x26;				\
 	x04 = x0+x4, x17 = x1+x7, x07 = x04+x17;			\
 	s0 = c + x07 + x2;						\
 	s1 = rotl32(x07 + x3, 4);					\
 	s2 = rotl32(x07 + x6, 8);					\
 	x23 = x2 + x3;							\
 	s5 = rotl32(x04 + x23 + x5, 22);				\
 	x56 = x5 + x6;							\
 	s6 = rotl32(x17 + x56 + x0, 24);				\
 	x26 = x23+x56;							\
 	s3 = rotl32(x26 + x7, 13);					\
 	s4 = rotl32(x26 + x1, 17);					\
 	s7 = rotl32(x26 + x4, 29);					\
 }
 
 #define	LS1_512(c, x0, x1, x2, x3, x4, x5, x6, x7)			\
 {									\
 	uint64_t x04, x17, x23, x56, x07, x26;				\
 	x04 = x0+x4, x17 = x1+x7, x07 = x04+x17;			\
 	s0 = c + x07 + x2;						\
 	s1 = rotl64(x07 + x3, 5);					\
 	s2 = rotl64(x07 + x6, 15);					\
 	x23 = x2 + x3;							\
 	s5 = rotl64(x04 + x23 + x5, 40);				\
 	x56 = x5 + x6;							\
 	s6 = rotl64(x17 + x56 + x0, 50);				\
 	x26 = x23+x56;							\
 	s3 = rotl64(x26 + x7, 22);					\
 	s4 = rotl64(x26 + x1, 31);					\
 	s7 = rotl64(x26 + x4, 59);					\
 }
 
 /*
  * Second Orthogonal Latin Square
  * 0   4   2   3   1   6   5   7
  * 7   6   3   2   5   4   1   0
  * 5   3   1   6   0   2   7   4
  * 1   0   5   4   3   7   2   6
  * 2   1   0   7   4   5   6   3
  * 3   5   7   0   6   1   4   2
  * 4   7   6   1   2   0   3   5
  * 6   2   4   5   7   3   0   1
  */
 #define	LS2_256(c, y0, y1, y2, y3, y4, y5, y6, y7)			\
 {									\
 	uint32_t y01, y25, y34, y67, y04, y05, y27, y37;		\
 	y01 = y0+y1, y25 = y2+y5, y05 = y01+y25;			\
 	t0  = ~c + y05 + y7;						\
 	t2 = rotl32(y05 + y3, 9);					\
 	y34 = y3+y4, y04 = y01+y34;					\
 	t1 = rotl32(y04 + y6, 5);					\
 	t4 = rotl32(y04 + y5, 15);					\
 	y67 = y6+y7, y37 = y34+y67;					\
 	t3 = rotl32(y37 + y2, 11);					\
 	t7 = rotl32(y37 + y0, 27);					\
 	y27 = y25+y67;							\
 	t5 = rotl32(y27 + y4, 20);					\
 	t6 = rotl32(y27 + y1, 25);					\
 }
 
 #define	LS2_512(c, y0, y1, y2, y3, y4, y5, y6, y7)			\
 {									\
 	uint64_t y01, y25, y34, y67, y04, y05, y27, y37;		\
 	y01 = y0+y1, y25 = y2+y5, y05 = y01+y25;			\
 	t0  = ~c + y05 + y7;						\
 	t2 = rotl64(y05 + y3, 19);					\
 	y34 = y3+y4, y04 = y01+y34;					\
 	t1 = rotl64(y04 + y6, 10);					\
 	t4 = rotl64(y04 + y5, 36);					\
 	y67 = y6+y7, y37 = y34+y67;					\
 	t3 = rotl64(y37 + y2, 29);					\
 	t7 = rotl64(y37 + y0, 55);					\
 	y27 = y25+y67;							\
 	t5 = rotl64(y27 + y4, 44);					\
 	t6 = rotl64(y27 + y1, 48);					\
 }
 
 #define	quasi_exform256(r0, r1, r2, r3, r4, r5, r6, r7)			\
 {									\
 	uint32_t s04, s17, s23, s56, t01, t25, t34, t67;		\
 	s04 = s0 ^ s4, t01 = t0 ^ t1;					\
 	r0 = (s04 ^ s1) + (t01 ^ t5);					\
 	t67 = t6 ^ t7;							\
 	r1 = (s04 ^ s7) + (t2 ^ t67);					\
 	s23 = s2 ^ s3;							\
 	r7 = (s23 ^ s5) + (t4 ^ t67);					\
 	t34 = t3 ^ t4;							\
 	r3 = (s23 ^ s4) + (t0 ^ t34);					\
 	s56 = s5 ^ s6;							\
 	r5 = (s3 ^ s56) + (t34 ^ t6);					\
 	t25 = t2 ^ t5;							\
 	r6 = (s2 ^ s56) + (t25 ^ t7);					\
 	s17 = s1 ^ s7;							\
 	r4 = (s0 ^ s17) + (t1 ^ t25);					\
 	r2 = (s17 ^ s6) + (t01 ^ t3);					\
 }
 
 #define	quasi_exform512(r0, r1, r2, r3, r4, r5, r6, r7)			\
 {									\
 	uint64_t s04, s17, s23, s56, t01, t25, t34, t67;		\
 	s04 = s0 ^ s4, t01 = t0 ^ t1;					\
 	r0 = (s04 ^ s1) + (t01 ^ t5);					\
 	t67 = t6 ^ t7;							\
 	r1 = (s04 ^ s7) + (t2 ^ t67);					\
 	s23 = s2 ^ s3;							\
 	r7 = (s23 ^ s5) + (t4 ^ t67);					\
 	t34 = t3 ^ t4;							\
 	r3 = (s23 ^ s4) + (t0 ^ t34);					\
 	s56 = s5 ^ s6;							\
 	r5 = (s3 ^ s56) + (t34 ^ t6);					\
 	t25 = t2 ^ t5;							\
 	r6 = (s2 ^ s56) + (t25 ^ t7);					\
 	s17 = s1 ^ s7;							\
 	r4 = (s0 ^ s17) + (t1 ^ t25);					\
 	r2 = (s17 ^ s6) + (t01 ^ t3);					\
 }
 
 static size_t
 Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p)
 {
 	size_t bl;
 
 	for (bl = bitlen; bl >= EdonR256_BLOCK_BITSIZE;
 	    bl -= EdonR256_BLOCK_BITSIZE, data += 16) {
 		uint32_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4,
 		    t5, t6, t7;
 		uint32_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4,
 		    q5, q6, q7;
 		const uint32_t defix = 0xaaaaaaaa;
 #if defined(MACHINE_IS_BIG_ENDIAN)
 		uint32_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8,
 		    swp9, swp10, swp11, swp12, swp13, swp14, swp15;
 #define	d(j)	swp ## j
 #define	s32(j)	ld_swap32((uint32_t *)data + j, swp ## j)
 #else
 #define	d(j)	data[j]
 #endif
 
 		/* First row of quasigroup e-transformations */
 #if defined(MACHINE_IS_BIG_ENDIAN)
 		s32(8);
 		s32(9);
 		s32(10);
 		s32(11);
 		s32(12);
 		s32(13);
 		s32(14);
 		s32(15);
 #endif
 		LS1_256(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9),
 		    d(8));
 #if defined(MACHINE_IS_BIG_ENDIAN)
 		s32(0);
 		s32(1);
 		s32(2);
 		s32(3);
 		s32(4);
 		s32(5);
 		s32(6);
 		s32(7);
 #undef s32
 #endif
 		LS2_256(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7));
 		quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
 
 		LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		LS2_256(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14),
 		    d(15));
 		quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
 
 		/* Second row of quasigroup e-transformations */
 		LS1_256(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14],
 		    p[15]);
 		LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
 
 		LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
 		quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
 
 		/* Third row of quasigroup e-transformations */
 		LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		LS2_256(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
 		quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
 
 		LS1_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
 		LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
 
 		/* Fourth row of quasigroup e-transformations */
 		LS1_256(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0));
 		LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
 
 		LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
 		quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
 
 		/* Edon-R tweak on the original SHA-3 Edon-R submission. */
 		p[0] ^= d(8) ^ p0;
 		p[1] ^= d(9) ^ p1;
 		p[2] ^= d(10) ^ p2;
 		p[3] ^= d(11) ^ p3;
 		p[4] ^= d(12) ^ p4;
 		p[5] ^= d(13) ^ p5;
 		p[6] ^= d(14) ^ p6;
 		p[7] ^= d(15) ^ p7;
 		p[8] ^= d(0) ^ q0;
 		p[9] ^= d(1) ^ q1;
 		p[10] ^= d(2) ^ q2;
 		p[11] ^= d(3) ^ q3;
 		p[12] ^= d(4) ^ q4;
 		p[13] ^= d(5) ^ q5;
 		p[14] ^= d(6) ^ q6;
 		p[15] ^= d(7) ^ q7;
 	}
 
 #undef d
 	return (bitlen - bl);
 }
 
 /*
  * Why is this #pragma here?
  *
  * Checksum functions like this one can go over the stack frame size check
  * Linux imposes on 32-bit platforms (-Wframe-larger-than=1024).  We can
  * safely ignore the compiler error since we know that in OpenZFS, that
  * the function will be called from a worker thread that won't be using
  * much stack.  The only function that goes over the 1k limit is Q512(),
  * which only goes over it by a hair (1248 bytes on ARM32).
  */
 #include <sys/isa_defs.h>	/* for _ILP32 */
-#ifdef _ILP32   /* We're 32-bit, assume small stack frames */
+#if defined(_ILP32)   /* We're 32-bit, assume small stack frames */
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wframe-larger-than="
 #endif
+#endif
 
 #if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__)
 static inline size_t
 #else
 static size_t
 #endif
 Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p)
 {
 	size_t bl;
 
 	for (bl = bitlen; bl >= EdonR512_BLOCK_BITSIZE;
 	    bl -= EdonR512_BLOCK_BITSIZE, data += 16) {
 		uint64_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4,
 		    t5, t6, t7;
 		uint64_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4,
 		    q5, q6, q7;
 		const uint64_t defix = 0xaaaaaaaaaaaaaaaaull;
 #if defined(MACHINE_IS_BIG_ENDIAN)
 		uint64_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8,
 		    swp9, swp10, swp11, swp12, swp13, swp14, swp15;
 #define	d(j)	swp##j
 #define	s64(j)	ld_swap64((uint64_t *)data+j, swp##j)
 #else
 #define	d(j)	data[j]
 #endif
 
 		/* First row of quasigroup e-transformations */
 #if defined(MACHINE_IS_BIG_ENDIAN)
 		s64(8);
 		s64(9);
 		s64(10);
 		s64(11);
 		s64(12);
 		s64(13);
 		s64(14);
 		s64(15);
 #endif
 		LS1_512(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9),
 		    d(8));
 #if defined(MACHINE_IS_BIG_ENDIAN)
 		s64(0);
 		s64(1);
 		s64(2);
 		s64(3);
 		s64(4);
 		s64(5);
 		s64(6);
 		s64(7);
 #undef s64
 #endif
 		LS2_512(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7));
 		quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
 
 		LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		LS2_512(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14),
 		    d(15));
 		quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
 
 		/* Second row of quasigroup e-transformations */
 		LS1_512(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14],
 		    p[15]);
 		LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
 
 		LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
 		quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
 
 		/* Third row of quasigroup e-transformations */
 		LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		LS2_512(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
 		quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
 
 		LS1_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
 		LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
 
 		/* Fourth row of quasigroup e-transformations */
 		LS1_512(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0));
 		LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
 
 		LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
 		LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
 		quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
 
 		/* Edon-R tweak on the original SHA-3 Edon-R submission. */
 		p[0] ^= d(8) ^ p0;
 		p[1] ^= d(9) ^ p1;
 		p[2] ^= d(10) ^ p2;
 		p[3] ^= d(11) ^ p3;
 		p[4] ^= d(12) ^ p4;
 		p[5] ^= d(13) ^ p5;
 		p[6] ^= d(14) ^ p6;
 		p[7] ^= d(15) ^ p7;
 		p[8] ^= d(0) ^ q0;
 		p[9] ^= d(1) ^ q1;
 		p[10] ^= d(2) ^ q2;
 		p[11] ^= d(3) ^ q3;
 		p[12] ^= d(4) ^ q4;
 		p[13] ^= d(5) ^ q5;
 		p[14] ^= d(6) ^ q6;
 		p[15] ^= d(7) ^ q7;
 	}
 
 #undef d
 	return (bitlen - bl);
 }
 
 void
 EdonRInit(EdonRState *state, size_t hashbitlen)
 {
 	ASSERT(EDONR_VALID_HASHBITLEN(hashbitlen));
 	switch (hashbitlen) {
 	case 224:
 		state->hashbitlen = 224;
 		state->bits_processed = 0;
 		state->unprocessed_bits = 0;
 		memcpy(hashState224(state)->DoublePipe, i224p2,
 		    sizeof (i224p2));
 		break;
 
 	case 256:
 		state->hashbitlen = 256;
 		state->bits_processed = 0;
 		state->unprocessed_bits = 0;
 		memcpy(hashState256(state)->DoublePipe, i256p2,
 		    sizeof (i256p2));
 		break;
 
 	case 384:
 		state->hashbitlen = 384;
 		state->bits_processed = 0;
 		state->unprocessed_bits = 0;
 		memcpy(hashState384(state)->DoublePipe, i384p2,
 		    sizeof (i384p2));
 		break;
 
 	case 512:
 		state->hashbitlen = 512;
 		state->bits_processed = 0;
 		state->unprocessed_bits = 0;
 		memcpy(hashState512(state)->DoublePipe, i512p2,
 		    sizeof (i512p2));
 		break;
 	}
 }
 
 
 void
 EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen)
 {
 	uint32_t *data32;
 	uint64_t *data64;
 
 	size_t bits_processed;
 
 	ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen));
 	switch (state->hashbitlen) {
 	case 224:
 	case 256:
 		if (state->unprocessed_bits > 0) {
 			/* LastBytes = databitlen / 8 */
 			int LastBytes = (int)databitlen >> 3;
 
 			ASSERT(state->unprocessed_bits + databitlen <=
 			    EdonR256_BLOCK_SIZE * 8);
 
 			memcpy(hashState256(state)->LastPart
 			    + (state->unprocessed_bits >> 3),
 			    data, LastBytes);
 			state->unprocessed_bits += (int)databitlen;
 			databitlen = state->unprocessed_bits;
 			/* LINTED E_BAD_PTR_CAST_ALIGN */
 			data32 = (uint32_t *)hashState256(state)->LastPart;
 		} else
 			/* LINTED E_BAD_PTR_CAST_ALIGN */
 			data32 = (uint32_t *)data;
 
 		bits_processed = Q256(databitlen, data32,
 		    hashState256(state)->DoublePipe);
 		state->bits_processed += bits_processed;
 		databitlen -= bits_processed;
 		state->unprocessed_bits = (int)databitlen;
 		if (databitlen > 0) {
 			/* LastBytes = Ceil(databitlen / 8) */
 			int LastBytes =
 			    ((~(((-(int)databitlen) >> 3) & 0x01ff)) +
 			    1) & 0x01ff;
 
 			data32 += bits_processed >> 5;	/* byte size update */
 			memmove(hashState256(state)->LastPart,
 			    data32, LastBytes);
 		}
 		break;
 
 	case 384:
 	case 512:
 		if (state->unprocessed_bits > 0) {
 			/* LastBytes = databitlen / 8 */
 			int LastBytes = (int)databitlen >> 3;
 
 			ASSERT(state->unprocessed_bits + databitlen <=
 			    EdonR512_BLOCK_SIZE * 8);
 
 			memcpy(hashState512(state)->LastPart
 			    + (state->unprocessed_bits >> 3),
 			    data, LastBytes);
 			state->unprocessed_bits += (int)databitlen;
 			databitlen = state->unprocessed_bits;
 			/* LINTED E_BAD_PTR_CAST_ALIGN */
 			data64 = (uint64_t *)hashState512(state)->LastPart;
 		} else
 			/* LINTED E_BAD_PTR_CAST_ALIGN */
 			data64 = (uint64_t *)data;
 
 		bits_processed = Q512(databitlen, data64,
 		    hashState512(state)->DoublePipe);
 		state->bits_processed += bits_processed;
 		databitlen -= bits_processed;
 		state->unprocessed_bits = (int)databitlen;
 		if (databitlen > 0) {
 			/* LastBytes = Ceil(databitlen / 8) */
 			int LastBytes =
 			    ((~(((-(int)databitlen) >> 3) & 0x03ff)) +
 			    1) & 0x03ff;
 
 			data64 += bits_processed >> 6;	/* byte size update */
 			memmove(hashState512(state)->LastPart,
 			    data64, LastBytes);
 		}
 		break;
 	}
 }
 
 void
 EdonRFinal(EdonRState *state, uint8_t *hashval)
 {
 	uint32_t *data32;
 	uint64_t *data64, num_bits;
 
 	size_t databitlen;
 	int LastByte, PadOnePosition;
 
 	num_bits = state->bits_processed + state->unprocessed_bits;
 	ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen));
 	switch (state->hashbitlen) {
 	case 224:
 	case 256:
 		LastByte = (int)state->unprocessed_bits >> 3;
 		PadOnePosition = 7 - (state->unprocessed_bits & 0x07);
 		hashState256(state)->LastPart[LastByte] =
 		    (hashState256(state)->LastPart[LastByte]
 		    & (0xff << (PadOnePosition + 1))) ^
 		    (0x01 << PadOnePosition);
 		/* LINTED E_BAD_PTR_CAST_ALIGN */
 		data64 = (uint64_t *)hashState256(state)->LastPart;
 
 		if (state->unprocessed_bits < 448) {
 			(void) memset((hashState256(state)->LastPart) +
 			    LastByte + 1, 0x00,
 			    EdonR256_BLOCK_SIZE - LastByte - 9);
 			databitlen = EdonR256_BLOCK_SIZE * 8;
 #if defined(MACHINE_IS_BIG_ENDIAN)
 			st_swap64(num_bits, data64 + 7);
 #else
 			data64[7] = num_bits;
 #endif
 		} else {
 			(void) memset((hashState256(state)->LastPart) +
 			    LastByte + 1, 0x00,
 			    EdonR256_BLOCK_SIZE * 2 - LastByte - 9);
 			databitlen = EdonR256_BLOCK_SIZE * 16;
 #if defined(MACHINE_IS_BIG_ENDIAN)
 			st_swap64(num_bits, data64 + 15);
 #else
 			data64[15] = num_bits;
 #endif
 		}
 
 		/* LINTED E_BAD_PTR_CAST_ALIGN */
 		data32 = (uint32_t *)hashState256(state)->LastPart;
 		state->bits_processed += Q256(databitlen, data32,
 		    hashState256(state)->DoublePipe);
 		break;
 
 	case 384:
 	case 512:
 		LastByte = (int)state->unprocessed_bits >> 3;
 		PadOnePosition = 7 - (state->unprocessed_bits & 0x07);
 		hashState512(state)->LastPart[LastByte] =
 		    (hashState512(state)->LastPart[LastByte]
 		    & (0xff << (PadOnePosition + 1))) ^
 		    (0x01 << PadOnePosition);
 		/* LINTED E_BAD_PTR_CAST_ALIGN */
 		data64 = (uint64_t *)hashState512(state)->LastPart;
 
 		if (state->unprocessed_bits < 960) {
 			(void) memset((hashState512(state)->LastPart) +
 			    LastByte + 1, 0x00,
 			    EdonR512_BLOCK_SIZE - LastByte - 9);
 			databitlen = EdonR512_BLOCK_SIZE * 8;
 #if defined(MACHINE_IS_BIG_ENDIAN)
 			st_swap64(num_bits, data64 + 15);
 #else
 			data64[15] = num_bits;
 #endif
 		} else {
 			(void) memset((hashState512(state)->LastPart) +
 			    LastByte + 1, 0x00,
 			    EdonR512_BLOCK_SIZE * 2 - LastByte - 9);
 			databitlen = EdonR512_BLOCK_SIZE * 16;
 #if defined(MACHINE_IS_BIG_ENDIAN)
 			st_swap64(num_bits, data64 + 31);
 #else
 			data64[31] = num_bits;
 #endif
 		}
 
 		state->bits_processed += Q512(databitlen, data64,
 		    hashState512(state)->DoublePipe);
 		break;
 	}
 
 	switch (state->hashbitlen) {
 	case 224: {
 #if defined(MACHINE_IS_BIG_ENDIAN)
 		uint32_t *d32 = (uint32_t *)hashval;
 		uint32_t *s32 = hashState224(state)->DoublePipe + 9;
 		int j;
 
 		for (j = 0; j < EdonR224_DIGEST_SIZE >> 2; j++)
 			st_swap32(s32[j], d32 + j);
 #else
 		memcpy(hashval, hashState256(state)->DoublePipe + 9,
 		    EdonR224_DIGEST_SIZE);
 #endif
 		break;
 	}
 	case 256: {
 #if defined(MACHINE_IS_BIG_ENDIAN)
 		uint32_t *d32 = (uint32_t *)hashval;
 		uint32_t *s32 = hashState224(state)->DoublePipe + 8;
 		int j;
 
 		for (j = 0; j < EdonR256_DIGEST_SIZE >> 2; j++)
 			st_swap32(s32[j], d32 + j);
 #else
 		memcpy(hashval, hashState256(state)->DoublePipe + 8,
 		    EdonR256_DIGEST_SIZE);
 #endif
 		break;
 	}
 	case 384: {
 #if defined(MACHINE_IS_BIG_ENDIAN)
 		uint64_t *d64 = (uint64_t *)hashval;
 		uint64_t *s64 = hashState384(state)->DoublePipe + 10;
 		int j;
 
 		for (j = 0; j < EdonR384_DIGEST_SIZE >> 3; j++)
 			st_swap64(s64[j], d64 + j);
 #else
 		memcpy(hashval, hashState384(state)->DoublePipe + 10,
 		    EdonR384_DIGEST_SIZE);
 #endif
 		break;
 	}
 	case 512: {
 #if defined(MACHINE_IS_BIG_ENDIAN)
 		uint64_t *d64 = (uint64_t *)hashval;
 		uint64_t *s64 = hashState512(state)->DoublePipe + 8;
 		int j;
 
 		for (j = 0; j < EdonR512_DIGEST_SIZE >> 3; j++)
 			st_swap64(s64[j], d64 + j);
 #else
 		memcpy(hashval, hashState512(state)->DoublePipe + 8,
 		    EdonR512_DIGEST_SIZE);
 #endif
 		break;
 	}
 	}
 }
 
 
 void
 EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen,
     uint8_t *hashval)
 {
 	EdonRState state;
 
 	EdonRInit(&state, hashbitlen);
 	EdonRUpdate(&state, data, databitlen);
 	EdonRFinal(&state, hashval);
 }
 
 #ifdef _KERNEL
 EXPORT_SYMBOL(EdonRInit);
 EXPORT_SYMBOL(EdonRUpdate);
 EXPORT_SYMBOL(EdonRHash);
 EXPORT_SYMBOL(EdonRFinal);
 #endif
diff --git a/module/icp/algs/skein/skein_block.c b/module/icp/algs/skein/skein_block.c
index 7ba165a48511..3ad52da5f6a3 100644
--- a/module/icp/algs/skein/skein_block.c
+++ b/module/icp/algs/skein/skein_block.c
@@ -1,790 +1,792 @@
 /*
  * Implementation of the Skein block functions.
  * Source code author: Doug Whiting, 2008.
  * This algorithm and source code is released to the public domain.
  * Compile-time switches:
  *  SKEIN_USE_ASM  -- set bits (256/512/1024) to select which
  *                    versions use ASM code for block processing
  *                    [default: use C for all block sizes]
  */
 /* Copyright 2013 Doug Whiting. This code is released to the public domain. */
 
 #include <sys/skein.h>
 #include "skein_impl.h"
 #include <sys/isa_defs.h>	/* for _ILP32 */
 
 #ifndef	SKEIN_USE_ASM
 #define	SKEIN_USE_ASM	(0)	/* default is all C code (no ASM) */
 #endif
 
 #ifndef	SKEIN_LOOP
 /*
  * The low-level checksum routines use a lot of stack space. On systems where
  * small stacks frame are enforced (like 32-bit kernel builds), do not unroll
  * checksum calculations to save stack space.
  *
  * Even with no loops unrolled, we still can exceed the 1k stack frame limit
  * in Skein1024_Process_Block() (it hits 1272 bytes on ARM32).  We can
  * safely ignore it though, since that the checksum functions will be called
  * from a worker thread that won't be using much stack.  That's why we have
  * the #pragma here to ignore the warning.
  */
 #if defined(_ILP32) || defined(__powerpc)	/* Assume small stack */
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
 /*
  * We're running on 32-bit, don't unroll loops to save stack frame space
  *
  * Due to the ways the calculations on SKEIN_LOOP are done in
  * Skein_*_Process_Block(), a value of 111 disables unrolling loops
  * in any of those functions.
  */
 #define	SKEIN_LOOP 111
 #else
 /* We're compiling with large stacks */
 #define	SKEIN_LOOP 001		/* default: unroll 256 and 512, but not 1024 */
 #endif
 #endif
 
 /* some useful definitions for code here */
 #define	BLK_BITS	(WCNT*64)
 #define	KW_TWK_BASE	(0)
 #define	KW_KEY_BASE	(3)
 #define	ks		(kw + KW_KEY_BASE)
 #define	ts		(kw + KW_TWK_BASE)
 
 /* no debugging in Illumos version */
 #define	DebugSaveTweak(ctx)
 
 /* Skein_256 */
 #if	!(SKEIN_USE_ASM & 256)
 void
 Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr,
     size_t blkCnt, size_t byteCntAdd)
 {
 	enum {
 		WCNT = SKEIN_256_STATE_WORDS
 	};
 #undef  RCNT
 #define	RCNT  (SKEIN_256_ROUNDS_TOTAL / 8)
 
 #ifdef	SKEIN_LOOP		/* configure how much to unroll the loop */
 #define	SKEIN_UNROLL_256 (((SKEIN_LOOP) / 100) % 10)
 #else
 #define	SKEIN_UNROLL_256 (0)
 #endif
 
 #if	SKEIN_UNROLL_256
 #if	(RCNT % SKEIN_UNROLL_256)
 #error "Invalid SKEIN_UNROLL_256"	/* sanity check on unroll count */
 #endif
 	size_t r;
 	/* key schedule words : chaining vars + tweak + "rotation" */
 	uint64_t kw[WCNT + 4 + RCNT * 2];
 #else
 	uint64_t kw[WCNT + 4];	/* key schedule words : chaining vars + tweak */
 #endif
 	/* local copy of context vars, for speed */
 	uint64_t X0, X1, X2, X3;
 	uint64_t w[WCNT];		/* local copy of input block */
 #ifdef	SKEIN_DEBUG
 	/* use for debugging (help compiler put Xn in registers) */
 	const uint64_t *Xptr[4];
 	Xptr[0] = &X0;
 	Xptr[1] = &X1;
 	Xptr[2] = &X2;
 	Xptr[3] = &X3;
 #endif
 	Skein_assert(blkCnt != 0);	/* never call with blkCnt == 0! */
 	ts[0] = ctx->h.T[0];
 	ts[1] = ctx->h.T[1];
 	do {
 		/*
 		 * this implementation only supports 2**64 input bytes
 		 * (no carry out here)
 		 */
 		ts[0] += byteCntAdd;	/* update processed length */
 
 		/* precompute the key schedule for this block */
 		ks[0] = ctx->X[0];
 		ks[1] = ctx->X[1];
 		ks[2] = ctx->X[2];
 		ks[3] = ctx->X[3];
 		ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
 
 		ts[2] = ts[0] ^ ts[1];
 
 		/* get input block in little-endian format */
 		Skein_Get64_LSB_First(w, blkPtr, WCNT);
 		DebugSaveTweak(ctx);
 		Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
 
 		X0 = w[0] + ks[0];	/* do the first full key injection */
 		X1 = w[1] + ks[1] + ts[0];
 		X2 = w[2] + ks[2] + ts[1];
 		X3 = w[3] + ks[3];
 
 		Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
 		    Xptr);	/* show starting state values */
 
 		blkPtr += SKEIN_256_BLOCK_BYTES;
 
 		/* run the rounds */
 
 #define	Round256(p0, p1, p2, p3, ROT, rNum)                          \
 	X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
 	X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
 
 #if	SKEIN_UNROLL_256 == 0
 #define	R256(p0, p1, p2, p3, ROT, rNum)		/* fully unrolled */	\
 	Round256(p0, p1, p2, p3, ROT, rNum)		\
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
 
 #define	I256(R)								\
 	X0 += ks[((R) + 1) % 5]; /* inject the key schedule value */ \
 	X1 += ks[((R) + 2) % 5] + ts[((R) + 1) % 3];			\
 	X2 += ks[((R) + 3) % 5] + ts[((R) + 2) % 3];			\
 	X3 += ks[((R) + 4) % 5] + (R) + 1;			\
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
 #else				/* looping version */
 #define	R256(p0, p1, p2, p3, ROT, rNum)                             \
 	Round256(p0, p1, p2, p3, ROT, rNum)                             \
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
 
 #define	I256(R)								\
 	X0 += ks[r + (R) + 0];	/* inject the key schedule value */	\
 	X1 += ks[r + (R) + 1] + ts[r + (R) + 0];			\
 	X2 += ks[r + (R) + 2] + ts[r + (R) + 1];			\
 	X3 += ks[r + (R) + 3] + r + (R);				\
 	ks[r + (R) + 4] = ks[r + (R) - 1];   /* rotate key schedule */	\
 	ts[r + (R) + 2] = ts[r + (R) - 1];			\
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
 
 		/* loop through it */
 		for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256)
 #endif
 		{
 #define	R256_8_rounds(R)                         \
 	R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1);  \
 	R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2);  \
 	R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3);  \
 	R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4);  \
 	I256(2 * (R));                           \
 	R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5);  \
 	R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6);  \
 	R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7);  \
 	R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8);  \
 	I256(2 * (R) + 1);
 
 			R256_8_rounds(0);
 
 #define	R256_Unroll_R(NN) \
 	((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL / 8 > (NN)) || \
 	(SKEIN_UNROLL_256 > (NN)))
 
 #if	R256_Unroll_R(1)
 			R256_8_rounds(1);
 #endif
 #if	R256_Unroll_R(2)
 			R256_8_rounds(2);
 #endif
 #if	R256_Unroll_R(3)
 			R256_8_rounds(3);
 #endif
 #if	R256_Unroll_R(4)
 			R256_8_rounds(4);
 #endif
 #if	R256_Unroll_R(5)
 			R256_8_rounds(5);
 #endif
 #if	R256_Unroll_R(6)
 			R256_8_rounds(6);
 #endif
 #if	R256_Unroll_R(7)
 			R256_8_rounds(7);
 #endif
 #if	R256_Unroll_R(8)
 			R256_8_rounds(8);
 #endif
 #if	R256_Unroll_R(9)
 			R256_8_rounds(9);
 #endif
 #if	R256_Unroll_R(10)
 			R256_8_rounds(10);
 #endif
 #if	R256_Unroll_R(11)
 			R256_8_rounds(11);
 #endif
 #if	R256_Unroll_R(12)
 			R256_8_rounds(12);
 #endif
 #if	R256_Unroll_R(13)
 			R256_8_rounds(13);
 #endif
 #if	R256_Unroll_R(14)
 			R256_8_rounds(14);
 #endif
 #if	(SKEIN_UNROLL_256 > 14)
 #error  "need more unrolling in Skein_256_Process_Block"
 #endif
 		}
 		/*
 		 * do the final "feedforward" xor, update context chaining vars
 		 */
 		ctx->X[0] = X0 ^ w[0];
 		ctx->X[1] = X1 ^ w[1];
 		ctx->X[2] = X2 ^ w[2];
 		ctx->X[3] = X3 ^ w[3];
 
 		Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
 
 		ts[1] &= ~SKEIN_T1_FLAG_FIRST;
 	} while (--blkCnt);
 	ctx->h.T[0] = ts[0];
 	ctx->h.T[1] = ts[1];
 }
 
 #if	defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
 size_t
 Skein_256_Process_Block_CodeSize(void)
 {
 	return ((uint8_t *)Skein_256_Process_Block_CodeSize) -
 	    ((uint8_t *)Skein_256_Process_Block);
 }
 
 uint_t
 Skein_256_Unroll_Cnt(void)
 {
 	return (SKEIN_UNROLL_256);
 }
 #endif
 #endif
 
 /* Skein_512 */
 #if	!(SKEIN_USE_ASM & 512)
 void
 Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr,
     size_t blkCnt, size_t byteCntAdd)
 {
 	enum {
 		WCNT = SKEIN_512_STATE_WORDS
 	};
 #undef  RCNT
 #define	RCNT  (SKEIN_512_ROUNDS_TOTAL / 8)
 
 #ifdef	SKEIN_LOOP		/* configure how much to unroll the loop */
 #define	SKEIN_UNROLL_512 (((SKEIN_LOOP) / 10) % 10)
 #else
 #define	SKEIN_UNROLL_512 (0)
 #endif
 
 #if	SKEIN_UNROLL_512
 #if	(RCNT % SKEIN_UNROLL_512)
 #error "Invalid SKEIN_UNROLL_512"	/* sanity check on unroll count */
 #endif
 	size_t r;
 	/* key schedule words : chaining vars + tweak + "rotation" */
 	uint64_t kw[WCNT + 4 + RCNT * 2];
 #else
 	uint64_t kw[WCNT + 4];	/* key schedule words : chaining vars + tweak */
 #endif
 	/* local copy of vars, for speed */
 	uint64_t X0, X1, X2, X3, X4, X5, X6, X7;
 	uint64_t w[WCNT];		/* local copy of input block */
 #ifdef	SKEIN_DEBUG
 	/* use for debugging (help compiler put Xn in registers) */
 	const uint64_t *Xptr[8];
 	Xptr[0] = &X0;
 	Xptr[1] = &X1;
 	Xptr[2] = &X2;
 	Xptr[3] = &X3;
 	Xptr[4] = &X4;
 	Xptr[5] = &X5;
 	Xptr[6] = &X6;
 	Xptr[7] = &X7;
 #endif
 
 	Skein_assert(blkCnt != 0);	/* never call with blkCnt == 0! */
 	ts[0] = ctx->h.T[0];
 	ts[1] = ctx->h.T[1];
 	do {
 		/*
 		 * this implementation only supports 2**64 input bytes
 		 * (no carry out here)
 		 */
 		ts[0] += byteCntAdd;	/* update processed length */
 
 		/* precompute the key schedule for this block */
 		ks[0] = ctx->X[0];
 		ks[1] = ctx->X[1];
 		ks[2] = ctx->X[2];
 		ks[3] = ctx->X[3];
 		ks[4] = ctx->X[4];
 		ks[5] = ctx->X[5];
 		ks[6] = ctx->X[6];
 		ks[7] = ctx->X[7];
 		ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
 		    ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
 
 		ts[2] = ts[0] ^ ts[1];
 
 		/* get input block in little-endian format */
 		Skein_Get64_LSB_First(w, blkPtr, WCNT);
 		DebugSaveTweak(ctx);
 		Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
 
 		X0 = w[0] + ks[0];	/* do the first full key injection */
 		X1 = w[1] + ks[1];
 		X2 = w[2] + ks[2];
 		X3 = w[3] + ks[3];
 		X4 = w[4] + ks[4];
 		X5 = w[5] + ks[5] + ts[0];
 		X6 = w[6] + ks[6] + ts[1];
 		X7 = w[7] + ks[7];
 
 		blkPtr += SKEIN_512_BLOCK_BYTES;
 
 		Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
 		    Xptr);
 		/* run the rounds */
 #define	Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)		\
 	X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\
 	X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\
 	X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\
 	X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;
 
 #if	SKEIN_UNROLL_512 == 0
 #define	R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)	/* unrolled */	\
 	Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)		\
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
 
 #define	I512(R)								\
 	X0 += ks[((R) + 1) % 9];	/* inject the key schedule value */\
 	X1 += ks[((R) + 2) % 9];					\
 	X2 += ks[((R) + 3) % 9];					\
 	X3 += ks[((R) + 4) % 9];					\
 	X4 += ks[((R) + 5) % 9];					\
 	X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3];			\
 	X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3];			\
 	X7 += ks[((R) + 8) % 9] + (R) + 1;				\
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
 #else				/* looping version */
 #define	R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)			\
 	Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)		\
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
 
 #define	I512(R)								\
 	X0 += ks[r + (R) + 0];	/* inject the key schedule value */	\
 	X1 += ks[r + (R) + 1];						\
 	X2 += ks[r + (R) + 2];						\
 	X3 += ks[r + (R) + 3];						\
 	X4 += ks[r + (R) + 4];						\
 	X5 += ks[r + (R) + 5] + ts[r + (R) + 0];			\
 	X6 += ks[r + (R) + 6] + ts[r + (R) + 1];			\
 	X7 += ks[r + (R) + 7] + r + (R);				\
 	ks[r + (R)+8] = ks[r + (R) - 1];	/* rotate key schedule */\
 	ts[r + (R)+2] = ts[r + (R) - 1];				\
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
 
 		/* loop through it */
 		for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512)
 #endif				/* end of looped code definitions */
 		{
 #define	R512_8_rounds(R)	/* do 8 full rounds */			\
 	R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1);		\
 	R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2);		\
 	R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3);		\
 	R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4);		\
 	I512(2 * (R));							\
 	R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5);		\
 	R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6);		\
 	R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7);		\
 	R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8);		\
 	I512(2*(R) + 1);		/* and key injection */
 
 			R512_8_rounds(0);
 
 #define	R512_Unroll_R(NN) \
 	((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL / 8 > (NN)) || \
 	(SKEIN_UNROLL_512 > (NN)))
 
 #if	R512_Unroll_R(1)
 			R512_8_rounds(1);
 #endif
 #if	R512_Unroll_R(2)
 			R512_8_rounds(2);
 #endif
 #if	R512_Unroll_R(3)
 			R512_8_rounds(3);
 #endif
 #if	R512_Unroll_R(4)
 			R512_8_rounds(4);
 #endif
 #if	R512_Unroll_R(5)
 			R512_8_rounds(5);
 #endif
 #if	R512_Unroll_R(6)
 			R512_8_rounds(6);
 #endif
 #if	R512_Unroll_R(7)
 			R512_8_rounds(7);
 #endif
 #if	R512_Unroll_R(8)
 			R512_8_rounds(8);
 #endif
 #if	R512_Unroll_R(9)
 			R512_8_rounds(9);
 #endif
 #if	R512_Unroll_R(10)
 			R512_8_rounds(10);
 #endif
 #if	R512_Unroll_R(11)
 			R512_8_rounds(11);
 #endif
 #if	R512_Unroll_R(12)
 			R512_8_rounds(12);
 #endif
 #if	R512_Unroll_R(13)
 			R512_8_rounds(13);
 #endif
 #if	R512_Unroll_R(14)
 			R512_8_rounds(14);
 #endif
 #if	(SKEIN_UNROLL_512 > 14)
 #error "need more unrolling in Skein_512_Process_Block"
 #endif
 		}
 
 		/*
 		 * do the final "feedforward" xor, update context chaining vars
 		 */
 		ctx->X[0] = X0 ^ w[0];
 		ctx->X[1] = X1 ^ w[1];
 		ctx->X[2] = X2 ^ w[2];
 		ctx->X[3] = X3 ^ w[3];
 		ctx->X[4] = X4 ^ w[4];
 		ctx->X[5] = X5 ^ w[5];
 		ctx->X[6] = X6 ^ w[6];
 		ctx->X[7] = X7 ^ w[7];
 		Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
 
 		ts[1] &= ~SKEIN_T1_FLAG_FIRST;
 	} while (--blkCnt);
 	ctx->h.T[0] = ts[0];
 	ctx->h.T[1] = ts[1];
 }
 
 #if	defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
 size_t
 Skein_512_Process_Block_CodeSize(void)
 {
 	return ((uint8_t *)Skein_512_Process_Block_CodeSize) -
 	    ((uint8_t *)Skein_512_Process_Block);
 }
 
 uint_t
 Skein_512_Unroll_Cnt(void)
 {
 	return (SKEIN_UNROLL_512);
 }
 #endif
 #endif
 
 /*  Skein1024 */
 #if	!(SKEIN_USE_ASM & 1024)
 void
 Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr,
     size_t blkCnt, size_t byteCntAdd)
 {
 	/* do it in C, always looping (unrolled is bigger AND slower!) */
 	enum {
 		WCNT = SKEIN1024_STATE_WORDS
 	};
 #undef  RCNT
 #define	RCNT  (SKEIN1024_ROUNDS_TOTAL/8)
 
 #ifdef	SKEIN_LOOP		/* configure how much to unroll the loop */
 #define	SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
 #else
 #define	SKEIN_UNROLL_1024 (0)
 #endif
 
 #if	(SKEIN_UNROLL_1024 != 0)
 #if	(RCNT % SKEIN_UNROLL_1024)
 #error "Invalid SKEIN_UNROLL_1024"	/* sanity check on unroll count */
 #endif
 	size_t r;
 	/* key schedule words : chaining vars + tweak + "rotation" */
 	uint64_t kw[WCNT + 4 + RCNT * 2];
 #else
 	uint64_t kw[WCNT + 4];	/* key schedule words : chaining vars + tweak */
 #endif
 
 	/* local copy of vars, for speed */
 	uint64_t X00, X01, X02, X03, X04, X05, X06, X07, X08, X09, X10, X11,
 	    X12, X13, X14, X15;
 	uint64_t w[WCNT];		/* local copy of input block */
 #ifdef	SKEIN_DEBUG
 	/* use for debugging (help compiler put Xn in registers) */
 	const uint64_t *Xptr[16];
 	Xptr[0] = &X00;
 	Xptr[1] = &X01;
 	Xptr[2] = &X02;
 	Xptr[3] = &X03;
 	Xptr[4] = &X04;
 	Xptr[5] = &X05;
 	Xptr[6] = &X06;
 	Xptr[7] = &X07;
 	Xptr[8] = &X08;
 	Xptr[9] = &X09;
 	Xptr[10] = &X10;
 	Xptr[11] = &X11;
 	Xptr[12] = &X12;
 	Xptr[13] = &X13;
 	Xptr[14] = &X14;
 	Xptr[15] = &X15;
 #endif
 
 	Skein_assert(blkCnt != 0);	/* never call with blkCnt == 0! */
 	ts[0] = ctx->h.T[0];
 	ts[1] = ctx->h.T[1];
 	do {
 		/*
 		 * this implementation only supports 2**64 input bytes
 		 * (no carry out here)
 		 */
 		ts[0] += byteCntAdd;	/* update processed length */
 
 		/* precompute the key schedule for this block */
 		ks[0] = ctx->X[0];
 		ks[1] = ctx->X[1];
 		ks[2] = ctx->X[2];
 		ks[3] = ctx->X[3];
 		ks[4] = ctx->X[4];
 		ks[5] = ctx->X[5];
 		ks[6] = ctx->X[6];
 		ks[7] = ctx->X[7];
 		ks[8] = ctx->X[8];
 		ks[9] = ctx->X[9];
 		ks[10] = ctx->X[10];
 		ks[11] = ctx->X[11];
 		ks[12] = ctx->X[12];
 		ks[13] = ctx->X[13];
 		ks[14] = ctx->X[14];
 		ks[15] = ctx->X[15];
 		ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
 		    ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^
 		    ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^
 		    ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
 
 		ts[2] = ts[0] ^ ts[1];
 
 		/* get input block in little-endian format */
 		Skein_Get64_LSB_First(w, blkPtr, WCNT);
 		DebugSaveTweak(ctx);
 		Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
 
 		X00 = w[0] + ks[0];	/* do the first full key injection */
 		X01 = w[1] + ks[1];
 		X02 = w[2] + ks[2];
 		X03 = w[3] + ks[3];
 		X04 = w[4] + ks[4];
 		X05 = w[5] + ks[5];
 		X06 = w[6] + ks[6];
 		X07 = w[7] + ks[7];
 		X08 = w[8] + ks[8];
 		X09 = w[9] + ks[9];
 		X10 = w[10] + ks[10];
 		X11 = w[11] + ks[11];
 		X12 = w[12] + ks[12];
 		X13 = w[13] + ks[13] + ts[0];
 		X14 = w[14] + ks[14] + ts[1];
 		X15 = w[15] + ks[15];
 
 		Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
 		    Xptr);
 
 #define	Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC,	\
 	pD, pE, pF, ROT, rNum)						\
 	X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\
 	X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\
 	X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\
 	X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;\
 	X##p8 += X##p9; X##p9 = RotL_64(X##p9, ROT##_4); X##p9 ^= X##p8;\
 	X##pA += X##pB; X##pB = RotL_64(X##pB, ROT##_5); X##pB ^= X##pA;\
 	X##pC += X##pD; X##pD = RotL_64(X##pD, ROT##_6); X##pD ^= X##pC;\
 	X##pE += X##pF; X##pF = RotL_64(X##pF, ROT##_7); X##pF ^= X##pE;
 
 #if	SKEIN_UNROLL_1024 == 0
 #define	R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD,	\
 	pE, pF, ROT, rn)						\
 	Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC,	\
 	pD, pE, pF, ROT, rn)						\
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rn, Xptr);
 
 #define	I1024(R)							\
 	X00 += ks[((R) + 1) % 17];	/* inject the key schedule value */\
 	X01 += ks[((R) + 2) % 17];					\
 	X02 += ks[((R) + 3) % 17];					\
 	X03 += ks[((R) + 4) % 17];					\
 	X04 += ks[((R) + 5) % 17];					\
 	X05 += ks[((R) + 6) % 17];					\
 	X06 += ks[((R) + 7) % 17];					\
 	X07 += ks[((R) + 8) % 17];					\
 	X08 += ks[((R) + 9) % 17];					\
 	X09 += ks[((R) + 10) % 17];					\
 	X10 += ks[((R) + 11) % 17];					\
 	X11 += ks[((R) + 12) % 17];					\
 	X12 += ks[((R) + 13) % 17];					\
 	X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3];			\
 	X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3];			\
 	X15 += ks[((R) + 16) % 17] + (R) +1;				\
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
 #else				/* looping version */
 #define	R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD,	\
 	pE, pF, ROT, rn)						\
 	Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC,	\
 	pD, pE, pF, ROT, rn)						\
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, Xptr);
 
 #define	I1024(R)							\
 	X00 += ks[r + (R) + 0];	/* inject the key schedule value */	\
 	X01 += ks[r + (R) + 1];						\
 	X02 += ks[r + (R) + 2];						\
 	X03 += ks[r + (R) + 3];						\
 	X04 += ks[r + (R) + 4];						\
 	X05 += ks[r + (R) + 5];						\
 	X06 += ks[r + (R) + 6];						\
 	X07 += ks[r + (R) + 7];						\
 	X08 += ks[r + (R) + 8];						\
 	X09 += ks[r + (R) + 9];						\
 	X10 += ks[r + (R) + 10];					\
 	X11 += ks[r + (R) + 11];					\
 	X12 += ks[r + (R) + 12];					\
 	X13 += ks[r + (R) + 13] + ts[r + (R) + 0];			\
 	X14 += ks[r + (R) + 14] + ts[r + (R) + 1];			\
 	X15 += ks[r + (R) + 15] +  r + (R);				\
 	ks[r + (R) + 16] = ks[r + (R) - 1];	/* rotate key schedule */\
 	ts[r + (R) + 2] = ts[r + (R) - 1];				\
 	Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
 
 		/* loop through it */
 		for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024)
 #endif
 		{
 #define	R1024_8_rounds(R)	/* do 8 full rounds */			\
 	R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13,	\
 	    14, 15, R1024_0, 8 * (R) + 1);				\
 	R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05,	\
 	    08, 01, R1024_1, 8 * (R) + 2);				\
 	R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11,	\
 	    10, 09, R1024_2, 8 * (R) + 3);				\
 	R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03,	\
 	    12, 07, R1024_3, 8 * (R) + 4);				\
 	I1024(2 * (R));							\
 	R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13,	\
 	    14, 15, R1024_4, 8 * (R) + 5);				\
 	R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05,	\
 	    08, 01, R1024_5, 8 * (R) + 6);				\
 	R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11,	\
 	    10, 09, R1024_6, 8 * (R) + 7);				\
 	R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03,	\
 	    12, 07, R1024_7, 8 * (R) + 8);				\
 	I1024(2 * (R) + 1);
 
 			R1024_8_rounds(0);
 
 #define	R1024_Unroll_R(NN)						\
 	((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) ||	\
 	(SKEIN_UNROLL_1024 > (NN)))
 
 #if	R1024_Unroll_R(1)
 			R1024_8_rounds(1);
 #endif
 #if	R1024_Unroll_R(2)
 			R1024_8_rounds(2);
 #endif
 #if	R1024_Unroll_R(3)
 			R1024_8_rounds(3);
 #endif
 #if	R1024_Unroll_R(4)
 			R1024_8_rounds(4);
 #endif
 #if	R1024_Unroll_R(5)
 			R1024_8_rounds(5);
 #endif
 #if	R1024_Unroll_R(6)
 			R1024_8_rounds(6);
 #endif
 #if	R1024_Unroll_R(7)
 			R1024_8_rounds(7);
 #endif
 #if	R1024_Unroll_R(8)
 			R1024_8_rounds(8);
 #endif
 #if	R1024_Unroll_R(9)
 			R1024_8_rounds(9);
 #endif
 #if	R1024_Unroll_R(10)
 			R1024_8_rounds(10);
 #endif
 #if	R1024_Unroll_R(11)
 			R1024_8_rounds(11);
 #endif
 #if	R1024_Unroll_R(12)
 			R1024_8_rounds(12);
 #endif
 #if	R1024_Unroll_R(13)
 			R1024_8_rounds(13);
 #endif
 #if	R1024_Unroll_R(14)
 			R1024_8_rounds(14);
 #endif
 #if	(SKEIN_UNROLL_1024 > 14)
 #error  "need more unrolling in Skein_1024_Process_Block"
 #endif
 		}
 		/*
 		 * do the final "feedforward" xor, update context chaining vars
 		 */
 
 		ctx->X[0] = X00 ^ w[0];
 		ctx->X[1] = X01 ^ w[1];
 		ctx->X[2] = X02 ^ w[2];
 		ctx->X[3] = X03 ^ w[3];
 		ctx->X[4] = X04 ^ w[4];
 		ctx->X[5] = X05 ^ w[5];
 		ctx->X[6] = X06 ^ w[6];
 		ctx->X[7] = X07 ^ w[7];
 		ctx->X[8] = X08 ^ w[8];
 		ctx->X[9] = X09 ^ w[9];
 		ctx->X[10] = X10 ^ w[10];
 		ctx->X[11] = X11 ^ w[11];
 		ctx->X[12] = X12 ^ w[12];
 		ctx->X[13] = X13 ^ w[13];
 		ctx->X[14] = X14 ^ w[14];
 		ctx->X[15] = X15 ^ w[15];
 
 		Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
 
 		ts[1] &= ~SKEIN_T1_FLAG_FIRST;
 		blkPtr += SKEIN1024_BLOCK_BYTES;
 	} while (--blkCnt);
 	ctx->h.T[0] = ts[0];
 	ctx->h.T[1] = ts[1];
 }
 
 #if	defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
 size_t
 Skein1024_Process_Block_CodeSize(void)
 {
 	return ((uint8_t *)Skein1024_Process_Block_CodeSize) -
 	    ((uint8_t *)Skein1024_Process_Block);
 }
 
 uint_t
 Skein1024_Unroll_Cnt(void)
 {
 	return (SKEIN_UNROLL_1024);
 }
 #endif
 #endif
diff --git a/module/lua/ldo.c b/module/lua/ldo.c
index e2a3d0279d7f..291bca044e7b 100644
--- a/module/lua/ldo.c
+++ b/module/lua/ldo.c
@@ -1,759 +1,760 @@
 /*
 ** $Id: ldo.c,v 2.108.1.3 2013/11/08 18:22:50 roberto Exp $
 ** Stack and Call structure of Lua
 ** See Copyright Notice in lua.h
 */
 
 
 #define ldo_c
 #define LUA_CORE
 
 #include <sys/lua/lua.h>
 
 #include "lapi.h"
 #include "ldebug.h"
 #include "ldo.h"
 #include "lfunc.h"
 #include "lgc.h"
 #include "lmem.h"
 #include "lobject.h"
 #include "lopcodes.h"
 #include "lparser.h"
 #include "lstate.h"
 #include "lstring.h"
 #include "ltable.h"
 #include "ltm.h"
 #include "lvm.h"
 #include "lzio.h"
 
 
 
 /* Return the number of bytes available on the stack. */
 #if defined (_KERNEL) && defined(__linux__)
 #include <asm/current.h>
 static intptr_t stack_remaining(void) {
   intptr_t local;
   local = (intptr_t)&local - (intptr_t)current->stack;
   return local;
 }
 #elif defined (_KERNEL) && defined(__FreeBSD__)
 #include <sys/pcpu.h>
 static intptr_t stack_remaining(void) {
   intptr_t local;
   local = (intptr_t)&local - (intptr_t)curthread->td_kstack;
   return local;
 }
 #else
 static intptr_t stack_remaining(void) {
   return INTPTR_MAX;
 }
 #endif
 
 /*
 ** {======================================================
 ** Error-recovery functions
 ** =======================================================
 */
 
 /*
 ** LUAI_THROW/LUAI_TRY define how Lua does exception handling. By
 ** default, Lua handles errors with exceptions when compiling as
 ** C++ code, with _longjmp/_setjmp when asked to use them, and with
 ** longjmp/setjmp otherwise.
 */
 #if !defined(LUAI_THROW)
 
 #ifdef _KERNEL
 
 #ifdef __linux__
 #if defined(__i386__)
 #define	JMP_BUF_CNT	6
 #elif defined(__x86_64__)
 #define	JMP_BUF_CNT	8
 #elif defined(__sparc__) && defined(__arch64__)
 #define	JMP_BUF_CNT	6
 #elif defined(__powerpc__)
 #define	JMP_BUF_CNT	26
 #elif defined(__aarch64__)
 #define	JMP_BUF_CNT	64
 #elif defined(__arm__)
 #define	JMP_BUF_CNT	65
 #elif defined(__mips__)
 #define JMP_BUF_CNT	12
 #elif defined(__s390x__)
 #define JMP_BUF_CNT	18
 #elif defined(__riscv)
 #define JMP_BUF_CNT     64
 #else
 #define	JMP_BUF_CNT	1
 #endif
 
 typedef	struct _label_t { long long unsigned val[JMP_BUF_CNT]; } label_t;
 
 int setjmp(label_t *) __attribute__ ((__nothrow__));
 extern __attribute__((noreturn)) void longjmp(label_t *);
 
 #define LUAI_THROW(L,c)		longjmp(&(c)->b)
 #define LUAI_TRY(L,c,a)		if (setjmp(&(c)->b) == 0) { a }
 #define luai_jmpbuf		label_t
 
 /* unsupported arches will build but not be able to run lua programs */
 #if JMP_BUF_CNT == 1
 int setjmp (label_t *buf) {
 	return 1;
 }
 
 void longjmp (label_t * buf) {
 	for (;;);
 }
 #endif
 #else
 #define LUAI_THROW(L,c)		longjmp((c)->b, 1)
 #define LUAI_TRY(L,c,a)		if (setjmp((c)->b) == 0) { a }
 #define luai_jmpbuf		jmp_buf
 #endif
 
 #else /* _KERNEL */
 
 #if defined(__cplusplus) && !defined(LUA_USE_LONGJMP)
 /* C++ exceptions */
 #define LUAI_THROW(L,c)		throw(c)
 #define LUAI_TRY(L,c,a) \
 	try { a } catch(...) { if ((c)->status == 0) (c)->status = -1; }
 #define luai_jmpbuf		int  /* dummy variable */
 
 #elif defined(LUA_USE_ULONGJMP)
 /* in Unix, try _longjmp/_setjmp (more efficient) */
 #define LUAI_THROW(L,c)		_longjmp((c)->b, 1)
 #define LUAI_TRY(L,c,a)		if (_setjmp((c)->b) == 0) { a }
 #define luai_jmpbuf		jmp_buf
 
 #else
 /* default handling with long jumps */
 #define LUAI_THROW(L,c)		longjmp((c)->b, 1)
 #define LUAI_TRY(L,c,a)		if (setjmp((c)->b) == 0) { a }
 #define luai_jmpbuf		jmp_buf
 
 #endif
 
 #endif /* _KERNEL */
 
 #endif /* LUAI_THROW */
 
 
 /* chain list of long jump buffers */
 struct lua_longjmp {
   struct lua_longjmp *previous;
   luai_jmpbuf b;
   volatile int status;  /* error code */
 };
 
 
 static void seterrorobj (lua_State *L, int errcode, StkId oldtop) {
   switch (errcode) {
     case LUA_ERRMEM: {  /* memory error? */
       setsvalue2s(L, oldtop, G(L)->memerrmsg); /* reuse preregistered msg. */
       break;
     }
     case LUA_ERRERR: {
       setsvalue2s(L, oldtop, luaS_newliteral(L, "error in error handling"));
       break;
     }
     default: {
       setobjs2s(L, oldtop, L->top - 1);  /* error message on current top */
       break;
     }
   }
   L->top = oldtop + 1;
 }
 
 /*
  * Silence infinite recursion warning which was added to -Wall in gcc 12.1
  */
 #if defined(__GNUC__) && !defined(__clang__) && \
 	defined(HAVE_KERNEL_INFINITE_RECURSION)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Winfinite-recursion"
 #endif
 
 l_noret luaD_throw (lua_State *L, int errcode) {
   if (L->errorJmp) {  /* thread has an error handler? */
     L->errorJmp->status = errcode;  /* set status */
     LUAI_THROW(L, L->errorJmp);  /* jump to it */
   }
   else {  /* thread has no error handler */
     L->status = cast_byte(errcode);  /* mark it as dead */
     if (G(L)->mainthread->errorJmp) {  /* main thread has a handler? */
       setobjs2s(L, G(L)->mainthread->top++, L->top - 1);  /* copy error obj. */
       luaD_throw(G(L)->mainthread, errcode);  /* re-throw in main thread */
     }
     else {  /* no handler at all; abort */
       if (G(L)->panic) {  /* panic function? */
         lua_unlock(L);
         G(L)->panic(L);  /* call it (last chance to jump out) */
       }
       panic("no error handler");
     }
   }
 }
 
-#if defined(HAVE_INFINITE_RECURSION)
+#if defined(__GNUC__) && !defined(__clang__) && \
+	defined(HAVE_INFINITE_RECURSION)
 #pragma GCC diagnostic pop
 #endif
 
 
 int luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud) {
   unsigned short oldnCcalls = L->nCcalls;
   struct lua_longjmp lj;
   lj.status = LUA_OK;
   lj.previous = L->errorJmp;  /* chain new error handler */
   L->errorJmp = &lj;
   LUAI_TRY(L, &lj,
     (*f)(L, ud);
   );
   L->errorJmp = lj.previous;  /* restore old error handler */
   L->nCcalls = oldnCcalls;
   return lj.status;
 }
 
 /* }====================================================== */
 
 
 static void correctstack (lua_State *L, TValue *oldstack) {
   CallInfo *ci;
   GCObject *up;
   L->top = (L->top - oldstack) + L->stack;
   for (up = L->openupval; up != NULL; up = up->gch.next)
     gco2uv(up)->v = (gco2uv(up)->v - oldstack) + L->stack;
   for (ci = L->ci; ci != NULL; ci = ci->previous) {
     ci->top = (ci->top - oldstack) + L->stack;
     ci->func = (ci->func - oldstack) + L->stack;
     if (isLua(ci))
       ci->u.l.base = (ci->u.l.base - oldstack) + L->stack;
   }
 }
 
 
 /* some space for error handling */
 #define ERRORSTACKSIZE	(LUAI_MAXSTACK + 200)
 
 
 void luaD_reallocstack (lua_State *L, int newsize) {
   TValue *oldstack = L->stack;
   int lim = L->stacksize;
   lua_assert(newsize <= LUAI_MAXSTACK || newsize == ERRORSTACKSIZE);
   lua_assert(L->stack_last - L->stack == L->stacksize - EXTRA_STACK);
   luaM_reallocvector(L, L->stack, L->stacksize, newsize, TValue);
   for (; lim < newsize; lim++)
     setnilvalue(L->stack + lim); /* erase new segment */
   L->stacksize = newsize;
   L->stack_last = L->stack + newsize - EXTRA_STACK;
   correctstack(L, oldstack);
 }
 
 
 void luaD_growstack (lua_State *L, int n) {
   int size = L->stacksize;
   if (size > LUAI_MAXSTACK)  /* error after extra size? */
     luaD_throw(L, LUA_ERRERR);
   else {
     int needed = cast_int(L->top - L->stack) + n + EXTRA_STACK;
     int newsize = 2 * size;
     if (newsize > LUAI_MAXSTACK) newsize = LUAI_MAXSTACK;
     if (newsize < needed) newsize = needed;
     if (newsize > LUAI_MAXSTACK) {  /* stack overflow? */
       luaD_reallocstack(L, ERRORSTACKSIZE);
       luaG_runerror(L, "stack overflow");
     }
     else
       luaD_reallocstack(L, newsize);
   }
 }
 
 
 static int stackinuse (lua_State *L) {
   CallInfo *ci;
   StkId lim = L->top;
   for (ci = L->ci; ci != NULL; ci = ci->previous) {
     lua_assert(ci->top <= L->stack_last);
     if (lim < ci->top) lim = ci->top;
   }
   return cast_int(lim - L->stack) + 1;  /* part of stack in use */
 }
 
 
 void luaD_shrinkstack (lua_State *L) {
   int inuse = stackinuse(L);
   int goodsize = inuse + (inuse / 8) + 2*EXTRA_STACK;
   if (goodsize > LUAI_MAXSTACK) goodsize = LUAI_MAXSTACK;
   if (inuse > LUAI_MAXSTACK ||  /* handling stack overflow? */
       goodsize >= L->stacksize)  /* would grow instead of shrink? */
     condmovestack(L);  /* don't change stack (change only for debugging) */
   else
     luaD_reallocstack(L, goodsize);  /* shrink it */
 }
 
 
 void luaD_hook (lua_State *L, int event, int line) {
   lua_Hook hook = L->hook;
   if (hook && L->allowhook) {
     CallInfo *ci = L->ci;
     ptrdiff_t top = savestack(L, L->top);
     ptrdiff_t ci_top = savestack(L, ci->top);
     lua_Debug ar;
     ar.event = event;
     ar.currentline = line;
     ar.i_ci = ci;
     luaD_checkstack(L, LUA_MINSTACK);  /* ensure minimum stack size */
     ci->top = L->top + LUA_MINSTACK;
     lua_assert(ci->top <= L->stack_last);
     L->allowhook = 0;  /* cannot call hooks inside a hook */
     ci->callstatus |= CIST_HOOKED;
     lua_unlock(L);
     (*hook)(L, &ar);
     lua_lock(L);
     lua_assert(!L->allowhook);
     L->allowhook = 1;
     ci->top = restorestack(L, ci_top);
     L->top = restorestack(L, top);
     ci->callstatus &= ~CIST_HOOKED;
   }
 }
 
 
 static void callhook (lua_State *L, CallInfo *ci) {
   int hook = LUA_HOOKCALL;
   ci->u.l.savedpc++;  /* hooks assume 'pc' is already incremented */
   if (isLua(ci->previous) &&
       GET_OPCODE(*(ci->previous->u.l.savedpc - 1)) == OP_TAILCALL) {
     ci->callstatus |= CIST_TAIL;
     hook = LUA_HOOKTAILCALL;
   }
   luaD_hook(L, hook, -1);
   ci->u.l.savedpc--;  /* correct 'pc' */
 }
 
 
 static StkId adjust_varargs (lua_State *L, Proto *p, int actual) {
   int i;
   int nfixargs = p->numparams;
   StkId base, fixed;
   lua_assert(actual >= nfixargs);
   /* move fixed parameters to final position */
   luaD_checkstack(L, p->maxstacksize);  /* check again for new 'base' */
   fixed = L->top - actual;  /* first fixed argument */
   base = L->top;  /* final position of first argument */
   for (i=0; i<nfixargs; i++) {
     setobjs2s(L, L->top++, fixed + i);
     setnilvalue(fixed + i);
   }
   return base;
 }
 
 
 static StkId tryfuncTM (lua_State *L, StkId func) {
   const TValue *tm = luaT_gettmbyobj(L, func, TM_CALL);
   StkId p;
   ptrdiff_t funcr = savestack(L, func);
   if (!ttisfunction(tm))
     luaG_typeerror(L, func, "call");
   /* Open a hole inside the stack at `func' */
   for (p = L->top; p > func; p--) setobjs2s(L, p, p-1);
   incr_top(L);
   func = restorestack(L, funcr);  /* previous call may change stack */
   setobj2s(L, func, tm);  /* tag method is the new function to be called */
   return func;
 }
 
 
 
 #define next_ci(L) (L->ci = (L->ci->next ? L->ci->next : luaE_extendCI(L)))
 
 
 /*
 ** returns true if function has been executed (C function)
 */
 int luaD_precall (lua_State *L, StkId func, int nresults) {
   lua_CFunction f;
   CallInfo *ci;
   int n;  /* number of arguments (Lua) or returns (C) */
   ptrdiff_t funcr = savestack(L, func);
   switch (ttype(func)) {
     case LUA_TLCF:  /* light C function */
       f = fvalue(func);
       goto Cfunc;
     case LUA_TCCL: {  /* C closure */
       f = clCvalue(func)->f;
      Cfunc:
       luaD_checkstack(L, LUA_MINSTACK);  /* ensure minimum stack size */
       ci = next_ci(L);  /* now 'enter' new function */
       ci->nresults = nresults;
       ci->func = restorestack(L, funcr);
       ci->top = L->top + LUA_MINSTACK;
       lua_assert(ci->top <= L->stack_last);
       ci->callstatus = 0;
       luaC_checkGC(L);  /* stack grow uses memory */
       if (L->hookmask & LUA_MASKCALL)
         luaD_hook(L, LUA_HOOKCALL, -1);
       lua_unlock(L);
       n = (*f)(L);  /* do the actual call */
       lua_lock(L);
       api_checknelems(L, n);
       luaD_poscall(L, L->top - n);
       return 1;
     }
     case LUA_TLCL: {  /* Lua function: prepare its call */
       StkId base;
       Proto *p = clLvalue(func)->p;
       n = cast_int(L->top - func) - 1;  /* number of real arguments */
       luaD_checkstack(L, p->maxstacksize + p->numparams);
       for (; n < p->numparams; n++)
         setnilvalue(L->top++);  /* complete missing arguments */
       if (!p->is_vararg) {
         func = restorestack(L, funcr);
         base = func + 1;
       }
       else {
         base = adjust_varargs(L, p, n);
         func = restorestack(L, funcr);  /* previous call can change stack */
       }
       ci = next_ci(L);  /* now 'enter' new function */
       ci->nresults = nresults;
       ci->func = func;
       ci->u.l.base = base;
       ci->top = base + p->maxstacksize;
       lua_assert(ci->top <= L->stack_last);
       ci->u.l.savedpc = p->code;  /* starting point */
       ci->callstatus = CIST_LUA;
       L->top = ci->top;
       luaC_checkGC(L);  /* stack grow uses memory */
       if (L->hookmask & LUA_MASKCALL)
         callhook(L, ci);
       return 0;
     }
     default: {  /* not a function */
       func = tryfuncTM(L, func);  /* retry with 'function' tag method */
       return luaD_precall(L, func, nresults);  /* now it must be a function */
     }
   }
 }
 
 
 int luaD_poscall (lua_State *L, StkId firstResult) {
   StkId res;
   int wanted, i;
   CallInfo *ci = L->ci;
   if (L->hookmask & (LUA_MASKRET | LUA_MASKLINE)) {
     if (L->hookmask & LUA_MASKRET) {
       ptrdiff_t fr = savestack(L, firstResult);  /* hook may change stack */
       luaD_hook(L, LUA_HOOKRET, -1);
       firstResult = restorestack(L, fr);
     }
     L->oldpc = ci->previous->u.l.savedpc;  /* 'oldpc' for caller function */
   }
   res = ci->func;  /* res == final position of 1st result */
   wanted = ci->nresults;
   L->ci = ci->previous;  /* back to caller */
   /* move results to correct place */
   for (i = wanted; i != 0 && firstResult < L->top; i--)
     setobjs2s(L, res++, firstResult++);
   while (i-- > 0)
     setnilvalue(res++);
   L->top = res;
   return (wanted - LUA_MULTRET);  /* 0 iff wanted == LUA_MULTRET */
 }
 
 
 /*
 ** Call a function (C or Lua). The function to be called is at *func.
 ** The arguments are on the stack, right after the function.
 ** When returns, all the results are on the stack, starting at the original
 ** function position.
 */
 void luaD_call (lua_State *L, StkId func, int nResults, int allowyield) {
   if (++L->nCcalls >= LUAI_MAXCCALLS) {
     if (L->nCcalls == LUAI_MAXCCALLS)
       luaG_runerror(L, "C stack overflow");
     else if (L->nCcalls >= (LUAI_MAXCCALLS + (LUAI_MAXCCALLS>>3)))
       luaD_throw(L, LUA_ERRERR);  /* error while handling stack error */
   }
   intptr_t remaining = stack_remaining();
   if (L->runerror == 0 && remaining < LUAI_MINCSTACK)
     luaG_runerror(L, "C stack overflow");
   if (L->runerror != 0 && remaining < LUAI_MINCSTACK / 2)
     luaD_throw(L, LUA_ERRERR);  /* error while handling stack error */
   if (!allowyield) L->nny++;
   if (!luaD_precall(L, func, nResults))  /* is a Lua function? */
     luaV_execute(L);  /* call it */
   if (!allowyield) L->nny--;
   L->nCcalls--;
 }
 
 
 static void finishCcall (lua_State *L) {
   CallInfo *ci = L->ci;
   int n;
   lua_assert(ci->u.c.k != NULL);  /* must have a continuation */
   lua_assert(L->nny == 0);
   if (ci->callstatus & CIST_YPCALL) {  /* was inside a pcall? */
     ci->callstatus &= ~CIST_YPCALL;  /* finish 'lua_pcall' */
     L->errfunc = ci->u.c.old_errfunc;
   }
   /* finish 'lua_callk'/'lua_pcall' */
   adjustresults(L, ci->nresults);
   /* call continuation function */
   if (!(ci->callstatus & CIST_STAT))  /* no call status? */
     ci->u.c.status = LUA_YIELD;  /* 'default' status */
   lua_assert(ci->u.c.status != LUA_OK);
   ci->callstatus = (ci->callstatus & ~(CIST_YPCALL | CIST_STAT)) | CIST_YIELDED;
   lua_unlock(L);
   n = (*ci->u.c.k)(L);
   lua_lock(L);
   api_checknelems(L, n);
   /* finish 'luaD_precall' */
   luaD_poscall(L, L->top - n);
 }
 
 
 static void unroll (lua_State *L, void *ud) {
   UNUSED(ud);
   for (;;) {
     if (L->ci == &L->base_ci)  /* stack is empty? */
       return;  /* coroutine finished normally */
     if (!isLua(L->ci))  /* C function? */
       finishCcall(L);
     else {  /* Lua function */
       luaV_finishOp(L);  /* finish interrupted instruction */
       luaV_execute(L);  /* execute down to higher C 'boundary' */
     }
   }
 }
 
 
 /*
 ** check whether thread has a suspended protected call
 */
 static CallInfo *findpcall (lua_State *L) {
   CallInfo *ci;
   for (ci = L->ci; ci != NULL; ci = ci->previous) {  /* search for a pcall */
     if (ci->callstatus & CIST_YPCALL)
       return ci;
   }
   return NULL;  /* no pending pcall */
 }
 
 
 static int recover (lua_State *L, int status) {
   StkId oldtop;
   CallInfo *ci = findpcall(L);
   if (ci == NULL) return 0;  /* no recovery point */
   /* "finish" luaD_pcall */
   oldtop = restorestack(L, ci->extra);
   luaF_close(L, oldtop);
   seterrorobj(L, status, oldtop);
   L->ci = ci;
   L->allowhook = ci->u.c.old_allowhook;
   L->nny = 0;  /* should be zero to be yieldable */
   luaD_shrinkstack(L);
   L->errfunc = ci->u.c.old_errfunc;
   ci->callstatus |= CIST_STAT;  /* call has error status */
   ci->u.c.status = status;  /* (here it is) */
   return 1;  /* continue running the coroutine */
 }
 
 
 /*
 ** signal an error in the call to 'resume', not in the execution of the
 ** coroutine itself. (Such errors should not be handled by any coroutine
 ** error handler and should not kill the coroutine.)
 */
 static l_noret resume_error (lua_State *L, const char *msg, StkId firstArg) {
   L->top = firstArg;  /* remove args from the stack */
   setsvalue2s(L, L->top, luaS_new(L, msg));  /* push error message */
   api_incr_top(L);
   luaD_throw(L, -1);  /* jump back to 'lua_resume' */
 }
 
 
 /*
 ** do the work for 'lua_resume' in protected mode
 */
 static void resume_cb (lua_State *L, void *ud) {
   int nCcalls = L->nCcalls;
   StkId firstArg = cast(StkId, ud);
   CallInfo *ci = L->ci;
   if (nCcalls >= LUAI_MAXCCALLS)
     resume_error(L, "C stack overflow", firstArg);
   if (L->status == LUA_OK) {  /* may be starting a coroutine */
     if (ci != &L->base_ci)  /* not in base level? */
       resume_error(L, "cannot resume non-suspended coroutine", firstArg);
     /* coroutine is in base level; start running it */
     if (!luaD_precall(L, firstArg - 1, LUA_MULTRET))  /* Lua function? */
       luaV_execute(L);  /* call it */
   }
   else if (L->status != LUA_YIELD)
     resume_error(L, "cannot resume dead coroutine", firstArg);
   else {  /* resuming from previous yield */
     L->status = LUA_OK;
     ci->func = restorestack(L, ci->extra);
     if (isLua(ci))  /* yielded inside a hook? */
       luaV_execute(L);  /* just continue running Lua code */
     else {  /* 'common' yield */
       if (ci->u.c.k != NULL) {  /* does it have a continuation? */
         int n;
         ci->u.c.status = LUA_YIELD;  /* 'default' status */
         ci->callstatus |= CIST_YIELDED;
         lua_unlock(L);
         n = (*ci->u.c.k)(L);  /* call continuation */
         lua_lock(L);
         api_checknelems(L, n);
         firstArg = L->top - n;  /* yield results come from continuation */
       }
       luaD_poscall(L, firstArg);  /* finish 'luaD_precall' */
     }
     unroll(L, NULL);
   }
   lua_assert(nCcalls == L->nCcalls);
 }
 
 
 LUA_API int lua_resume (lua_State *L, lua_State *from, int nargs) {
   int status;
   int oldnny = L->nny;  /* save 'nny' */
   lua_lock(L);
   luai_userstateresume(L, nargs);
   L->nCcalls = (from) ? from->nCcalls + 1 : 1;
   L->nny = 0;  /* allow yields */
   api_checknelems(L, (L->status == LUA_OK) ? nargs + 1 : nargs);
   status = luaD_rawrunprotected(L, resume_cb, L->top - nargs);
   if (status == -1)  /* error calling 'lua_resume'? */
     status = LUA_ERRRUN;
   else {  /* yield or regular error */
     while (status != LUA_OK && status != LUA_YIELD) {  /* error? */
       if (recover(L, status))  /* recover point? */
         status = luaD_rawrunprotected(L, unroll, NULL);  /* run continuation */
       else {  /* unrecoverable error */
         L->status = cast_byte(status);  /* mark thread as `dead' */
         seterrorobj(L, status, L->top);
         L->ci->top = L->top;
         break;
       }
     }
     lua_assert(status == L->status);
   }
   L->nny = oldnny;  /* restore 'nny' */
   L->nCcalls--;
   lua_assert(L->nCcalls == ((from) ? from->nCcalls : 0));
   lua_unlock(L);
   return status;
 }
 
 
 LUA_API int lua_yieldk (lua_State *L, int nresults, int ctx, lua_CFunction k) {
   CallInfo *ci = L->ci;
   luai_userstateyield(L, nresults);
   lua_lock(L);
   api_checknelems(L, nresults);
   if (L->nny > 0) {
     if (L != G(L)->mainthread)
       luaG_runerror(L, "attempt to yield across a C-call boundary");
     else
       luaG_runerror(L, "attempt to yield from outside a coroutine");
   }
   L->status = LUA_YIELD;
   ci->extra = savestack(L, ci->func);  /* save current 'func' */
   if (isLua(ci)) {  /* inside a hook? */
     api_check(L, k == NULL, "hooks cannot continue after yielding");
   }
   else {
     if ((ci->u.c.k = k) != NULL)  /* is there a continuation? */
       ci->u.c.ctx = ctx;  /* save context */
     ci->func = L->top - nresults - 1;  /* protect stack below results */
     luaD_throw(L, LUA_YIELD);
   }
   lua_assert(ci->callstatus & CIST_HOOKED);  /* must be inside a hook */
   lua_unlock(L);
   return 0;  /* return to 'luaD_hook' */
 }
 
 
 int luaD_pcall (lua_State *L, Pfunc func, void *u,
                 ptrdiff_t old_top, ptrdiff_t ef) {
   int status;
   CallInfo *old_ci = L->ci;
   lu_byte old_allowhooks = L->allowhook;
   unsigned short old_nny = L->nny;
   ptrdiff_t old_errfunc = L->errfunc;
   L->errfunc = ef;
   status = luaD_rawrunprotected(L, func, u);
   if (status != LUA_OK) {  /* an error occurred? */
     StkId oldtop = restorestack(L, old_top);
     luaF_close(L, oldtop);  /* close possible pending closures */
     seterrorobj(L, status, oldtop);
     L->ci = old_ci;
     L->allowhook = old_allowhooks;
     L->nny = old_nny;
     luaD_shrinkstack(L);
   }
   L->errfunc = old_errfunc;
   return status;
 }
 
 
 
 /*
 ** Execute a protected parser.
 */
 struct SParser {  /* data to `f_parser' */
   ZIO *z;
   Mbuffer buff;  /* dynamic structure used by the scanner */
   Dyndata dyd;  /* dynamic structures used by the parser */
   const char *mode;
   const char *name;
 };
 
 
 static void checkmode (lua_State *L, const char *mode, const char *x) {
   if (mode && strchr(mode, x[0]) == NULL) {
     luaO_pushfstring(L,
        "attempt to load a %s chunk (mode is " LUA_QS ")", x, mode);
     luaD_throw(L, LUA_ERRSYNTAX);
   }
 }
 
 
 static void f_parser (lua_State *L, void *ud) {
   int i;
   Closure *cl;
   struct SParser *p = cast(struct SParser *, ud);
   int c = zgetc(p->z);  /* read first character */
   lua_assert(c != LUA_SIGNATURE[0]);	/* binary not supported */
   checkmode(L, p->mode, "text");
   cl = luaY_parser(L, p->z, &p->buff, &p->dyd, p->name, c);
   lua_assert(cl->l.nupvalues == cl->l.p->sizeupvalues);
   for (i = 0; i < cl->l.nupvalues; i++) {  /* initialize upvalues */
     UpVal *up = luaF_newupval(L);
     cl->l.upvals[i] = up;
     luaC_objbarrier(L, cl, up);
   }
 }
 
 
 int luaD_protectedparser (lua_State *L, ZIO *z, const char *name,
                                         const char *mode) {
   struct SParser p;
   int status;
   L->nny++;  /* cannot yield during parsing */
   p.z = z; p.name = name; p.mode = mode;
   p.dyd.actvar.arr = NULL; p.dyd.actvar.size = 0;
   p.dyd.gt.arr = NULL; p.dyd.gt.size = 0;
   p.dyd.label.arr = NULL; p.dyd.label.size = 0;
   luaZ_initbuffer(L, &p.buff);
   status = luaD_pcall(L, f_parser, &p, savestack(L, L->top), L->errfunc);
   luaZ_freebuffer(L, &p.buff);
   luaM_freearray(L, p.dyd.actvar.arr, p.dyd.actvar.size);
   luaM_freearray(L, p.dyd.gt.arr, p.dyd.gt.size);
   luaM_freearray(L, p.dyd.label.arr, p.dyd.label.size);
   L->nny--;
   return status;
 }
diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c
index 71eedf635f73..38515023e4b8 100644
--- a/module/os/linux/spl/spl-generic.c
+++ b/module/os/linux/spl/spl-generic.c
@@ -1,927 +1,931 @@
 /*
  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
  *  Copyright (C) 2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
  *  UCRL-CODE-235197
  *
  *  This file is part of the SPL, Solaris Porting Layer.
  *
  *  The SPL is free software; you can redistribute it and/or modify it
  *  under the terms of the GNU General Public License as published by the
  *  Free Software Foundation; either version 2 of the License, or (at your
  *  option) any later version.
  *
  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  *  for more details.
  *
  *  You should have received a copy of the GNU General Public License along
  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  *
  *  Solaris Porting Layer (SPL) Generic Implementation.
  */
 
 #include <sys/isa_defs.h>
 #include <sys/sysmacros.h>
 #include <sys/systeminfo.h>
 #include <sys/vmsystm.h>
 #include <sys/kmem.h>
 #include <sys/kmem_cache.h>
 #include <sys/vmem.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/taskq.h>
 #include <sys/tsd.h>
 #include <sys/zmod.h>
 #include <sys/debug.h>
 #include <sys/proc.h>
 #include <sys/kstat.h>
 #include <sys/file.h>
 #include <sys/sunddi.h>
 #include <linux/ctype.h>
 #include <sys/disp.h>
 #include <sys/random.h>
 #include <sys/string.h>
 #include <linux/kmod.h>
 #include <linux/mod_compat.h>
 #include <sys/cred.h>
 #include <sys/vnode.h>
 #include <sys/misc.h>
 #include <linux/mod_compat.h>
 
 unsigned long spl_hostid = 0;
 EXPORT_SYMBOL(spl_hostid);
 
 /* CSTYLED */
 module_param(spl_hostid, ulong, 0644);
 MODULE_PARM_DESC(spl_hostid, "The system hostid.");
 
 proc_t p0;
 EXPORT_SYMBOL(p0);
 
 /*
  * xoshiro256++ 1.0 PRNG by David Blackman and Sebastiano Vigna
  *
  * "Scrambled Linear Pseudorandom Number Generators∗"
  * https://vigna.di.unimi.it/ftp/papers/ScrambledLinear.pdf
  *
  * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose
  * is to provide bytes containing random numbers. It is mapped to /dev/urandom
  * on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's
  * random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so
  * we can implement it using a fast PRNG that we seed using Linux' actual
  * equivalent to random_get_pseudo_bytes(). We do this by providing each CPU
  * with an independent seed so that all calls to random_get_pseudo_bytes() are
  * free of atomic instructions.
  *
  * A consequence of using a fast PRNG is that using random_get_pseudo_bytes()
  * to generate words larger than 256 bits will paradoxically be limited to
  * `2^256 - 1` possibilities. This is because we have a sequence of `2^256 - 1`
  * 256-bit words and selecting the first will implicitly select the second. If
  * a caller finds this behavior undesirable, random_get_bytes() should be used
  * instead.
  *
  * XXX: Linux interrupt handlers that trigger within the critical section
  * formed by `s[3] = xp[3];` and `xp[0] = s[0];` and call this function will
  * see the same numbers. Nothing in the code currently calls this in an
  * interrupt handler, so this is considered to be okay. If that becomes a
  * problem, we could create a set of per-cpu variables for interrupt handlers
  * and use them when in_interrupt() from linux/preempt_mask.h evaluates to
  * true.
  */
 static void __percpu *spl_pseudo_entropy;
 
 /*
  * rotl()/spl_rand_next()/spl_rand_jump() are copied from the following CC-0
  * licensed file:
  *
  * https://prng.di.unimi.it/xoshiro256plusplus.c
  */
 
 static inline uint64_t rotl(const uint64_t x, int k)
 {
 	return ((x << k) | (x >> (64 - k)));
 }
 
 static inline uint64_t
 spl_rand_next(uint64_t *s)
 {
 	const uint64_t result = rotl(s[0] + s[3], 23) + s[0];
 
 	const uint64_t t = s[1] << 17;
 
 	s[2] ^= s[0];
 	s[3] ^= s[1];
 	s[1] ^= s[2];
 	s[0] ^= s[3];
 
 	s[2] ^= t;
 
 	s[3] = rotl(s[3], 45);
 
 	return (result);
 }
 
 static inline void
 spl_rand_jump(uint64_t *s)
 {
 	static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba,
 	    0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c };
 
 	uint64_t s0 = 0;
 	uint64_t s1 = 0;
 	uint64_t s2 = 0;
 	uint64_t s3 = 0;
 	int i, b;
 	for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++)
 		for (b = 0; b < 64; b++) {
 			if (JUMP[i] & 1ULL << b) {
 				s0 ^= s[0];
 				s1 ^= s[1];
 				s2 ^= s[2];
 				s3 ^= s[3];
 			}
 			(void) spl_rand_next(s);
 		}
 
 	s[0] = s0;
 	s[1] = s1;
 	s[2] = s2;
 	s[3] = s3;
 }
 
 int
 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
 {
 	uint64_t *xp, s[4];
 
 	ASSERT(ptr);
 
 	xp = get_cpu_ptr(spl_pseudo_entropy);
 
 	s[0] = xp[0];
 	s[1] = xp[1];
 	s[2] = xp[2];
 	s[3] = xp[3];
 
 	while (len) {
 		union {
 			uint64_t ui64;
 			uint8_t byte[sizeof (uint64_t)];
 		}entropy;
 		int i = MIN(len, sizeof (uint64_t));
 
 		len -= i;
 		entropy.ui64 = spl_rand_next(s);
 
 		/*
 		 * xoshiro256++ has low entropy lower bytes, so we copy the
 		 * higher order bytes first.
 		 */
 		while (i--)
 #ifdef _ZFS_BIG_ENDIAN
 			*ptr++ = entropy.byte[i];
 #else
 			*ptr++ = entropy.byte[7 - i];
 #endif
 	}
 
 	xp[0] = s[0];
 	xp[1] = s[1];
 	xp[2] = s[2];
 	xp[3] = s[3];
 
 	put_cpu_ptr(spl_pseudo_entropy);
 
 	return (0);
 }
 
 
 EXPORT_SYMBOL(random_get_pseudo_bytes);
 
 #if BITS_PER_LONG == 32
 
 /*
  * Support 64/64 => 64 division on a 32-bit platform.  While the kernel
  * provides a div64_u64() function for this we do not use it because the
  * implementation is flawed.  There are cases which return incorrect
  * results as late as linux-2.6.35.  Until this is fixed upstream the
  * spl must provide its own implementation.
  *
  * This implementation is a slightly modified version of the algorithm
  * proposed by the book 'Hacker's Delight'.  The original source can be
  * found here and is available for use without restriction.
  *
  * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
  */
 
 /*
  * Calculate number of leading of zeros for a 64-bit value.
  */
 static int
 nlz64(uint64_t x)
 {
 	register int n = 0;
 
 	if (x == 0)
 		return (64);
 
 	if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; }
 	if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; }
 	if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n +  8; x = x <<  8; }
 	if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n +  4; x = x <<  4; }
 	if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n +  2; x = x <<  2; }
 	if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n +  1; }
 
 	return (n);
 }
 
 /*
  * Newer kernels have a div_u64() function but we define our own
  * to simplify portability between kernel versions.
  */
 static inline uint64_t
 __div_u64(uint64_t u, uint32_t v)
 {
 	(void) do_div(u, v);
 	return (u);
 }
 
 /*
  * Turn off missing prototypes warning for these functions. They are
  * replacements for libgcc-provided functions and will never be called
  * directly.
  */
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#endif
 
 /*
  * Implementation of 64-bit unsigned division for 32-bit machines.
  *
  * First the procedure takes care of the case in which the divisor is a
  * 32-bit quantity. There are two subcases: (1) If the left half of the
  * dividend is less than the divisor, one execution of do_div() is all that
  * is required (overflow is not possible). (2) Otherwise it does two
  * divisions, using the grade school method.
  */
 uint64_t
 __udivdi3(uint64_t u, uint64_t v)
 {
 	uint64_t u0, u1, v1, q0, q1, k;
 	int n;
 
 	if (v >> 32 == 0) {			// If v < 2**32:
 		if (u >> 32 < v) {		// If u/v cannot overflow,
 			return (__div_u64(u, v)); // just do one division.
 		} else {			// If u/v would overflow:
 			u1 = u >> 32;		// Break u into two halves.
 			u0 = u & 0xFFFFFFFF;
 			q1 = __div_u64(u1, v);	// First quotient digit.
 			k  = u1 - q1 * v;	// First remainder, < v.
 			u0 += (k << 32);
 			q0 = __div_u64(u0, v);	// Seconds quotient digit.
 			return ((q1 << 32) + q0);
 		}
 	} else {				// If v >= 2**32:
 		n = nlz64(v);			// 0 <= n <= 31.
 		v1 = (v << n) >> 32;		// Normalize divisor, MSB is 1.
 		u1 = u >> 1;			// To ensure no overflow.
 		q1 = __div_u64(u1, v1);		// Get quotient from
 		q0 = (q1 << n) >> 31;		// Undo normalization and
 						// division of u by 2.
 		if (q0 != 0)			// Make q0 correct or
 			q0 = q0 - 1;		// too small by 1.
 		if ((u - q0 * v) >= v)
 			q0 = q0 + 1;		// Now q0 is correct.
 
 		return (q0);
 	}
 }
 EXPORT_SYMBOL(__udivdi3);
 
 #ifndef abs64
 /* CSTYLED */
 #define	abs64(x)	({ uint64_t t = (x) >> 63; ((x) ^ t) - t; })
 #endif
 
 /*
  * Implementation of 64-bit signed division for 32-bit machines.
  */
 int64_t
 __divdi3(int64_t u, int64_t v)
 {
 	int64_t q, t;
 	q = __udivdi3(abs64(u), abs64(v));
 	t = (u ^ v) >> 63;	// If u, v have different
 	return ((q ^ t) - t);	// signs, negate q.
 }
 EXPORT_SYMBOL(__divdi3);
 
 /*
  * Implementation of 64-bit unsigned modulo for 32-bit machines.
  */
 uint64_t
 __umoddi3(uint64_t dividend, uint64_t divisor)
 {
 	return (dividend - (divisor * __udivdi3(dividend, divisor)));
 }
 EXPORT_SYMBOL(__umoddi3);
 
 /* 64-bit signed modulo for 32-bit machines. */
 int64_t
 __moddi3(int64_t n, int64_t d)
 {
 	int64_t q;
 	boolean_t nn = B_FALSE;
 
 	if (n < 0) {
 		nn = B_TRUE;
 		n = -n;
 	}
 	if (d < 0)
 		d = -d;
 
 	q = __umoddi3(n, d);
 
 	return (nn ? -q : q);
 }
 EXPORT_SYMBOL(__moddi3);
 
 /*
  * Implementation of 64-bit unsigned division/modulo for 32-bit machines.
  */
 uint64_t
 __udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)
 {
 	uint64_t q = __udivdi3(n, d);
 	if (r)
 		*r = n - d * q;
 	return (q);
 }
 EXPORT_SYMBOL(__udivmoddi4);
 
 /*
  * Implementation of 64-bit signed division/modulo for 32-bit machines.
  */
 int64_t
 __divmoddi4(int64_t n, int64_t d, int64_t *r)
 {
 	int64_t q, rr;
 	boolean_t nn = B_FALSE;
 	boolean_t nd = B_FALSE;
 	if (n < 0) {
 		nn = B_TRUE;
 		n = -n;
 	}
 	if (d < 0) {
 		nd = B_TRUE;
 		d = -d;
 	}
 
 	q = __udivmoddi4(n, d, (uint64_t *)&rr);
 
 	if (nn != nd)
 		q = -q;
 	if (nn)
 		rr = -rr;
 	if (r)
 		*r = rr;
 	return (q);
 }
 EXPORT_SYMBOL(__divmoddi4);
 
 #if defined(__arm) || defined(__arm__)
 /*
  * Implementation of 64-bit (un)signed division for 32-bit arm machines.
  *
  * Run-time ABI for the ARM Architecture (page 20).  A pair of (unsigned)
  * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
  * and the remainder in {r2, r3}.  The return type is specifically left
  * set to 'void' to ensure the compiler does not overwrite these registers
  * during the return.  All results are in registers as per ABI
  */
 void
 __aeabi_uldivmod(uint64_t u, uint64_t v)
 {
 	uint64_t res;
 	uint64_t mod;
 
 	res = __udivdi3(u, v);
 	mod = __umoddi3(u, v);
 	{
 		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
 		register uint32_t r1 asm("r1") = (res >> 32);
 		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
 		register uint32_t r3 asm("r3") = (mod >> 32);
 
 		asm volatile(""
 		    : "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3)  /* output */
 		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));    /* input */
 
 		return; /* r0; */
 	}
 }
 EXPORT_SYMBOL(__aeabi_uldivmod);
 
 void
 __aeabi_ldivmod(int64_t u, int64_t v)
 {
 	int64_t res;
 	uint64_t mod;
 
 	res =  __divdi3(u, v);
 	mod = __umoddi3(u, v);
 	{
 		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
 		register uint32_t r1 asm("r1") = (res >> 32);
 		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
 		register uint32_t r3 asm("r3") = (mod >> 32);
 
 		asm volatile(""
 		    : "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3)  /* output */
 		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));    /* input */
 
 		return; /* r0; */
 	}
 }
 EXPORT_SYMBOL(__aeabi_ldivmod);
 #endif /* __arm || __arm__ */
 
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
 
 #endif /* BITS_PER_LONG */
 
 /*
  * NOTE: The strtoxx behavior is solely based on my reading of the Solaris
  * ddi_strtol(9F) man page.  I have not verified the behavior of these
  * functions against their Solaris counterparts.  It is possible that I
  * may have misinterpreted the man page or the man page is incorrect.
  */
 int ddi_strtol(const char *, char **, int, long *);
 int ddi_strtoull(const char *, char **, int, unsigned long long *);
 int ddi_strtoll(const char *, char **, int, long long *);
 
 #define	define_ddi_strtox(type, valtype)				\
 int ddi_strto##type(const char *str, char **endptr,			\
     int base, valtype *result)						\
 {									\
 	valtype last_value, value = 0;					\
 	char *ptr = (char *)str;					\
 	int digit, minus = 0;						\
 									\
 	while (strchr(" \t\n\r\f", *ptr))				\
 		++ptr;							\
 									\
 	if (strlen(ptr) == 0)						\
 		return (EINVAL);					\
 									\
 	switch (*ptr) {							\
 	case '-':							\
 		minus = 1;						\
 		zfs_fallthrough;					\
 	case '+':							\
 		++ptr;							\
 		break;							\
 	}								\
 									\
 	/* Auto-detect base based on prefix */				\
 	if (!base) {							\
 		if (str[0] == '0') {					\
 			if (tolower(str[1]) == 'x' && isxdigit(str[2])) { \
 				base = 16; /* hex */			\
 				ptr += 2;				\
 			} else if (str[1] >= '0' && str[1] < 8) {	\
 				base = 8; /* octal */			\
 				ptr += 1;				\
 			} else {					\
 				return (EINVAL);			\
 			}						\
 		} else {						\
 			base = 10; /* decimal */			\
 		}							\
 	}								\
 									\
 	while (1) {							\
 		if (isdigit(*ptr))					\
 			digit = *ptr - '0';				\
 		else if (isalpha(*ptr))					\
 			digit = tolower(*ptr) - 'a' + 10;		\
 		else							\
 			break;						\
 									\
 		if (digit >= base)					\
 			break;						\
 									\
 		last_value = value;					\
 		value = value * base + digit;				\
 		if (last_value > value) /* Overflow */			\
 			return (ERANGE);				\
 									\
 		ptr++;							\
 	}								\
 									\
 	*result = minus ? -value : value;				\
 									\
 	if (endptr)							\
 		*endptr = ptr;						\
 									\
 	return (0);							\
 }									\
 
 define_ddi_strtox(l, long)
 define_ddi_strtox(ull, unsigned long long)
 define_ddi_strtox(ll, long long)
 
 EXPORT_SYMBOL(ddi_strtol);
 EXPORT_SYMBOL(ddi_strtoll);
 EXPORT_SYMBOL(ddi_strtoull);
 
 int
 ddi_copyin(const void *from, void *to, size_t len, int flags)
 {
 	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
 	if (flags & FKIOCTL) {
 		memcpy(to, from, len);
 		return (0);
 	}
 
 	return (copyin(from, to, len));
 }
 EXPORT_SYMBOL(ddi_copyin);
 
 #define	define_spl_param(type, fmt)					\
 int									\
 spl_param_get_##type(char *buf, zfs_kernel_param_t *kp)			\
 {									\
 	return (scnprintf(buf, PAGE_SIZE, fmt "\n",			\
 	    *(type *)kp->arg));						\
 }									\
 int									\
 spl_param_set_##type(const char *buf, zfs_kernel_param_t *kp)		\
 {									\
 	return (kstrto##type(buf, 0, (type *)kp->arg));			\
 }									\
 const struct kernel_param_ops spl_param_ops_##type = {			\
 	.set = spl_param_set_##type,					\
 	.get = spl_param_get_##type,					\
 };									\
 EXPORT_SYMBOL(spl_param_get_##type);					\
 EXPORT_SYMBOL(spl_param_set_##type);					\
 EXPORT_SYMBOL(spl_param_ops_##type);
 
 define_spl_param(s64, "%lld")
 define_spl_param(u64, "%llu")
 
 /*
  * Post a uevent to userspace whenever a new vdev adds to the pool. It is
  * necessary to sync blkid information with udev, which zed daemon uses
  * during device hotplug to identify the vdev.
  */
 void
 spl_signal_kobj_evt(struct block_device *bdev)
 {
 #if defined(HAVE_BDEV_KOBJ) || defined(HAVE_PART_TO_DEV)
 #ifdef HAVE_BDEV_KOBJ
 	struct kobject *disk_kobj = bdev_kobj(bdev);
 #else
 	struct kobject *disk_kobj = &part_to_dev(bdev->bd_part)->kobj;
 #endif
 	if (disk_kobj) {
 		int ret = kobject_uevent(disk_kobj, KOBJ_CHANGE);
 		if (ret) {
 			pr_warn("ZFS: Sending event '%d' to kobject: '%s'"
 			    " (%p): failed(ret:%d)\n", KOBJ_CHANGE,
 			    kobject_name(disk_kobj), disk_kobj, ret);
 		}
 	}
 #else
 /*
  * This is encountered if neither bdev_kobj() nor part_to_dev() is available
  * in the kernel - likely due to an API change that needs to be chased down.
  */
 #error "Unsupported kernel: unable to get struct kobj from bdev"
 #endif
 }
 EXPORT_SYMBOL(spl_signal_kobj_evt);
 
 int
 ddi_copyout(const void *from, void *to, size_t len, int flags)
 {
 	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
 	if (flags & FKIOCTL) {
 		memcpy(to, from, len);
 		return (0);
 	}
 
 	return (copyout(from, to, len));
 }
 EXPORT_SYMBOL(ddi_copyout);
 
 static ssize_t
 spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
 {
 #if defined(HAVE_KERNEL_READ_PPOS)
 	return (kernel_read(file, buf, count, pos));
 #else
 	mm_segment_t saved_fs;
 	ssize_t ret;
 
 	saved_fs = get_fs();
 	set_fs(KERNEL_DS);
 
 	ret = vfs_read(file, (void __user *)buf, count, pos);
 
 	set_fs(saved_fs);
 
 	return (ret);
 #endif
 }
 
 static int
 spl_getattr(struct file *filp, struct kstat *stat)
 {
 	int rc;
 
 	ASSERT(filp);
 	ASSERT(stat);
 
 #if defined(HAVE_4ARGS_VFS_GETATTR)
 	rc = vfs_getattr(&filp->f_path, stat, STATX_BASIC_STATS,
 	    AT_STATX_SYNC_AS_STAT);
 #elif defined(HAVE_2ARGS_VFS_GETATTR)
 	rc = vfs_getattr(&filp->f_path, stat);
 #elif defined(HAVE_3ARGS_VFS_GETATTR)
 	rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, stat);
 #else
 #error "No available vfs_getattr()"
 #endif
 	if (rc)
 		return (-rc);
 
 	return (0);
 }
 
 /*
  * Read the unique system identifier from the /etc/hostid file.
  *
  * The behavior of /usr/bin/hostid on Linux systems with the
  * regular eglibc and coreutils is:
  *
  *   1. Generate the value if the /etc/hostid file does not exist
  *      or if the /etc/hostid file is less than four bytes in size.
  *
  *   2. If the /etc/hostid file is at least 4 bytes, then return
  *      the first four bytes [0..3] in native endian order.
  *
  *   3. Always ignore bytes [4..] if they exist in the file.
  *
  * Only the first four bytes are significant, even on systems that
  * have a 64-bit word size.
  *
  * See:
  *
  *   eglibc: sysdeps/unix/sysv/linux/gethostid.c
  *   coreutils: src/hostid.c
  *
  * Notes:
  *
  * The /etc/hostid file on Solaris is a text file that often reads:
  *
  *   # DO NOT EDIT
  *   "0123456789"
  *
  * Directly copying this file to Linux results in a constant
  * hostid of 4f442023 because the default comment constitutes
  * the first four bytes of the file.
  *
  */
 
 static char *spl_hostid_path = HW_HOSTID_PATH;
 module_param(spl_hostid_path, charp, 0444);
 MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)");
 
 static int
 hostid_read(uint32_t *hostid)
 {
 	uint64_t size;
 	uint32_t value = 0;
 	int error;
 	loff_t off;
 	struct file *filp;
 	struct kstat stat;
 
 	filp = filp_open(spl_hostid_path, 0, 0);
 
 	if (IS_ERR(filp))
 		return (ENOENT);
 
 	error = spl_getattr(filp, &stat);
 	if (error) {
 		filp_close(filp, 0);
 		return (error);
 	}
 	size = stat.size;
 	// cppcheck-suppress sizeofwithnumericparameter
 	if (size < sizeof (HW_HOSTID_MASK)) {
 		filp_close(filp, 0);
 		return (EINVAL);
 	}
 
 	off = 0;
 	/*
 	 * Read directly into the variable like eglibc does.
 	 * Short reads are okay; native behavior is preserved.
 	 */
 	error = spl_kernel_read(filp, &value, sizeof (value), &off);
 	if (error < 0) {
 		filp_close(filp, 0);
 		return (EIO);
 	}
 
 	/* Mask down to 32 bits like coreutils does. */
 	*hostid = (value & HW_HOSTID_MASK);
 	filp_close(filp, 0);
 
 	return (0);
 }
 
 /*
  * Return the system hostid.  Preferentially use the spl_hostid module option
  * when set, otherwise use the value in the /etc/hostid file.
  */
 uint32_t
 zone_get_hostid(void *zone)
 {
 	uint32_t hostid;
 
 	ASSERT3P(zone, ==, NULL);
 
 	if (spl_hostid != 0)
 		return ((uint32_t)(spl_hostid & HW_HOSTID_MASK));
 
 	if (hostid_read(&hostid) == 0)
 		return (hostid);
 
 	return (0);
 }
 EXPORT_SYMBOL(zone_get_hostid);
 
 static int
 spl_kvmem_init(void)
 {
 	int rc = 0;
 
 	rc = spl_kmem_init();
 	if (rc)
 		return (rc);
 
 	rc = spl_vmem_init();
 	if (rc) {
 		spl_kmem_fini();
 		return (rc);
 	}
 
 	return (rc);
 }
 
 /*
  * We initialize the random number generator with 128 bits of entropy from the
  * system random number generator. In the improbable case that we have a zero
  * seed, we fallback to the system jiffies, unless it is also zero, in which
  * situation we use a preprogrammed seed. We step forward by 2^64 iterations to
  * initialize each of the per-cpu seeds so that the sequences generated on each
  * CPU are guaranteed to never overlap in practice.
  */
 static int __init
 spl_random_init(void)
 {
 	uint64_t s[4];
 	int i = 0;
 
 	spl_pseudo_entropy = __alloc_percpu(4 * sizeof (uint64_t),
 	    sizeof (uint64_t));
 
 	if (!spl_pseudo_entropy)
 		return (-ENOMEM);
 
 	get_random_bytes(s, sizeof (s));
 
 	if (s[0] == 0 && s[1] == 0 && s[2] == 0 && s[3] == 0) {
 		if (jiffies != 0) {
 			s[0] = jiffies;
 			s[1] = ~0 - jiffies;
 			s[2] = ~jiffies;
 			s[3] = jiffies - ~0;
 		} else {
 			(void) memcpy(s, "improbable seed", 16);
 		}
 		printk("SPL: get_random_bytes() returned 0 "
 		    "when generating random seed. Setting initial seed to "
 		    "0x%016llx%016llx%016llx%016llx.\n", cpu_to_be64(s[0]),
 		    cpu_to_be64(s[1]), cpu_to_be64(s[2]), cpu_to_be64(s[3]));
 	}
 
 	for_each_possible_cpu(i) {
 		uint64_t *wordp = per_cpu_ptr(spl_pseudo_entropy, i);
 
 		spl_rand_jump(s);
 
 		wordp[0] = s[0];
 		wordp[1] = s[1];
 		wordp[2] = s[2];
 		wordp[3] = s[3];
 	}
 
 	return (0);
 }
 
 static void
 spl_random_fini(void)
 {
 	free_percpu(spl_pseudo_entropy);
 }
 
 static void
 spl_kvmem_fini(void)
 {
 	spl_vmem_fini();
 	spl_kmem_fini();
 }
 
 static int __init
 spl_init(void)
 {
 	int rc = 0;
 
 	if ((rc = spl_random_init()))
 		goto out0;
 
 	if ((rc = spl_kvmem_init()))
 		goto out1;
 
 	if ((rc = spl_tsd_init()))
 		goto out2;
 
 	if ((rc = spl_taskq_init()))
 		goto out3;
 
 	if ((rc = spl_kmem_cache_init()))
 		goto out4;
 
 	if ((rc = spl_proc_init()))
 		goto out5;
 
 	if ((rc = spl_kstat_init()))
 		goto out6;
 
 	if ((rc = spl_zlib_init()))
 		goto out7;
 
 	if ((rc = spl_zone_init()))
 		goto out8;
 
 	return (rc);
 
 out8:
 	spl_zlib_fini();
 out7:
 	spl_kstat_fini();
 out6:
 	spl_proc_fini();
 out5:
 	spl_kmem_cache_fini();
 out4:
 	spl_taskq_fini();
 out3:
 	spl_tsd_fini();
 out2:
 	spl_kvmem_fini();
 out1:
 	spl_random_fini();
 out0:
 	return (rc);
 }
 
 static void __exit
 spl_fini(void)
 {
 	spl_zone_fini();
 	spl_zlib_fini();
 	spl_kstat_fini();
 	spl_proc_fini();
 	spl_kmem_cache_fini();
 	spl_taskq_fini();
 	spl_tsd_fini();
 	spl_kvmem_fini();
 	spl_random_fini();
 }
 
 module_init(spl_init);
 module_exit(spl_fini);
 
 MODULE_DESCRIPTION("Solaris Porting Layer");
 MODULE_AUTHOR(ZFS_META_AUTHOR);
 MODULE_LICENSE("GPL");
 MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
diff --git a/module/zfs/vdev_raidz_math_aarch64_neonx2.c b/module/zfs/vdev_raidz_math_aarch64_neonx2.c
index 0a1f05fd6664..bd9de91a4ba8 100644
--- a/module/zfs/vdev_raidz_math_aarch64_neonx2.c
+++ b/module/zfs/vdev_raidz_math_aarch64_neonx2.c
@@ -1,232 +1,236 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (C) 2016 Romain Dolbeau. All rights reserved.
  */
 
 #include <sys/isa_defs.h>
 
 #if defined(__aarch64__)
 
 #include "vdev_raidz_math_aarch64_neon_common.h"
 
 #define	SYN_STRIDE		4
 
 #define	ZERO_STRIDE		8
 #define	ZERO_DEFINE()	\
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()
 #define	ZERO_D			0, 1, 2, 3, 4, 5, 6, 7
 
 #define	COPY_STRIDE		8
 #define	COPY_DEFINE()	\
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()
 #define	COPY_D			0, 1, 2, 3, 4, 5, 6, 7
 
 #define	ADD_STRIDE		8
 #define	ADD_DEFINE()	\
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()
 #define	ADD_D			0, 1, 2, 3, 4, 5, 6, 7
 
 #define	MUL_STRIDE		4
 #define	MUL_DEFINE()	\
 	GEN_X_DEFINE_0_3()  \
 	GEN_X_DEFINE_33_36()
 #define	MUL_D			0, 1, 2, 3
 
 #define	GEN_P_DEFINE() \
 	GEN_X_DEFINE_0_3() \
 	GEN_X_DEFINE_33_36()
 #define	GEN_P_STRIDE		4
 #define	GEN_P_P			0, 1, 2, 3
 
 #define	GEN_PQ_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_16()	\
 	GEN_X_DEFINE_17()	\
 	GEN_X_DEFINE_33_36()
 #define	GEN_PQ_STRIDE		4
 #define	GEN_PQ_D		0, 1, 2, 3
 #define	GEN_PQ_C		4, 5, 6, 7
 
 #define	GEN_PQR_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_16()	\
 	GEN_X_DEFINE_17()	\
 	GEN_X_DEFINE_33_36()
 #define	GEN_PQR_STRIDE		4
 #define	GEN_PQR_D		0, 1, 2, 3
 #define	GEN_PQR_C		4, 5, 6, 7
 
 #define	SYN_Q_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_16()	\
 	GEN_X_DEFINE_17()	\
 	GEN_X_DEFINE_33_36()
 #define	SYN_Q_STRIDE		4
 #define	SYN_Q_D			0, 1, 2, 3
 #define	SYN_Q_X			4, 5, 6, 7
 
 #define	SYN_R_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_16()	\
 	GEN_X_DEFINE_17()	\
 	GEN_X_DEFINE_33_36()
 #define	SYN_R_STRIDE		4
 #define	SYN_R_D			0, 1, 2, 3
 #define	SYN_R_X			4, 5, 6, 7
 
 #define	SYN_PQ_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_16()	\
 	GEN_X_DEFINE_17()	\
 	GEN_X_DEFINE_33_36()
 #define	SYN_PQ_STRIDE		4
 #define	SYN_PQ_D		0, 1, 2, 3
 #define	SYN_PQ_X		4, 5, 6, 7
 
 #define	REC_PQ_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_8_9()	\
 	GEN_X_DEFINE_22_23()	\
 	GEN_X_DEFINE_33_36()
 #define	REC_PQ_STRIDE		4
 #define	REC_PQ_X		0, 1, 2, 3
 #define	REC_PQ_Y		4, 5, 6, 7
 #define	REC_PQ_T		8, 9, 22, 23
 
 #define	SYN_PR_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_16()	\
 	GEN_X_DEFINE_17()	\
 	GEN_X_DEFINE_33_36()
 #define	SYN_PR_STRIDE		4
 #define	SYN_PR_D		0, 1, 2, 3
 #define	SYN_PR_X		4, 5, 6, 7
 
 #define	REC_PR_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_8_9()	\
 	GEN_X_DEFINE_22_23()	\
 	GEN_X_DEFINE_33_36()
 #define	REC_PR_STRIDE		4
 #define	REC_PR_X		0, 1, 2, 3
 #define	REC_PR_Y		4, 5, 6, 7
 #define	REC_PR_T		8, 9, 22, 23
 
 #define	SYN_QR_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_16()	\
 	GEN_X_DEFINE_17()	\
 	GEN_X_DEFINE_33_36()
 #define	SYN_QR_STRIDE		4
 #define	SYN_QR_D		0, 1, 2, 3
 #define	SYN_QR_X		4, 5, 6, 7
 
 #define	REC_QR_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_8_9()	\
 	GEN_X_DEFINE_22_23()	\
 	GEN_X_DEFINE_33_36()
 #define	REC_QR_STRIDE		4
 #define	REC_QR_X		0, 1, 2, 3
 #define	REC_QR_Y		4, 5, 6, 7
 #define	REC_QR_T		8, 9, 22, 23
 
 #define	SYN_PQR_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_16()	\
 	GEN_X_DEFINE_17()	\
 	GEN_X_DEFINE_33_36()
 #define	SYN_PQR_STRIDE		 4
 #define	SYN_PQR_D		 0, 1, 2, 3
 #define	SYN_PQR_X		 4, 5, 6, 7
 
 #define	REC_PQR_DEFINE() \
 	GEN_X_DEFINE_0_3()	\
 	GEN_X_DEFINE_4_5()	\
 	GEN_X_DEFINE_6_7()	\
 	GEN_X_DEFINE_8_9()	\
 	GEN_X_DEFINE_31()	\
 	GEN_X_DEFINE_32()	\
 	GEN_X_DEFINE_33_36()
 #define	REC_PQR_STRIDE		2
 #define	REC_PQR_X		0, 1
 #define	REC_PQR_Y		2, 3
 #define	REC_PQR_Z		4, 5
 #define	REC_PQR_XS		6, 7
 #define	REC_PQR_YS		8, 9
 
 #include <sys/vdev_raidz_impl.h>
 #include "vdev_raidz_math_impl.h"
 
 DEFINE_GEN_METHODS(aarch64_neonx2);
 /*
  * If compiled with -O0, gcc doesn't do any stack frame coalescing
  * and -Wframe-larger-than=1024 is triggered in debug mode.
  */
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
 DEFINE_REC_METHODS(aarch64_neonx2);
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
 
 static boolean_t
 raidz_will_aarch64_neonx2_work(void)
 {
 	return (kfpu_allowed());
 }
 
 const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl = {
 	.init = NULL,
 	.fini = NULL,
 	.gen = RAIDZ_GEN_METHODS(aarch64_neonx2),
 	.rec = RAIDZ_REC_METHODS(aarch64_neonx2),
 	.is_supported = &raidz_will_aarch64_neonx2_work,
 	.name = "aarch64_neonx2"
 };
 
 #endif /* defined(__aarch64__) */