diff --git a/contrib/arm-optimized-routines/MAINTAINERS b/contrib/arm-optimized-routines/MAINTAINERS
index 6c5823a8dbce..06cceb8f2501 100644
--- a/contrib/arm-optimized-routines/MAINTAINERS
+++ b/contrib/arm-optimized-routines/MAINTAINERS
@@ -1,12 +1,9 @@
 /
-	Szabolcs Nagy <szabolcs.nagy@arm.com>
+	Tamar Christina <tamar.christina@arm.com>
 math/
-	Szabolcs Nagy <szabolcs.nagy@arm.com>
-networking/
-	Szabolcs Nagy <szabolcs.nagy@arm.com>
-pl/
 	Pierre Blanchard <pierre.blanchard@arm.com>
 	Joe Ramsay <joe.ramsay@arm.com>
+networking/
+	Ola Liljedahl <ola.liljedahl@arm.com>
 string/
-	Szabolcs Nagy <szabolcs.nagy@arm.com>
 	Wilco Dijkstra <wilco.dijkstra@arm.com>
diff --git a/contrib/arm-optimized-routines/Makefile b/contrib/arm-optimized-routines/Makefile
index c487896728c2..e7503dbd2f60 100644
--- a/contrib/arm-optimized-routines/Makefile
+++ b/contrib/arm-optimized-routines/Makefile
@@ -1,92 +1,100 @@
 # Makefile - requires GNU make
 #
-# Copyright (c) 2018-2022, Arm Limited.
+# Copyright (c) 2018-2024, Arm Limited.
 # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 srcdir = .
 prefix = /usr
 bindir = $(prefix)/bin
 libdir = $(prefix)/lib
 includedir = $(prefix)/include
 
 # Configure these in config.mk, do not make changes in this file.
 SUBS = math string networking
-PLSUBS = math
 HOST_CC = cc
 HOST_CFLAGS = -std=c99 -O2
 HOST_LDFLAGS =
 HOST_LDLIBS =
 EMULATOR =
 CPPFLAGS =
 CFLAGS = -std=c99 -O2
 CFLAGS_SHARED = -fPIC
 CFLAGS_ALL = -Ibuild/include $(CPPFLAGS) $(CFLAGS)
-CFLAGS_PL = -Ibuild/pl/include $(CPPFLAGS) $(CFLAGS) -DPL
 LDFLAGS =
 LDLIBS =
 AR = $(CROSS_COMPILE)ar
 RANLIB = $(CROSS_COMPILE)ranlib
 INSTALL = install
+# Detect OS.
+# Assume Unix environment: Linux, Darwin, or Msys.
+OS := $(shell uname -s)
+OS := $(patsubst MSYS%,Msys,$(OS))
+# Following math dependencies can be adjusted in config file
+# if necessary, e.g. for Msys.
+libm-libs = -lm
+libc-libs = -lc
+mpfr-libs = -lmpfr
+gmp-libs = -lgmp
+mpc-libs = -lmpc
 
 all:
 
 -include config.mk
 
 $(foreach sub,$(SUBS),$(eval include $(srcdir)/$(sub)/Dir.mk))
 
 # Required targets of subproject foo:
 #   all-foo
 #   check-foo
 #   clean-foo
 #   install-foo
 # Required make variables of subproject foo:
 #   foo-files: Built files (all in build/).
 # Make variables used by subproject foo:
 #   foo-...: Variables defined in foo/Dir.mk or by config.mk.
 
 all: $(SUBS:%=all-%)
 
 ALL_FILES = $(foreach sub,$(SUBS),$($(sub)-files))
 DIRS = $(sort $(patsubst %/,%,$(dir $(ALL_FILES))))
 $(ALL_FILES): | $(DIRS)
 $(DIRS):
 	mkdir -p $@
 
 $(filter %.os,$(ALL_FILES)): CFLAGS_ALL += $(CFLAGS_SHARED)
-$(filter %.os,$(ALL_FILES)): CFLAGS_PL += $(CFLAGS_SHARED)
 
 build/%.o: $(srcdir)/%.S
 	$(CC) $(CFLAGS_ALL) -c -o $@ $<
 
 build/%.o: $(srcdir)/%.c
 	$(CC) $(CFLAGS_ALL) -c -o $@ $<
 
 build/%.os: $(srcdir)/%.S
 	$(CC) $(CFLAGS_ALL) -c -o $@ $<
 
 build/%.os: $(srcdir)/%.c
 	$(CC) $(CFLAGS_ALL) -c -o $@ $<
 
 clean: $(SUBS:%=clean-%)
 	rm -rf build
 
 distclean: clean
 	rm -f config.mk
 
 $(DESTDIR)$(bindir)/%: build/bin/%
 	$(INSTALL) -D $< $@
 
 $(DESTDIR)$(libdir)/%.so: build/lib/%.so
 	$(INSTALL) -D $< $@
 
 $(DESTDIR)$(libdir)/%: build/lib/%
 	$(INSTALL) -m 644 -D $< $@
 
 $(DESTDIR)$(includedir)/%: build/include/%
 	$(INSTALL) -m 644 -D $< $@
 
 install: $(SUBS:%=install-%)
 
 check: $(SUBS:%=check-%)
 
 .PHONY: all clean distclean install check
diff --git a/contrib/arm-optimized-routines/README b/contrib/arm-optimized-routines/README
index 651ebdc84bc8..4bbed76d75c8 100644
--- a/contrib/arm-optimized-routines/README
+++ b/contrib/arm-optimized-routines/README
@@ -1,60 +1,87 @@
 Arm Optimized Routines
 ----------------------
 
 This repository contains implementations of library functions
 provided by Arm. The outbound license is available under a dual
 license, at the user’s election, as reflected in the LICENSE file.
 Contributions to this project are accepted, but Contributors have
 to sign an Assignment Agreement, please follow the instructions in
 contributor-agreement.pdf. This is needed so upstreaming code
 to projects that require copyright assignment is possible. Further
 contribution requirements are documented in README.contributors of
 the appropriate subdirectory.
 
 Regular quarterly releases are tagged as vYY.MM, the latest
-release is v24.01.
+release is v25.01.
 
 Source code layout:
 
 build/          - build directory (created by make).
-math/           - math subproject sources.
+math/           - math subproject sources for generic scalar
+                  subroutines and sources shared with
+                  subdirectories of math/.
+                  All math routines should meet the quality
+                  requirements stated in math/README.contributors,
+                  routines that fail to do so are located in an
+                  experimental/ directory.
+math/aarch64/   - math subproject AArch64-specific sources
+                  and sources shared with subdirectories.
+math/aarch64/advsimd      - AdvSIMD-specific math sources.
+math/aarch64/experimental - Experimental math sources do not
+                            meet quality requirements stated in
+                            math/README.contributors.
+math/aarch64/sve          - SVE-specific math sources.
 math/include/   - math library public headers.
 math/test/      - math test and benchmark related sources.
 math/tools/     - tools used for designing the algorithms.
 networking/     - networking subproject sources.
 networking/include/ - networking library public headers.
 networking/test/ - networking test and benchmark related sources.
 string/         - string routines subproject sources.
+                  All string routines should meet the quality
+                  requirements stated in string/README.contributors,
+                  routines that fail to do so are located in an
+                  experimental/ directory.
+string/<arch>   - <arch>-specific string routines sources for
+                  <arch>=aarch64, and arm.
+string/aarch64/experimental - Experimental string routines which
+                              may not be fully optimized yet.
 string/include/ - string library public headers.
 string/test/    - string test and benchmark related sources.
-pl/...          - separately maintained performance library code.
 
 The steps to build the target libraries and run the tests:
 
 cp config.mk.dist config.mk
 # edit config.mk if necessary ...
 make
 make check
 
 Or building outside of the source directory:
 
 ln -s path/to/src/Makefile Makefile
 cp path/to/src/config.mk.dist config.mk
 echo 'srcdir = path/to/src' >> config.mk
 # further edits to config.mk
 make
 make check
 
 Or building and testing the math subproject only:
 
 make all-math
 make check-math
 
+Note on compiler compability/requirement:
+
+SVE routines are always built by default - this means that on AArch64
+GCC >= 10 or LLVM >= 5 are always required for SVE ACLE compatibility.
+There is no explicit check for compatible compiler, therefore the SVE
+routines will fail to build if CC is too old.
+
 The test system requires libmpfr and libmpc.
 For example on debian linux they can be installed as:
 
 sudo apt-get install libmpfr-dev libmpc-dev
 
 For cross build, CROSS_COMPILE should be set in config.mk and EMULATOR
 should be set for cross testing (e.g. using qemu-user or remote access
 to a target machine), see the examples in config.mk.dist.
diff --git a/contrib/arm-optimized-routines/config.mk.dist b/contrib/arm-optimized-routines/config.mk.dist
index 03fb54db52fa..ae4574e7cdba 100644
--- a/contrib/arm-optimized-routines/config.mk.dist
+++ b/contrib/arm-optimized-routines/config.mk.dist
@@ -1,94 +1,149 @@
 # Example config.mk
 #
-# Copyright (c) 2018-2023, Arm Limited.
+# Copyright (c) 2018-2024, Arm Limited.
 # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 # Subprojects to build
 SUBS = math string networking
 
-# Subsubprojects to build if subproject pl is built
-PLSUBS = math
-
 # Target architecture: aarch64, arm or x86_64
 ARCH = aarch64
 
 # Use for cross compilation with gcc.
 #CROSS_COMPILE = aarch64-none-linux-gnu-
 
 # Compiler for the target
 CC = $(CROSS_COMPILE)gcc
 CFLAGS = -std=c99 -pipe -O3
 CFLAGS += -Wall -Wno-missing-braces
 CFLAGS += -Werror=implicit-function-declaration
 
 # Used for test case generator that is executed on the host
 HOST_CC = gcc
 HOST_CFLAGS = -std=c99 -O2
 HOST_CFLAGS += -Wall -Wno-unused-function
 
 # Enable debug info.
 HOST_CFLAGS += -g
 CFLAGS += -g
 
+ifeq ($(OS),Msys)
+  # llvm is the only available/valid native compiler
+  CC = clang
+  AR = llvm-ar
+  RANLIB = llvm-ranlib
+  HOST_CC = clang
+  SYSROOT = /c/wenv/msys2/msys64/clangarm64
+  # Common windows flags
+  COMMON_WIN_CFLAGS = -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE
+  COMMON_WIN_CFLAGS += -Wno-deprecated-declarations -Wno-unused-variable
+  # For mathtest
+  HOST_CFLAGS += -I$(SYSROOT)/include
+  HOST_CFLAGS += $(COMMON_WIN_CFLAGS) -Wno-ignored-attributes
+  # Clear the default flag -fPIC, as not supported on Windows
+  CFLAGS_SHARED =
+  # For ulp.h with MPFR
+  CFLAGS += -I$(SYSROOT)/include
+  # For clang on Windows
+  CFLAGS += $(COMMON_WIN_CFLAGS)
+endif
+
 # Optimize the shared libraries on aarch64 assuming they fit in 1M.
 #CFLAGS_SHARED = -fPIC -mcmodel=tiny
 
 # Enable MTE support.
 #CFLAGS += -march=armv8.5-a+memtag -DWANT_MTE_TEST=1
 
 # Use with cross testing.
 #EMULATOR = qemu-aarch64-static
 #EMULATOR = sh -c 'scp $$1 user@host:/dir && ssh user@host /dir/"$$@"' --
 
 # Additional flags for subprojects.
 math-cflags =
 math-ldlibs =
 math-ulpflags =
 math-testflags =
-string-cflags =
+string-cflags = -falign-functions=64
 networking-cflags =
 
-# Use if mpfr is available on the target for ulp error checking.
-#math-ldlibs += -lmpfr -lgmp
-#math-cflags += -DUSE_MPFR
+ifeq ($(OS),Msys)
+  # Libraries can be installed with pacman
+  libm-libs = -lmsvcrt -lvcruntime -lucrt
+  libc-libs =
+  # Linker will look for .lib but some systems only have .dll.a,
+  # therefore we have to give absolute path to libraries.
+  # This is system dependent and might need adjusting.
+  mpfr-libs = $(SYSROOT)/lib/libmpfr.dll.a
+  gmp-libs = $(SYSROOT)/lib/libgmp.dll.a
+  mpc-libs = $(SYSROOT)/lib/libmpc.dll.a
+endif
+
+# Use if mpfr is available on the target for ulp error checking. If
+# enabling this, it is advised to disable fenv checks by uncommenting
+# the two lines at the bottom of this block.
+USE_MPFR=0
+math-cflags += -DUSE_MPFR=$(USE_MPFR)
+ifeq ($(USE_MPFR), 1)
+  math-ldlibs += $(mpfr-libs) $(gmp-libs)
+  math-ulpflags += -m
+endif
+# Disable fenv checks
+#math-ulpflags = -q -f
+#math-testflags = -nostatus
 
 # Use with gcc.
 math-cflags += -frounding-math -fexcess-precision=standard -fno-stack-protector
 math-cflags += -ffp-contract=fast -fno-math-errno
 
 # Use with clang.
 #math-cflags += -ffp-contract=fast
 
-# Disable/enable SVE vector math code and tests.
-# If WANT_SVE_MATH is enabled, math-sve-cflags is added for SVE
-# routines only so that SVE code does not leak into scalar
-# routines. It is also necessary to add it for tools (e.g. ulp,
-# mathbench)
-WANT_SVE_MATH = 0
-ifeq ($(WANT_SVE_MATH), 1)
-  math-sve-cflags = -march=armv8-a+sve
-endif
-math-cflags += -DWANT_SVE_MATH=$(WANT_SVE_MATH)
-
 # If defined to 1, set errno in math functions according to ISO C.  Many math
 # libraries do not set errno, so this is 0 by default.  It may need to be
 # set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.
 WANT_ERRNO = 0
 math-cflags += -DWANT_ERRNO=$(WANT_ERRNO)
 
+# Disable/enable SVE vector math tests/tools.
+ifeq ($(ARCH),aarch64)
+  WANT_SVE_TESTS = 1
+else
+  WANT_SVE_TESTS = 0
+endif
+math-cflags += -DWANT_SVE_TESTS=$(WANT_SVE_TESTS)
+
 # If set to 1, set fenv in vector math routines.
 WANT_SIMD_EXCEPT = 0
 math-cflags += -DWANT_SIMD_EXCEPT=$(WANT_SIMD_EXCEPT)
 
-# Disable fenv checks
-#math-ulpflags = -q -f
-#math-testflags = -nostatus
+# If set to 1, enable tests for exp10.
+WANT_EXP10_TESTS = 1
+math-cflags += -DWANT_EXP10_TESTS=$(WANT_EXP10_TESTS)
+
+# If set to 1, enable tests for sinpi and cospi. These functions are
+# only supported on aarch64
+ifeq ($(ARCH),aarch64)
+  WANT_TRIGPI_TESTS = 1
+else
+  WANT_TRIGPI_TESTS = 0
+endif
+math-cflags += -DWANT_TRIGPI_TESTS=$(WANT_TRIGPI_TESTS)
 
 # Remove GNU Property Notes from asm files.
 #string-cflags += -DWANT_GNU_PROPERTY=0
 
 # Enable assertion checks.
 #networking-cflags += -DWANT_ASSERT
 
 # Avoid auto-vectorization of scalar code and unroll loops
 networking-cflags += -O2 -fno-tree-vectorize -funroll-loops
+
+# Provide *_finite symbols and some of the glibc hidden symbols
+# so libmathlib can be used with binaries compiled against glibc
+# to interpose math functions with both static and dynamic linking
+USE_GLIBC_ABI = 1
+math-cflags += -DUSE_GLIBC_ABI=$(USE_GLIBC_ABI)
+
+# Enable experimental math routines - non-C23 vector math and low-accuracy scalar
+WANT_EXPERIMENTAL_MATH = 0
+math-cflags += -DWANT_EXPERIMENTAL_MATH=$(WANT_EXPERIMENTAL_MATH)
diff --git a/contrib/arm-optimized-routines/math/Dir.mk b/contrib/arm-optimized-routines/math/Dir.mk
index 5e9494a7bd3c..6277241ac4de 100644
--- a/contrib/arm-optimized-routines/math/Dir.mk
+++ b/contrib/arm-optimized-routines/math/Dir.mk
@@ -1,119 +1,290 @@
 # Makefile fragment - requires GNU make
 #
-# Copyright (c) 2019-2023, Arm Limited.
+# Copyright (c) 2019-2024, Arm Limited.
 # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
-S := $(srcdir)/math
-B := build/math
-
-math-lib-srcs := $(wildcard $(S)/*.[cS])
-math-lib-srcs += $(wildcard $(S)/$(ARCH)/*.[cS])
+.SECONDEXPANSION:
+
+ifneq ($(OS),Linux)
+  ifeq ($(WANT_SIMD_EXCEPT),1)
+    $(error WANT_SIMD_EXCEPT is not supported outside Linux)
+  endif
+  ifneq ($(USE_MPFR),1)
+    $(warning WARNING: Double-precision ULP tests will not be usable without MPFR)
+  endif
+  ifeq ($(USE_GLIBC_ABI),1)
+    $(error Can only generate special GLIBC symbols on Linux - please disable USE_GLIBC_ABI)
+  endif
+endif
+
+ifneq ($(ARCH),aarch64)
+  ifeq ($(WANT_TRIGPI_TESTS),1)
+    $(error trigpi functions only supported on aarch64)
+  endif
+  ifeq ($(WANT_EXPERIMENTAL_MATH),1)
+    $(error Experimental math only supported on aarch64)
+  endif
+endif
+
+math-src-dir := $(srcdir)/math
+math-build-dir := build/math
+
+math-lib-srcs := $(wildcard $(math-src-dir)/*.[cS])
+math-lib-srcs += $(wildcard $(math-src-dir)/$(ARCH)/*.[cS])
+ifeq ($(OS),Linux)
+# Vector symbols only supported on Linux
+math-lib-srcs += $(wildcard $(math-src-dir)/$(ARCH)/*/*.[cS])
+endif
+
+ifeq ($(WANT_EXPERIMENTAL_MATH), 1)
+ifeq ($(OS),Linux)
+# Vector symbols only supported on Linux
+math-lib-srcs += $(wildcard $(math-src-dir)/$(ARCH)/experimental/*/*.[cS])
+else
+math-lib-srcs += $(wildcard $(math-src-dir)/$(ARCH)/experimental/*.[cS])
+endif
+else
+# Scalar experimental symbols will have been added by wildcard, so remove them
+math-lib-srcs := $(filter-out $(math-src-dir)/aarch64/experimental/%, $(math-lib-srcs))
+endif
 
 math-test-srcs := \
-	$(S)/test/mathtest.c \
-	$(S)/test/mathbench.c \
-	$(S)/test/ulp.c \
+	$(math-src-dir)/test/mathtest.c \
+	$(math-src-dir)/test/mathbench.c \
+	$(math-src-dir)/test/ulp.c \
 
-math-test-host-srcs := $(wildcard $(S)/test/rtest/*.[cS])
+math-test-host-srcs := $(wildcard $(math-src-dir)/test/rtest/*.[cS])
 
-math-includes := $(patsubst $(S)/%,build/%,$(wildcard $(S)/include/*.h))
-math-test-includes := $(patsubst $(S)/%,build/include/%,$(wildcard $(S)/test/*.h))
+math-includes := $(patsubst $(math-src-dir)/%,build/%,$(wildcard $(math-src-dir)/include/*.h))
 
 math-libs := \
 	build/lib/libmathlib.so \
 	build/lib/libmathlib.a \
 
 math-tools := \
 	build/bin/mathtest \
 	build/bin/mathbench \
 	build/bin/mathbench_libc \
 	build/bin/runulp.sh \
 	build/bin/ulp \
 
 math-host-tools := \
 	build/bin/rtest \
 
-math-lib-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-lib-srcs)))
-math-test-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-test-srcs)))
-math-host-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-test-host-srcs)))
+math-lib-objs := $(patsubst $(math-src-dir)/%,$(math-build-dir)/%.o,$(basename $(math-lib-srcs)))
+math-test-objs := $(patsubst $(math-src-dir)/%,$(math-build-dir)/%.o,$(basename $(math-test-srcs)))
+math-host-objs := $(patsubst $(math-src-dir)/%,$(math-build-dir)/%.o,$(basename $(math-test-host-srcs)))
 math-target-objs := $(math-lib-objs) $(math-test-objs)
 math-objs := $(math-target-objs) $(math-target-objs:%.o=%.os) $(math-host-objs)
 
 math-files := \
 	$(math-objs) \
 	$(math-libs) \
 	$(math-tools) \
 	$(math-host-tools) \
-	$(math-includes) \
-	$(math-test-includes) \
+	$(math-includes)
 
-all-math: $(math-libs) $(math-tools) $(math-includes) $(math-test-includes)
+all-math: $(math-libs) $(math-tools) $(math-includes)
 
-$(math-objs): $(math-includes) $(math-test-includes)
+$(math-objs): $(math-includes)
 $(math-objs): CFLAGS_ALL += $(math-cflags)
-$(B)/test/mathtest.o: CFLAGS_ALL += -fmath-errno
+$(math-build-dir)/test/mathtest.o: CFLAGS_ALL += -fmath-errno
 $(math-host-objs): CC = $(HOST_CC)
 $(math-host-objs): CFLAGS_ALL = $(HOST_CFLAGS)
 
-$(B)/test/ulp.o: $(S)/test/ulp.h
+# Add include path for experimental routines so they can share helpers with non-experimental
+$(math-build-dir)/aarch64/experimental/advsimd/%: CFLAGS_ALL += -I$(math-src-dir)/aarch64/advsimd
+$(math-build-dir)/aarch64/experimental/sve/%: CFLAGS_ALL += -I$(math-src-dir)/aarch64/sve
+
+$(math-objs): CFLAGS_ALL += -I$(math-src-dir)
+
+ulp-funcs-dir = build/test/ulp-funcs/
+ulp-wrappers-dir = build/test/ulp-wrappers/
+mathbench-funcs-dir = build/test/mathbench-funcs/
+test-sig-dirs = $(ulp-funcs-dir) $(ulp-wrappers-dir) $(mathbench-funcs-dir)
+build/include/test $(test-sig-dirs) $(addsuffix /$(ARCH),$(test-sig-dirs)) $(addsuffix /aarch64/experimental,$(test-sig-dirs)) \
+$(addsuffix /aarch64/experimental/advsimd,$(test-sig-dirs)) $(addsuffix /aarch64/experimental/sve,$(test-sig-dirs)) \
+$(addsuffix /aarch64/advsimd,$(test-sig-dirs)) $(addsuffix /aarch64/sve,$(test-sig-dirs)):
+	mkdir -p $@
+
+ulp-funcs = $(patsubst $(math-src-dir)/%,$(ulp-funcs-dir)/%,$(basename $(math-lib-srcs)))
+ulp-wrappers = $(patsubst $(math-src-dir)/%,$(ulp-wrappers-dir)/%,$(basename $(math-lib-srcs)))
+mathbench-funcs = $(patsubst $(math-src-dir)/%,$(mathbench-funcs-dir)/%,$(basename $(math-lib-srcs)))
+
+ifeq ($(WANT_SVE_TESTS), 0)
+  # Filter out anything with sve in the path
+  ulp-funcs := $(foreach a,$(ulp-funcs),$(if $(findstring sve,$a),,$a))
+  ulp-wrappers := $(foreach a,$(ulp-wrappers),$(if $(findstring sve,$a),,$a))
+  mathbench-funcs := $(foreach a,$(mathbench-funcs),$(if $(findstring sve,$a),,$a))
+endif
+
+define emit_sig
+$1/aarch64/experimental/sve/%.i: EXTRA_INC = -I$(math-src-dir)/aarch64/sve
+$1/aarch64/experimental/advsimd/%.i: EXTRA_INC = -I$(math-src-dir)/aarch64/advsimd
+$1/%.i: $(math-src-dir)/%.c | $$$$(@D)
+	$(CC) $$< $(math-cflags) -I$(math-src-dir)/include -I$(math-src-dir) $$(EXTRA_INC) -D$2 -E -o $$@
+$1/%: $1/%.i
+	{ grep TEST_SIG $$< || true; } | cut -f 2- -d ' ' > $$@
+endef
+
+$(eval $(call emit_sig,$(ulp-funcs-dir),EMIT_ULP_FUNCS))
+$(eval $(call emit_sig,$(ulp-wrappers-dir),EMIT_ULP_WRAPPERS))
+$(eval $(call emit_sig,$(mathbench-funcs-dir),EMIT_MATHBENCH_FUNCS))
+
+ulp-funcs-gen = build/include/test/ulp_funcs_gen.h
+ulp-wrappers-gen = build/include/test/ulp_wrappers_gen.h
+mathbench-funcs-gen = build/include/test/mathbench_funcs_gen.h
+math-tools-autogen-headers = $(ulp-funcs-gen) $(ulp-wrappers-gen) $(mathbench-funcs-gen)
+
+$(ulp-funcs-gen): $(ulp-funcs) | $$(@D)
+$(ulp-wrappers-gen): $(ulp-wrappers) | $$(@D)
+$(mathbench-funcs-gen): $(mathbench-funcs) | $$(@D)
+
+$(math-tools-autogen-headers): | $$(@D)
+	cat $^ | sort -u > $@
+
+$(math-build-dir)/test/mathbench.o: $(mathbench-funcs-gen)
+$(math-build-dir)/test/ulp.o: $(math-src-dir)/test/ulp.h $(ulp-funcs-gen) $(ulp-wrappers-gen)
 
 build/lib/libmathlib.so: $(math-lib-objs:%.o=%.os)
 	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -shared -o $@ $^
 
 build/lib/libmathlib.a: $(math-lib-objs)
 	rm -f $@
 	$(AR) rc $@ $^
 	$(RANLIB) $@
 
-$(math-host-tools): HOST_LDLIBS += -lm -lmpfr -lmpc
-$(math-tools): LDLIBS += $(math-ldlibs) -lm
-# math-sve-cflags should be empty if WANT_SVE_MATH is not enabled
-$(math-tools): CFLAGS_ALL += $(math-sve-cflags)
+$(math-host-tools): HOST_LDLIBS += $(libm-libs) $(mpfr-libs) $(mpc-libs)
+$(math-tools): LDLIBS += $(math-ldlibs) $(libm-libs)
+
+ifneq ($(OS),Darwin)
+  $(math-tools): LDFLAGS += -static
+endif
 
 build/bin/rtest: $(math-host-objs)
 	$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LDLIBS)
 
-build/bin/mathtest: $(B)/test/mathtest.o build/lib/libmathlib.a
-	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+build/bin/mathtest: $(math-build-dir)/test/mathtest.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -o $@ $^ $(libm-libs)
 
-build/bin/mathbench: $(B)/test/mathbench.o build/lib/libmathlib.a
-	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+build/bin/mathbench: $(math-build-dir)/test/mathbench.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -o $@ $^ $(libm-libs)
 
 # This is not ideal, but allows custom symbols in mathbench to get resolved.
-build/bin/mathbench_libc: $(B)/test/mathbench.o build/lib/libmathlib.a
-	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $< $(LDLIBS) -lc build/lib/libmathlib.a -lm
-
-build/bin/ulp: $(B)/test/ulp.o build/lib/libmathlib.a
-	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+build/bin/mathbench_libc: $(math-build-dir)/test/mathbench.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -o $@ $< $(libm-libs) $(libc-libs) build/lib/libmathlib.a $(libm-libs)
 
-build/include/%.h: $(S)/include/%.h
-	cp $< $@
+build/bin/ulp: $(math-build-dir)/test/ulp.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -o $@ $^ $(LDLIBS)
 
-build/include/test/%.h: $(S)/test/%.h
+build/include/%.h: $(math-src-dir)/include/%.h
 	cp $< $@
 
-build/bin/%.sh: $(S)/test/%.sh
+build/bin/%.sh: $(math-src-dir)/test/%.sh
 	cp $< $@
 
-math-tests := $(wildcard $(S)/test/testcases/directed/*.tst)
-math-rtests := $(wildcard $(S)/test/testcases/random/*.tst)
+math-tests := $(wildcard $(math-src-dir)/test/testcases/directed/*.tst)
+ifneq ($(WANT_EXP10_TESTS),1)
+math-tests := $(filter-out %exp10.tst, $(math-tests))
+endif
+math-rtests := $(wildcard $(math-src-dir)/test/testcases/random/*.tst)
 
 check-math-test: $(math-tools)
 	cat $(math-tests) | $(EMULATOR) build/bin/mathtest $(math-testflags)
 
 check-math-rtest: $(math-host-tools) $(math-tools)
 	cat $(math-rtests) | build/bin/rtest | $(EMULATOR) build/bin/mathtest $(math-testflags)
 
+ulp-input-dir = $(math-build-dir)/test/inputs
+$(ulp-input-dir) $(ulp-input-dir)/$(ARCH) $(ulp-input-dir)/aarch64/sve $(ulp-input-dir)/aarch64/advsimd \
+$(ulp-input-dir)/aarch64/experimental $(ulp-input-dir)/aarch64/experimental/advsimd $(ulp-input-dir)/aarch64/experimental/sve:
+	mkdir -p $@
+
+math-lib-lims = $(patsubst $(math-src-dir)/%.c,$(ulp-input-dir)/%.ulp,$(math-lib-srcs))
+math-lib-lims-nn = $(patsubst $(math-src-dir)/%.c,$(ulp-input-dir)/%.ulp_nn,$(math-lib-srcs))
+math-lib-fenvs = $(patsubst $(math-src-dir)/%.c,$(ulp-input-dir)/%.fenv,$(math-lib-srcs))
+math-lib-itvs = $(patsubst $(math-src-dir)/%.c,$(ulp-input-dir)/%.itv,$(math-lib-srcs))
+math-lib-cvals = $(patsubst $(math-src-dir)/%.c,$(ulp-input-dir)/%.cval,$(math-lib-srcs))
+
+ulp-inputs = $(math-lib-lims) $(math-lib-lims-nn) $(math-lib-fenvs) $(math-lib-itvs) $(math-lib-cvals)
+$(ulp-inputs): CFLAGS = -I$(math-src-dir)/test -I$(math-src-dir)/include -I$(math-src-dir) $(math-cflags)\
+                        -I$(math-src-dir)/aarch64/advsimd -I$(math-src-dir)/aarch64/sve
+
+$(ulp-input-dir)/%.ulp.i: $(math-src-dir)/%.c | $$(@D)
+	$(CC) $(CFLAGS) $< -E -o $@
+
+$(ulp-input-dir)/%.ulp: $(ulp-input-dir)/%.ulp.i
+	{ grep "TEST_ULP " $< || true; } > $@
+
+$(ulp-input-dir)/%.ulp_nn.i: $(math-src-dir)/%.c | $$(@D)
+	$(CC) $(CFLAGS) $< -E -o $@
+
+$(ulp-input-dir)/%.ulp_nn: $(ulp-input-dir)/%.ulp_nn.i
+	{ grep "TEST_ULP_NONNEAREST " $< || true; } > $@
+
+$(ulp-input-dir)/%.fenv.i: $(math-src-dir)/%.c | $$(@D)
+	$(CC) $(CFLAGS) $< -E -o $@
+
+$(ulp-input-dir)/%.fenv: $(ulp-input-dir)/%.fenv.i
+	{ grep "TEST_DISABLE_FENV " $< || true; } > $@
+
+$(ulp-input-dir)/%.itv.i: $(math-src-dir)/%.c | $$(@D)
+	$(CC) $(CFLAGS) $< -E -o $@
+
+$(ulp-input-dir)/%.itv: $(ulp-input-dir)/%.itv.i
+	{ grep "TEST_INTERVAL " $< || true; } | sed "s/ TEST_INTERVAL/\nTEST_INTERVAL/g" > $@
+
+$(ulp-input-dir)/%.cval.i: $(math-src-dir)/%.c | $$(@D)
+	$(CC) $(CFLAGS) $< -E -o $@
+
+$(ulp-input-dir)/%.cval: $(ulp-input-dir)/%.cval.i
+	{ grep "TEST_CONTROL_VALUE " $< || true; } > $@
+
+ulp-lims = $(ulp-input-dir)/limits
+$(ulp-lims): $(math-lib-lims)
+
+ulp-lims-nn = $(ulp-input-dir)/limits_nn
+$(ulp-lims-nn): $(math-lib-lims-nn)
+
+fenv-exps := $(ulp-input-dir)/fenv
+$(fenv-exps): $(math-lib-fenvs)
+
+generic-itvs = $(ulp-input-dir)/itvs
+$(generic-itvs): $(filter-out $(ulp-input-dir)/$(ARCH)/%,$(math-lib-itvs))
+
+arch-itvs = $(ulp-input-dir)/$(ARCH)/itvs
+$(arch-itvs): $(filter $(ulp-input-dir)/$(ARCH)/%,$(math-lib-itvs))
+
+ulp-cvals := $(ulp-input-dir)/cvals
+$(ulp-cvals): $(math-lib-cvals)
+
+# Remove first word, which will be TEST directive
+$(ulp-lims) $(ulp-lims-nn) $(fenv-exps) $(arch-itvs) $(generic-itvs) $(ulp-cvals): | $$(@D)
+	sed "s/TEST_[^ ]* //g" $^ | sort -u > $@
+
+check-math-ulp: $(ulp-lims) $(ulp-lims-nn)
+check-math-ulp: $(fenv-exps) $(ulp-cvals)
+check-math-ulp: $(generic-itvs) $(arch-itvs)
 check-math-ulp: $(math-tools)
-	ULPFLAGS="$(math-ulpflags)" WANT_SIMD_EXCEPT="$(WANT_SIMD_EXCEPT)" build/bin/runulp.sh $(EMULATOR)
+	ULPFLAGS="$(math-ulpflags)" \
+	LIMITS=../../$(ulp-lims) \
+	ARCH_ITVS=../../$(arch-itvs) \
+	GEN_ITVS=../../$(generic-itvs) \
+	DISABLE_FENV=../../$(fenv-exps) \
+	CVALS=../../$(ulp-cvals) \
+	FUNC=$(func) \
+	WANT_EXPERIMENTAL_MATH=$(WANT_EXPERIMENTAL_MATH) \
+	WANT_SVE_TESTS=$(WANT_SVE_TESTS) \
+	USE_MPFR=$(USE_MPFR) \
+	build/bin/runulp.sh $(EMULATOR)
 
 check-math: check-math-test check-math-rtest check-math-ulp
 
 install-math: \
  $(math-libs:build/lib/%=$(DESTDIR)$(libdir)/%) \
  $(math-includes:build/include/%=$(DESTDIR)$(includedir)/%)
 
 clean-math:
 	rm -f $(math-files)
 
 .PHONY: all-math check-math-test check-math-rtest check-math-ulp check-math install-math clean-math
diff --git a/contrib/arm-optimized-routines/math/README.contributors b/contrib/arm-optimized-routines/math/README.contributors
index 33e7ba376e41..58a04fa4759d 100644
--- a/contrib/arm-optimized-routines/math/README.contributors
+++ b/contrib/arm-optimized-routines/math/README.contributors
@@ -1,78 +1,79 @@
 STYLE REQUIREMENTS
 ==================
 
-1. Most code in this sub-directory is expected to be upstreamed into glibc so
-   the GNU Coding Standard and glibc specific conventions should be followed
+1. With the exception of math/aarch64/experimental/, most code in this
+   sub-directory is expected to be upstreamed into glibc so the GNU
+   Coding Standard and glibc specific conventions should be followed
    to ease upstreaming.
 
 2. ABI and symbols: the code should be written so it is suitable for inclusion
    into a libc with minimal changes. This e.g. means that internal symbols
    should be hidden and in the implementation reserved namespace according to
    ISO C and POSIX rules. If possible the built shared libraries and static
    library archives should be usable to override libc symbols at link time (or
    at runtime via LD_PRELOAD). This requires the symbols to follow the glibc ABI
    (other than symbol versioning), this cannot be done reliably for static
    linking so this is a best effort requirement.
 
 3. API: include headers should be suitable for benchmarking and testing code
    and should not conflict with libc headers.
 
 
 CONTRIBUTION GUIDELINES FOR math SUB-DIRECTORY
 ==============================================
 
 1. Math functions have quality and performance requirements.
 
 2. Quality:
    - Worst-case ULP error should be small in the entire input domain (for most
      common double precision scalar functions the target is < 0.66 ULP error,
      and < 1 ULP for single precision, even performance optimized function
      variant should not have > 5 ULP error if the goal is to be a drop in
      replacement for a standard math function), this should be tested
      statistically (or on all inputs if possible in reasonable amount of time).
      The ulp tool is for this and runulp.sh should be updated for new functions.
 
    - All standard rounding modes need to be supported but in non-default rounding
      modes the quality requirement can be relaxed. (Non-nearest rounded
      computation can be slow and inaccurate but has to be correct for conformance
      reasons.)
 
    - Special cases and error handling need to follow ISO C Annex F requirements,
      POSIX requirements, IEEE 754-2008 requirements and Glibc requiremnts:
      https://www.gnu.org/software/libc/manual/html_mono/libc.html#Errors-in-Math-Functions
      this should be tested by direct tests (glibc test system may be used for it).
 
    - Error handling code should be decoupled from the approximation code as much
      as possible. (There are helper functions, these take care of errno as well
      as exception raising.)
 
    - Vector math code does not need to work in non-nearest rounding mode and error
      handling side effects need not happen (fenv exceptions and errno), but the
      result should be correct (within quality requirements, which are lower for
      vector code than for scalar code).
 
    - Error bounds of the approximation should be clearly documented.
 
    - The code should build and pass tests on arm, aarch64 and x86_64 GNU linux
      systems. (Routines and features can be disabled on specific targets, but
      the build must complete). On aarch64, both little- and big-endian targets
      are supported as well as valid combinations of architecture extensions.
      The configurations that should be tested depend on the contribution.
 
 3. Performance:
    - Common math code should be benchmarked on modern aarch64 microarchitectures
      over typical inputs.
 
    - Performance improvements should be documented (relative numbers can be
      published; it is enough to use the mathbench microbenchmark tool which should
      be updated for new functions).
 
    - Attention should be paid to the compilation flags: for aarch64 fma
      contraction should be on and math errno turned off so some builtins can be
      inlined.
 
    - The code should be reasonably performant on x86_64 too, e.g. some rounding
      instructions and fma may not be available on x86_64, such builtins turn into
      libc calls with slow code. Such slowdown is not acceptable, a faster fallback
      should be present: glibc and bionic use the same code on all targets. (This
      does not apply to vector math code).
diff --git a/contrib/arm-optimized-routines/pl/math/v_acos_2u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/acos.c
similarity index 85%
rename from contrib/arm-optimized-routines/pl/math/v_acos_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/acos.c
index 581f8506c0d6..7873a07e6f56 100644
--- a/contrib/arm-optimized-routines/pl/math/v_acos_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/acos.c
@@ -1,122 +1,122 @@
 /*
  * Double-precision vector acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64x2_t poly[12];
   float64x2_t pi, pi_over_2;
   uint64x2_t abs_mask;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
   .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4),
 	    V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6),
 	    V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6),
 	    V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7),
 	    V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6),
 	    V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), },
   .pi = V2 (0x1.921fb54442d18p+1),
   .pi_over_2 = V2 (0x1.921fb54442d18p+0),
   .abs_mask = V2 (0x7fffffffffffffff),
 };
 
 #define AllMask v_u64 (0xffffffffffffffff)
-#define Oneu (0x3ff0000000000000)
-#define Small (0x3e50000000000000) /* 2^-53.  */
+#define Oneu 0x3ff0000000000000
+#define Small 0x3e50000000000000 /* 2^-53.  */
 
 #if WANT_SIMD_EXCEPT
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
 {
   return v_call_f64 (acos, x, y, special);
 }
 #endif
 
 /* Double-precision implementation of vector acos(x).
 
    For |x| < Small, approximate acos(x) by pi/2 - x. Small = 2^-53 for correct
    rounding.
    If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the following
    approximation.
 
    For |x| in [Small, 0.5], use an order 11 polynomial P such that the final
    approximation of asin is an odd polynomial:
 
      acos(x) ~ pi/2 - (x + x^3 P(x^2)).
 
    The largest observed error in this region is 1.18 ulps,
    _ZGVnN2v_acos (0x1.fbab0a7c460f6p-2) got 0x1.0d54d1985c068p+0
 				       want 0x1.0d54d1985c069p+0.
 
    For |x| in [0.5, 1.0], use same approximation with a change of variable
 
      acos(x) = y + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 1.52 ulps,
    _ZGVnN2v_acos (0x1.23d362722f591p-1) got 0x1.edbbedf8a7d6ep-1
 				       want 0x1.edbbedf8a7d6cp-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
   float64x2_t ax = vabsq_f64 (x);
 
 #if WANT_SIMD_EXCEPT
   /* A single comparison for One, Small and QNaN.  */
   uint64x2_t special
       = vcgtq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (ax), v_u64 (Small)),
 		   v_u64 (Oneu - Small));
   if (unlikely (v_any_u64 (special)))
     return special_case (x, x, AllMask);
 #endif
 
   uint64x2_t a_le_half = vcleq_f64 (ax, v_f64 (0.5));
 
   /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
      z2 = x ^ 2         and z = |x|     , if |x| < 0.5
      z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
   float64x2_t z2 = vbslq_f64 (a_le_half, vmulq_f64 (x, x),
 			      vfmaq_f64 (v_f64 (0.5), v_f64 (-0.5), ax));
   float64x2_t z = vbslq_f64 (a_le_half, ax, vsqrtq_f64 (z2));
 
   /* Use a single polynomial approximation P for both intervals.  */
   float64x2_t z4 = vmulq_f64 (z2, z2);
   float64x2_t z8 = vmulq_f64 (z4, z4);
   float64x2_t z16 = vmulq_f64 (z8, z8);
   float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly);
 
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = vfmaq_f64 (z, vmulq_f64 (z, z2), p);
 
   /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for  |x| < 0.5
 	       = 2 Q(|x|)               , for  0.5 < x < 1.0
 	       = pi - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
   float64x2_t y = vbslq_f64 (d->abs_mask, p, x);
 
   uint64x2_t is_neg = vcltzq_f64 (x);
   float64x2_t off = vreinterpretq_f64_u64 (
       vandq_u64 (is_neg, vreinterpretq_u64_f64 (d->pi)));
   float64x2_t mul = vbslq_f64 (a_le_half, v_f64 (-1.0), v_f64 (2.0));
   float64x2_t add = vbslq_f64 (a_le_half, d->pi_over_2, off);
 
   return vfmaq_f64 (add, mul, y);
 }
 
-PL_SIG (V, D, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (V_NAME_D1 (acos), 1.02)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (acos), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), 0, Small, 5000)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), Small, 0.5, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (V_NAME_D1 (acos), -0, -inf, 20000)
+TEST_SIG (V, D, 1, acos, -1.0, 1.0)
+TEST_ULP (V_NAME_D1 (acos), 1.02)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (acos), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (acos), 0, Small, 5000)
+TEST_INTERVAL (V_NAME_D1 (acos), Small, 0.5, 50000)
+TEST_INTERVAL (V_NAME_D1 (acos), 0.5, 1.0, 50000)
+TEST_INTERVAL (V_NAME_D1 (acos), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (V_NAME_D1 (acos), 0x1p11, inf, 20000)
+TEST_INTERVAL (V_NAME_D1 (acos), -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_acosf_1u4.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/acosf.c
similarity index 82%
rename from contrib/arm-optimized-routines/pl/math/v_acosf_1u4.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/acosf.c
index bb17b1df18f3..e200f792c764 100644
--- a/contrib/arm-optimized-routines/pl/math/v_acosf_1u4.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/acosf.c
@@ -1,113 +1,115 @@
 /*
  * Single-precision vector acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float32x4_t poly[5];
   float32x4_t pi_over_2f, pif;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))  on
      [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 .  */
   .poly = { V4 (0x1.55555ep-3), V4 (0x1.33261ap-4), V4 (0x1.70d7dcp-5),
 	    V4 (0x1.b059dp-6), V4 (0x1.3af7d8p-5) },
   .pi_over_2f = V4 (0x1.921fb6p+0f),
   .pif = V4 (0x1.921fb6p+1f),
 };
 
 #define AbsMask 0x7fffffff
 #define Half 0x3f000000
 #define One 0x3f800000
 #define Small 0x32800000 /* 2^-26.  */
 
 #if WANT_SIMD_EXCEPT
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
 {
   return v_call_f32 (acosf, x, y, special);
 }
 #endif
 
 /* Single-precision implementation of vector acos(x).
 
    For |x| < Small, approximate acos(x) by pi/2 - x. Small = 2^-26 for correct
    rounding.
    If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the following
    approximation.
 
    For |x| in [Small, 0.5], use order 4 polynomial P such that the final
    approximation of asin is an odd polynomial:
 
      acos(x) ~ pi/2 - (x + x^3 P(x^2)).
 
     The largest observed error in this region is 1.26 ulps,
       _ZGVnN4v_acosf (0x1.843bfcp-2) got 0x1.2e934cp+0 want 0x1.2e934ap+0.
 
     For |x| in [0.5, 1.0], use same approximation with a change of variable
 
       acos(x) = y + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 1.32 ulps,
    _ZGVnN4v_acosf (0x1.15ba56p-1) got 0x1.feb33p-1
-			   want 0x1.feb32ep-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (acos) (float32x4_t x)
+				 want 0x1.feb32ep-1.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (acos) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
   uint32x4_t ia = vandq_u32 (ix, v_u32 (AbsMask));
 
 #if WANT_SIMD_EXCEPT
   /* A single comparison for One, Small and QNaN.  */
   uint32x4_t special
       = vcgtq_u32 (vsubq_u32 (ia, v_u32 (Small)), v_u32 (One - Small));
   if (unlikely (v_any_u32 (special)))
     return special_case (x, x, v_u32 (0xffffffff));
 #endif
 
   float32x4_t ax = vreinterpretq_f32_u32 (ia);
   uint32x4_t a_le_half = vcleq_u32 (ia, v_u32 (Half));
 
   /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
      z2 = x ^ 2         and z = |x|     , if |x| < 0.5
      z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
   float32x4_t z2 = vbslq_f32 (a_le_half, vmulq_f32 (x, x),
 			      vfmsq_n_f32 (v_f32 (0.5), ax, 0.5));
   float32x4_t z = vbslq_f32 (a_le_half, ax, vsqrtq_f32 (z2));
 
   /* Use a single polynomial approximation P for both intervals.  */
   float32x4_t p = v_horner_4_f32 (z2, d->poly);
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = vfmaq_f32 (z, vmulq_f32 (z, z2), p);
 
   /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for  |x| < 0.5
 	       = 2 Q(|x|)               , for  0.5 < x < 1.0
 	       = pi - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
   float32x4_t y = vbslq_f32 (v_u32 (AbsMask), p, x);
 
   uint32x4_t is_neg = vcltzq_f32 (x);
   float32x4_t off = vreinterpretq_f32_u32 (
       vandq_u32 (vreinterpretq_u32_f32 (d->pif), is_neg));
   float32x4_t mul = vbslq_f32 (a_le_half, v_f32 (-1.0), v_f32 (2.0));
   float32x4_t add = vbslq_f32 (a_le_half, d->pi_over_2f, off);
 
   return vfmaq_f32 (add, mul, y);
 }
 
-PL_SIG (V, F, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (V_NAME_F1 (acos), 0.82)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (acos), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), 0, 0x1p-26, 5000)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), 0x1p-26, 0.5, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (V_NAME_F1 (acos), -0, -inf, 20000)
+HALF_WIDTH_ALIAS_F1 (acos)
+
+TEST_SIG (V, F, 1, acos, -1.0, 1.0)
+TEST_ULP (V_NAME_F1 (acos), 0.82)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (acos), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (acos), 0, 0x1p-26, 5000)
+TEST_INTERVAL (V_NAME_F1 (acos), 0x1p-26, 0.5, 50000)
+TEST_INTERVAL (V_NAME_F1 (acos), 0.5, 1.0, 50000)
+TEST_INTERVAL (V_NAME_F1 (acos), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (V_NAME_F1 (acos), 0x1p11, inf, 20000)
+TEST_INTERVAL (V_NAME_F1 (acos), -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_acosh_3u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/acosh.c
similarity index 72%
rename from contrib/arm-optimized-routines/pl/math/v_acosh_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/acosh.c
index 42fa2616d562..55d8ed5a421e 100644
--- a/contrib/arm-optimized-routines/pl/math/v_acosh_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/acosh.c
@@ -1,66 +1,65 @@
 /*
- * Single-precision vector acosh(x) function.
- * Copyright (c) 2023, Arm Limited.
+ * Double-precision vector acosh(x) function.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define WANT_V_LOG1P_K0_SHORTCUT 1
 #include "v_log1p_inline.h"
 
 const static struct data
 {
   struct v_log1p_data log1p_consts;
   uint64x2_t one, thresh;
 } data = {
   .log1p_consts = V_LOG1P_CONSTANTS_TABLE,
   .one = V2 (0x3ff0000000000000),
   .thresh = V2 (0x1ff0000000000000) /* asuint64(0x1p511) - asuint64(1).  */
 };
 
 static float64x2_t NOINLINE VPCS_ATTR
 special_case (float64x2_t x, float64x2_t y, uint64x2_t special,
 	      const struct v_log1p_data *d)
 {
   return v_call_f64 (acosh, x, log1p_inline (y, d), special);
 }
 
 /* Vector approximation for double-precision acosh, based on log1p.
    The largest observed error is 3.02 ULP in the region where the
    argument to log1p falls in the k=0 interval, i.e. x close to 1:
    _ZGVnN2v_acosh(0x1.00798aaf80739p+0) got 0x1.f2d6d823bc9dfp-5
 				       want 0x1.f2d6d823bc9e2p-5.  */
 VPCS_ATTR float64x2_t V_NAME_D1 (acosh) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
   uint64x2_t special
       = vcgeq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (x), d->one), d->thresh);
   float64x2_t special_arg = x;
 
 #if WANT_SIMD_EXCEPT
   if (unlikely (v_any_u64 (special)))
     x = vbslq_f64 (special, vreinterpretq_f64_u64 (d->one), x);
 #endif
 
-  float64x2_t xm1 = vsubq_f64 (x, v_f64 (1));
-  float64x2_t y;
-  y = vaddq_f64 (x, v_f64 (1));
+  float64x2_t xm1 = vsubq_f64 (x, v_f64 (1.0));
+  float64x2_t y = vaddq_f64 (x, v_f64 (1.0));
   y = vmulq_f64 (y, xm1);
   y = vsqrtq_f64 (y);
   y = vaddq_f64 (xm1, y);
 
   if (unlikely (v_any_u64 (special)))
     return special_case (special_arg, y, special, &d->log1p_consts);
   return log1p_inline (y, &d->log1p_consts);
 }
 
-PL_SIG (V, D, 1, acosh, 1.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (acosh), 2.53)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (acosh), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_D1 (acosh), 1, 0x1p511, 90000)
-PL_TEST_INTERVAL (V_NAME_D1 (acosh), 0x1p511, inf, 10000)
-PL_TEST_INTERVAL (V_NAME_D1 (acosh), 0, 1, 1000)
-PL_TEST_INTERVAL (V_NAME_D1 (acosh), -0, -inf, 10000)
+TEST_SIG (V, D, 1, acosh, 1.0, 10.0)
+TEST_ULP (V_NAME_D1 (acosh), 2.53)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (acosh), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (acosh), 1, 0x1p511, 90000)
+TEST_INTERVAL (V_NAME_D1 (acosh), 0x1p511, inf, 10000)
+TEST_INTERVAL (V_NAME_D1 (acosh), 0, 1, 1000)
+TEST_INTERVAL (V_NAME_D1 (acosh), -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_acoshf_3u1.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/acoshf.c
similarity index 50%
rename from contrib/arm-optimized-routines/pl/math/v_acoshf_3u1.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/acoshf.c
index a2ff0f02635b..029d457cfa8a 100644
--- a/contrib/arm-optimized-routines/pl/math/v_acoshf_3u1.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/acoshf.c
@@ -1,78 +1,78 @@
 /*
  * Single-precision vector acosh(x) function.
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "v_log1pf_inline.h"
 
+#define SquareLim 0x1p64
+
 const static struct data
 {
   struct v_log1pf_data log1pf_consts;
   uint32x4_t one;
-  uint16x4_t thresh;
-} data = {
-  .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
-  .one = V4 (0x3f800000),
-  .thresh = V4 (0x2000) /* asuint(0x1p64) - asuint(1).  */
-};
+} data = { .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .one = V4 (0x3f800000) };
 
-#define SignMask 0x80000000
+#define Thresh vdup_n_u16 (0x2000) /* top(asuint(SquareLim) - asuint(1)).  */
 
 static float32x4_t NOINLINE VPCS_ATTR
 special_case (float32x4_t x, float32x4_t y, uint16x4_t special,
-	      const struct v_log1pf_data d)
+	      const struct v_log1pf_data *d)
 {
   return v_call_f32 (acoshf, x, log1pf_inline (y, d), vmovl_u16 (special));
 }
 
 /* Vector approximation for single-precision acosh, based on log1p. Maximum
    error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it
-   is 2.78 ULP:
-   __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3
-			   want 0x1.ef9ea2p-3.
+   is 3.00 ULP:
+   _ZGVnN4v_acoshf(0x1.01df3ap+0) got 0x1.ef0a82p-4
+				 want 0x1.ef0a7cp-4.
    With exceptions disabled, we can compute u with a shorter dependency chain,
-   which gives maximum error of 3.07 ULP:
-  __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4
-			   want 0x1.fbc7f4p-4.  */
+   which gives maximum error of 3.22 ULP:
+   _ZGVnN4v_acoshf(0x1.007ef2p+0) got 0x1.fdcdccp-5
+				 want 0x1.fdcdd2p-5.  */
 
-VPCS_ATTR float32x4_t V_NAME_F1 (acosh) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (acosh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
-  uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), d->thresh);
+  uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), Thresh);
 
 #if WANT_SIMD_EXCEPT
   /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use
      only xm1 to calculate u, as operating on x will trigger invalid for NaN.
      Widening sign-extend special predicate in order to mask with it.  */
   uint32x4_t p
       = vreinterpretq_u32_s32 (vmovl_s16 (vreinterpret_s16_u16 (special)));
   float32x4_t xm1 = v_zerofy_f32 (vsubq_f32 (x, v_f32 (1)), p);
   float32x4_t u = vfmaq_f32 (vaddq_f32 (xm1, xm1), xm1, xm1);
 #else
-  float32x4_t xm1 = vsubq_f32 (x, v_f32 (1));
-  float32x4_t u = vmulq_f32 (xm1, vaddq_f32 (x, v_f32 (1.0f)));
+  float32x4_t xm1 = vsubq_f32 (x, vreinterpretq_f32_u32 (d->one));
+  float32x4_t u
+      = vmulq_f32 (xm1, vaddq_f32 (x, vreinterpretq_f32_u32 (d->one)));
 #endif
 
   float32x4_t y = vaddq_f32 (xm1, vsqrtq_f32 (u));
 
   if (unlikely (v_any_u16h (special)))
-    return special_case (x, y, special, d->log1pf_consts);
-  return log1pf_inline (y, d->log1pf_consts);
+    return special_case (x, y, special, &d->log1pf_consts);
+  return log1pf_inline (y, &d->log1pf_consts);
 }
 
-PL_SIG (V, F, 1, acosh, 1.0, 10.0)
+HALF_WIDTH_ALIAS_F1 (acosh)
+
+TEST_SIG (V, F, 1, acosh, 1.0, 10.0)
 #if WANT_SIMD_EXCEPT
-PL_TEST_ULP (V_NAME_F1 (acosh), 2.29)
+TEST_ULP (V_NAME_F1 (acosh), 2.50)
 #else
-PL_TEST_ULP (V_NAME_F1 (acosh), 2.58)
+TEST_ULP (V_NAME_F1 (acosh), 2.78)
 #endif
-PL_TEST_EXPECT_FENV (V_NAME_F1 (acosh), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_F1 (acosh), 0, 1, 500)
-PL_TEST_INTERVAL (V_NAME_F1 (acosh), 1, SquareLim, 100000)
-PL_TEST_INTERVAL (V_NAME_F1 (acosh), SquareLim, inf, 1000)
-PL_TEST_INTERVAL (V_NAME_F1 (acosh), -0, -inf, 1000)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (acosh), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (acosh), 0, 1, 500)
+TEST_INTERVAL (V_NAME_F1 (acosh), 1, SquareLim, 100000)
+TEST_INTERVAL (V_NAME_F1 (acosh), SquareLim, inf, 1000)
+TEST_INTERVAL (V_NAME_F1 (acosh), -0, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_asin_3u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/asin.c
similarity index 56%
rename from contrib/arm-optimized-routines/pl/math/v_asin_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/asin.c
index 756443c6b320..c751d9264a12 100644
--- a/contrib/arm-optimized-routines/pl/math/v_asin_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/asin.c
@@ -1,113 +1,130 @@
 /*
  * Double-precision vector asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
-  float64x2_t poly[12];
+  float64x2_t c0, c2, c4, c6, c8, c10;
   float64x2_t pi_over_2;
   uint64x2_t abs_mask;
+  double c1, c3, c5, c7, c9, c11;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
-  .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4),
-	    V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6),
-	    V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6),
-	    V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7),
-	    V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6),
-	    V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), },
-  .pi_over_2 = V2 (0x1.921fb54442d18p+0),
-  .abs_mask = V2 (0x7fffffffffffffff),
+  .c0 = V2 (0x1.555555555554ep-3),	  .c1 = 0x1.3333333337233p-4,
+  .c2 = V2 (0x1.6db6db67f6d9fp-5),	  .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = V2 (0x1.6e8b264d467d6p-6),	  .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = V2 (0x1.c86a22cd9389dp-7),	  .c7 = 0x1.856073c22ebbep-7,
+  .c8 = V2 (0x1.fd1151acb6bedp-8),	  .c9 = 0x1.087182f799c1dp-6,
+  .c10 = V2 (-0x1.6602748120927p-7),	  .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff),
 };
 
 #define AllMask v_u64 (0xffffffffffffffff)
-#define One (0x3ff0000000000000)
-#define Small (0x3e50000000000000) /* 2^-12.  */
+#define One 0x3ff0000000000000
+#define Small 0x3e50000000000000 /* 2^-12.  */
 
 #if WANT_SIMD_EXCEPT
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
 {
   return v_call_f64 (asin, x, y, special);
 }
 #endif
 
 /* Double-precision implementation of vector asin(x).
 
    For |x| < Small, approximate asin(x) by x. Small = 2^-12 for correct
    rounding. If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the
    following approximation.
 
    For |x| in [Small, 0.5], use an order 11 polynomial P such that the final
    approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
 
    The largest observed error in this region is 1.01 ulps,
    _ZGVnN2v_asin (0x1.da9735b5a9277p-2) got 0x1.ed78525a927efp-2
 				       want 0x1.ed78525a927eep-2.
 
    For |x| in [0.5, 1.0], use same approximation with a change of variable
 
      asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 2.69 ulps,
-   _ZGVnN2v_asin (0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
-				       want 0x1.110d7e85fdd53p-1.  */
+   _ZGVnN2v_asin (0x1.044e8cefee301p-1) got 0x1.1111dd54ddf96p-1
+				       want 0x1.1111dd54ddf99p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
-
   float64x2_t ax = vabsq_f64 (x);
 
 #if WANT_SIMD_EXCEPT
   /* Special values need to be computed with scalar fallbacks so
      that appropriate exceptions are raised.  */
   uint64x2_t special
       = vcgtq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (ax), v_u64 (Small)),
 		   v_u64 (One - Small));
   if (unlikely (v_any_u64 (special)))
     return special_case (x, x, AllMask);
 #endif
 
-  uint64x2_t a_lt_half = vcltq_f64 (ax, v_f64 (0.5));
+  uint64x2_t a_lt_half = vcaltq_f64 (x, v_f64 (0.5));
 
   /* Evaluate polynomial Q(x) = y + y * z * P(z) with
      z = x ^ 2 and y = |x|            , if |x| < 0.5
      z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
   float64x2_t z2 = vbslq_f64 (a_lt_half, vmulq_f64 (x, x),
 			      vfmsq_n_f64 (v_f64 (0.5), ax, 0.5));
   float64x2_t z = vbslq_f64 (a_lt_half, ax, vsqrtq_f64 (z2));
 
   /* Use a single polynomial approximation P for both intervals.  */
   float64x2_t z4 = vmulq_f64 (z2, z2);
   float64x2_t z8 = vmulq_f64 (z4, z4);
   float64x2_t z16 = vmulq_f64 (z8, z8);
-  float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly);
+
+  /* order-11 estrin.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
+
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
+
+  float64x2_t p07 = vfmaq_f64 (p03, z8, p47);
+  float64x2_t p = vfmaq_f64 (p07, z16, p811);
 
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = vfmaq_f64 (z, vmulq_f64 (z, z2), p);
 
   /* asin(|x|) = Q(|x|)         , for |x| < 0.5
 	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
   float64x2_t y = vbslq_f64 (a_lt_half, p, vfmsq_n_f64 (d->pi_over_2, p, 2.0));
 
   /* Copy sign.  */
   return vbslq_f64 (d->abs_mask, y, x);
 }
 
-PL_SIG (V, D, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (V_NAME_D1 (asin), 2.19)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (asin), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), 0, Small, 5000)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), Small, 0.5, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (V_NAME_D1 (asin), -0, -inf, 20000)
+TEST_SIG (V, D, 1, asin, -1.0, 1.0)
+TEST_ULP (V_NAME_D1 (asin), 2.20)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (asin), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (asin), 0, Small, 5000)
+TEST_INTERVAL (V_NAME_D1 (asin), Small, 0.5, 50000)
+TEST_INTERVAL (V_NAME_D1 (asin), 0.5, 1.0, 50000)
+TEST_INTERVAL (V_NAME_D1 (asin), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (V_NAME_D1 (asin), 0x1p11, inf, 20000)
+TEST_INTERVAL (V_NAME_D1 (asin), -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_asinf_2u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/asinf.c
similarity index 82%
rename from contrib/arm-optimized-routines/pl/math/v_asinf_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/asinf.c
index eb978cd956ab..970feb37e1d5 100644
--- a/contrib/arm-optimized-routines/pl/math/v_asinf_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/asinf.c
@@ -1,104 +1,106 @@
 /*
  * Single-precision vector asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float32x4_t poly[5];
   float32x4_t pi_over_2f;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))  on
      [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 .  */
   .poly = { V4 (0x1.55555ep-3), V4 (0x1.33261ap-4), V4 (0x1.70d7dcp-5),
 	    V4 (0x1.b059dp-6), V4 (0x1.3af7d8p-5) },
   .pi_over_2f = V4 (0x1.921fb6p+0f),
 };
 
 #define AbsMask 0x7fffffff
 #define Half 0x3f000000
 #define One 0x3f800000
 #define Small 0x39800000 /* 2^-12.  */
 
 #if WANT_SIMD_EXCEPT
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
 {
   return v_call_f32 (asinf, x, y, special);
 }
 #endif
 
 /* Single-precision implementation of vector asin(x).
 
    For |x| < Small, approximate asin(x) by x. Small = 2^-12 for correct
    rounding. If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the
    following approximation.
 
    For |x| in [Small, 0.5], use order 4 polynomial P such that the final
    approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
 
     The largest observed error in this region is 0.83 ulps,
       _ZGVnN4v_asinf (0x1.ea00f4p-2) got 0x1.fef15ep-2 want 0x1.fef15cp-2.
 
     For |x| in [0.5, 1.0], use same approximation with a change of variable
 
     asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 2.41 ulps,
      _ZGVnN4v_asinf (0x1.00203ep-1) got 0x1.0c3a64p-1 want 0x1.0c3a6p-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (asin) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asin) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
   uint32x4_t ia = vandq_u32 (ix, v_u32 (AbsMask));
 
 #if WANT_SIMD_EXCEPT
   /* Special values need to be computed with scalar fallbacks so
      that appropriate fp exceptions are raised.  */
   uint32x4_t special
       = vcgtq_u32 (vsubq_u32 (ia, v_u32 (Small)), v_u32 (One - Small));
   if (unlikely (v_any_u32 (special)))
     return special_case (x, x, v_u32 (0xffffffff));
 #endif
 
   float32x4_t ax = vreinterpretq_f32_u32 (ia);
   uint32x4_t a_lt_half = vcltq_u32 (ia, v_u32 (Half));
 
   /* Evaluate polynomial Q(x) = y + y * z * P(z) with
      z = x ^ 2 and y = |x|            , if |x| < 0.5
      z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
   float32x4_t z2 = vbslq_f32 (a_lt_half, vmulq_f32 (x, x),
 			      vfmsq_n_f32 (v_f32 (0.5), ax, 0.5));
   float32x4_t z = vbslq_f32 (a_lt_half, ax, vsqrtq_f32 (z2));
 
   /* Use a single polynomial approximation P for both intervals.  */
   float32x4_t p = v_horner_4_f32 (z2, d->poly);
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = vfmaq_f32 (z, vmulq_f32 (z, z2), p);
 
   /* asin(|x|) = Q(|x|)         , for |x| < 0.5
 	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
   float32x4_t y
       = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0));
 
   /* Copy sign.  */
   return vbslq_f32 (v_u32 (AbsMask), y, x);
 }
 
-PL_SIG (V, F, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (V_NAME_F1 (asin), 1.91)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (asin), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), 0, 0x1p-12, 5000)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), 0x1p-12, 0.5, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (V_NAME_F1 (asin), -0, -inf, 20000)
+HALF_WIDTH_ALIAS_F1 (asin)
+
+TEST_SIG (V, F, 1, asin, -1.0, 1.0)
+TEST_ULP (V_NAME_F1 (asin), 1.91)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (asin), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (asin), 0, 0x1p-12, 5000)
+TEST_INTERVAL (V_NAME_F1 (asin), 0x1p-12, 0.5, 50000)
+TEST_INTERVAL (V_NAME_F1 (asin), 0.5, 1.0, 50000)
+TEST_INTERVAL (V_NAME_F1 (asin), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (V_NAME_F1 (asin), 0x1p11, inf, 20000)
+TEST_INTERVAL (V_NAME_F1 (asin), -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/asinh.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/asinh.c
new file mode 100644
index 000000000000..550302826bd9
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/asinh.c
@@ -0,0 +1,242 @@
+/*
+ * Double-precision vector asinh(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "test_defs.h"
+#include "test_sig.h"
+#include "v_math.h"
+
+const static struct data
+{
+  uint64x2_t huge_bound, abs_mask, off, mask;
+#if WANT_SIMD_EXCEPT
+  float64x2_t tiny_bound;
+#endif
+  float64x2_t lc0, lc2;
+  double lc1, lc3, ln2, lc4;
+
+  float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c17;
+  double c1, c3, c5, c7, c9, c11, c13, c15;
+
+} data = {
+
+#if WANT_SIMD_EXCEPT
+  .tiny_bound = V2 (0x1p-26),
+#endif
+  /* Even terms of polynomial s.t. asinh(x) is approximated by
+     asinh(x) ~= x + x^3 * (C0 + C1 * x + C2 * x^2 + C3 * x^3 + ...).
+     Generated using Remez, f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2).  */
+
+  .c0 = V2 (-0x1.55555555554a7p-3),
+  .c1 = 0x1.3333333326c7p-4,
+  .c2 = V2 (-0x1.6db6db68332e6p-5),
+  .c3 = 0x1.f1c71b26fb40dp-6,
+  .c4 = V2 (-0x1.6e8b8b654a621p-6),
+  .c5 = 0x1.1c4daa9e67871p-6,
+  .c6 = V2 (-0x1.c9871d10885afp-7),
+  .c7 = 0x1.7a16e8d9d2ecfp-7,
+  .c8 = V2 (-0x1.3ddca533e9f54p-7),
+  .c9 = 0x1.0becef748dafcp-7,
+  .c10 = V2 (-0x1.b90c7099dd397p-8),
+  .c11 = 0x1.541f2bb1ffe51p-8,
+  .c12 = V2 (-0x1.d217026a669ecp-9),
+  .c13 = 0x1.0b5c7977aaf7p-9,
+  .c14 = V2 (-0x1.e0f37daef9127p-11),
+  .c15 = 0x1.388b5fe542a6p-12,
+  .c16 = V2 (-0x1.021a48685e287p-14),
+  .c17 = V2 (0x1.93d4ba83d34dap-18),
+
+  .lc0 = V2 (-0x1.ffffffffffff7p-2),
+  .lc1 = 0x1.55555555170d4p-2,
+  .lc2 = V2 (-0x1.0000000399c27p-2),
+  .lc3 = 0x1.999b2e90e94cap-3,
+  .lc4 = -0x1.554e550bd501ep-3,
+  .ln2 = 0x1.62e42fefa39efp-1,
+
+  .off = V2 (0x3fe6900900000000),
+  .huge_bound = V2 (0x5fe0000000000000),
+  .abs_mask = V2 (0x7fffffffffffffff),
+  .mask = V2 (0xfffULL << 52),
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint64x2_t abs_mask,
+	      uint64x2_t special)
+{
+  /* Copy sign.  */
+  y = vbslq_f64 (abs_mask, y, x);
+  return v_call_f64 (asinh, x, y, special);
+}
+
+#define N (1 << V_LOG_TABLE_BITS)
+#define IndexMask (N - 1)
+
+struct entry
+{
+  float64x2_t invc;
+  float64x2_t logc;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+  /* Since N is a power of 2, n % N = n & (N - 1).  */
+  struct entry e;
+  uint64_t i0 = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i1 = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
+  float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
+  e.invc = vuzp1q_f64 (e0, e1);
+  e.logc = vuzp2q_f64 (e0, e1);
+  return e;
+}
+
+static inline float64x2_t
+log_inline (float64x2_t xm, const struct data *d)
+{
+
+  uint64x2_t u = vreinterpretq_u64_f64 (xm);
+  uint64x2_t u_off = vsubq_u64 (u, d->off);
+
+  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52);
+  uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->mask));
+  float64x2_t z = vreinterpretq_f64_u64 (iz);
+
+  struct entry e = lookup (u_off);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+  float64x2_t kd = vcvtq_f64_s64 (k);
+
+  /* hi = r + log(c) + k*Ln2.  */
+  float64x2_t ln2_and_lc4 = vld1q_f64 (&d->ln2);
+  float64x2_t hi = vfmaq_laneq_f64 (vaddq_f64 (e.logc, r), kd, ln2_and_lc4, 0);
+
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  float64x2_t odd_coeffs = vld1q_f64 (&d->lc1);
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t y = vfmaq_laneq_f64 (d->lc2, r, odd_coeffs, 1);
+  float64x2_t p = vfmaq_laneq_f64 (d->lc0, r, odd_coeffs, 0);
+  y = vfmaq_laneq_f64 (y, r2, ln2_and_lc4, 1);
+  y = vfmaq_f64 (p, r2, y);
+  return vfmaq_f64 (hi, y, r2);
+}
+
+/* Double-precision implementation of vector asinh(x).
+   asinh is very sensitive around 1, so it is impractical to devise a single
+   low-cost algorithm which is sufficiently accurate on a wide range of input.
+   Instead we use two different algorithms:
+   asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1)      if |x| >= 1
+	    = sign(x) * (|x| + |x|^3 * P(x^2))       otherwise
+   where log(x) is an optimized log approximation, and P(x) is a polynomial
+   shared with the scalar routine. The greatest observed error 2.79 ULP, in
+   |x| >= 1:
+   _ZGVnN2v_asinh(0x1.2cd9d73ea76a6p+0) got 0x1.ffffd003219dap-1
+				       want  0x1.ffffd003219ddp-1.  */
+VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float64x2_t ax = vabsq_f64 (x);
+
+  uint64x2_t gt1 = vcgeq_f64 (ax, v_f64 (1));
+
+#if WANT_SIMD_EXCEPT
+  uint64x2_t iax = vreinterpretq_u64_f64 (ax);
+  uint64x2_t special = vcgeq_u64 (iax, (d->huge_bound));
+  uint64x2_t tiny = vcltq_f64 (ax, d->tiny_bound);
+  special = vorrq_u64 (special, tiny);
+#else
+  uint64x2_t special = vcgeq_f64 (ax, vreinterpretq_f64_u64 (d->huge_bound));
+#endif
+
+  /* Option 1: |x| >= 1.
+     Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)).
+     If WANT_SIMD_EXCEPT is enabled, sidestep special values, which will
+     overflow, by setting special lanes to 1. These will be fixed later.  */
+  float64x2_t option_1 = v_f64 (0);
+  if (likely (v_any_u64 (gt1)))
+    {
+#if WANT_SIMD_EXCEPT
+      float64x2_t xm = v_zerofy_f64 (ax, special);
+#else
+      float64x2_t xm = ax;
+#endif
+      option_1 = log_inline (
+	  vaddq_f64 (xm, vsqrtq_f64 (vfmaq_f64 (v_f64 (1), xm, xm))), d);
+    }
+
+  /* Option 2: |x| < 1.
+     Compute asinh(x) using a polynomial.
+     If WANT_SIMD_EXCEPT is enabled, sidestep special lanes, which will
+     overflow, and tiny lanes, which will underflow, by setting them to 0. They
+     will be fixed later, either by selecting x or falling back to the scalar
+     special-case. The largest observed error in this region is 1.47 ULPs:
+     _ZGVnN2v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
+					 want 0x1.c1d6bf874019cp-1.  */
+  float64x2_t option_2 = v_f64 (0);
+
+  if (likely (v_any_u64 (vceqzq_u64 (gt1))))
+    {
+
+#if WANT_SIMD_EXCEPT
+      ax = v_zerofy_f64 (ax, vorrq_u64 (tiny, gt1));
+#endif
+      float64x2_t x2 = vmulq_f64 (ax, ax), z2 = vmulq_f64 (x2, x2);
+      /* Order-17 Pairwise Horner scheme.  */
+      float64x2_t c13 = vld1q_f64 (&d->c1);
+      float64x2_t c57 = vld1q_f64 (&d->c5);
+      float64x2_t c911 = vld1q_f64 (&d->c9);
+      float64x2_t c1315 = vld1q_f64 (&d->c13);
+
+      float64x2_t p01 = vfmaq_laneq_f64 (d->c0, x2, c13, 0);
+      float64x2_t p23 = vfmaq_laneq_f64 (d->c2, x2, c13, 1);
+      float64x2_t p45 = vfmaq_laneq_f64 (d->c4, x2, c57, 0);
+      float64x2_t p67 = vfmaq_laneq_f64 (d->c6, x2, c57, 1);
+      float64x2_t p89 = vfmaq_laneq_f64 (d->c8, x2, c911, 0);
+      float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, x2, c911, 1);
+      float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, x2, c1315, 0);
+      float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, x2, c1315, 1);
+      float64x2_t p1617 = vfmaq_f64 (d->c16, x2, d->c17);
+
+      float64x2_t p = vfmaq_f64 (p1415, z2, p1617);
+      p = vfmaq_f64 (p1213, z2, p);
+      p = vfmaq_f64 (p1011, z2, p);
+      p = vfmaq_f64 (p89, z2, p);
+
+      p = vfmaq_f64 (p67, z2, p);
+      p = vfmaq_f64 (p45, z2, p);
+
+      p = vfmaq_f64 (p23, z2, p);
+
+      p = vfmaq_f64 (p01, z2, p);
+      option_2 = vfmaq_f64 (ax, p, vmulq_f64 (ax, x2));
+#if WANT_SIMD_EXCEPT
+      option_2 = vbslq_f64 (tiny, x, option_2);
+#endif
+    }
+
+  /* Choose the right option for each lane.  */
+  float64x2_t y = vbslq_f64 (gt1, option_1, option_2);
+  if (unlikely (v_any_u64 (special)))
+    {
+      return special_case (x, y, d->abs_mask, special);
+    }
+  /* Copy sign.  */
+  return vbslq_f64 (d->abs_mask, y, x);
+}
+
+TEST_SIG (V, D, 1, asinh, -10.0, 10.0)
+TEST_ULP (V_NAME_D1 (asinh), 2.29)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (asinh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (asinh), 0, 0x1p-26, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (asinh), 0x1p-26, 1, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (asinh), 1, 0x1p511, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (asinh), 0x1p511, inf, 40000)
+/* Test vector asinh 3 times, with control lane < 1, > 1 and special.
+   Ensures the v_sel is choosing the right option in all cases.  */
+TEST_CONTROL_VALUE (V_NAME_D1 (asinh), 0.5)
+TEST_CONTROL_VALUE (V_NAME_D1 (asinh), 2)
+TEST_CONTROL_VALUE (V_NAME_D1 (asinh), 0x1p600)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/asinhf.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/asinhf.c
new file mode 100644
index 000000000000..6a96f6ee9f4b
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/asinhf.c
@@ -0,0 +1,89 @@
+/*
+ * Single-precision vector asinh(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_log1pf_inline.h"
+
+const static struct data
+{
+  struct v_log1pf_data log1pf_consts;
+  float32x4_t one;
+  uint32x4_t big_bound;
+#if WANT_SIMD_EXCEPT
+  uint32x4_t tiny_bound;
+#endif
+} data = {
+  .one = V4 (1),
+  .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
+  .big_bound = V4 (0x5f800000), /* asuint(0x1p64).  */
+#if WANT_SIMD_EXCEPT
+  .tiny_bound = V4 (0x30800000) /* asuint(0x1p-30).  */
+#endif
+};
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, uint32x4_t sign, float32x4_t y,
+	      uint32x4_t special, const struct data *d)
+{
+  return v_call_f32 (
+      asinhf, x,
+      vreinterpretq_f32_u32 (veorq_u32 (
+	  sign, vreinterpretq_u32_f32 (log1pf_inline (y, &d->log1pf_consts)))),
+      special);
+}
+
+/* Single-precision implementation of vector asinh(x), using vector log1p.
+   Worst-case error is 2.59 ULP:
+   _ZGVnN4v_asinhf(0x1.d86124p-3) got 0x1.d449bep-3
+				 want 0x1.d449c4p-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asinh) (float32x4_t x)
+{
+  const struct data *dat = ptr_barrier (&data);
+  float32x4_t ax = vabsq_f32 (x);
+  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+  uint32x4_t special = vcgeq_u32 (iax, dat->big_bound);
+  uint32x4_t sign = veorq_u32 (vreinterpretq_u32_f32 (x), iax);
+  float32x4_t special_arg = x;
+
+#if WANT_SIMD_EXCEPT
+  /* Sidestep tiny and large values to avoid inadvertently triggering
+     under/overflow.  */
+  special = vorrq_u32 (special, vcltq_u32 (iax, dat->tiny_bound));
+  if (unlikely (v_any_u32 (special)))
+    {
+      ax = v_zerofy_f32 (ax, special);
+      x = v_zerofy_f32 (x, special);
+    }
+#endif
+
+  /* asinh(x) = log(x + sqrt(x * x + 1)).
+     For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))).  */
+  float32x4_t d
+      = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (dat->one, ax, ax)));
+  float32x4_t y = vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d));
+
+  if (unlikely (v_any_u32 (special)))
+    return special_case (special_arg, sign, y, special, dat);
+  return vreinterpretq_f32_u32 (veorq_u32 (
+      sign, vreinterpretq_u32_f32 (log1pf_inline (y, &dat->log1pf_consts))));
+}
+
+HALF_WIDTH_ALIAS_F1 (asinh)
+
+TEST_SIG (V, F, 1, asinh, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (asinh), 2.10)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (asinh), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (asinh), 0, 0x1p-12, 40000)
+TEST_INTERVAL (V_NAME_F1 (asinh), 0x1p-12, 1.0, 40000)
+TEST_INTERVAL (V_NAME_F1 (asinh), 1.0, 0x1p11, 40000)
+TEST_INTERVAL (V_NAME_F1 (asinh), 0x1p11, inf, 40000)
+TEST_INTERVAL (V_NAME_F1 (asinh), -0, -0x1p-12, 20000)
+TEST_INTERVAL (V_NAME_F1 (asinh), -0x1p-12, -1.0, 20000)
+TEST_INTERVAL (V_NAME_F1 (asinh), -1.0, -0x1p11, 20000)
+TEST_INTERVAL (V_NAME_F1 (asinh), -0x1p11, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atan_2u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/atan.c
similarity index 51%
rename from contrib/arm-optimized-routines/pl/math/v_atan_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/atan.c
index ba68cc3cc720..26d264321068 100644
--- a/contrib/arm-optimized-routines/pl/math/v_atan_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/atan.c
@@ -1,104 +1,135 @@
 /*
  * Double-precision vector atan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
+  float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18;
   float64x2_t pi_over_2;
-  float64x2_t poly[20];
+  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
 	      [2**-1022, 1.0].  */
-  .poly = { V2 (-0x1.5555555555555p-2),	 V2 (0x1.99999999996c1p-3),
-	    V2 (-0x1.2492492478f88p-3),	 V2 (0x1.c71c71bc3951cp-4),
-	    V2 (-0x1.745d160a7e368p-4),	 V2 (0x1.3b139b6a88ba1p-4),
-	    V2 (-0x1.11100ee084227p-4),	 V2 (0x1.e1d0f9696f63bp-5),
-	    V2 (-0x1.aebfe7b418581p-5),	 V2 (0x1.842dbe9b0d916p-5),
-	    V2 (-0x1.5d30140ae5e99p-5),	 V2 (0x1.338e31eb2fbbcp-5),
-	    V2 (-0x1.00e6eece7de8p-5),	 V2 (0x1.860897b29e5efp-6),
-	    V2 (-0x1.0051381722a59p-6),	 V2 (0x1.14e9dc19a4a4ep-7),
-	    V2 (-0x1.d0062b42fe3bfp-9),	 V2 (0x1.17739e210171ap-10),
-	    V2 (-0x1.ab24da7be7402p-13), V2 (0x1.358851160a528p-16), },
+  .c0 = V2 (-0x1.5555555555555p-2),	  .c1 = 0x1.99999999996c1p-3,
+  .c2 = V2 (-0x1.2492492478f88p-3),	  .c3 = 0x1.c71c71bc3951cp-4,
+  .c4 = V2 (-0x1.745d160a7e368p-4),	  .c5 = 0x1.3b139b6a88ba1p-4,
+  .c6 = V2 (-0x1.11100ee084227p-4),	  .c7 = 0x1.e1d0f9696f63bp-5,
+  .c8 = V2 (-0x1.aebfe7b418581p-5),	  .c9 = 0x1.842dbe9b0d916p-5,
+  .c10 = V2 (-0x1.5d30140ae5e99p-5),	  .c11 = 0x1.338e31eb2fbbcp-5,
+  .c12 = V2 (-0x1.00e6eece7de8p-5),	  .c13 = 0x1.860897b29e5efp-6,
+  .c14 = V2 (-0x1.0051381722a59p-6),	  .c15 = 0x1.14e9dc19a4a4ep-7,
+  .c16 = V2 (-0x1.d0062b42fe3bfp-9),	  .c17 = 0x1.17739e210171ap-10,
+  .c18 = V2 (-0x1.ab24da7be7402p-13),	  .c19 = 0x1.358851160a528p-16,
   .pi_over_2 = V2 (0x1.921fb54442d18p+0),
 };
 
 #define SignMask v_u64 (0x8000000000000000)
 #define TinyBound 0x3e10000000000000 /* asuint64(0x1p-30).  */
 #define BigBound 0x4340000000000000  /* asuint64(0x1p53).  */
 
 /* Fast implementation of vector atan.
    Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
    z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps:
    _ZGVnN2v_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
 				       want 0x1.9225645bdd7c3p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+  float64x2_t c1315 = vld1q_f64 (&d->c13);
+  float64x2_t c1719 = vld1q_f64 (&d->c17);
 
   /* Small cases, infs and nans are supported by our approximation technique,
      but do not set fenv flags correctly. Only trigger special case if we need
      fenv.  */
   uint64x2_t ix = vreinterpretq_u64_f64 (x);
   uint64x2_t sign = vandq_u64 (ix, SignMask);
 
 #if WANT_SIMD_EXCEPT
   uint64x2_t ia12 = vandq_u64 (ix, v_u64 (0x7ff0000000000000));
   uint64x2_t special = vcgtq_u64 (vsubq_u64 (ia12, v_u64 (TinyBound)),
 				  v_u64 (BigBound - TinyBound));
   /* If any lane is special, fall back to the scalar routine for all lanes.  */
   if (unlikely (v_any_u64 (special)))
     return v_call_f64 (atan, x, v_f64 (0), v_u64 (-1));
 #endif
 
   /* Argument reduction:
      y := arctan(x) for x < 1
      y := pi/2 + arctan(-1/x) for x > 1
      Hence, use z=-1/a if x>=1, otherwise z=a.  */
   uint64x2_t red = vcagtq_f64 (x, v_f64 (1.0));
   /* Avoid dependency in abs(x) in division (and comparison).  */
   float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (1.0), x), x);
   float64x2_t shift = vreinterpretq_f64_u64 (
       vandq_u64 (red, vreinterpretq_u64_f64 (d->pi_over_2)));
   /* Use absolute value only when needed (odd powers of z).  */
   float64x2_t az = vbslq_f64 (
       SignMask, vreinterpretq_f64_u64 (vandq_u64 (SignMask, red)), z);
 
   /* Calculate the polynomial approximation.
      Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
      full scheme to avoid underflow in x^16.
      The order 19 polynomial P approximates
      (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
   float64x2_t z2 = vmulq_f64 (z, z);
   float64x2_t x2 = vmulq_f64 (z2, z2);
   float64x2_t x4 = vmulq_f64 (x2, x2);
   float64x2_t x8 = vmulq_f64 (x4, x4);
-  float64x2_t y
-      = vfmaq_f64 (v_estrin_7_f64 (z2, x2, x4, d->poly),
-		   v_estrin_11_f64 (z2, x2, x4, x8, d->poly + 8), x8);
+
+  /* estrin_7.  */
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, x2, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, x2, p67);
+
+  float64x2_t p07 = vfmaq_f64 (p03, x4, p47);
+
+  /* estrin_11.  */
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, x2, p1011);
+
+  float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0);
+  float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1);
+  float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415);
+
+  float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0);
+  float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1);
+  float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819);
+
+  float64x2_t p815 = vfmaq_f64 (p811, x4, p1215);
+  float64x2_t p819 = vfmaq_f64 (p815, x8, p1619);
+
+  float64x2_t y = vfmaq_f64 (p07, p819, x8);
 
   /* Finalize. y = shift + z + z^3 * P(z^2).  */
   y = vfmaq_f64 (az, y, vmulq_f64 (z2, az));
   y = vaddq_f64 (y, shift);
 
   /* y = atan(x) if x>0, -atan(-x) otherwise.  */
   y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), sign));
   return y;
 }
 
-PL_SIG (V, D, 1, atan, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (atan), 1.78)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (atan), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), 0, 0x1p-30, 10000)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), -0, -0x1p-30, 1000)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), 0x1p-30, 0x1p53, 900000)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), -0x1p-30, -0x1p53, 90000)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), 0x1p53, inf, 10000)
-PL_TEST_INTERVAL (V_NAME_D1 (atan), -0x1p53, -inf, 1000)
+TEST_SIG (V, D, 1, atan, -10.0, 10.0)
+TEST_ULP (V_NAME_D1 (atan), 1.78)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (atan), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (atan), 0, 0x1p-30, 10000)
+TEST_INTERVAL (V_NAME_D1 (atan), -0, -0x1p-30, 1000)
+TEST_INTERVAL (V_NAME_D1 (atan), 0x1p-30, 0x1p53, 900000)
+TEST_INTERVAL (V_NAME_D1 (atan), -0x1p-30, -0x1p53, 90000)
+TEST_INTERVAL (V_NAME_D1 (atan), 0x1p53, inf, 10000)
+TEST_INTERVAL (V_NAME_D1 (atan), -0x1p53, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/atan2.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/atan2.c
new file mode 100644
index 000000000000..18c4b70b92f6
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/atan2.c
@@ -0,0 +1,171 @@
+/*
+ * Double-precision vector atan2(x) function.
+ *
+ * Copyright (c) 2021-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18;
+  float64x2_t pi_over_2;
+  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+  uint64x2_t zeroinfnan, minustwo;
+} data = {
+  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+	      [2**-1022, 1.0].  */
+  .c0 = V2 (-0x1.5555555555555p-2),
+  .c1 = 0x1.99999999996c1p-3,
+  .c2 = V2 (-0x1.2492492478f88p-3),
+  .c3 = 0x1.c71c71bc3951cp-4,
+  .c4 = V2 (-0x1.745d160a7e368p-4),
+  .c5 = 0x1.3b139b6a88ba1p-4,
+  .c6 = V2 (-0x1.11100ee084227p-4),
+  .c7 = 0x1.e1d0f9696f63bp-5,
+  .c8 = V2 (-0x1.aebfe7b418581p-5),
+  .c9 = 0x1.842dbe9b0d916p-5,
+  .c10 = V2 (-0x1.5d30140ae5e99p-5),
+  .c11 = 0x1.338e31eb2fbbcp-5,
+  .c12 = V2 (-0x1.00e6eece7de8p-5),
+  .c13 = 0x1.860897b29e5efp-6,
+  .c14 = V2 (-0x1.0051381722a59p-6),
+  .c15 = 0x1.14e9dc19a4a4ep-7,
+  .c16 = V2 (-0x1.d0062b42fe3bfp-9),
+  .c17 = 0x1.17739e210171ap-10,
+  .c18 = V2 (-0x1.ab24da7be7402p-13),
+  .c19 = 0x1.358851160a528p-16,
+  .pi_over_2 = V2 (0x1.921fb54442d18p+0),
+  .zeroinfnan = V2 (2 * 0x7ff0000000000000ul - 1),
+  .minustwo = V2 (0xc000000000000000),
+};
+
+#define SignMask v_u64 (0x8000000000000000)
+
+/* Special cases i.e. 0, infinity, NaN (fall back to scalar calls).  */
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t y, float64x2_t x, float64x2_t ret,
+	      uint64x2_t sign_xy, uint64x2_t cmp)
+{
+  /* Account for the sign of x and y.  */
+  ret = vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
+  return v_call2_f64 (atan2, y, x, ret, cmp);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
+static inline uint64x2_t
+zeroinfnan (uint64x2_t i, const struct data *d)
+{
+  /* (2 * i - 1) >= (2 * asuint64 (INFINITY) - 1).  */
+  return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)), d->zeroinfnan);
+}
+
+/* Fast implementation of vector atan2.
+   Maximum observed error is 2.8 ulps:
+   _ZGVnN2vv_atan2 (0x1.9651a429a859ap+5, 0x1.953075f4ee26p+5)
+	got 0x1.92d628ab678ccp-1
+       want 0x1.92d628ab678cfp-1.  */
+float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  uint64x2_t iy = vreinterpretq_u64_f64 (y);
+
+  uint64x2_t special_cases
+      = vorrq_u64 (zeroinfnan (ix, d), zeroinfnan (iy, d));
+
+  uint64x2_t sign_x = vandq_u64 (ix, SignMask);
+  uint64x2_t sign_y = vandq_u64 (iy, SignMask);
+  uint64x2_t sign_xy = veorq_u64 (sign_x, sign_y);
+
+  float64x2_t ax = vabsq_f64 (x);
+  float64x2_t ay = vabsq_f64 (y);
+
+  uint64x2_t pred_xlt0 = vcltzq_f64 (x);
+  uint64x2_t pred_aygtax = vcagtq_f64 (y, x);
+
+  /* Set up z for call to atan.  */
+  float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay);
+  float64x2_t q = vbslq_f64 (pred_aygtax, ay, ax);
+  float64x2_t z = vdivq_f64 (n, q);
+
+  /* Work out the correct shift.  */
+  float64x2_t shift
+      = vreinterpretq_f64_u64 (vandq_u64 (pred_xlt0, d->minustwo));
+  shift = vbslq_f64 (pred_aygtax, vaddq_f64 (shift, v_f64 (1.0)), shift);
+  shift = vmulq_f64 (shift, d->pi_over_2);
+
+  /* Calculate the polynomial approximation.
+     Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
+     full scheme to avoid underflow in x^16.
+     The order 19 polynomial P approximates
+     (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
+  float64x2_t z2 = vmulq_f64 (z, z);
+  float64x2_t x2 = vmulq_f64 (z2, z2);
+  float64x2_t x4 = vmulq_f64 (x2, x2);
+  float64x2_t x8 = vmulq_f64 (x4, x4);
+
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+  float64x2_t c1315 = vld1q_f64 (&d->c13);
+  float64x2_t c1719 = vld1q_f64 (&d->c17);
+
+  /* estrin_7.  */
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, x2, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, x2, p67);
+
+  float64x2_t p07 = vfmaq_f64 (p03, x4, p47);
+
+  /* estrin_11.  */
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, x2, p1011);
+
+  float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0);
+  float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1);
+  float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415);
+
+  float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0);
+  float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1);
+  float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819);
+
+  float64x2_t p815 = vfmaq_f64 (p811, x4, p1215);
+  float64x2_t p819 = vfmaq_f64 (p815, x8, p1619);
+
+  float64x2_t ret = vfmaq_f64 (p07, p819, x8);
+
+  /* Finalize. y = shift + z + z^3 * P(z^2).  */
+  ret = vfmaq_f64 (z, ret, vmulq_f64 (z2, z));
+  ret = vaddq_f64 (ret, shift);
+
+  if (unlikely (v_any_u64 (special_cases)))
+    return special_case (y, x, ret, sign_xy, special_cases);
+
+  /* Account for the sign of x and y.  */
+  ret = vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
+
+  return ret;
+}
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
+TEST_SIG (V, D, 2, atan2)
+// TODO tighten this once __v_atan2 is fixed
+TEST_ULP (V_NAME_D2 (atan2), 2.9)
+TEST_DISABLE_FENV (V_NAME_D2 (atan2))
+TEST_INTERVAL (V_NAME_D2 (atan2), -10.0, 10.0, 50000)
+TEST_INTERVAL (V_NAME_D2 (atan2), -1.0, 1.0, 40000)
+TEST_INTERVAL (V_NAME_D2 (atan2), 0.0, 1.0, 40000)
+TEST_INTERVAL (V_NAME_D2 (atan2), 1.0, 100.0, 40000)
+TEST_INTERVAL (V_NAME_D2 (atan2), 1e6, 1e32, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atan2f_3u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/atan2f.c
similarity index 54%
rename from contrib/arm-optimized-routines/pl/math/v_atan2f_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/atan2f.c
index bbfc3cb552f6..632014249ab0 100644
--- a/contrib/arm-optimized-routines/pl/math/v_atan2f_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/atan2f.c
@@ -1,115 +1,127 @@
 /*
  * Single-precision vector atan2(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
-  float32x4_t poly[8];
-  float32x4_t pi_over_2;
+  float32x4_t c0, pi_over_2, c4, c6, c2;
+  float c1, c3, c5, c7;
+  uint32x4_t comp_const;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-128, 1.0].
      Generated using fpminimax between FLT_MIN and 1.  */
-  .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f),
-	    V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f),
-	    V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) },
-  .pi_over_2 = V4 (0x1.921fb6p+0f),
+  .c0 = V4 (-0x1.55555p-2f),	    .c1 = 0x1.99935ep-3f,
+  .c2 = V4 (-0x1.24051ep-3f),	    .c3 = 0x1.bd7368p-4f,
+  .c4 = V4 (-0x1.491f0ep-4f),	    .c5 = 0x1.93a2c0p-5f,
+  .c6 = V4 (-0x1.4c3c60p-6f),	    .c7 = 0x1.01fd88p-8f,
+  .pi_over_2 = V4 (0x1.921fb6p+0f), .comp_const = V4 (2 * 0x7f800000lu - 1),
 };
 
 #define SignMask v_u32 (0x80000000)
 
 /* Special cases i.e. 0, infinity and nan (fall back to scalar calls).  */
 static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t y, float32x4_t x, float32x4_t ret, uint32x4_t cmp)
+special_case (float32x4_t y, float32x4_t x, float32x4_t ret,
+	      uint32x4_t sign_xy, uint32x4_t cmp)
 {
+  /* Account for the sign of y.  */
+  ret = vreinterpretq_f32_u32 (
+      veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy));
   return v_call2_f32 (atan2f, y, x, ret, cmp);
 }
 
 /* Returns 1 if input is the bit representation of 0, infinity or nan.  */
 static inline uint32x4_t
-zeroinfnan (uint32x4_t i)
+zeroinfnan (uint32x4_t i, const struct data *d)
 {
   /* 2 * i - 1 >= 2 * 0x7f800000lu - 1.  */
-  return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)),
-		    v_u32 (2 * 0x7f800000lu - 1));
+  return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)), d->comp_const);
 }
 
 /* Fast implementation of vector atan2f. Maximum observed error is
    2.95 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]:
    _ZGVnN4vv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
 						 want 0x1.967f00p-1.  */
-float32x4_t VPCS_ATTR V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
 {
-  const struct data *data_ptr = ptr_barrier (&data);
+  const struct data *d = ptr_barrier (&data);
 
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
   uint32x4_t iy = vreinterpretq_u32_f32 (y);
 
-  uint32x4_t special_cases = vorrq_u32 (zeroinfnan (ix), zeroinfnan (iy));
+  uint32x4_t special_cases
+      = vorrq_u32 (zeroinfnan (ix, d), zeroinfnan (iy, d));
 
   uint32x4_t sign_x = vandq_u32 (ix, SignMask);
   uint32x4_t sign_y = vandq_u32 (iy, SignMask);
   uint32x4_t sign_xy = veorq_u32 (sign_x, sign_y);
 
   float32x4_t ax = vabsq_f32 (x);
   float32x4_t ay = vabsq_f32 (y);
 
   uint32x4_t pred_xlt0 = vcltzq_f32 (x);
   uint32x4_t pred_aygtax = vcgtq_f32 (ay, ax);
 
   /* Set up z for call to atanf.  */
   float32x4_t n = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay);
-  float32x4_t d = vbslq_f32 (pred_aygtax, ay, ax);
-  float32x4_t z = vdivq_f32 (n, d);
+  float32x4_t q = vbslq_f32 (pred_aygtax, ay, ax);
+  float32x4_t z = vdivq_f32 (n, q);
 
   /* Work out the correct shift.  */
   float32x4_t shift = vreinterpretq_f32_u32 (
       vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-2.0f))));
   shift = vbslq_f32 (pred_aygtax, vaddq_f32 (shift, v_f32 (1.0f)), shift);
-  shift = vmulq_f32 (shift, data_ptr->pi_over_2);
+  shift = vmulq_f32 (shift, d->pi_over_2);
 
   /* Calculate the polynomial approximation.
      Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
      a standard implementation using z8 creates spurious underflow
      in the very last fma (when z^8 is small enough).
      Therefore, we split the last fma into a mul and an fma.
      Horner and single-level Estrin have higher errors that exceed
      threshold.  */
   float32x4_t z2 = vmulq_f32 (z, z);
   float32x4_t z4 = vmulq_f32 (z2, z2);
 
-  float32x4_t ret = vfmaq_f32 (
-      v_pairwise_poly_3_f32 (z2, z4, data_ptr->poly), z4,
-      vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, data_ptr->poly + 4)));
+  float32x4_t c1357 = vld1q_f32 (&d->c1);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c1357, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c1357, 1);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c1357, 2);
+  float32x4_t p67 = vfmaq_laneq_f32 (d->c6, z2, c1357, 3);
+  float32x4_t p03 = vfmaq_f32 (p01, z4, p23);
+  float32x4_t p47 = vfmaq_f32 (p45, z4, p67);
+
+  float32x4_t ret = vfmaq_f32 (p03, z4, vmulq_f32 (z4, p47));
 
   /* y = shift + z * P(z^2).  */
   ret = vaddq_f32 (vfmaq_f32 (z, ret, vmulq_f32 (z2, z)), shift);
 
-  /* Account for the sign of y.  */
-  ret = vreinterpretq_f32_u32 (
-      veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy));
-
   if (unlikely (v_any_u32 (special_cases)))
     {
-      return special_case (y, x, ret, special_cases);
+      return special_case (y, x, ret, sign_xy, special_cases);
     }
 
-  return ret;
+  /* Account for the sign of y.  */
+  return vreinterpretq_f32_u32 (
+      veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy));
 }
 
+HALF_WIDTH_ALIAS_F2 (atan2)
+
 /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (V, F, 2, atan2)
-PL_TEST_ULP (V_NAME_F2 (atan2), 2.46)
-PL_TEST_INTERVAL (V_NAME_F2 (atan2), -10.0, 10.0, 50000)
-PL_TEST_INTERVAL (V_NAME_F2 (atan2), -1.0, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME_F2 (atan2), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME_F2 (atan2), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (V_NAME_F2 (atan2), 1e6, 1e32, 40000)
+TEST_SIG (V, F, 2, atan2)
+TEST_DISABLE_FENV (V_NAME_F2 (atan2))
+TEST_ULP (V_NAME_F2 (atan2), 2.46)
+TEST_INTERVAL (V_NAME_F2 (atan2), -10.0, 10.0, 50000)
+TEST_INTERVAL (V_NAME_F2 (atan2), -1.0, 1.0, 40000)
+TEST_INTERVAL (V_NAME_F2 (atan2), 0.0, 1.0, 40000)
+TEST_INTERVAL (V_NAME_F2 (atan2), 1.0, 100.0, 40000)
+TEST_INTERVAL (V_NAME_F2 (atan2), 1e6, 1e32, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atanf_3u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/atanf.c
similarity index 85%
rename from contrib/arm-optimized-routines/pl/math/v_atanf_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/atanf.c
index f522d957c1cc..61927c9b261a 100644
--- a/contrib/arm-optimized-routines/pl/math/v_atanf_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/atanf.c
@@ -1,107 +1,109 @@
 /*
  * Single-precision vector atan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_poly_f32.h"
 
 static const struct data
 {
   float32x4_t poly[8];
   float32x4_t pi_over_2;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-128, 1.0].
      Generated using fpminimax between FLT_MIN and 1.  */
   .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f),
 	    V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f),
 	    V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) },
   .pi_over_2 = V4 (0x1.921fb6p+0f),
 };
 
 #define SignMask v_u32 (0x80000000)
 
 #define P(i) d->poly[i]
 
 #define TinyBound 0x30800000 /* asuint(0x1p-30).  */
 #define BigBound 0x4e800000  /* asuint(0x1p30).  */
 
 #if WANT_SIMD_EXCEPT
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
 {
   return v_call_f32 (atanf, x, y, special);
 }
 #endif
 
 /* Fast implementation of vector atanf based on
    atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1]
    using z=-1/x and shift = pi/2. Maximum observed error is 2.9ulps:
    _ZGVnN4v_atanf (0x1.0468f6p+0) got 0x1.967f06p-1 want 0x1.967fp-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (atan) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* Small cases, infs and nans are supported by our approximation technique,
      but do not set fenv flags correctly. Only trigger special case if we need
      fenv.  */
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
   uint32x4_t sign = vandq_u32 (ix, SignMask);
 
 #if WANT_SIMD_EXCEPT
   uint32x4_t ia = vandq_u32 (ix, v_u32 (0x7ff00000));
   uint32x4_t special = vcgtq_u32 (vsubq_u32 (ia, v_u32 (TinyBound)),
 				  v_u32 (BigBound - TinyBound));
   /* If any lane is special, fall back to the scalar routine for all lanes.  */
   if (unlikely (v_any_u32 (special)))
     return special_case (x, x, v_u32 (-1));
 #endif
 
   /* Argument reduction:
      y := arctan(x) for x < 1
      y := pi/2 + arctan(-1/x) for x > 1
      Hence, use z=-1/a if x>=1, otherwise z=a.  */
   uint32x4_t red = vcagtq_f32 (x, v_f32 (1.0));
   /* Avoid dependency in abs(x) in division (and comparison).  */
   float32x4_t z = vbslq_f32 (red, vdivq_f32 (v_f32 (1.0f), x), x);
   float32x4_t shift = vreinterpretq_f32_u32 (
       vandq_u32 (red, vreinterpretq_u32_f32 (d->pi_over_2)));
   /* Use absolute value only when needed (odd powers of z).  */
   float32x4_t az = vbslq_f32 (
       SignMask, vreinterpretq_f32_u32 (vandq_u32 (SignMask, red)), z);
 
   /* Calculate the polynomial approximation.
      Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
      a standard implementation using z8 creates spurious underflow
      in the very last fma (when z^8 is small enough).
      Therefore, we split the last fma into a mul and an fma.
      Horner and single-level Estrin have higher errors that exceed
      threshold.  */
   float32x4_t z2 = vmulq_f32 (z, z);
   float32x4_t z4 = vmulq_f32 (z2, z2);
 
   float32x4_t y = vfmaq_f32 (
       v_pairwise_poly_3_f32 (z2, z4, d->poly), z4,
       vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, d->poly + 4)));
 
   /* y = shift + z * P(z^2).  */
   y = vaddq_f32 (vfmaq_f32 (az, y, vmulq_f32 (z2, az)), shift);
 
   /* y = atan(x) if x>0, -atan(-x) otherwise.  */
   y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), sign));
 
   return y;
 }
 
-PL_SIG (V, F, 1, atan, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (atan), 2.5)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (atan), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0, 0x1p-30, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0x1p-30, 1, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 1, 0x1p30, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0x1p30, inf, 1000)
+HALF_WIDTH_ALIAS_F1 (atan)
+
+TEST_SIG (V, F, 1, atan, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (atan), 2.5)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (atan), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0, 0x1p-30, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0x1p-30, 1, 40000)
+TEST_SYM_INTERVAL (V_NAME_F1 (atan), 1, 0x1p30, 40000)
+TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0x1p30, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atanh_3u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/atanh.c
similarity index 55%
rename from contrib/arm-optimized-routines/pl/math/v_atanh_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/atanh.c
index f282826a3f32..c2f9585dd29b 100644
--- a/contrib/arm-optimized-routines/pl/math/v_atanh_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/atanh.c
@@ -1,66 +1,75 @@
 /*
  * Double-precision vector atanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define WANT_V_LOG1P_K0_SHORTCUT 0
 #include "v_log1p_inline.h"
 
 const static struct data
 {
   struct v_log1p_data log1p_consts;
-  uint64x2_t one, half;
+  uint64x2_t one;
+  uint64x2_t sign_mask;
 } data = { .log1p_consts = V_LOG1P_CONSTANTS_TABLE,
 	   .one = V2 (0x3ff0000000000000),
-	   .half = V2 (0x3fe0000000000000) };
+	   .sign_mask = V2 (0x8000000000000000) };
 
 static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+special_case (float64x2_t x, float64x2_t halfsign, float64x2_t y,
+	      uint64x2_t special, const struct data *d)
 {
-  return v_call_f64 (atanh, x, y, special);
+  y = log1p_inline (y, &d->log1p_consts);
+  return v_call_f64 (atanh, vbslq_f64 (d->sign_mask, halfsign, x),
+		     vmulq_f64 (halfsign, y), special);
 }
 
 /* Approximation for vector double-precision atanh(x) using modified log1p.
    The greatest observed error is 3.31 ULP:
    _ZGVnN2v_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
 				      want 0x1.ffd8ff31b501cp-6.  */
 VPCS_ATTR
 float64x2_t V_NAME_D1 (atanh) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
+  float64x2_t halfsign = vbslq_f64 (d->sign_mask, x, v_f64 (0.5));
   float64x2_t ax = vabsq_f64 (x);
   uint64x2_t ia = vreinterpretq_u64_f64 (ax);
-  uint64x2_t sign = veorq_u64 (vreinterpretq_u64_f64 (x), ia);
   uint64x2_t special = vcgeq_u64 (ia, d->one);
-  float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->half));
 
 #if WANT_SIMD_EXCEPT
   ax = v_zerofy_f64 (ax, special);
 #endif
 
   float64x2_t y;
   y = vaddq_f64 (ax, ax);
-  y = vdivq_f64 (y, vsubq_f64 (v_f64 (1), ax));
-  y = log1p_inline (y, &d->log1p_consts);
+  y = vdivq_f64 (y, vsubq_f64 (vreinterpretq_f64_u64 (d->one), ax));
 
   if (unlikely (v_any_u64 (special)))
-    return special_case (x, vmulq_f64 (y, halfsign), special);
+#if WANT_SIMD_EXCEPT
+    return special_case (x, halfsign, y, special, d);
+#else
+    return special_case (ax, halfsign, y, special, d);
+#endif
+
+  y = log1p_inline (y, &d->log1p_consts);
   return vmulq_f64 (y, halfsign);
 }
 
-PL_SIG (V, D, 1, atanh, -1.0, 1.0)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (atanh), WANT_SIMD_EXCEPT)
-PL_TEST_ULP (V_NAME_D1 (atanh), 3.32)
+TEST_SIG (V, D, 1, atanh, -1.0, 1.0)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (atanh), WANT_SIMD_EXCEPT)
+TEST_ULP (V_NAME_D1 (atanh), 3.32)
+TEST_SYM_INTERVAL (V_NAME_D1 (atanh), 0, 0x1p-23, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (atanh), 0x1p-23, 1, 90000)
+TEST_SYM_INTERVAL (V_NAME_D1 (atanh), 1, inf, 100)
 /* atanh is asymptotic at 1, which is the default control value - have to set
    -c 0 specially to ensure fp exceptions are triggered correctly (choice of
    control lane is irrelevant if fp exceptions are disabled).  */
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (atanh), 0, 0x1p-23, 10000, 0)
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (atanh), 0x1p-23, 1, 90000, 0)
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (atanh), 1, inf, 100, 0)
+TEST_CONTROL_VALUE (V_NAME_D1 (atanh), 0)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atanhf_3u1.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/atanhf.c
similarity index 54%
rename from contrib/arm-optimized-routines/pl/math/v_atanhf_3u1.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/atanhf.c
index f6a5f25eca9a..313d15ca6391 100644
--- a/contrib/arm-optimized-routines/pl/math/v_atanhf_3u1.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/atanhf.c
@@ -1,77 +1,90 @@
 /*
  * Single-precision vector atanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "v_log1pf_inline.h"
 
 const static struct data
 {
   struct v_log1pf_data log1pf_consts;
   uint32x4_t one;
 #if WANT_SIMD_EXCEPT
   uint32x4_t tiny_bound;
 #endif
 } data = {
   .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
   .one = V4 (0x3f800000),
 #if WANT_SIMD_EXCEPT
   /* 0x1p-12, below which atanhf(x) rounds to x.  */
   .tiny_bound = V4 (0x39800000),
 #endif
 };
 
 #define AbsMask v_u32 (0x7fffffff)
 #define Half v_u32 (0x3f000000)
 
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, float32x4_t halfsign, float32x4_t y,
+	      uint32x4_t special)
 {
-  return v_call_f32 (atanhf, x, y, special);
+  return v_call_f32 (atanhf, vbslq_f32 (AbsMask, x, halfsign),
+		     vmulq_f32 (halfsign, y), special);
 }
 
 /* Approximation for vector single-precision atanh(x) using modified log1p.
-   The maximum error is 3.08 ULP:
-   __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5
-			   want 0x1.ffcb82p-5.  */
-VPCS_ATTR float32x4_t V_NAME_F1 (atanh) (float32x4_t x)
+   The maximum error is 2.93 ULP:
+   _ZGVnN4v_atanhf(0x1.f43d7p-5) got 0x1.f4dcfep-5
+				want 0x1.f4dcf8p-5.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
   float32x4_t halfsign = vbslq_f32 (AbsMask, v_f32 (0.5), x);
   float32x4_t ax = vabsq_f32 (x);
   uint32x4_t iax = vreinterpretq_u32_f32 (ax);
 
 #if WANT_SIMD_EXCEPT
   uint32x4_t special
       = vorrq_u32 (vcgeq_u32 (iax, d->one), vcltq_u32 (iax, d->tiny_bound));
   /* Side-step special cases by setting those lanes to 0, which will trigger no
      exceptions. These will be fixed up later.  */
   if (unlikely (v_any_u32 (special)))
     ax = v_zerofy_f32 (ax, special);
 #else
   uint32x4_t special = vcgeq_u32 (iax, d->one);
 #endif
 
-  float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax));
-  y = log1pf_inline (y, d->log1pf_consts);
+  float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax),
+			     vsubq_f32 (vreinterpretq_f32_u32 (d->one), ax));
+  y = log1pf_inline (y, &d->log1pf_consts);
 
+  /* If exceptions not required, pass ax to special-case for shorter dependency
+     chain. If exceptions are required ax will have been zerofied, so have to
+     pass x.  */
   if (unlikely (v_any_u32 (special)))
-    return special_case (x, vmulq_f32 (halfsign, y), special);
+#if WANT_SIMD_EXCEPT
+    return special_case (x, halfsign, y, special);
+#else
+    return special_case (ax, halfsign, y, special);
+#endif
   return vmulq_f32 (halfsign, y);
 }
 
-PL_SIG (V, F, 1, atanh, -1.0, 1.0)
-PL_TEST_ULP (V_NAME_F1 (atanh), 2.59)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (atanh), WANT_SIMD_EXCEPT)
+HALF_WIDTH_ALIAS_F1 (atanh)
+
+TEST_SIG (V, F, 1, atanh, -1.0, 1.0)
+TEST_ULP (V_NAME_F1 (atanh), 2.44)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (atanh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (atanh), 0, 0x1p-12, 500)
+TEST_SYM_INTERVAL (V_NAME_F1 (atanh), 0x1p-12, 1, 200000)
+TEST_SYM_INTERVAL (V_NAME_F1 (atanh), 1, inf, 1000)
 /* atanh is asymptotic at 1, which is the default control value - have to set
  -c 0 specially to ensure fp exceptions are triggered correctly (choice of
  control lane is irrelevant if fp exceptions are disabled).  */
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (atanh), 0, 0x1p-12, 500, 0)
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (atanh), 0x1p-12, 1, 200000, 0)
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (atanh), 1, inf, 1000, 0)
+TEST_CONTROL_VALUE (V_NAME_F1 (atanh), 0)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cbrt_2u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/cbrt.c
similarity index 76%
rename from contrib/arm-optimized-routines/pl/math/v_cbrt_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/cbrt.c
index cc7cff15dc0f..8e72e5b566fc 100644
--- a/contrib/arm-optimized-routines/pl/math/v_cbrt_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/cbrt.c
@@ -1,116 +1,127 @@
 /*
  * Double-precision vector cbrt(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_poly_f64.h"
 
 const static struct data
 {
   float64x2_t poly[4], one_third, shift;
   int64x2_t exp_bias;
   uint64x2_t abs_mask, tiny_bound;
   uint32x4_t thresh;
   double table[5];
 } data = {
   .shift = V2 (0x1.8p52),
   .poly = { /* Generated with fpminimax in [0.5, 1].  */
             V2 (0x1.c14e8ee44767p-2), V2 (0x1.dd2d3f99e4c0ep-1),
 	    V2 (-0x1.08e83026b7e74p-1), V2 (0x1.2c74eaa3ba428p-3) },
   .exp_bias = V2 (1022),
   .abs_mask = V2(0x7fffffffffffffff),
   .tiny_bound = V2(0x0010000000000000), /* Smallest normal.  */
   .thresh = V4(0x7fe00000),   /* asuint64 (infinity) - tiny_bound.  */
   .one_third = V2(0x1.5555555555555p-2),
   .table = { /* table[i] = 2^((i - 2) / 3).  */
              0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0,
 	     0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0 }
 };
 
 #define MantissaMask v_u64 (0x000fffffffffffff)
 
 static float64x2_t NOINLINE VPCS_ATTR
 special_case (float64x2_t x, float64x2_t y, uint32x2_t special)
 {
   return v_call_f64 (cbrt, x, y, vmovl_u32 (special));
 }
 
-/* Approximation for double-precision vector cbrt(x), using low-order polynomial
-   and two Newton iterations. Greatest observed error is 1.79 ULP. Errors repeat
+/* Approximation for double-precision vector cbrt(x), using low-order
+   polynomial and two Newton iterations.
+
+   The vector version of frexp does not handle subnormals
+   correctly. As a result these need to be handled by the scalar
+   fallback, where accuracy may be worse than that of the vector code
+   path.
+
+   Greatest observed error in the normal range is 1.79 ULP. Errors repeat
    according to the exponent, for instance an error observed for double value
    m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an
    integer.
-   __v_cbrt(0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
-				 want 0x1.965fe72821e99p+0.  */
+   _ZGVnN2v_cbrt (0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
+				       want 0x1.965fe72821e99p+0.  */
 VPCS_ATTR float64x2_t V_NAME_D1 (cbrt) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
   uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
 
   /* Subnormal, +/-0 and special values.  */
   uint32x2_t special
       = vcge_u32 (vsubhn_u64 (iax, d->tiny_bound), vget_low_u32 (d->thresh));
 
   /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
      version of frexp, which gets subnormal values wrong - these have to be
      special-cased as a result.  */
   float64x2_t m = vbslq_f64 (MantissaMask, x, v_f64 (0.5));
   int64x2_t exp_bias = d->exp_bias;
   uint64x2_t ia12 = vshrq_n_u64 (iax, 52);
   int64x2_t e = vsubq_s64 (vreinterpretq_s64_u64 (ia12), exp_bias);
 
-  /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for
-     Newton iterations.  */
+  /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point
+     for Newton iterations.  */
   float64x2_t p = v_pairwise_poly_3_f64 (m, vmulq_f64 (m, m), d->poly);
   float64x2_t one_third = d->one_third;
   /* Two iterations of Newton's method for iteratively approximating cbrt.  */
   float64x2_t m_by_3 = vmulq_f64 (m, one_third);
   float64x2_t two_thirds = vaddq_f64 (one_third, one_third);
   float64x2_t a
       = vfmaq_f64 (vdivq_f64 (m_by_3, vmulq_f64 (p, p)), two_thirds, p);
   a = vfmaq_f64 (vdivq_f64 (m_by_3, vmulq_f64 (a, a)), two_thirds, a);
 
   /* Assemble the result by the following:
 
      cbrt(x) = cbrt(m) * 2 ^ (e / 3).
 
      We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
      not necessarily a multiple of 3 we lose some information.
 
      Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
 
-     Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which is
-     an integer in [-2, 2], and can be looked up in the table T. Hence the
+     Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
+     is an integer in [-2, 2], and can be looked up in the table T. Hence the
      result is assembled as:
 
      cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.  */
 
   float64x2_t ef = vcvtq_f64_s64 (e);
   float64x2_t eb3f = vrndnq_f64 (vmulq_f64 (ef, one_third));
   int64x2_t em3 = vcvtq_s64_f64 (vfmsq_f64 (ef, eb3f, v_f64 (3)));
   int64x2_t ey = vcvtq_s64_f64 (eb3f);
 
   float64x2_t my = (float64x2_t){ d->table[em3[0] + 2], d->table[em3[1] + 2] };
   my = vmulq_f64 (my, a);
 
   /* Vector version of ldexp.  */
   float64x2_t y = vreinterpretq_f64_s64 (
       vshlq_n_s64 (vaddq_s64 (ey, vaddq_s64 (exp_bias, v_s64 (1))), 52));
   y = vmulq_f64 (y, my);
 
   if (unlikely (v_any_u32h (special)))
     return special_case (x, vbslq_f64 (d->abs_mask, y, x), special);
 
   /* Copy sign.  */
   return vbslq_f64 (d->abs_mask, y, x);
 }
 
-PL_TEST_ULP (V_NAME_D1 (cbrt), 1.30)
-PL_SIG (V, D, 1, cbrt, -10.0, 10.0)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (cbrt))
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cbrt), 0, inf, 1000000)
+/* Worse-case ULP error assumes that scalar fallback is GLIBC 2.40 cbrt, which
+   has ULP error of 3.67 at 0x1.7a337e1ba1ec2p-257 [1]. Largest observed error
+   in the vector path is 1.79 ULP.
+   [1] Innocente, V., & Zimmermann, P. (2024). Accuracy of Mathematical
+   Functions in Single, Double, Double Extended, and Quadruple Precision.  */
+TEST_ULP (V_NAME_D1 (cbrt), 3.17)
+TEST_SIG (V, D, 1, cbrt, -10.0, 10.0)
+TEST_SYM_INTERVAL (V_NAME_D1 (cbrt), 0, inf, 1000000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cbrtf_1u7.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/cbrtf.c
similarity index 91%
rename from contrib/arm-optimized-routines/pl/math/v_cbrtf_1u7.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/cbrtf.c
index 74918765209f..4e76feb2dd8b 100644
--- a/contrib/arm-optimized-routines/pl/math/v_cbrtf_1u7.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/cbrtf.c
@@ -1,116 +1,117 @@
 /*
  * Single-precision vector cbrt(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_poly_f32.h"
 
 const static struct data
 {
   float32x4_t poly[4], one_third;
   float table[5];
 } data = {
   .poly = { /* Very rough approximation of cbrt(x) in [0.5, 1], generated with
                FPMinimax.  */
 	    V4 (0x1.c14e96p-2), V4 (0x1.dd2d3p-1), V4 (-0x1.08e81ap-1),
 	    V4 (0x1.2c74c2p-3) },
   .table = { /* table[i] = 2^((i - 2) / 3).  */
 	    0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0 },
   .one_third = V4 (0x1.555556p-2f),
 };
 
 #define SignMask v_u32 (0x80000000)
 #define SmallestNormal v_u32 (0x00800000)
 #define Thresh vdup_n_u16 (0x7f00) /* asuint(INFINITY) - SmallestNormal.  */
 #define MantissaMask v_u32 (0x007fffff)
 #define HalfExp v_u32 (0x3f000000)
 
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint16x4_t special)
 {
   return v_call_f32 (cbrtf, x, y, vmovl_u16 (special));
 }
 
 static inline float32x4_t
 shifted_lookup (const float *table, int32x4_t i)
 {
   return (float32x4_t){ table[i[0] + 2], table[i[1] + 2], table[i[2] + 2],
 			table[i[3] + 2] };
 }
 
 /* Approximation for vector single-precision cbrt(x) using Newton iteration
    with initial guess obtained by a low-order polynomial. Greatest error
    is 1.64 ULP. This is observed for every value where the mantissa is
    0x1.85a2aa and the exponent is a multiple of 3, for example:
    _ZGVnN4v_cbrtf(0x1.85a2aap+3) got 0x1.267936p+1
 				want 0x1.267932p+1.  */
-VPCS_ATTR float32x4_t V_NAME_F1 (cbrt) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cbrt) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   uint32x4_t iax = vreinterpretq_u32_f32 (vabsq_f32 (x));
 
   /* Subnormal, +/-0 and special values.  */
   uint16x4_t special = vcge_u16 (vsubhn_u32 (iax, SmallestNormal), Thresh);
 
   /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
      version of frexpf, which gets subnormal values wrong - these have to be
      special-cased as a result.  */
   float32x4_t m = vbslq_f32 (MantissaMask, x, v_f32 (0.5));
   int32x4_t e
       = vsubq_s32 (vreinterpretq_s32_u32 (vshrq_n_u32 (iax, 23)), v_s32 (126));
 
   /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
      the less accurate the next stage of the algorithm needs to be. An order-4
      polynomial is enough for one Newton iteration.  */
   float32x4_t p = v_pairwise_poly_3_f32 (m, vmulq_f32 (m, m), d->poly);
 
   float32x4_t one_third = d->one_third;
   float32x4_t two_thirds = vaddq_f32 (one_third, one_third);
 
   /* One iteration of Newton's method for iteratively approximating cbrt.  */
   float32x4_t m_by_3 = vmulq_f32 (m, one_third);
   float32x4_t a
       = vfmaq_f32 (vdivq_f32 (m_by_3, vmulq_f32 (p, p)), two_thirds, p);
 
   /* Assemble the result by the following:
 
      cbrt(x) = cbrt(m) * 2 ^ (e / 3).
 
      We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
      not necessarily a multiple of 3 we lose some information.
 
      Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
 
      Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
      is an integer in [-2, 2], and can be looked up in the table T. Hence the
      result is assembled as:
 
      cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.  */
   float32x4_t ef = vmulq_f32 (vcvtq_f32_s32 (e), one_third);
   int32x4_t ey = vcvtq_s32_f32 (ef);
   int32x4_t em3 = vsubq_s32 (e, vmulq_s32 (ey, v_s32 (3)));
 
   float32x4_t my = shifted_lookup (d->table, em3);
   my = vmulq_f32 (my, a);
 
   /* Vector version of ldexpf.  */
   float32x4_t y
       = vreinterpretq_f32_s32 (vshlq_n_s32 (vaddq_s32 (ey, v_s32 (127)), 23));
   y = vmulq_f32 (y, my);
 
   if (unlikely (v_any_u16h (special)))
     return special_case (x, vbslq_f32 (SignMask, x, y), special);
 
   /* Copy sign.  */
   return vbslq_f32 (SignMask, x, y);
 }
 
-PL_SIG (V, F, 1, cbrt, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (cbrt), 1.15)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_F1 (cbrt))
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cbrt), 0, inf, 1000000)
+HALF_WIDTH_ALIAS_F1 (cbrt)
+
+TEST_SIG (V, F, 1, cbrt, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (cbrt), 1.15)
+TEST_SYM_INTERVAL (V_NAME_F1 (cbrt), 0, inf, 1000000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cexpi_3u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/cexpi.c
similarity index 79%
rename from contrib/arm-optimized-routines/pl/math/v_cexpi_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/cexpi.c
index 5163b15926b8..40ba5ff31f20 100644
--- a/contrib/arm-optimized-routines/pl/math/v_cexpi_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/cexpi.c
@@ -1,45 +1,47 @@
 /*
  * Double-precision vector sincos function - return-by-value interface.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_sincos_common.h"
 #include "v_math.h"
-#include "pl_test.h"
+#include "test_defs.h"
 
 static float64x2x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, uint64x2_t special, float64x2x2_t y)
 {
   return (float64x2x2_t){ v_call_f64 (sin, x, y.val[0], special),
 			  v_call_f64 (cos, x, y.val[1], special) };
 }
 
 /* Double-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate polynomials.
    Largest observed error is for sin, 3.22 ULP:
    v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
 				       want -0x1.ffe9537d5dbb4p-3.  */
 VPCS_ATTR float64x2x2_t
 _ZGVnN2v_cexpi (float64x2_t x)
 {
   const struct v_sincos_data *d = ptr_barrier (&v_sincos_data);
   uint64x2_t special = check_ge_rangeval (x, d);
 
   float64x2x2_t sc = v_sincos_inline (x, d);
 
   if (unlikely (v_any_u64 (special)))
     return special_case (x, special, sc);
   return sc;
 }
 
-PL_TEST_ULP (_ZGVnN2v_cexpi_sin, 2.73)
-PL_TEST_ULP (_ZGVnN2v_cexpi_cos, 2.73)
+TEST_DISABLE_FENV (_ZGVnN2v_cexpi_cos)
+TEST_DISABLE_FENV (_ZGVnN2v_cexpi_sin)
+TEST_ULP (_ZGVnN2v_cexpi_sin, 2.73)
+TEST_ULP (_ZGVnN2v_cexpi_cos, 2.73)
 #define V_CEXPI_INTERVAL(lo, hi, n)                                           \
-  PL_TEST_INTERVAL (_ZGVnN2v_cexpi_sin, lo, hi, n)                            \
-  PL_TEST_INTERVAL (_ZGVnN2v_cexpi_cos, lo, hi, n)
+  TEST_INTERVAL (_ZGVnN2v_cexpi_sin, lo, hi, n)                               \
+  TEST_INTERVAL (_ZGVnN2v_cexpi_cos, lo, hi, n)
 V_CEXPI_INTERVAL (0, 0x1p23, 500000)
 V_CEXPI_INTERVAL (-0, -0x1p23, 500000)
 V_CEXPI_INTERVAL (0x1p23, inf, 10000)
 V_CEXPI_INTERVAL (-0x1p23, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cexpif_1u8.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/cexpif.c
similarity index 80%
rename from contrib/arm-optimized-routines/pl/math/v_cexpif_1u8.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/cexpif.c
index 4897018d3090..e55d99653a66 100644
--- a/contrib/arm-optimized-routines/pl/math/v_cexpif_1u8.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/cexpif.c
@@ -1,47 +1,49 @@
 /*
  * Single-precision vector cexpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_sincosf_common.h"
 #include "v_math.h"
-#include "pl_test.h"
+#include "test_defs.h"
 
 static float32x4x2_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, uint32x4_t special, float32x4x2_t y)
 {
   return (float32x4x2_t){ v_call_f32 (sinf, x, y.val[0], special),
 			  v_call_f32 (cosf, x, y.val[1], special) };
 }
 
 /* Single-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate low-order
    polynomials.
    Worst-case error for sin is 1.67 ULP:
    v_cexpif_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
    Worst-case error for cos is 1.81 ULP:
    v_cexpif_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6.  */
 VPCS_ATTR float32x4x2_t
 _ZGVnN4v_cexpif (float32x4_t x)
 {
   const struct v_sincosf_data *d = ptr_barrier (&v_sincosf_data);
   uint32x4_t special = check_ge_rangeval (x, d);
 
   float32x4x2_t sc = v_sincosf_inline (x, d);
 
   if (unlikely (v_any_u32 (special)))
     return special_case (x, special, sc);
   return sc;
 }
 
-PL_TEST_ULP (_ZGVnN4v_cexpif_sin, 1.17)
-PL_TEST_ULP (_ZGVnN4v_cexpif_cos, 1.31)
+TEST_DISABLE_FENV (_ZGVnN4v_cexpif_sin)
+TEST_DISABLE_FENV (_ZGVnN4v_cexpif_cos)
+TEST_ULP (_ZGVnN4v_cexpif_sin, 1.17)
+TEST_ULP (_ZGVnN4v_cexpif_cos, 1.31)
 #define V_CEXPIF_INTERVAL(lo, hi, n)                                          \
-  PL_TEST_INTERVAL (_ZGVnN4v_cexpif_sin, lo, hi, n)                           \
-  PL_TEST_INTERVAL (_ZGVnN4v_cexpif_cos, lo, hi, n)
+  TEST_INTERVAL (_ZGVnN4v_cexpif_sin, lo, hi, n)                              \
+  TEST_INTERVAL (_ZGVnN4v_cexpif_cos, lo, hi, n)
 V_CEXPIF_INTERVAL (0, 0x1p20, 500000)
 V_CEXPIF_INTERVAL (-0, -0x1p20, 500000)
 V_CEXPIF_INTERVAL (0x1p20, inf, 10000)
 V_CEXPIF_INTERVAL (-0x1p20, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_cos.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/cos.c
similarity index 80%
rename from contrib/arm-optimized-routines/math/aarch64/v_cos.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/cos.c
index 9a73575bce89..9f3de4dd5c36 100644
--- a/contrib/arm-optimized-routines/math/aarch64/v_cos.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/cos.c
@@ -1,87 +1,92 @@
 /*
  * Double-precision vector cos function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 static const struct data
 {
   float64x2_t poly[7];
-  float64x2_t range_val, shift, inv_pi, half_pi, pi_1, pi_2, pi_3;
+  float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   /* Worst-case error is 3.3 ulp in [-pi/2, pi/2].  */
   .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7),
 	    V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19),
 	    V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33),
 	    V2 (-0x1.9e9540300a1p-41) },
   .inv_pi = V2 (0x1.45f306dc9c883p-2),
-  .half_pi = V2 (0x1.921fb54442d18p+0),
   .pi_1 = V2 (0x1.921fb54442d18p+1),
   .pi_2 = V2 (0x1.1a62633145c06p-53),
   .pi_3 = V2 (0x1.c1cd129024e09p-106),
-  .shift = V2 (0x1.8p52),
   .range_val = V2 (0x1p23)
 };
 
 #define C(i) d->poly[i]
 
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
 {
   y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
   return v_call_f64 (cos, x, y, cmp);
 }
 
 float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float64x2_t n, r, r2, r3, r4, t1, t2, t3, y;
   uint64x2_t odd, cmp;
 
 #if WANT_SIMD_EXCEPT
   r = vabsq_f64 (x);
   cmp = vcgeq_u64 (vreinterpretq_u64_f64 (r),
 		   vreinterpretq_u64_f64 (d->range_val));
   if (unlikely (v_any_u64 (cmp)))
     /* If fenv exceptions are to be triggered correctly, set any special lanes
        to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
        special-case handler later.  */
     r = vbslq_f64 (cmp, v_f64 (1.0), r);
 #else
   cmp = vcageq_f64 (x, d->range_val);
   r = x;
 #endif
 
   /* n = rint((|x|+pi/2)/pi) - 0.5.  */
-  n = vfmaq_f64 (d->shift, d->inv_pi, vaddq_f64 (r, d->half_pi));
-  odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63);
-  n = vsubq_f64 (n, d->shift);
-  n = vsubq_f64 (n, v_f64 (0.5));
+  n = vrndaq_f64 (vfmaq_f64 (v_f64 (0.5), r, d->inv_pi));
+  odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63);
+  n = vsubq_f64 (n, v_f64 (0.5f));
 
   /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
   r = vfmsq_f64 (r, d->pi_1, n);
   r = vfmsq_f64 (r, d->pi_2, n);
   r = vfmsq_f64 (r, d->pi_3, n);
 
   /* sin(r) poly approx.  */
   r2 = vmulq_f64 (r, r);
   r3 = vmulq_f64 (r2, r);
   r4 = vmulq_f64 (r2, r2);
 
   t1 = vfmaq_f64 (C (4), C (5), r2);
   t2 = vfmaq_f64 (C (2), C (3), r2);
   t3 = vfmaq_f64 (C (0), C (1), r2);
 
   y = vfmaq_f64 (t1, C (6), r4);
   y = vfmaq_f64 (t2, y, r4);
   y = vfmaq_f64 (t3, y, r4);
   y = vfmaq_f64 (r, y, r3);
 
   if (unlikely (v_any_u64 (cmp)))
     return special_case (x, y, odd, cmp);
   return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
 }
+
+TEST_SIG (V, D, 1, cos, -3.1, 3.1)
+TEST_ULP (V_NAME_D1 (cos), 3.0)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (cos), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (cos), 0, 0x1p23, 500000)
+TEST_SYM_INTERVAL (V_NAME_D1 (cos), 0x1p23, inf, 10000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_cosf.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/cosf.c
similarity index 76%
rename from contrib/arm-optimized-routines/math/aarch64/v_cosf.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/cosf.c
index b9890b2998ad..d2844e44e196 100644
--- a/contrib/arm-optimized-routines/math/aarch64/v_cosf.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/cosf.c
@@ -1,82 +1,89 @@
 /*
  * Single-precision vector cos function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 static const struct data
 {
   float32x4_t poly[4];
-  float32x4_t range_val, inv_pi, half_pi, shift, pi_1, pi_2, pi_3;
+  float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   /* 1.886 ulp error.  */
   .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f),
 	    V4 (0x1.5b2e76p-19f) },
 
   .pi_1 = V4 (0x1.921fb6p+1f),
   .pi_2 = V4 (-0x1.777a5cp-24f),
   .pi_3 = V4 (-0x1.ee59dap-49f),
 
   .inv_pi = V4 (0x1.45f306p-2f),
-  .shift = V4 (0x1.8p+23f),
-  .half_pi = V4 (0x1.921fb6p0f),
   .range_val = V4 (0x1p20f)
 };
 
 #define C(i) d->poly[i]
 
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
 {
   /* Fall back to scalar code.  */
   y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
   return v_call_f32 (cosf, x, y, cmp);
 }
 
-float32x4_t VPCS_ATTR V_NAME_F1 (cos) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float32x4_t n, r, r2, r3, y;
   uint32x4_t odd, cmp;
 
 #if WANT_SIMD_EXCEPT
   r = vabsq_f32 (x);
   cmp = vcgeq_u32 (vreinterpretq_u32_f32 (r),
 		   vreinterpretq_u32_f32 (d->range_val));
   if (unlikely (v_any_u32 (cmp)))
     /* If fenv exceptions are to be triggered correctly, set any special lanes
        to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
        special-case handler later.  */
     r = vbslq_f32 (cmp, v_f32 (1.0f), r);
 #else
   cmp = vcageq_f32 (x, d->range_val);
   r = x;
 #endif
 
   /* n = rint((|x|+pi/2)/pi) - 0.5.  */
-  n = vfmaq_f32 (d->shift, d->inv_pi, vaddq_f32 (r, d->half_pi));
-  odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31);
-  n = vsubq_f32 (n, d->shift);
+  n = vrndaq_f32 (vfmaq_f32 (v_f32 (0.5), r, d->inv_pi));
+  odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31);
   n = vsubq_f32 (n, v_f32 (0.5f));
 
   /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
   r = vfmsq_f32 (r, d->pi_1, n);
   r = vfmsq_f32 (r, d->pi_2, n);
   r = vfmsq_f32 (r, d->pi_3, n);
 
   /* y = sin(r).  */
   r2 = vmulq_f32 (r, r);
   r3 = vmulq_f32 (r2, r);
   y = vfmaq_f32 (C (2), C (3), r2);
   y = vfmaq_f32 (C (1), y, r2);
   y = vfmaq_f32 (C (0), y, r2);
   y = vfmaq_f32 (r, y, r3);
 
   if (unlikely (v_any_u32 (cmp)))
     return special_case (x, y, odd, cmp);
   return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
 }
+
+HALF_WIDTH_ALIAS_F1 (cos)
+
+TEST_SIG (V, F, 1, cos, -3.1, 3.1)
+TEST_ULP (V_NAME_F1 (cos), 1.4)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (cos), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (cos), 0, 0x1p20, 500000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cos), 0x1p20, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cosh_2u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/cosh.c
similarity index 84%
rename from contrib/arm-optimized-routines/pl/math/v_cosh_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/cosh.c
index 649c390f4622..54407b23aa9d 100644
--- a/contrib/arm-optimized-routines/pl/math/v_cosh_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/cosh.c
@@ -1,104 +1,107 @@
 /*
  * Double-precision vector cosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64x2_t poly[3];
-  float64x2_t inv_ln2, ln2, shift, thres;
+  float64x2_t inv_ln2;
+  double ln2[2];
+  float64x2_t shift, thres;
   uint64x2_t index_mask, special_bound;
 } data = {
   .poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6b68cp-3),
 	    V2 (0x1.5555576a59599p-5), },
 
   .inv_ln2 = V2 (0x1.71547652b82fep8), /* N/ln2.  */
   /* -ln2/N.  */
   .ln2 = {-0x1.62e42fefa39efp-9, -0x1.abc9e3b39803f3p-64},
   .shift = V2 (0x1.8p+52),
   .thres = V2 (704.0),
 
   .index_mask = V2 (0xff),
   /* 0x1.6p9, above which exp overflows.  */
   .special_bound = V2 (0x4086000000000000),
 };
 
 static float64x2_t NOINLINE VPCS_ATTR
 special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
 {
   return v_call_f64 (cosh, x, y, special);
 }
 
 /* Helper for approximating exp(x). Copied from v_exp_tail, with no
    special-case handling or tail.  */
 static inline float64x2_t
 exp_inline (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* n = round(x/(ln2/N)).  */
   float64x2_t z = vfmaq_f64 (d->shift, x, d->inv_ln2);
   uint64x2_t u = vreinterpretq_u64_f64 (z);
   float64x2_t n = vsubq_f64 (z, d->shift);
 
   /* r = x - n*ln2/N.  */
-  float64x2_t r = vfmaq_laneq_f64 (x, n, d->ln2, 0);
-  r = vfmaq_laneq_f64 (r, n, d->ln2, 1);
+  float64x2_t ln2 = vld1q_f64 (d->ln2);
+  float64x2_t r = vfmaq_laneq_f64 (x, n, ln2, 0);
+  r = vfmaq_laneq_f64 (r, n, ln2, 1);
 
   uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TAIL_TABLE_BITS);
   uint64x2_t i = vandq_u64 (u, d->index_mask);
 
   /* y = tail + exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4.  */
   float64x2_t y = vfmaq_f64 (d->poly[1], d->poly[2], r);
   y = vfmaq_f64 (d->poly[0], y, r);
   y = vmulq_f64 (vfmaq_f64 (v_f64 (1), y, r), r);
 
   /* s = 2^(n/N).  */
   u = v_lookup_u64 (__v_exp_tail_data, i);
   float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
 
   return vfmaq_f64 (s, y, s);
 }
 
 /* Approximation for vector double-precision cosh(x) using exp_inline.
    cosh(x) = (exp(x) + exp(-x)) / 2.
    The greatest observed error is in the scalar fall-back region, so is the
    same as the scalar routine, 1.93 ULP:
    _ZGVnN2v_cosh (0x1.628af341989dap+9) got 0x1.fdf28623ef921p+1021
 				       want 0x1.fdf28623ef923p+1021.
 
    The greatest observed error in the non-special region is 1.54 ULP:
    _ZGVnN2v_cosh (0x1.8e205b6ecacf7p+2) got 0x1.f711dcb0c77afp+7
 				       want 0x1.f711dcb0c77b1p+7.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (cosh) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
   float64x2_t ax = vabsq_f64 (x);
   uint64x2_t special
       = vcgtq_u64 (vreinterpretq_u64_f64 (ax), d->special_bound);
 
   /* Up to the point that exp overflows, we can use it to calculate cosh by
      exp(|x|) / 2 + 1 / (2 * exp(|x|)).  */
   float64x2_t t = exp_inline (ax);
   float64x2_t half_t = vmulq_n_f64 (t, 0.5);
   float64x2_t half_over_t = vdivq_f64 (v_f64 (0.5), t);
 
   /* Fall back to scalar for any special cases.  */
   if (unlikely (v_any_u64 (special)))
     return special_case (x, vaddq_f64 (half_t, half_over_t), special);
 
   return vaddq_f64 (half_t, half_over_t);
 }
 
-PL_SIG (V, D, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (cosh), 1.43)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (cosh))
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cosh), 0, 0x1.6p9, 100000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cosh), 0x1.6p9, inf, 1000)
+TEST_SIG (V, D, 1, cosh, -10.0, 10.0)
+TEST_ULP (V_NAME_D1 (cosh), 1.43)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (cosh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (cosh), 0, 0x1.6p9, 100000)
+TEST_SYM_INTERVAL (V_NAME_D1 (cosh), 0x1.6p9, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_coshf_2u4.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/coshf.c
similarity index 64%
rename from contrib/arm-optimized-routines/pl/math/v_coshf_2u4.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/coshf.c
index c622b0b183f1..f1ed3e5161fd 100644
--- a/contrib/arm-optimized-routines/pl/math/v_coshf_2u4.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/coshf.c
@@ -1,80 +1,92 @@
 /*
  * Single-precision vector cosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_expf_inline.h"
 #include "v_math.h"
-#include "mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   struct v_expf_data expf_consts;
-  uint32x4_t tiny_bound, special_bound;
+  uint32x4_t tiny_bound;
+  float32x4_t bound;
+#if WANT_SIMD_EXCEPT
+  uint32x4_t special_bound;
+#endif
 } data = {
   .expf_consts = V_EXPF_DATA,
   .tiny_bound = V4 (0x20000000), /* 0x1p-63: Round to 1 below this.  */
   /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case.  */
+  .bound = V4 (0x1.5a92d8p+6),
+#if WANT_SIMD_EXCEPT
   .special_bound = V4 (0x42ad496c),
+#endif
 };
 
 #if !WANT_SIMD_EXCEPT
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, float32x4_t half_t, float32x4_t half_over_t,
+	      uint32x4_t special)
 {
-  return v_call_f32 (coshf, x, y, special);
+  return v_call_f32 (coshf, x, vaddq_f32 (half_t, half_over_t), special);
 }
 #endif
 
 /* Single-precision vector cosh, using vector expf.
    Maximum error is 2.38 ULP:
    _ZGVnN4v_coshf (0x1.e8001ep+1) got 0x1.6a491ep+4
 				 want 0x1.6a4922p+4.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cosh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
-  float32x4_t ax = vabsq_f32 (x);
-  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
-  uint32x4_t special = vcgeq_u32 (iax, d->special_bound);
-
 #if WANT_SIMD_EXCEPT
   /* If fp exceptions are to be triggered correctly, fall back to the scalar
      variant for all inputs if any input is a special value or above the bound
      at which expf overflows.  */
+  float32x4_t ax = vabsq_f32 (x);
+  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+  uint32x4_t special = vcgeq_u32 (iax, d->special_bound);
   if (unlikely (v_any_u32 (special)))
     return v_call_f32 (coshf, x, x, v_u32 (-1));
 
   uint32x4_t tiny = vcleq_u32 (iax, d->tiny_bound);
   /* If any input is tiny, avoid underflow exception by fixing tiny lanes of
      input to 0, which will generate no exceptions.  */
   if (unlikely (v_any_u32 (tiny)))
     ax = v_zerofy_f32 (ax, tiny);
+  float32x4_t t = v_expf_inline (ax, &d->expf_consts);
+#else
+  uint32x4_t special = vcageq_f32 (x, d->bound);
+  float32x4_t t = v_expf_inline (x, &d->expf_consts);
 #endif
 
   /* Calculate cosh by exp(x) / 2 + exp(-x) / 2.  */
-  float32x4_t t = v_expf_inline (ax, &d->expf_consts);
   float32x4_t half_t = vmulq_n_f32 (t, 0.5);
   float32x4_t half_over_t = vdivq_f32 (v_f32 (0.5), t);
 
 #if WANT_SIMD_EXCEPT
   if (unlikely (v_any_u32 (tiny)))
     return vbslq_f32 (tiny, v_f32 (1), vaddq_f32 (half_t, half_over_t));
 #else
   if (unlikely (v_any_u32 (special)))
-    return special_case (x, vaddq_f32 (half_t, half_over_t), special);
+    return special_case (x, half_t, half_over_t, special);
 #endif
 
   return vaddq_f32 (half_t, half_over_t);
 }
 
-PL_SIG (V, F, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (cosh), 1.89)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (cosh), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0, 0x1p-63, 100)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0, 0x1.5a92d8p+6, 80000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0x1.5a92d8p+6, inf, 2000)
+HALF_WIDTH_ALIAS_F1 (cosh)
+
+TEST_SIG (V, F, 1, cosh, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (cosh), 1.89)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (cosh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0, 0x1p-63, 100)
+TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0x1p-63, 1, 1000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 1, 0x1.5a92d8p+6, 80000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0x1.5a92d8p+6, inf, 2000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cospi_3u1.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/cospi.c
similarity index 81%
rename from contrib/arm-optimized-routines/pl/math/v_cospi_3u1.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/cospi.c
index 3c2ee0b74c8e..e63201a55786 100644
--- a/contrib/arm-optimized-routines/pl/math/v_cospi_3u1.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/cospi.c
@@ -1,86 +1,87 @@
 /*
  * Double-precision vector cospi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64x2_t poly[10];
   float64x2_t range_val;
 } data = {
   /* Polynomial coefficients generated using Remez algorithm,
      see sinpi.sollya for details.  */
   .poly = { V2 (0x1.921fb54442d184p1), V2 (-0x1.4abbce625be53p2),
 	    V2 (0x1.466bc6775ab16p1), V2 (-0x1.32d2cce62dc33p-1),
 	    V2 (0x1.507834891188ep-4), V2 (-0x1.e30750a28c88ep-8),
 	    V2 (0x1.e8f48308acda4p-12), V2 (-0x1.6fc0032b3c29fp-16),
 	    V2 (0x1.af86ae521260bp-21), V2 (-0x1.012a9870eeb7dp-25) },
   .range_val = V2 (0x1p63),
 };
 
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
 {
   /* Fall back to scalar code.  */
   y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
-  return v_call_f64 (cospi, x, y, cmp);
+  return v_call_f64 (arm_math_cospi, x, y, cmp);
 }
 
 /* Approximation for vector double-precision cospi(x).
    Maximum Error 3.06 ULP:
   _ZGVnN2v_cospi(0x1.7dd4c0b03cc66p-5) got 0x1.fa854babfb6bep-1
 				      want 0x1.fa854babfb6c1p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (cospi) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
 #if WANT_SIMD_EXCEPT
   float64x2_t r = vabsq_f64 (x);
   uint64x2_t cmp = vcaleq_f64 (v_f64 (0x1p64), x);
 
   /* When WANT_SIMD_EXCEPT = 1, special lanes should be zero'd
      to avoid them overflowing and throwing exceptions.  */
   r = v_zerofy_f64 (r, cmp);
   uint64x2_t odd = vshlq_n_u64 (vcvtnq_u64_f64 (r), 63);
 
 #else
   float64x2_t r = x;
   uint64x2_t cmp = vcageq_f64 (r, d->range_val);
   uint64x2_t odd
       = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtaq_s64_f64 (r)), 63);
 
 #endif
 
   r = vsubq_f64 (r, vrndaq_f64 (r));
 
   /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2.  */
   r = vsubq_f64 (v_f64 (0.5), vabsq_f64 (r));
 
   /* y = sin(r).  */
   float64x2_t r2 = vmulq_f64 (r, r);
   float64x2_t r4 = vmulq_f64 (r2, r2);
   float64x2_t y = vmulq_f64 (v_pw_horner_9_f64 (r2, r4, d->poly), r);
 
   /* Fallback to scalar.  */
   if (unlikely (v_any_u64 (cmp)))
     return special_case (x, y, odd, cmp);
 
   /* Reintroduce the sign bit for inputs which round to odd.  */
   return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
 }
 
-PL_SIG (V, D, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (V_NAME_D1 (cospi), 2.56)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (cospi), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0.5, 0x1p51, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0x1p51, inf, 10000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (V_NAME_D1 (cospi), 2.56)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (cospi), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0.5, 0x1p51, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0x1p51, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/v_cospif_3u2.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/cospif.c
similarity index 76%
rename from contrib/arm-optimized-routines/pl/math/v_cospif_3u2.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/cospif.c
index d88aa828439d..62f4b8122b2c 100644
--- a/contrib/arm-optimized-routines/pl/math/v_cospif_3u2.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/cospif.c
@@ -1,83 +1,86 @@
 /*
  * Single-precision vector cospi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float32x4_t poly[6];
   float32x4_t range_val;
 } data = {
   /* Taylor series coefficents for sin(pi * x).  */
   .poly = { V4 (0x1.921fb6p1f), V4 (-0x1.4abbcep2f), V4 (0x1.466bc6p1f),
 	    V4 (-0x1.32d2ccp-1f), V4 (0x1.50783p-4f), V4 (-0x1.e30750p-8f) },
   .range_val = V4 (0x1p31f),
 };
 
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
 {
   y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
-  return v_call_f32 (cospif, x, y, cmp);
+  return v_call_f32 (arm_math_cospif, x, y, cmp);
 }
 
 /* Approximation for vector single-precision cospi(x)
     Maximum Error: 3.17 ULP:
     _ZGVnN4v_cospif(0x1.d341a8p-5) got 0x1.f7cd56p-1
 				  want 0x1.f7cd5p-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (cospi) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cospi) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
 #if WANT_SIMD_EXCEPT
   float32x4_t r = vabsq_f32 (x);
   uint32x4_t cmp = vcaleq_f32 (v_f32 (0x1p32f), x);
 
   /* When WANT_SIMD_EXCEPT = 1, special lanes should be zero'd
      to avoid them overflowing and throwing exceptions.  */
   r = v_zerofy_f32 (r, cmp);
   uint32x4_t odd = vshlq_n_u32 (vcvtnq_u32_f32 (r), 31);
 
 #else
   float32x4_t r = x;
   uint32x4_t cmp = vcageq_f32 (r, d->range_val);
 
   uint32x4_t odd
       = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (r)), 31);
 
 #endif
 
   /* r = x - rint(x).  */
   r = vsubq_f32 (r, vrndaq_f32 (r));
 
   /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2.  */
   r = vsubq_f32 (v_f32 (0.5f), vabsq_f32 (r));
 
   /* Pairwise Horner approximation for y = sin(r * pi).  */
   float32x4_t r2 = vmulq_f32 (r, r);
   float32x4_t r4 = vmulq_f32 (r2, r2);
   float32x4_t y = vmulq_f32 (v_pw_horner_5_f32 (r2, r4, d->poly), r);
 
   /* Fallback to scalar.  */
   if (unlikely (v_any_u32 (cmp)))
     return special_case (x, y, odd, cmp);
 
   /* Reintroduce the sign bit for inputs which round to odd.  */
   return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
 }
 
-PL_SIG (V, F, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (V_NAME_F1 (cospi), 2.67)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (cospi), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0.5, 0x1p32f, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0x1p32f, inf, 10000)
+HALF_WIDTH_ALIAS_F1 (cospi)
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (V_NAME_F1 (cospi), 2.67)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (cospi), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0.5, 0x1p32f, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0x1p32f, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/v_erf_2u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/erf.c
similarity index 77%
rename from contrib/arm-optimized-routines/pl/math/v_erf_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/erf.c
index e581ec5bb8a7..40717a660ce2 100644
--- a/contrib/arm-optimized-routines/pl/math/v_erf_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/erf.c
@@ -1,158 +1,166 @@
 /*
  * Double-precision vector erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64x2_t third;
-  float64x2_t tenth, two_over_five, two_over_fifteen;
-  float64x2_t two_over_nine, two_over_fortyfive;
+  float64x2_t tenth, two_over_five, two_over_nine;
+  double two_over_fifteen, two_over_fortyfive;
   float64x2_t max, shift;
+  uint64x2_t max_idx;
 #if WANT_SIMD_EXCEPT
   float64x2_t tiny_bound, huge_bound, scale_minus_one;
 #endif
 } data = {
+  .max_idx = V2 (768),
   .third = V2 (0x1.5555555555556p-2), /* used to compute 2/3 and 1/6 too.  */
-  .two_over_fifteen = V2 (0x1.1111111111111p-3),
+  .two_over_fifteen = 0x1.1111111111111p-3,
   .tenth = V2 (-0x1.999999999999ap-4),
   .two_over_five = V2 (-0x1.999999999999ap-2),
   .two_over_nine = V2 (-0x1.c71c71c71c71cp-3),
-  .two_over_fortyfive = V2 (0x1.6c16c16c16c17p-5),
+  .two_over_fortyfive = 0x1.6c16c16c16c17p-5,
   .max = V2 (5.9921875), /* 6 - 1/128.  */
   .shift = V2 (0x1p45),
 #if WANT_SIMD_EXCEPT
   .huge_bound = V2 (0x1p205),
   .tiny_bound = V2 (0x1p-226),
   .scale_minus_one = V2 (0x1.06eba8214db69p-3), /* 2/sqrt(pi) - 1.0.  */
 #endif
 };
 
 #define AbsMask 0x7fffffffffffffff
 
 struct entry
 {
   float64x2_t erf;
   float64x2_t scale;
 };
 
 static inline struct entry
 lookup (uint64x2_t i)
 {
   struct entry e;
-  float64x2_t e1 = vld1q_f64 ((float64_t *) (__erf_data.tab + i[0])),
-	      e2 = vld1q_f64 ((float64_t *) (__erf_data.tab + i[1]));
+  float64x2_t e1 = vld1q_f64 (&__v_erf_data.tab[vgetq_lane_u64 (i, 0)].erf),
+	      e2 = vld1q_f64 (&__v_erf_data.tab[vgetq_lane_u64 (i, 1)].erf);
   e.erf = vuzp1q_f64 (e1, e2);
   e.scale = vuzp2q_f64 (e1, e2);
   return e;
 }
 
 /* Double-precision implementation of vector erf(x).
    Approximation based on series expansion near x rounded to
    nearest multiple of 1/128.
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
 
    erf(x) ~ erf(r) + scale * d * [
        + 1
        - r d
        + 1/3 (2 r^2 - 1) d^2
        - 1/6 (r (2 r^2 - 3)) d^3
        + 1/30 (4 r^4 - 12 r^2 + 3) d^4
        - 1/90 (4 r^4 - 20 r^2 + 15) d^5
      ]
 
    Maximum measure error: 2.29 ULP
    V_NAME_D1 (erf)(-0x1.00003c924e5d1p-8) got -0x1.20dd59132ebadp-8
 					 want -0x1.20dd59132ebafp-8.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x)
 {
   const struct data *dat = ptr_barrier (&data);
 
   float64x2_t a = vabsq_f64 (x);
   /* Reciprocal conditions that do not catch NaNs so they can be used in BSLs
      to return expected results.  */
-  uint64x2_t a_le_max = vcleq_f64 (a, dat->max);
-  uint64x2_t a_gt_max = vcgtq_f64 (a, dat->max);
+  uint64x2_t a_le_max = vcaleq_f64 (x, dat->max);
+  uint64x2_t a_gt_max = vcagtq_f64 (x, dat->max);
 
 #if WANT_SIMD_EXCEPT
   /* |x| huge or tiny.  */
   uint64x2_t cmp1 = vcgtq_f64 (a, dat->huge_bound);
   uint64x2_t cmp2 = vcltq_f64 (a, dat->tiny_bound);
   uint64x2_t cmp = vorrq_u64 (cmp1, cmp2);
   /* If any lanes are special, mask them with 1 for small x or 8 for large
      values and retain a copy of a to allow special case handler to fix special
      lanes later. This is only necessary if fenv exceptions are to be triggered
      correctly.  */
   if (unlikely (v_any_u64 (cmp)))
     {
       a = vbslq_f64 (cmp1, v_f64 (8.0), a);
       a = vbslq_f64 (cmp2, v_f64 (1.0), a);
     }
 #endif
 
   /* Set r to multiple of 1/128 nearest to |x|.  */
   float64x2_t shift = dat->shift;
   float64x2_t z = vaddq_f64 (a, shift);
 
   /* Lookup erf(r) and scale(r) in table, without shortcut for small values,
      but with saturated indices for large values and NaNs in order to avoid
      segfault.  */
   uint64x2_t i
       = vsubq_u64 (vreinterpretq_u64_f64 (z), vreinterpretq_u64_f64 (shift));
-  i = vbslq_u64 (a_le_max, i, v_u64 (768));
+  i = vbslq_u64 (a_le_max, i, dat->max_idx);
   struct entry e = lookup (i);
 
   float64x2_t r = vsubq_f64 (z, shift);
 
   /* erf(x) ~ erf(r) + scale * d * poly (r, d).  */
   float64x2_t d = vsubq_f64 (a, r);
   float64x2_t d2 = vmulq_f64 (d, d);
   float64x2_t r2 = vmulq_f64 (r, r);
 
+  float64x2_t two_over_fifteen_and_fortyfive
+      = vld1q_f64 (&dat->two_over_fifteen);
+
   /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p5(r) * d^5.  */
   float64x2_t p1 = r;
   float64x2_t p2
       = vfmsq_f64 (dat->third, r2, vaddq_f64 (dat->third, dat->third));
   float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->third));
-  float64x2_t p4 = vfmaq_f64 (dat->two_over_five, r2, dat->two_over_fifteen);
+  float64x2_t p4 = vfmaq_laneq_f64 (dat->two_over_five, r2,
+				    two_over_fifteen_and_fortyfive, 0);
   p4 = vfmsq_f64 (dat->tenth, r2, p4);
-  float64x2_t p5 = vfmaq_f64 (dat->two_over_nine, r2, dat->two_over_fortyfive);
+  float64x2_t p5 = vfmaq_laneq_f64 (dat->two_over_nine, r2,
+				    two_over_fifteen_and_fortyfive, 1);
   p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->third), r2, p5));
 
   float64x2_t p34 = vfmaq_f64 (p3, d, p4);
   float64x2_t p12 = vfmaq_f64 (p1, d, p2);
   float64x2_t y = vfmaq_f64 (p34, d2, p5);
   y = vfmaq_f64 (p12, d2, y);
 
   y = vfmaq_f64 (e.erf, e.scale, vfmsq_f64 (d, d2, y));
 
   /* Solves the |x| = inf and NaN cases.  */
   y = vbslq_f64 (a_gt_max, v_f64 (1.0), y);
 
   /* Copy sign.  */
   y = vbslq_f64 (v_u64 (AbsMask), y, x);
 
 #if WANT_SIMD_EXCEPT
   if (unlikely (v_any_u64 (cmp2)))
     {
       /* Neutralise huge values of x before fixing small values.  */
       x = vbslq_f64 (cmp1, v_f64 (1.0), x);
       /* Fix tiny values that trigger spurious underflow.  */
       return vbslq_f64 (cmp2, vfmaq_f64 (x, dat->scale_minus_one, x), y);
     }
 #endif
   return y;
 }
 
-PL_SIG (V, D, 1, erf, -6.0, 6.0)
-PL_TEST_ULP (V_NAME_D1 (erf), 1.79)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (erf), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (erf), 0, 5.9921875, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (erf), 5.9921875, inf, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (erf), 0, inf, 40000)
+TEST_SIG (V, D, 1, erf, -6.0, 6.0)
+TEST_ULP (V_NAME_D1 (erf), 1.79)
+/* WANT_SIMD_EXCEPT blocks miss some cases.  */
+TEST_DISABLE_FENV (V_NAME_D1 (erf))
+TEST_SYM_INTERVAL (V_NAME_D1 (erf), 0, 5.9921875, 40000)
+TEST_SYM_INTERVAL (V_NAME_D1 (erf), 5.9921875, inf, 40000)
+TEST_SYM_INTERVAL (V_NAME_D1 (erf), 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_erfc_1u8.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/erfc.c
similarity index 77%
rename from contrib/arm-optimized-routines/pl/math/v_erfc_1u8.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/erfc.c
index 10ef7e6a3c34..97ef09ecc113 100644
--- a/contrib/arm-optimized-routines/pl/math/v_erfc_1u8.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/erfc.c
@@ -1,198 +1,205 @@
 /*
  * Double-precision vector erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   uint64x2_t offset, table_scale;
   float64x2_t max, shift;
-  float64x2_t p20, p40, p41, p42;
-  float64x2_t p51, p52;
-  float64x2_t qr5, qr6, qr7, qr8, qr9;
+  float64x2_t p20, p40, p41, p51;
+  double p42, p52;
+  double qr5[2], qr6[2], qr7[2], qr8[2], qr9[2];
 #if WANT_SIMD_EXCEPT
   float64x2_t uflow_bound;
 #endif
 } data = {
   /* Set an offset so the range of the index used for lookup is 3487, and it
      can be clamped using a saturated add on an offset index.
      Index offset is 0xffffffffffffffff - asuint64(shift) - 3487.  */
   .offset = V2 (0xbd3ffffffffff260),
   .table_scale = V2 (0x37f0000000000000 << 1), /* asuint64 (2^-128) << 1.  */
   .max = V2 (0x1.b3ep+4),		       /* 3487/128.  */
   .shift = V2 (0x1p45),
   .p20 = V2 (0x1.5555555555555p-2),  /* 1/3, used to compute 2/3 and 1/6.  */
   .p40 = V2 (-0x1.999999999999ap-4), /* 1/10.  */
   .p41 = V2 (-0x1.999999999999ap-2), /* 2/5.  */
-  .p42 = V2 (0x1.1111111111111p-3),  /* 2/15.  */
+  .p42 = 0x1.1111111111111p-3,	     /* 2/15.  */
   .p51 = V2 (-0x1.c71c71c71c71cp-3), /* 2/9.  */
-  .p52 = V2 (0x1.6c16c16c16c17p-5),  /* 2/45.  */
+  .p52 = 0x1.6c16c16c16c17p-5,	     /* 2/45.  */
   /* Qi = (i+1) / i, Ri = -2 * i / ((i+1)*(i+2)), for i = 5, ..., 9.  */
   .qr5 = { 0x1.3333333333333p0, -0x1.e79e79e79e79ep-3 },
   .qr6 = { 0x1.2aaaaaaaaaaabp0, -0x1.b6db6db6db6dbp-3 },
   .qr7 = { 0x1.2492492492492p0, -0x1.8e38e38e38e39p-3 },
   .qr8 = { 0x1.2p0, -0x1.6c16c16c16c17p-3 },
   .qr9 = { 0x1.1c71c71c71c72p0, -0x1.4f2094f2094f2p-3 },
 #if WANT_SIMD_EXCEPT
   .uflow_bound = V2 (0x1.a8b12fc6e4892p+4),
 #endif
 };
 
 #define TinyBound 0x4000000000000000 /* 0x1p-511 << 1.  */
 #define Off 0xfffffffffffff260	     /* 0xffffffffffffffff - 3487.  */
 
 struct entry
 {
   float64x2_t erfc;
   float64x2_t scale;
 };
 
 static inline struct entry
 lookup (uint64x2_t i)
 {
   struct entry e;
-  float64x2_t e1 = vld1q_f64 ((float64_t *) (__erfc_data.tab - Off + i[0])),
-	      e2 = vld1q_f64 ((float64_t *) (__erfc_data.tab - Off + i[1]));
+  float64x2_t e1
+      = vld1q_f64 (&__v_erfc_data.tab[vgetq_lane_u64 (i, 0) - Off].erfc);
+  float64x2_t e2
+      = vld1q_f64 (&__v_erfc_data.tab[vgetq_lane_u64 (i, 1) - Off].erfc);
   e.erfc = vuzp1q_f64 (e1, e2);
   e.scale = vuzp2q_f64 (e1, e2);
   return e;
 }
 
 #if WANT_SIMD_EXCEPT
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
 {
   return v_call_f64 (erfc, x, y, cmp);
 }
 #endif
 
 /* Optimized double-precision vector erfc(x).
    Approximation based on series expansion near x rounded to
    nearest multiple of 1/128.
 
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
 
    erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
 
    poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
 		+ (2/15 r^4 - 2/5 r^2 + 1/10) d^4
 		- r * (2/45 r^4 - 2/9 r^2 + 1/6) d^5
 		+ p6(r) d^6 + ... + p10(r) d^10
 
    Polynomials p6(r) to p10(r) are computed using recurrence relation
 
    2(i+1)p_i + 2r(i+2)p_{i+1} + (i+2)(i+3)p_{i+2} = 0,
    with p0 = 1, and p1(r) = -r.
 
    Values of erfc(r) and scale are read from lookup tables. Stored values
    are scaled to avoid hitting the subnormal range.
 
    Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
 
    Maximum measured error: 1.71 ULP
    V_NAME_D1 (erfc)(0x1.46cfe976733p+4) got 0x1.e15fcbea3e7afp-608
 				       want 0x1.e15fcbea3e7adp-608.  */
 VPCS_ATTR
 float64x2_t V_NAME_D1 (erfc) (float64x2_t x)
 {
   const struct data *dat = ptr_barrier (&data);
 
 #if WANT_SIMD_EXCEPT
   /* |x| < 2^-511. Avoid fabs by left-shifting by 1.  */
   uint64x2_t ix = vreinterpretq_u64_f64 (x);
   uint64x2_t cmp = vcltq_u64 (vaddq_u64 (ix, ix), v_u64 (TinyBound));
   /* x >= ~26.54 (into subnormal case and uflow case). Comparison is done in
      integer domain to avoid raising exceptions in presence of nans.  */
   uint64x2_t uflow = vcgeq_s64 (vreinterpretq_s64_f64 (x),
 				vreinterpretq_s64_f64 (dat->uflow_bound));
   cmp = vorrq_u64 (cmp, uflow);
   float64x2_t xm = x;
   /* If any lanes are special, mask them with 0 and retain a copy of x to allow
      special case handler to fix special lanes later. This is only necessary if
      fenv exceptions are to be triggered correctly.  */
   if (unlikely (v_any_u64 (cmp)))
     x = v_zerofy_f64 (x, cmp);
 #endif
 
   float64x2_t a = vabsq_f64 (x);
   a = vminq_f64 (a, dat->max);
 
   /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 0 and scale to
      2/sqrt(pi), when x reduced to r = 0.  */
   float64x2_t shift = dat->shift;
   float64x2_t z = vaddq_f64 (a, shift);
 
   /* Clamp index to a range of 3487. A naive approach would use a subtract and
      min. Instead we offset the table address and the index, then use a
      saturating add.  */
   uint64x2_t i = vqaddq_u64 (vreinterpretq_u64_f64 (z), dat->offset);
 
   struct entry e = lookup (i);
 
   /* erfc(x) ~ erfc(r) - scale * d * poly(r, d).  */
   float64x2_t r = vsubq_f64 (z, shift);
   float64x2_t d = vsubq_f64 (a, r);
   float64x2_t d2 = vmulq_f64 (d, d);
   float64x2_t r2 = vmulq_f64 (r, r);
 
   float64x2_t p1 = r;
   float64x2_t p2 = vfmsq_f64 (dat->p20, r2, vaddq_f64 (dat->p20, dat->p20));
   float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->p20));
-  float64x2_t p4 = vfmaq_f64 (dat->p41, r2, dat->p42);
+  float64x2_t p42_p52 = vld1q_f64 (&dat->p42);
+  float64x2_t p4 = vfmaq_laneq_f64 (dat->p41, r2, p42_p52, 0);
   p4 = vfmsq_f64 (dat->p40, r2, p4);
-  float64x2_t p5 = vfmaq_f64 (dat->p51, r2, dat->p52);
+  float64x2_t p5 = vfmaq_laneq_f64 (dat->p51, r2, p42_p52, 1);
   p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->p20), r2, p5));
   /* Compute p_i using recurrence relation:
      p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}.  */
-  float64x2_t p6 = vfmaq_f64 (p4, p5, vmulq_laneq_f64 (r, dat->qr5, 0));
-  p6 = vmulq_laneq_f64 (p6, dat->qr5, 1);
-  float64x2_t p7 = vfmaq_f64 (p5, p6, vmulq_laneq_f64 (r, dat->qr6, 0));
-  p7 = vmulq_laneq_f64 (p7, dat->qr6, 1);
-  float64x2_t p8 = vfmaq_f64 (p6, p7, vmulq_laneq_f64 (r, dat->qr7, 0));
-  p8 = vmulq_laneq_f64 (p8, dat->qr7, 1);
-  float64x2_t p9 = vfmaq_f64 (p7, p8, vmulq_laneq_f64 (r, dat->qr8, 0));
-  p9 = vmulq_laneq_f64 (p9, dat->qr8, 1);
-  float64x2_t p10 = vfmaq_f64 (p8, p9, vmulq_laneq_f64 (r, dat->qr9, 0));
-  p10 = vmulq_laneq_f64 (p10, dat->qr9, 1);
+  float64x2_t qr5 = vld1q_f64 (dat->qr5), qr6 = vld1q_f64 (dat->qr6),
+	      qr7 = vld1q_f64 (dat->qr7), qr8 = vld1q_f64 (dat->qr8),
+	      qr9 = vld1q_f64 (dat->qr9);
+  float64x2_t p6 = vfmaq_f64 (p4, p5, vmulq_laneq_f64 (r, qr5, 0));
+  p6 = vmulq_laneq_f64 (p6, qr5, 1);
+  float64x2_t p7 = vfmaq_f64 (p5, p6, vmulq_laneq_f64 (r, qr6, 0));
+  p7 = vmulq_laneq_f64 (p7, qr6, 1);
+  float64x2_t p8 = vfmaq_f64 (p6, p7, vmulq_laneq_f64 (r, qr7, 0));
+  p8 = vmulq_laneq_f64 (p8, qr7, 1);
+  float64x2_t p9 = vfmaq_f64 (p7, p8, vmulq_laneq_f64 (r, qr8, 0));
+  p9 = vmulq_laneq_f64 (p9, qr8, 1);
+  float64x2_t p10 = vfmaq_f64 (p8, p9, vmulq_laneq_f64 (r, qr9, 0));
+  p10 = vmulq_laneq_f64 (p10, qr9, 1);
   /* Compute polynomial in d using pairwise Horner scheme.  */
   float64x2_t p90 = vfmaq_f64 (p9, d, p10);
   float64x2_t p78 = vfmaq_f64 (p7, d, p8);
   float64x2_t p56 = vfmaq_f64 (p5, d, p6);
   float64x2_t p34 = vfmaq_f64 (p3, d, p4);
   float64x2_t p12 = vfmaq_f64 (p1, d, p2);
   float64x2_t y = vfmaq_f64 (p78, d2, p90);
   y = vfmaq_f64 (p56, d2, y);
   y = vfmaq_f64 (p34, d2, y);
   y = vfmaq_f64 (p12, d2, y);
 
   y = vfmsq_f64 (e.erfc, e.scale, vfmsq_f64 (d, d2, y));
 
   /* Offset equals 2.0 if sign, else 0.0.  */
   uint64x2_t sign = vshrq_n_u64 (vreinterpretq_u64_f64 (x), 63);
   float64x2_t off = vreinterpretq_f64_u64 (vshlq_n_u64 (sign, 62));
   /* Copy sign and scale back in a single fma. Since the bit patterns do not
      overlap, then logical or and addition are equivalent here.  */
   float64x2_t fac = vreinterpretq_f64_u64 (
       vsraq_n_u64 (vshlq_n_u64 (sign, 63), dat->table_scale, 1));
 
 #if WANT_SIMD_EXCEPT
   if (unlikely (v_any_u64 (cmp)))
     return special_case (xm, vfmaq_f64 (off, fac, y), cmp);
 #endif
 
   return vfmaq_f64 (off, fac, y);
 }
 
-PL_SIG (V, D, 1, erfc, -6.0, 28.0)
-PL_TEST_ULP (V_NAME_D1 (erfc), 1.21)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (erfc), 0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (V_NAME_D1 (erfc), 0x1p-26, 28.0, 40000)
-PL_TEST_INTERVAL (V_NAME_D1 (erfc), -0x1p-26, -6.0, 40000)
-PL_TEST_INTERVAL (V_NAME_D1 (erfc), 28.0, inf, 40000)
-PL_TEST_INTERVAL (V_NAME_D1 (erfc), -6.0, -inf, 40000)
+TEST_SIG (V, D, 1, erfc, -6.0, 28.0)
+TEST_ULP (V_NAME_D1 (erfc), 1.21)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (erfc), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (erfc), 0, 0x1p-26, 40000)
+TEST_INTERVAL (V_NAME_D1 (erfc), 0x1p-26, 28.0, 40000)
+TEST_INTERVAL (V_NAME_D1 (erfc), -0x1p-26, -6.0, 40000)
+TEST_INTERVAL (V_NAME_D1 (erfc), 28.0, inf, 40000)
+TEST_INTERVAL (V_NAME_D1 (erfc), -6.0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_erfcf_1u7.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/erfcf.c
similarity index 76%
rename from contrib/arm-optimized-routines/pl/math/v_erfcf_1u7.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/erfcf.c
index c361d0704438..f420439ef8a3 100644
--- a/contrib/arm-optimized-routines/pl/math/v_erfcf_1u7.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/erfcf.c
@@ -1,166 +1,174 @@
 /*
  * Single-precision vector erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   uint32x4_t offset, table_scale;
   float32x4_t max, shift;
-  float32x4_t coeffs, third, two_over_five, tenth;
+  float coeffs[4];
+  float32x4_t third, two_over_five, tenth;
 #if WANT_SIMD_EXCEPT
   float32x4_t uflow_bound;
 #endif
 
 } data = {
   /* Set an offset so the range of the index used for lookup is 644, and it can
      be clamped using a saturated add.  */
   .offset = V4 (0xb7fffd7b),	       /* 0xffffffff - asuint(shift) - 644.  */
   .table_scale = V4 (0x28000000 << 1), /* asuint (2^-47) << 1.  */
   .max = V4 (10.0625f),		       /* 10 + 1/16 = 644/64.  */
   .shift = V4 (0x1p17f),
   /* Store 1/3, 2/3 and 2/15 in a single register for use with indexed muls and
      fmas.  */
-  .coeffs = (float32x4_t){ 0x1.555556p-2f, 0x1.555556p-1f, 0x1.111112p-3f, 0 },
+  .coeffs = { 0x1.555556p-2f, 0x1.555556p-1f, 0x1.111112p-3f, 0 },
   .third = V4 (0x1.555556p-2f),
   .two_over_five = V4 (-0x1.99999ap-2f),
   .tenth = V4 (-0x1.99999ap-4f),
 #if WANT_SIMD_EXCEPT
   .uflow_bound = V4 (0x1.2639cp+3f),
 #endif
 };
 
 #define TinyBound 0x41000000 /* 0x1p-62f << 1.  */
 #define Thres 0xbe000000     /* asuint(infinity) << 1 - TinyBound.  */
 #define Off 0xfffffd7b	     /* 0xffffffff - 644.  */
 
 struct entry
 {
   float32x4_t erfc;
   float32x4_t scale;
 };
 
 static inline struct entry
 lookup (uint32x4_t i)
 {
   struct entry e;
-  float64_t t0 = *((float64_t *) (__erfcf_data.tab - Off + i[0]));
-  float64_t t1 = *((float64_t *) (__erfcf_data.tab - Off + i[1]));
-  float64_t t2 = *((float64_t *) (__erfcf_data.tab - Off + i[2]));
-  float64_t t3 = *((float64_t *) (__erfcf_data.tab - Off + i[3]));
-  float32x4_t e1 = vreinterpretq_f32_f64 ((float64x2_t){ t0, t1 });
-  float32x4_t e2 = vreinterpretq_f32_f64 ((float64x2_t){ t2, t3 });
+  float32x2_t t0
+      = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 0) - Off].erfc);
+  float32x2_t t1
+      = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 1) - Off].erfc);
+  float32x2_t t2
+      = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 2) - Off].erfc);
+  float32x2_t t3
+      = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 3) - Off].erfc);
+  float32x4_t e1 = vcombine_f32 (t0, t1);
+  float32x4_t e2 = vcombine_f32 (t2, t3);
   e.erfc = vuzp1q_f32 (e1, e2);
   e.scale = vuzp2q_f32 (e1, e2);
   return e;
 }
 
 #if WANT_SIMD_EXCEPT
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
 {
   return v_call_f32 (erfcf, x, y, cmp);
 }
 #endif
 
 /* Optimized single-precision vector erfcf(x).
    Approximation based on series expansion near x rounded to
    nearest multiple of 1/64.
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
 
    erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
 
    poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
 		+ (2/15 r^4 - 2/5 r^2 + 1/10) d^4
 
    Values of erfc(r) and scale are read from lookup tables. Stored values
    are scaled to avoid hitting the subnormal range.
 
    Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
    Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0).
    _ZGVnN4v_erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120
 				want 0x1.f51216p-120.  */
-VPCS_ATTR
-float32x4_t V_NAME_F1 (erfc) (float32x4_t x)
+NOINLINE VPCS_ATTR float32x4_t V_NAME_F1 (erfc) (float32x4_t x)
 {
   const struct data *dat = ptr_barrier (&data);
 
 #if WANT_SIMD_EXCEPT
   /* |x| < 2^-62. Avoid fabs by left-shifting by 1.  */
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
   uint32x4_t cmp = vcltq_u32 (vaddq_u32 (ix, ix), v_u32 (TinyBound));
   /* x >= ~9.19 (into subnormal case and uflow case). Comparison is done in
      integer domain to avoid raising exceptions in presence of nans.  */
   uint32x4_t uflow = vcgeq_s32 (vreinterpretq_s32_f32 (x),
 				vreinterpretq_s32_f32 (dat->uflow_bound));
   cmp = vorrq_u32 (cmp, uflow);
   float32x4_t xm = x;
   /* If any lanes are special, mask them with 0 and retain a copy of x to allow
      special case handler to fix special lanes later. This is only necessary if
      fenv exceptions are to be triggered correctly.  */
   if (unlikely (v_any_u32 (cmp)))
     x = v_zerofy_f32 (x, cmp);
 #endif
 
   float32x4_t a = vabsq_f32 (x);
   a = vminq_f32 (a, dat->max);
 
   /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 0 and scale to
      2/sqrt(pi), when x reduced to r = 0.  */
   float32x4_t shift = dat->shift;
   float32x4_t z = vaddq_f32 (a, shift);
 
   /* Clamp index to a range of 644. A naive approach would use a subtract and
      min. Instead we offset the table address and the index, then use a
      saturating add.  */
   uint32x4_t i = vqaddq_u32 (vreinterpretq_u32_f32 (z), dat->offset);
 
   struct entry e = lookup (i);
 
   /* erfc(x) ~ erfc(r) - scale * d * poly(r, d).  */
   float32x4_t r = vsubq_f32 (z, shift);
   float32x4_t d = vsubq_f32 (a, r);
   float32x4_t d2 = vmulq_f32 (d, d);
   float32x4_t r2 = vmulq_f32 (r, r);
 
   float32x4_t p1 = r;
-  float32x4_t p2 = vfmsq_laneq_f32 (dat->third, r2, dat->coeffs, 1);
+  float32x4_t coeffs = vld1q_f32 (dat->coeffs);
+  float32x4_t p2 = vfmsq_laneq_f32 (dat->third, r2, coeffs, 1);
   float32x4_t p3
-      = vmulq_f32 (r, vfmaq_laneq_f32 (v_f32 (-0.5), r2, dat->coeffs, 0));
-  float32x4_t p4 = vfmaq_laneq_f32 (dat->two_over_five, r2, dat->coeffs, 2);
+      = vmulq_f32 (r, vfmaq_laneq_f32 (v_f32 (-0.5), r2, coeffs, 0));
+  float32x4_t p4 = vfmaq_laneq_f32 (dat->two_over_five, r2, coeffs, 2);
   p4 = vfmsq_f32 (dat->tenth, r2, p4);
 
   float32x4_t y = vfmaq_f32 (p3, d, p4);
   y = vfmaq_f32 (p2, d, y);
   y = vfmaq_f32 (p1, d, y);
   y = vfmsq_f32 (e.erfc, e.scale, vfmsq_f32 (d, d2, y));
 
   /* Offset equals 2.0f if sign, else 0.0f.  */
   uint32x4_t sign = vshrq_n_u32 (vreinterpretq_u32_f32 (x), 31);
   float32x4_t off = vreinterpretq_f32_u32 (vshlq_n_u32 (sign, 30));
   /* Copy sign and scale back in a single fma. Since the bit patterns do not
      overlap, then logical or and addition are equivalent here.  */
   float32x4_t fac = vreinterpretq_f32_u32 (
       vsraq_n_u32 (vshlq_n_u32 (sign, 31), dat->table_scale, 1));
 
 #if WANT_SIMD_EXCEPT
   if (unlikely (v_any_u32 (cmp)))
     return special_case (xm, vfmaq_f32 (off, fac, y), cmp);
 #endif
 
   return vfmaq_f32 (off, fac, y);
 }
 
-PL_SIG (V, F, 1, erfc, -4.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (erfc), 1.14)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (erfc), 0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (erfc), 0x1p-26, 10.0625, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (erfc), -0x1p-26, -4.0, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (erfc), 10.0625, inf, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (erfc), -4.0, -inf, 40000)
+HALF_WIDTH_ALIAS_F1 (erfc)
+
+TEST_SIG (V, F, 1, erfc, -4.0, 10.0)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (erfc), WANT_SIMD_EXCEPT)
+TEST_ULP (V_NAME_F1 (erfc), 1.14)
+TEST_SYM_INTERVAL (V_NAME_F1 (erfc), 0, 0x1p-26, 40000)
+TEST_INTERVAL (V_NAME_F1 (erfc), 0x1p-26, 10.0625, 40000)
+TEST_INTERVAL (V_NAME_F1 (erfc), -0x1p-26, -4.0, 40000)
+TEST_INTERVAL (V_NAME_F1 (erfc), 10.0625, inf, 40000)
+TEST_INTERVAL (V_NAME_F1 (erfc), -4.0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_erff_2u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/erff.c
similarity index 76%
rename from contrib/arm-optimized-routines/pl/math/v_erff_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/erff.c
index 502526407df2..508bc4c2f5e2 100644
--- a/contrib/arm-optimized-routines/pl/math/v_erff_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/erff.c
@@ -1,118 +1,120 @@
 /*
  * Single-precision vector erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float32x4_t max, shift, third;
 #if WANT_SIMD_EXCEPT
   float32x4_t tiny_bound, scale_minus_one;
 #endif
 } data = {
   .max = V4 (3.9375), /* 4 - 8/128.  */
   .shift = V4 (0x1p16f),
   .third = V4 (0x1.555556p-2f), /* 1/3.  */
 #if WANT_SIMD_EXCEPT
   .tiny_bound = V4 (0x1p-62f),
   .scale_minus_one = V4 (0x1.06eba8p-3f), /* scale - 1.0.  */
 #endif
 };
 
 #define AbsMask 0x7fffffff
 
 struct entry
 {
   float32x4_t erf;
   float32x4_t scale;
 };
 
 static inline struct entry
 lookup (uint32x4_t i)
 {
   struct entry e;
-  float64_t t0 = *((float64_t *) (__erff_data.tab + i[0]));
-  float64_t t1 = *((float64_t *) (__erff_data.tab + i[1]));
-  float64_t t2 = *((float64_t *) (__erff_data.tab + i[2]));
-  float64_t t3 = *((float64_t *) (__erff_data.tab + i[3]));
-  float32x4_t e1 = vreinterpretq_f32_f64 ((float64x2_t){ t0, t1 });
-  float32x4_t e2 = vreinterpretq_f32_f64 ((float64x2_t){ t2, t3 });
+  float32x2_t t0 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 0)].erf);
+  float32x2_t t1 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 1)].erf);
+  float32x2_t t2 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 2)].erf);
+  float32x2_t t3 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 3)].erf);
+  float32x4_t e1 = vcombine_f32 (t0, t1);
+  float32x4_t e2 = vcombine_f32 (t2, t3);
   e.erf = vuzp1q_f32 (e1, e2);
   e.scale = vuzp2q_f32 (e1, e2);
   return e;
 }
 
 /* Single-precision implementation of vector erf(x).
    Approximation based on series expansion near x rounded to
    nearest multiple of 1/128.
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
 
    erf(x) ~ erf(r) + scale * d * [1 - r * d - 1/3 * d^2]
 
    Values of erf(r) and scale are read from lookup tables.
    For |x| > 3.9375, erf(|x|) rounds to 1.0f.
 
    Maximum error: 1.93 ULP
      _ZGVnN4v_erff(0x1.c373e6p-9) got 0x1.fd686cp-9
 				 want 0x1.fd6868p-9.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (erf) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (erf) (float32x4_t x)
 {
   const struct data *dat = ptr_barrier (&data);
 
 #if WANT_SIMD_EXCEPT
   /* |x| < 2^-62.  */
   uint32x4_t cmp = vcaltq_f32 (x, dat->tiny_bound);
   float32x4_t xm = x;
   /* If any lanes are special, mask them with 1 and retain a copy of x to allow
      special case handler to fix special lanes later. This is only necessary if
      fenv exceptions are to be triggered correctly.  */
   if (unlikely (v_any_u32 (cmp)))
     x = vbslq_f32 (cmp, v_f32 (1), x);
 #endif
 
   float32x4_t a = vabsq_f32 (x);
   uint32x4_t a_gt_max = vcgtq_f32 (a, dat->max);
 
   /* Lookup erf(r) and scale(r) in tables, e.g. set erf(r) to 0 and scale to
      2/sqrt(pi), when x reduced to r = 0.  */
   float32x4_t shift = dat->shift;
   float32x4_t z = vaddq_f32 (a, shift);
 
   uint32x4_t i
       = vsubq_u32 (vreinterpretq_u32_f32 (z), vreinterpretq_u32_f32 (shift));
   i = vminq_u32 (i, v_u32 (512));
   struct entry e = lookup (i);
 
   float32x4_t r = vsubq_f32 (z, shift);
 
   /* erf(x) ~ erf(r) + scale * d * (1 - r * d - 1/3 * d^2).  */
   float32x4_t d = vsubq_f32 (a, r);
   float32x4_t d2 = vmulq_f32 (d, d);
   float32x4_t y = vfmaq_f32 (r, dat->third, d);
   y = vfmaq_f32 (e.erf, e.scale, vfmsq_f32 (d, d2, y));
 
   /* Solves the |x| = inf case.  */
   y = vbslq_f32 (a_gt_max, v_f32 (1.0f), y);
 
   /* Copy sign.  */
   y = vbslq_f32 (v_u32 (AbsMask), y, x);
 
 #if WANT_SIMD_EXCEPT
   if (unlikely (v_any_u32 (cmp)))
     return vbslq_f32 (cmp, vfmaq_f32 (xm, dat->scale_minus_one, xm), y);
 #endif
   return y;
 }
 
-PL_SIG (V, F, 1, erf, -4.0, 4.0)
-PL_TEST_ULP (V_NAME_F1 (erf), 1.43)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (erf), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (erf), 0, 3.9375, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (erf), 3.9375, inf, 40000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (erf), 0, inf, 40000)
+HALF_WIDTH_ALIAS_F1 (erf)
+
+TEST_SIG (V, F, 1, erf, -4.0, 4.0)
+TEST_ULP (V_NAME_F1 (erf), 1.43)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (erf), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (erf), 0, 3.9375, 40000)
+TEST_SYM_INTERVAL (V_NAME_F1 (erf), 3.9375, inf, 40000)
+TEST_SYM_INTERVAL (V_NAME_F1 (erf), 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_exp.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp.c
similarity index 90%
rename from contrib/arm-optimized-routines/math/aarch64/v_exp.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/exp.c
index bc5609faf4fc..a928c35c9418 100644
--- a/contrib/arm-optimized-routines/math/aarch64/v_exp.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp.c
@@ -1,125 +1,134 @@
 /*
  * Double-precision vector e^x function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define N (1 << V_EXP_TABLE_BITS)
 #define IndexMask (N - 1)
 
 const static volatile struct
 {
   float64x2_t poly[3];
   float64x2_t inv_ln2, ln2_hi, ln2_lo, shift;
 #if !WANT_SIMD_EXCEPT
   float64x2_t special_bound, scale_thresh;
 #endif
 } data = {
   /* maxerr: 1.88 +0.5 ulp
      rel error: 1.4337*2^-53
      abs error: 1.4299*2^-53 in [ -ln2/256, ln2/256 ].  */
   .poly = { V2 (0x1.ffffffffffd43p-2), V2 (0x1.55555c75adbb2p-3),
 	    V2 (0x1.55555da646206p-5) },
 #if !WANT_SIMD_EXCEPT
   .scale_thresh = V2 (163840.0), /* 1280.0 * N.  */
   .special_bound = V2 (704.0),
 #endif
   .inv_ln2 = V2 (0x1.71547652b82fep7), /* N/ln2.  */
   .ln2_hi = V2 (0x1.62e42fefa39efp-8), /* ln2/N.  */
   .ln2_lo = V2 (0x1.abc9e3b39803f3p-63),
   .shift = V2 (0x1.8p+52)
 };
 
 #define C(i) data.poly[i]
 #define Tab __v_exp_data
 
 #if WANT_SIMD_EXCEPT
 
 # define TinyBound v_u64 (0x2000000000000000) /* asuint64 (0x1p-511).  */
 # define BigBound v_u64 (0x4080000000000000) /* asuint64 (0x1p9).  */
 # define SpecialBound v_u64 (0x2080000000000000) /* BigBound - TinyBound.  */
 
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
 {
   /* If fenv exceptions are to be triggered correctly, fall back to the scalar
      routine to special lanes.  */
   return v_call_f64 (exp, x, y, cmp);
 }
 
 #else
 
 # define SpecialOffset v_u64 (0x6000000000000000) /* 0x1p513.  */
 /* SpecialBias1 + SpecialBias1 = asuint(1.0).  */
 # define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769.  */
 # define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254.  */
 
 static inline float64x2_t VPCS_ATTR
 special_case (float64x2_t s, float64x2_t y, float64x2_t n)
 {
   /* 2^(n/N) may overflow, break it up into s1*s2.  */
   uint64x2_t b = vandq_u64 (vcltzq_f64 (n), SpecialOffset);
   float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (SpecialBias1, b));
   float64x2_t s2 = vreinterpretq_f64_u64 (
       vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), SpecialBias2), b));
   uint64x2_t cmp = vcagtq_f64 (n, data.scale_thresh);
   float64x2_t r1 = vmulq_f64 (s1, s1);
   float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, y, s2), s1);
   return vbslq_f64 (cmp, r1, r0);
 }
 
 #endif
 
 float64x2_t VPCS_ATTR V_NAME_D1 (exp) (float64x2_t x)
 {
   float64x2_t n, r, r2, s, y, z;
   uint64x2_t cmp, u, e;
 
 #if WANT_SIMD_EXCEPT
   /* If any lanes are special, mask them with 1 and retain a copy of x to allow
      special_case to fix special lanes later. This is only necessary if fenv
      exceptions are to be triggered correctly.  */
   float64x2_t xm = x;
   uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
   cmp = vcgeq_u64 (vsubq_u64 (iax, TinyBound), SpecialBound);
   if (unlikely (v_any_u64 (cmp)))
     x = vbslq_f64 (cmp, v_f64 (1), x);
 #else
   cmp = vcagtq_f64 (x, data.special_bound);
 #endif
 
   /* n = round(x/(ln2/N)).  */
   z = vfmaq_f64 (data.shift, x, data.inv_ln2);
   u = vreinterpretq_u64_f64 (z);
   n = vsubq_f64 (z, data.shift);
 
   /* r = x - n*ln2/N.  */
   r = x;
   r = vfmsq_f64 (r, data.ln2_hi, n);
   r = vfmsq_f64 (r, data.ln2_lo, n);
 
   e = vshlq_n_u64 (u, 52 - V_EXP_TABLE_BITS);
 
   /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4.  */
   r2 = vmulq_f64 (r, r);
   y = vfmaq_f64 (C (0), C (1), r);
   y = vfmaq_f64 (y, C (2), r2);
   y = vfmaq_f64 (r, y, r2);
 
   /* s = 2^(n/N).  */
   u = (uint64x2_t){ Tab[u[0] & IndexMask], Tab[u[1] & IndexMask] };
   s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
 
   if (unlikely (v_any_u64 (cmp)))
 #if WANT_SIMD_EXCEPT
     return special_case (xm, vfmaq_f64 (s, y, s), cmp);
 #else
     return special_case (s, y, n);
 #endif
 
   return vfmaq_f64 (s, y, s);
 }
+
+TEST_SIG (V, D, 1, exp, -9.9, 9.9)
+TEST_ULP (V_NAME_D1 (exp), 1.9)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (exp), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (exp), 0, 0xffff000000000000, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp), 0x1p-6, 0x1p6, 400000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp), 633.3, 733.3, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp10_2u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp10.c
similarity index 89%
rename from contrib/arm-optimized-routines/pl/math/v_exp10_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/exp10.c
index 29072a60fb3a..24fdd1c7d257 100644
--- a/contrib/arm-optimized-routines/pl/math/v_exp10_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp10.c
@@ -1,144 +1,147 @@
 /*
  * Double-precision vector 10^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#define _GNU_SOURCE
 #include "mathlib.h"
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Value of |x| above which scale overflows without special treatment.  */
 #define SpecialBound 306.0 /* floor (log10 (2^1023)) - 1.  */
 /* Value of n above which scale overflows even with special treatment.  */
 #define ScaleBound 163840.0 /* 1280.0 * N.  */
 
 const static struct data
 {
   float64x2_t poly[4];
   float64x2_t log10_2, log2_10_hi, log2_10_lo, shift;
 #if !WANT_SIMD_EXCEPT
   float64x2_t special_bound, scale_thresh;
 #endif
 } data = {
   /* Coefficients generated using Remez algorithm.
      rel error: 0x1.5ddf8f28p-54
      abs error: 0x1.5ed266c8p-54 in [ -log10(2)/256, log10(2)/256 ]
      maxerr: 1.14432 +0.5 ulp.  */
   .poly = { V2 (0x1.26bb1bbb5524p1), V2 (0x1.53524c73cecdap1),
 	    V2 (0x1.047060efb781cp1), V2 (0x1.2bd76040f0d16p0) },
   .log10_2 = V2 (0x1.a934f0979a371p8),	   /* N/log2(10).  */
   .log2_10_hi = V2 (0x1.34413509f79ffp-9), /* log2(10)/N.  */
   .log2_10_lo = V2 (-0x1.9dc1da994fd21p-66),
   .shift = V2 (0x1.8p+52),
 #if !WANT_SIMD_EXCEPT
   .scale_thresh = V2 (ScaleBound),
   .special_bound = V2 (SpecialBound),
 #endif
 };
 
 #define N (1 << V_EXP_TABLE_BITS)
 #define IndexMask v_u64 (N - 1)
 
 #if WANT_SIMD_EXCEPT
 
 # define TinyBound v_u64 (0x2000000000000000) /* asuint64 (0x1p-511).  */
 # define BigBound v_u64 (0x4070000000000000)  /* asuint64 (0x1p8).  */
 # define Thres v_u64 (0x2070000000000000)     /* BigBound - TinyBound.  */
 
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
 {
   /* If fenv exceptions are to be triggered correctly, fall back to the scalar
      routine for special lanes.  */
   return v_call_f64 (exp10, x, y, cmp);
 }
 
 #else
 
 # define SpecialOffset v_u64 (0x6000000000000000) /* 0x1p513.  */
 /* SpecialBias1 + SpecialBias1 = asuint(1.0).  */
 # define SpecialBias1 v_u64 (0x7000000000000000)  /* 0x1p769.  */
 # define SpecialBias2 v_u64 (0x3010000000000000)  /* 0x1p-254.  */
 
 static inline float64x2_t VPCS_ATTR
 special_case (float64x2_t s, float64x2_t y, float64x2_t n,
 	      const struct data *d)
 {
   /* 2^(n/N) may overflow, break it up into s1*s2.  */
   uint64x2_t b = vandq_u64 (vcltzq_f64 (n), SpecialOffset);
   float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (SpecialBias1, b));
   float64x2_t s2 = vreinterpretq_f64_u64 (
       vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), SpecialBias2), b));
   uint64x2_t cmp = vcagtq_f64 (n, d->scale_thresh);
   float64x2_t r1 = vmulq_f64 (s1, s1);
   float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, y, s2), s1);
   return vbslq_f64 (cmp, r1, r0);
 }
 
 #endif
 
 /* Fast vector implementation of exp10.
    Maximum measured error is 1.64 ulp.
    _ZGVnN2v_exp10(0x1.ccd1c9d82cc8cp+0) got 0x1.f8dab6d7fed0cp+5
 				       want 0x1.f8dab6d7fed0ap+5.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (exp10) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
   uint64x2_t cmp;
 #if WANT_SIMD_EXCEPT
   /* If any lanes are special, mask them with 1 and retain a copy of x to allow
      special_case to fix special lanes later. This is only necessary if fenv
      exceptions are to be triggered correctly.  */
   float64x2_t xm = x;
   uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
   cmp = vcgeq_u64 (vsubq_u64 (iax, TinyBound), Thres);
   if (unlikely (v_any_u64 (cmp)))
     x = vbslq_f64 (cmp, v_f64 (1), x);
 #else
   cmp = vcageq_f64 (x, d->special_bound);
 #endif
 
   /* n = round(x/(log10(2)/N)).  */
   float64x2_t z = vfmaq_f64 (d->shift, x, d->log10_2);
   uint64x2_t u = vreinterpretq_u64_f64 (z);
   float64x2_t n = vsubq_f64 (z, d->shift);
 
   /* r = x - n*log10(2)/N.  */
   float64x2_t r = x;
   r = vfmsq_f64 (r, d->log2_10_hi, n);
   r = vfmsq_f64 (r, d->log2_10_lo, n);
 
   uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TABLE_BITS);
   uint64x2_t i = vandq_u64 (u, IndexMask);
 
   /* y = exp10(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4.  */
   float64x2_t r2 = vmulq_f64 (r, r);
   float64x2_t p = vfmaq_f64 (d->poly[0], r, d->poly[1]);
   float64x2_t y = vfmaq_f64 (d->poly[2], r, d->poly[3]);
   p = vfmaq_f64 (p, y, r2);
   y = vmulq_f64 (r, p);
 
   /* s = 2^(n/N).  */
   u = v_lookup_u64 (__v_exp_data, i);
   float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
 
   if (unlikely (v_any_u64 (cmp)))
 #if WANT_SIMD_EXCEPT
     return special_case (xm, vfmaq_f64 (s, y, s), cmp);
 #else
     return special_case (s, y, n, d);
 #endif
 
   return vfmaq_f64 (s, y, s);
 }
 
-PL_SIG (S, D, 1, exp10, -9.9, 9.9)
-PL_SIG (V, D, 1, exp10, -9.9, 9.9)
-PL_TEST_ULP (V_NAME_D1 (exp10), 1.15)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (exp10), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp10), 0, SpecialBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp10), SpecialBound, ScaleBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp10), ScaleBound, inf, 10000)
+#if WANT_EXP10_TESTS
+TEST_SIG (S, D, 1, exp10, -9.9, 9.9)
+TEST_SIG (V, D, 1, exp10, -9.9, 9.9)
+TEST_ULP (V_NAME_D1 (exp10), 1.15)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (exp10), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp10), 0, SpecialBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp10), SpecialBound, ScaleBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp10), ScaleBound, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp10f_2u4.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp10f.c
similarity index 58%
rename from contrib/arm-optimized-routines/pl/math/v_exp10f_2u4.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/exp10f.c
index 0e91becfa612..eb0d5dd0d57c 100644
--- a/contrib/arm-optimized-routines/pl/math/v_exp10f_2u4.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp10f.c
@@ -1,138 +1,147 @@
 /*
  * Single-precision vector 10^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
+#define _GNU_SOURCE
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_poly_f32.h"
 
 #define ScaleBound 192.0f
 
 static const struct data
 {
-  float32x4_t poly[5];
-  float32x4_t log10_2_and_inv, shift;
-
+  float32x4_t c0, c1, c3;
+  float log10_2_high, log10_2_low, c2, c4;
+  float32x4_t inv_log10_2, special_bound;
+  uint32x4_t exponent_bias, special_offset, special_bias;
 #if !WANT_SIMD_EXCEPT
   float32x4_t scale_thresh;
 #endif
 } data = {
   /* Coefficients generated using Remez algorithm with minimisation of relative
      error.
      rel error: 0x1.89dafa3p-24
      abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
      maxerr: 1.85943 +0.5 ulp.  */
-  .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f),
-	    V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) },
-  .shift = V4 (0x1.8p23f),
-
-  /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0.  */
-  .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 },
+  .c0 = V4 (0x1.26bb16p+1f),
+  .c1 = V4 (0x1.5350d2p+1f),
+  .c2 = 0x1.04744ap+1f,
+  .c3 = V4 (0x1.2d8176p+0f),
+  .c4 = 0x1.12b41ap-1f,
+  .inv_log10_2 = V4 (0x1.a934fp+1),
+  .log10_2_high = 0x1.344136p-2,
+  .log10_2_low = 0x1.ec10cp-27,
+  /* rint (log2 (2^127 / (1 + sqrt (2)))).  */
+  .special_bound = V4 (126.0f),
+  .exponent_bias = V4 (0x3f800000),
+  .special_offset = V4 (0x82000000),
+  .special_bias = V4 (0x7f000000),
 #if !WANT_SIMD_EXCEPT
   .scale_thresh = V4 (ScaleBound)
 #endif
 };
 
-#define ExponentBias v_u32 (0x3f800000)
-
 #if WANT_SIMD_EXCEPT
 
 # define SpecialBound 38.0f	       /* rint(log10(2^127)).  */
 # define TinyBound v_u32 (0x20000000) /* asuint (0x1p-63).  */
 # define BigBound v_u32 (0x42180000)  /* asuint (SpecialBound).  */
 # define Thres v_u32 (0x22180000)     /* BigBound - TinyBound.  */
 
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
 {
   /* If fenv exceptions are to be triggered correctly, fall back to the scalar
      routine to special lanes.  */
   return v_call_f32 (exp10f, x, y, cmp);
 }
 
 #else
 
-# define SpecialBound 126.0f /* rint (log2 (2^127 / (1 + sqrt (2)))).  */
-# define SpecialOffset v_u32 (0x82000000)
-# define SpecialBias v_u32 (0x7f000000)
+# define SpecialBound 126.0f
 
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
 	      float32x4_t scale, const struct data *d)
 {
   /* 2^n may overflow, break it up into s1*s2.  */
-  uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
-  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
   float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
   uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
   float32x4_t r2 = vmulq_f32 (s1, s1);
   float32x4_t r1 = vmulq_f32 (vfmaq_f32 (s2, poly, s2), s1);
   /* Similar to r1 but avoids double rounding in the subnormal range.  */
   float32x4_t r0 = vfmaq_f32 (scale, poly, scale);
   float32x4_t r = vbslq_f32 (cmp1, r1, r0);
   return vbslq_f32 (cmp2, r2, r);
 }
 
 #endif
 
 /* Fast vector implementation of single-precision exp10.
    Algorithm is accurate to 2.36 ULP.
    _ZGVnN4v_exp10f(0x1.be2b36p+1) got 0x1.7e79c4p+11
 				 want 0x1.7e79cp+11.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (exp10) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 #if WANT_SIMD_EXCEPT
   /* asuint(x) - TinyBound >= BigBound - TinyBound.  */
   uint32x4_t cmp = vcgeq_u32 (
       vsubq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (x)), TinyBound), Thres);
   float32x4_t xm = x;
   /* If any lanes are special, mask them with 1 and retain a copy of x to allow
      special case handler to fix special lanes later. This is only necessary if
      fenv exceptions are to be triggered correctly.  */
   if (unlikely (v_any_u32 (cmp)))
     x = v_zerofy_f32 (x, cmp);
 #endif
 
   /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)),
      with poly(r) in [1/sqrt(2), sqrt(2)] and
      x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2].  */
-  float32x4_t z = vfmaq_laneq_f32 (d->shift, x, d->log10_2_and_inv, 0);
-  float32x4_t n = vsubq_f32 (z, d->shift);
-  float32x4_t r = vfmsq_laneq_f32 (x, n, d->log10_2_and_inv, 1);
-  r = vfmsq_laneq_f32 (r, n, d->log10_2_and_inv, 2);
-  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
+  float32x4_t log10_2_c24 = vld1q_f32 (&d->log10_2_high);
+  float32x4_t n = vrndaq_f32 (vmulq_f32 (x, d->inv_log10_2));
+  float32x4_t r = vfmsq_laneq_f32 (x, n, log10_2_c24, 0);
+  r = vfmaq_laneq_f32 (r, n, log10_2_c24, 1);
+  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (n)), 23);
 
-  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
+  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
 
 #if !WANT_SIMD_EXCEPT
-  uint32x4_t cmp = vcagtq_f32 (n, v_f32 (SpecialBound));
+  uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
 #endif
 
   float32x4_t r2 = vmulq_f32 (r, r);
-  float32x4_t poly
-      = vfmaq_f32 (vmulq_f32 (r, d->poly[0]),
-		   v_pairwise_poly_3_f32 (r, r2, d->poly + 1), r2);
+  float32x4_t p12 = vfmaq_laneq_f32 (d->c1, r, log10_2_c24, 2);
+  float32x4_t p34 = vfmaq_laneq_f32 (d->c3, r, log10_2_c24, 3);
+  float32x4_t p14 = vfmaq_f32 (p12, r2, p34);
+  float32x4_t poly = vfmaq_f32 (vmulq_f32 (r, d->c0), p14, r2);
 
   if (unlikely (v_any_u32 (cmp)))
 #if WANT_SIMD_EXCEPT
     return special_case (xm, vfmaq_f32 (scale, poly, scale), cmp);
 #else
     return special_case (poly, n, e, cmp, scale, d);
 #endif
 
   return vfmaq_f32 (scale, poly, scale);
 }
 
-PL_SIG (S, F, 1, exp10, -9.9, 9.9)
-PL_SIG (V, F, 1, exp10, -9.9, 9.9)
-PL_TEST_ULP (V_NAME_F1 (exp10), 1.86)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (exp10), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (exp10), 0, SpecialBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (exp10), SpecialBound, ScaleBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (exp10), ScaleBound, inf, 10000)
+HALF_WIDTH_ALIAS_F1 (exp10)
+
+#if WANT_EXP10_TESTS
+TEST_SIG (S, F, 1, exp10, -9.9, 9.9)
+TEST_SIG (V, F, 1, exp10, -9.9, 9.9)
+TEST_ULP (V_NAME_F1 (exp10), 1.86)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (exp10), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (exp10), 0, SpecialBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (exp10), SpecialBound, ScaleBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (exp10), ScaleBound, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp2_2u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp2.c
similarity index 82%
rename from contrib/arm-optimized-routines/pl/math/v_exp2_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/exp2.c
index de59779689f5..63448d806b82 100644
--- a/contrib/arm-optimized-routines/pl/math/v_exp2_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp2.c
@@ -1,128 +1,128 @@
 /*
  * Double-precision vector 2^x function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define N (1 << V_EXP_TABLE_BITS)
 #define IndexMask (N - 1)
 #define BigBound 1022.0
 #define UOFlowBound 1280.0
+#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511).  */
 
 static const struct data
 {
   float64x2_t poly[4];
   float64x2_t shift, scale_big_bound, scale_uoflow_bound;
 } data = {
   /* Coefficients are computed using Remez algorithm with
      minimisation of the absolute error.  */
   .poly = { V2 (0x1.62e42fefa3686p-1), V2 (0x1.ebfbdff82c241p-3),
 	    V2 (0x1.c6b09b16de99ap-5), V2 (0x1.3b2abf5571ad8p-7) },
   .shift = V2 (0x1.8p52 / N),
   .scale_big_bound = V2 (BigBound),
   .scale_uoflow_bound = V2 (UOFlowBound),
 };
 
 static inline uint64x2_t
 lookup_sbits (uint64x2_t i)
 {
   return (uint64x2_t){ __v_exp_data[i[0] & IndexMask],
 		       __v_exp_data[i[1] & IndexMask] };
 }
 
 #if WANT_SIMD_EXCEPT
 
-# define TinyBound 0x2000000000000000 /* asuint64(0x1p-511).  */
 # define Thres 0x2080000000000000     /* asuint64(512.0) - TinyBound.  */
 
 /* Call scalar exp2 as a fallback.  */
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special)
 {
   return v_call_f64 (exp2, x, y, is_special);
 }
 
 #else
 
 # define SpecialOffset 0x6000000000000000 /* 0x1p513.  */
 /* SpecialBias1 + SpecialBias1 = asuint(1.0).  */
 # define SpecialBias1 0x7000000000000000 /* 0x1p769.  */
 # define SpecialBias2 0x3010000000000000 /* 0x1p-254.  */
 
 static inline float64x2_t VPCS_ATTR
 special_case (float64x2_t s, float64x2_t y, float64x2_t n,
 	      const struct data *d)
 {
   /* 2^(n/N) may overflow, break it up into s1*s2.  */
   uint64x2_t b = vandq_u64 (vclezq_f64 (n), v_u64 (SpecialOffset));
   float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (v_u64 (SpecialBias1), b));
-  float64x2_t s2 = vreinterpretq_f64_u64 (
-    vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), v_u64 (SpecialBias2)), b));
+  float64x2_t s2 = vreinterpretq_f64_u64 (vaddq_u64 (
+      vsubq_u64 (vreinterpretq_u64_f64 (s), v_u64 (SpecialBias2)), b));
   uint64x2_t cmp = vcagtq_f64 (n, d->scale_uoflow_bound);
   float64x2_t r1 = vmulq_f64 (s1, s1);
   float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, s2, y), s1);
   return vbslq_f64 (cmp, r1, r0);
 }
 
 #endif
 
 /* Fast vector implementation of exp2.
    Maximum measured error is 1.65 ulp.
    _ZGVnN2v_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1
 				       want 0x1.f8db0d4df721dp-1.  */
 VPCS_ATTR
 float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
   uint64x2_t cmp;
 #if WANT_SIMD_EXCEPT
   uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
   cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres));
   /* Mask special lanes and retain a copy of x for passing to special-case
      handler.  */
   float64x2_t xc = x;
   x = v_zerofy_f64 (x, cmp);
 #else
   cmp = vcagtq_f64 (x, d->scale_big_bound);
 #endif
 
   /* n = round(x/N).  */
   float64x2_t z = vaddq_f64 (d->shift, x);
   uint64x2_t u = vreinterpretq_u64_f64 (z);
   float64x2_t n = vsubq_f64 (z, d->shift);
 
   /* r = x - n/N.  */
   float64x2_t r = vsubq_f64 (x, n);
 
   /* s = 2^(n/N).  */
   uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TABLE_BITS);
   u = lookup_sbits (u);
   float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
 
   /* y ~ exp2(r) - 1.  */
   float64x2_t r2 = vmulq_f64 (r, r);
   float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly);
   y = vmulq_f64 (r, y);
 
   if (unlikely (v_any_u64 (cmp)))
 #if !WANT_SIMD_EXCEPT
     return special_case (s, y, n, d);
 #else
     return special_case (xc, vfmaq_f64 (s, s, y), cmp);
 #endif
   return vfmaq_f64 (s, s, y);
 }
 
-PL_SIG (V, D, 1, exp2, -9.9, 9.9)
-PL_TEST_ULP (V_NAME_D1 (exp2), 1.15)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (exp2), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), 0, TinyBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), TinyBound, BigBound, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), BigBound, UOFlowBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), UOFlowBound, inf, 10000)
+TEST_SIG (V, D, 1, exp2, -9.9, 9.9)
+TEST_ULP (V_NAME_D1 (exp2), 1.15)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (exp2), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp2), 0, TinyBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp2), TinyBound, BigBound, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp2), BigBound, UOFlowBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (exp2), UOFlowBound, inf, 10000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_exp2f.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp2f.c
similarity index 58%
rename from contrib/arm-optimized-routines/math/aarch64/v_exp2f.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/exp2f.c
index e402205e98e6..40f6170d3702 100644
--- a/contrib/arm-optimized-routines/math/aarch64/v_exp2f.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp2f.c
@@ -1,113 +1,122 @@
 /*
  * Single-precision vector 2^x function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 static const struct data
 {
-  float32x4_t poly[5];
-  uint32x4_t exponent_bias;
+  float32x4_t c1, c3;
+  uint32x4_t exponent_bias, special_offset, special_bias;
 #if !WANT_SIMD_EXCEPT
-  float32x4_t special_bound, scale_thresh;
+  float32x4_t scale_thresh, special_bound;
 #endif
+  float c0, c2, c4, zero;
 } data = {
   /* maxerr: 1.962 ulp.  */
-  .poly = { V4 (0x1.59977ap-10f), V4 (0x1.3ce9e4p-7f), V4 (0x1.c6bd32p-5f),
-	    V4 (0x1.ebf9bcp-3f), V4 (0x1.62e422p-1f) },
+  .c0 = 0x1.59977ap-10f,
+  .c1 = V4 (0x1.3ce9e4p-7f),
+  .c2 = 0x1.c6bd32p-5f,
+  .c3 = V4 (0x1.ebf9bcp-3f),
+  .c4 = 0x1.62e422p-1f,
   .exponent_bias = V4 (0x3f800000),
+  .special_offset = V4 (0x82000000),
+  .special_bias = V4 (0x7f000000),
 #if !WANT_SIMD_EXCEPT
   .special_bound = V4 (126.0f),
   .scale_thresh = V4 (192.0f),
 #endif
 };
 
-#define C(i) d->poly[i]
-
 #if WANT_SIMD_EXCEPT
 
 # define TinyBound v_u32 (0x20000000)	  /* asuint (0x1p-63).  */
 # define BigBound v_u32 (0x42800000)	  /* asuint (0x1p6).  */
 # define SpecialBound v_u32 (0x22800000) /* BigBound - TinyBound.  */
 
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
 {
   /* If fenv exceptions are to be triggered correctly, fall back to the scalar
      routine for special lanes.  */
   return v_call_f32 (exp2f, x, y, cmp);
 }
 
 #else
 
-# define SpecialOffset v_u32 (0x82000000)
-# define SpecialBias v_u32 (0x7f000000)
-
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
 	      float32x4_t scale, const struct data *d)
 {
   /* 2^n may overflow, break it up into s1*s2.  */
-  uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
-  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
   float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
   uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
   float32x4_t r2 = vmulq_f32 (s1, s1);
   float32x4_t r1 = vmulq_f32 (vfmaq_f32 (s2, poly, s2), s1);
   /* Similar to r1 but avoids double rounding in the subnormal range.  */
   float32x4_t r0 = vfmaq_f32 (scale, poly, scale);
   float32x4_t r = vbslq_f32 (cmp1, r1, r0);
   return vbslq_f32 (cmp2, r2, r);
 }
 
 #endif
 
-float32x4_t VPCS_ATTR V_NAME_F1 (exp2) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp2) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
-  float32x4_t n, r, r2, scale, p, q, poly;
-  uint32x4_t cmp, e;
 
 #if WANT_SIMD_EXCEPT
   /* asuint(|x|) - TinyBound >= BigBound - TinyBound.  */
   uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x));
-  cmp = vcgeq_u32 (vsubq_u32 (ia, TinyBound), SpecialBound);
+  uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (ia, TinyBound), SpecialBound);
   float32x4_t xm = x;
   /* If any lanes are special, mask them with 1 and retain a copy of x to allow
      special_case to fix special lanes later. This is only necessary if fenv
      exceptions are to be triggered correctly.  */
   if (unlikely (v_any_u32 (cmp)))
     x = vbslq_f32 (cmp, v_f32 (1), x);
 #endif
 
-    /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-       x = n + r, with r in [-1/2, 1/2].  */
-  n = vrndaq_f32 (x);
-  r = vsubq_f32 (x, n);
-  e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), 23);
-  scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
+  /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+     x = n + r, with r in [-1/2, 1/2].  */
+  float32x4_t n = vrndaq_f32 (x);
+  float32x4_t r = vsubq_f32 (x, n);
+  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), 23);
+  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
 
 #if !WANT_SIMD_EXCEPT
-  cmp = vcagtq_f32 (n, d->special_bound);
+  uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
 #endif
 
-  r2 = vmulq_f32 (r, r);
-  p = vfmaq_f32 (C (1), C (0), r);
-  q = vfmaq_f32 (C (3), C (2), r);
+  float32x4_t c024 = vld1q_f32 (&d->c0);
+  float32x4_t r2 = vmulq_f32 (r, r);
+  float32x4_t p = vfmaq_laneq_f32 (d->c1, r, c024, 0);
+  float32x4_t q = vfmaq_laneq_f32 (d->c3, r, c024, 1);
   q = vfmaq_f32 (q, p, r2);
-  p = vmulq_f32 (C (4), r);
-  poly = vfmaq_f32 (p, q, r2);
+  p = vmulq_laneq_f32 (r, c024, 2);
+  float32x4_t poly = vfmaq_f32 (p, q, r2);
 
   if (unlikely (v_any_u32 (cmp)))
 #if WANT_SIMD_EXCEPT
     return special_case (xm, vfmaq_f32 (scale, poly, scale), cmp);
 #else
     return special_case (poly, n, e, cmp, scale, d);
 #endif
 
   return vfmaq_f32 (scale, poly, scale);
 }
+
+HALF_WIDTH_ALIAS_F1 (exp2)
+
+TEST_SIG (V, F, 1, exp2, -9.9, 9.9)
+TEST_ULP (V_NAME_F1 (exp2), 1.49)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (exp2), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (exp2), 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (exp2), 0x1p-14, 0x1p8, 500000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/exp2f_1u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp2f_1u.c
new file mode 100644
index 000000000000..1f8e89ab658f
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/exp2f_1u.c
@@ -0,0 +1,73 @@
+/*
+ * Single-precision vector 2^x function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  float32x4_t c0, c1, c2, c3, c4, c5, shift;
+  uint32x4_t exponent_bias;
+  float32x4_t special_bound, scale_thresh;
+  uint32x4_t special_offset, special_bias;
+} data = {
+  .shift = V4 (0x1.8p23f),
+  .exponent_bias = V4 (0x3f800000),
+  .special_bound = V4 (126.0f),
+  .scale_thresh = V4 (192.0f),
+  .special_offset = V4 (0x82000000),
+  .special_bias = V4 (0x7f000000),
+  /*  maxerr: 0.878 ulp.  */
+  .c0 = V4 (0x1.416b5ep-13f),
+  .c1 = V4 (0x1.5f082ep-10f),
+  .c2 = V4 (0x1.3b2dep-7f),
+  .c3 = V4 (0x1.c6af7cp-5f),
+  .c4 = V4 (0x1.ebfbdcp-3f),
+  .c5 = V4 (0x1.62e43p-1f),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+specialcase (float32x4_t p, float32x4_t n, uint32x4_t e, const struct data *d)
+{
+  /* 2^n may overflow, break it up into s1*s2.  */
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
+  float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
+  uint32x4_t cmp = vcagtq_f32 (n, d->scale_thresh);
+  float32x4_t r1 = vmulq_f32 (s1, s1);
+  float32x4_t r0 = vmulq_f32 (vmulq_f32 (p, s1), s2);
+  return vreinterpretq_f32_u32 ((cmp & vreinterpretq_u32_f32 (r1))
+				| (~cmp & vreinterpretq_u32_f32 (r0)));
+}
+
+float32x4_t VPCS_ATTR
+_ZGVnN4v_exp2f_1u (float32x4_t x)
+{
+  /* exp2(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
+     x = n + r, with r in [-1/2, 1/2].  */
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t n = vrndaq_f32 (x);
+  float32x4_t r = x - n;
+  uint32x4_t e = vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)) << 23;
+  float32x4_t scale = vreinterpretq_f32_u32 (e + d->exponent_bias);
+  uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
+
+  float32x4_t p = vfmaq_f32 (d->c1, d->c0, r);
+  p = vfmaq_f32 (d->c2, p, r);
+  p = vfmaq_f32 (d->c3, p, r);
+  p = vfmaq_f32 (d->c4, p, r);
+  p = vfmaq_f32 (d->c5, p, r);
+  p = vfmaq_f32 (v_f32 (1.0f), p, r);
+  if (unlikely (v_any_u32 (cmp)))
+    return specialcase (p, n, e, d);
+  return scale * p;
+}
+
+TEST_ULP (_ZGVnN4v_exp2f_1u, 0.4)
+TEST_DISABLE_FENV (_ZGVnN4v_exp2f_1u)
+TEST_INTERVAL (_ZGVnN4v_exp2f_1u, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (_ZGVnN4v_exp2f_1u, 0x1p-14, 0x1p8, 500000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_expf.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/expf.c
similarity index 61%
rename from contrib/arm-optimized-routines/math/aarch64/v_expf.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/expf.c
index 34e8b6081bcd..e5b1f020d1a0 100644
--- a/contrib/arm-optimized-routines/math/aarch64/v_expf.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/expf.c
@@ -1,122 +1,130 @@
 /*
  * Single-precision vector e^x function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
-
-#include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 static const struct data
 {
-  float32x4_t poly[5];
-  float32x4_t shift, inv_ln2, ln2_hi, ln2_lo;
-  uint32x4_t exponent_bias;
+  float32x4_t c1, c3, c4, inv_ln2;
+  float ln2_hi, ln2_lo, c0, c2;
+  uint32x4_t exponent_bias, special_offset, special_bias;
 #if !WANT_SIMD_EXCEPT
   float32x4_t special_bound, scale_thresh;
 #endif
 } data = {
   /* maxerr: 1.45358 +0.5 ulp.  */
-  .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f),
-	    V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) },
-  .shift = V4 (0x1.8p23f),
+  .c0 = 0x1.0e4020p-7f,
+  .c1 = V4 (0x1.573e2ep-5f),
+  .c2 = 0x1.555e66p-3f,
+  .c3 = V4 (0x1.fffdb6p-2f),
+  .c4 = V4 (0x1.ffffecp-1f),
   .inv_ln2 = V4 (0x1.715476p+0f),
-  .ln2_hi = V4 (0x1.62e4p-1f),
-  .ln2_lo = V4 (0x1.7f7d1cp-20f),
+  .ln2_hi = 0x1.62e4p-1f,
+  .ln2_lo = 0x1.7f7d1cp-20f,
   .exponent_bias = V4 (0x3f800000),
+  .special_offset = V4 (0x82000000),
+  .special_bias = V4 (0x7f000000),
 #if !WANT_SIMD_EXCEPT
   .special_bound = V4 (126.0f),
   .scale_thresh = V4 (192.0f),
 #endif
 };
 
 #define C(i) d->poly[i]
 
 #if WANT_SIMD_EXCEPT
 
 # define TinyBound v_u32 (0x20000000)	/* asuint (0x1p-63).  */
 # define BigBound v_u32 (0x42800000)	/* asuint (0x1p6).  */
 # define SpecialBound v_u32 (0x22800000) /* BigBound - TinyBound.  */
 
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
 {
   /* If fenv exceptions are to be triggered correctly, fall back to the scalar
      routine to special lanes.  */
   return v_call_f32 (expf, x, y, cmp);
 }
 
 #else
 
-# define SpecialOffset v_u32 (0x82000000)
-# define SpecialBias v_u32 (0x7f000000)
-
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
 	      float32x4_t scale, const struct data *d)
 {
   /* 2^n may overflow, break it up into s1*s2.  */
-  uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
-  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
   float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
   uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
   float32x4_t r2 = vmulq_f32 (s1, s1);
+  // (s2 + p*s2)*s1 = s2(p+1)s1
   float32x4_t r1 = vmulq_f32 (vfmaq_f32 (s2, poly, s2), s1);
   /* Similar to r1 but avoids double rounding in the subnormal range.  */
   float32x4_t r0 = vfmaq_f32 (scale, poly, scale);
   float32x4_t r = vbslq_f32 (cmp1, r1, r0);
   return vbslq_f32 (cmp2, r2, r);
 }
 
 #endif
 
-float32x4_t VPCS_ATTR V_NAME_F1 (exp) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
-  float32x4_t n, r, r2, scale, p, q, poly, z;
-  uint32x4_t cmp, e;
+  float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi);
 
 #if WANT_SIMD_EXCEPT
   /* asuint(x) - TinyBound >= BigBound - TinyBound.  */
-  cmp = vcgeq_u32 (
+  uint32x4_t cmp = vcgeq_u32 (
       vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)),
 		 TinyBound),
       SpecialBound);
   float32x4_t xm = x;
   /* If any lanes are special, mask them with 1 and retain a copy of x to allow
      special case handler to fix special lanes later. This is only necessary if
      fenv exceptions are to be triggered correctly.  */
   if (unlikely (v_any_u32 (cmp)))
     x = vbslq_f32 (cmp, v_f32 (1), x);
 #endif
 
   /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
      x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-  z = vfmaq_f32 (d->shift, x, d->inv_ln2);
-  n = vsubq_f32 (z, d->shift);
-  r = vfmsq_f32 (x, n, d->ln2_hi);
-  r = vfmsq_f32 (r, n, d->ln2_lo);
-  e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
-  scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
+  float32x4_t n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2));
+  float32x4_t r = vfmsq_laneq_f32 (x, n, ln2_c02, 0);
+  r = vfmsq_laneq_f32 (r, n, ln2_c02, 1);
+  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23);
+  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
 
 #if !WANT_SIMD_EXCEPT
-  cmp = vcagtq_f32 (n, d->special_bound);
+  uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
 #endif
 
-  r2 = vmulq_f32 (r, r);
-  p = vfmaq_f32 (C (1), C (0), r);
-  q = vfmaq_f32 (C (3), C (2), r);
+  float32x4_t r2 = vmulq_f32 (r, r);
+  float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2);
+  float32x4_t q = vfmaq_laneq_f32 (d->c3, r, ln2_c02, 3);
   q = vfmaq_f32 (q, p, r2);
-  p = vmulq_f32 (C (4), r);
-  poly = vfmaq_f32 (p, q, r2);
+  p = vmulq_f32 (d->c4, r);
+  float32x4_t poly = vfmaq_f32 (p, q, r2);
 
   if (unlikely (v_any_u32 (cmp)))
 #if WANT_SIMD_EXCEPT
     return special_case (xm, vfmaq_f32 (scale, poly, scale), cmp);
 #else
     return special_case (poly, n, e, cmp, scale, d);
 #endif
 
   return vfmaq_f32 (scale, poly, scale);
 }
+
+HALF_WIDTH_ALIAS_F1 (exp)
+
+TEST_SIG (V, F, 1, exp, -9.9, 9.9)
+TEST_ULP (V_NAME_F1 (exp), 1.49)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (exp), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (exp), 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (exp), 0x1p-14, 0x1p8, 500000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/expf_1u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/expf_1u.c
new file mode 100644
index 000000000000..4e114d810e08
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/expf_1u.c
@@ -0,0 +1,79 @@
+/*
+ * Single-precision vector e^x function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "v_math.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  float32x4_t shift, inv_ln2;
+  uint32x4_t exponent_bias;
+  float32x4_t c1, c2, c3, c4;
+  float32x4_t special_bound, scale_thresh;
+  uint32x4_t special_offset, special_bias;
+  float ln2_hi, ln2_lo, c0, nothing;
+} data = {
+  .ln2_hi = 0x1.62e4p-1f,
+  .ln2_lo = 0x1.7f7d1cp-20f,
+  .shift = V4 (0x1.8p23f),
+  .inv_ln2 = V4 (0x1.715476p+0f),
+  .exponent_bias = V4 (0x3f800000),
+  .special_bound = V4 (126.0f),
+  .scale_thresh = V4 (192.0f),
+  .special_offset = V4 (0x83000000),
+  .special_bias = V4 (0x7f000000),
+  /*  maxerr: 0.36565 +0.5 ulp.  */
+  .c0 = 0x1.6a6000p-10f,
+  .c1 = V4 (0x1.12718ep-7f),
+  .c2 = V4 (0x1.555af0p-5f),
+  .c3 = V4 (0x1.555430p-3f),
+  .c4 = V4 (0x1.fffff4p-2f),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+specialcase (float32x4_t p, float32x4_t n, uint32x4_t e, const struct data *d)
+{
+  /* 2^n may overflow, break it up into s1*s2.  */
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
+  float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
+  uint32x4_t cmp = vcagtq_f32 (n, d->scale_thresh);
+  float32x4_t r1 = vmulq_f32 (s1, s1);
+  float32x4_t r0 = vmulq_f32 (vmulq_f32 (p, s1), s2);
+  return vreinterpretq_f32_u32 ((cmp & vreinterpretq_u32_f32 (r1))
+				| (~cmp & vreinterpretq_u32_f32 (r0)));
+}
+
+float32x4_t VPCS_ATTR
+_ZGVnN4v_expf_1u (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t ln2_c0 = vld1q_f32 (&d->ln2_hi);
+
+  /* exp(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
+     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
+  float32x4_t z = vmulq_f32 (x, d->inv_ln2);
+  float32x4_t n = vrndaq_f32 (z);
+  float32x4_t r = vfmsq_laneq_f32 (x, n, ln2_c0, 0);
+  r = vfmsq_laneq_f32 (r, n, ln2_c0, 1);
+  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (z)), 23);
+  float32x4_t scale = vreinterpretq_f32_u32 (e + d->exponent_bias);
+  uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
+  float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c0, 2);
+  p = vfmaq_f32 (d->c2, p, r);
+  p = vfmaq_f32 (d->c3, p, r);
+  p = vfmaq_f32 (d->c4, p, r);
+  p = vfmaq_f32 (v_f32 (1.0f), p, r);
+  p = vfmaq_f32 (v_f32 (1.0f), p, r);
+  if (unlikely (v_any_u32 (cmp)))
+    return specialcase (p, n, e, d);
+  return scale * p;
+}
+
+TEST_ULP (_ZGVnN4v_expf_1u, 0.4)
+TEST_DISABLE_FENV (_ZGVnN4v_expf_1u)
+TEST_INTERVAL (_ZGVnN4v_expf_1u, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (_ZGVnN4v_expf_1u, 0x1p-14, 0x1p8, 500000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/expm1.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/expm1.c
new file mode 100644
index 000000000000..7535a1830427
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/expm1.c
@@ -0,0 +1,77 @@
+/*
+ * Double-precision vector exp(x) - 1 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_expm1_inline.h"
+
+static const struct data
+{
+  struct v_expm1_data d;
+#if WANT_SIMD_EXCEPT
+  uint64x2_t thresh, tiny_bound;
+#else
+  float64x2_t oflow_bound;
+#endif
+} data = {
+  .d = V_EXPM1_DATA,
+#if WANT_SIMD_EXCEPT
+  /* asuint64(oflow_bound) - asuint64(0x1p-51), shifted left by 1 for abs
+     compare.  */
+  .thresh = V2 (0x78c56fa6d34b552),
+  /* asuint64(0x1p-51) << 1.  */
+  .tiny_bound = V2 (0x3cc0000000000000 << 1),
+#else
+  /* Value above which expm1(x) should overflow. Absolute value of the
+     underflow bound is greater than this, so it catches both cases - there is
+     a small window where fallbacks are triggered unnecessarily.  */
+  .oflow_bound = V2 (0x1.62b7d369a5aa9p+9),
+#endif
+};
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, uint64x2_t special, const struct data *d)
+{
+  return v_call_f64 (expm1, x, expm1_inline (v_zerofy_f64 (x, special), &d->d),
+		     special);
+}
+
+/* Double-precision vector exp(x) - 1 function.
+   The maximum error observed error is 2.05 ULP:
+  _ZGVnN2v_expm1(0x1.6329669eb8c87p-2) got 0x1.a8897eef87b34p-2
+				      want 0x1.a8897eef87b32p-2.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  /* If fp exceptions are to be triggered correctly, fall back to scalar for
+     |x| < 2^-51, |x| > oflow_bound, Inf & NaN. Add ix to itself for
+     shift-left by 1, and compare with thresh which was left-shifted offline -
+     this is effectively an absolute compare.  */
+  uint64x2_t special
+      = vcgeq_u64 (vsubq_u64 (vaddq_u64 (ix, ix), d->tiny_bound), d->thresh);
+#else
+  /* Large input, NaNs and Infs.  */
+  uint64x2_t special = vcageq_f64 (x, d->oflow_bound);
+#endif
+
+  if (unlikely (v_any_u64 (special)))
+    return special_case (x, special, d);
+
+  /* expm1(x) ~= p * t + (t - 1).  */
+  return expm1_inline (x, &d->d);
+}
+
+TEST_SIG (V, D, 1, expm1, -9.9, 9.9)
+TEST_ULP (V_NAME_D1 (expm1), 1.56)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (expm1), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0, 0x1p-51, 1000)
+TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0x1p-51, 0x1.62b7d369a5aa9p+9, 100000)
+TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0x1.62b7d369a5aa9p+9, inf, 100)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/expm1f.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/expm1f.c
new file mode 100644
index 000000000000..6d4431dcd8a5
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/expm1f.c
@@ -0,0 +1,82 @@
+/*
+ * Single-precision vector exp(x) - 1 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_expm1f_inline.h"
+
+static const struct data
+{
+  struct v_expm1f_data d;
+#if WANT_SIMD_EXCEPT
+  uint32x4_t thresh;
+#else
+  float32x4_t oflow_bound;
+#endif
+} data = {
+  .d = V_EXPM1F_DATA,
+#if !WANT_SIMD_EXCEPT
+  /* Value above which expm1f(x) should overflow. Absolute value of the
+     underflow bound is greater than this, so it catches both cases - there is
+     a small window where fallbacks are triggered unnecessarily.  */
+  .oflow_bound = V4 (0x1.5ebc4p+6),
+#else
+  /* asuint(oflow_bound) - asuint(0x1p-23), shifted left by 1 for absolute
+     compare.  */
+  .thresh = V4 (0x1d5ebc40),
+#endif
+};
+
+/* asuint(0x1p-23), shifted by 1 for abs compare.  */
+#define TinyBound v_u32 (0x34000000 << 1)
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, uint32x4_t special, const struct data *d)
+{
+  return v_call_f32 (
+      expm1f, x, expm1f_inline (v_zerofy_f32 (x, special), &d->d), special);
+}
+
+/* Single-precision vector exp(x) - 1 function.
+   The maximum error is 1.62 ULP:
+   _ZGVnN4v_expm1f(0x1.85f83p-2) got 0x1.da9f4p-2
+				want 0x1.da9f44p-2.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  /* If fp exceptions are to be triggered correctly, fall back to scalar for
+     |x| < 2^-23, |x| > oflow_bound, Inf & NaN. Add ix to itself for
+     shift-left by 1, and compare with thresh which was left-shifted offline -
+     this is effectively an absolute compare.  */
+  uint32x4_t special
+      = vcgeq_u32 (vsubq_u32 (vaddq_u32 (ix, ix), TinyBound), d->thresh);
+#else
+  /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf.  */
+  uint32x4_t special = vcagtq_f32 (x, d->oflow_bound);
+#endif
+
+  if (unlikely (v_any_u32 (special)))
+    return special_case (x, special, d);
+
+  /* expm1(x) ~= p * t + (t - 1).  */
+  return expm1f_inline (x, &d->d);
+}
+
+HALF_WIDTH_ALIAS_F1 (expm1)
+
+TEST_SIG (V, F, 1, expm1, -9.9, 9.9)
+TEST_ULP (V_NAME_F1 (expm1), 1.13)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (expm1), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (expm1), 0, 0x1p-23, 1000)
+TEST_INTERVAL (V_NAME_F1 (expm1), -0x1p-23, 0x1.5ebc4p+6, 1000000)
+TEST_INTERVAL (V_NAME_F1 (expm1), -0x1p-23, -0x1.9bbabcp+6, 1000000)
+TEST_INTERVAL (V_NAME_F1 (expm1), 0x1.5ebc4p+6, inf, 1000)
+TEST_INTERVAL (V_NAME_F1 (expm1), -0x1.9bbabcp+6, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/finite_pow.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/finite_pow.h
similarity index 94%
rename from contrib/arm-optimized-routines/pl/math/finite_pow.h
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/finite_pow.h
index 8944d4fae625..0c8350a1a77b 100644
--- a/contrib/arm-optimized-routines/pl/math/finite_pow.h
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/finite_pow.h
@@ -1,365 +1,361 @@
 /*
  * Double-precision x^y function.
  *
- * Copyright (c) 2018-2023, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 /* Scalar version of pow used for fallbacks in vector implementations.  */
 
 /* Data is defined in v_pow_log_data.c.  */
 #define N_LOG (1 << V_POW_LOG_TABLE_BITS)
 #define Off 0x3fe6955500000000
 #define As __v_pow_log_data.poly
 
 /* Data is defined in v_pow_exp_data.c.  */
 #define N_EXP (1 << V_POW_EXP_TABLE_BITS)
 #define SignBias (0x800 << V_POW_EXP_TABLE_BITS)
 #define SmallExp 0x3c9 /* top12(0x1p-54).  */
 #define BigExp 0x408   /* top12(512.0).  */
 #define ThresExp 0x03f /* BigExp - SmallExp.  */
 #define InvLn2N __v_pow_exp_data.n_over_ln2
 #define Ln2HiN __v_pow_exp_data.ln2_over_n_hi
 #define Ln2LoN __v_pow_exp_data.ln2_over_n_lo
 #define SBits __v_pow_exp_data.sbits
 #define Cs __v_pow_exp_data.poly
 
 /* Constants associated with pow.  */
 #define SmallPowX 0x001 /* top12(0x1p-126).  */
 #define BigPowX 0x7ff	/* top12(INFINITY).  */
 #define ThresPowX 0x7fe /* BigPowX - SmallPowX.  */
 #define SmallPowY 0x3be /* top12(0x1.e7b6p-65).  */
 #define BigPowY 0x43e	/* top12(0x1.749p62).  */
 #define ThresPowY 0x080 /* BigPowY - SmallPowY.  */
 
 /* Top 12 bits of a double (sign and exponent bits).  */
 static inline uint32_t
 top12 (double x)
 {
   return asuint64 (x) >> 52;
 }
 
 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
    additional 15 bits precision.  IX is the bit representation of x, but
    normalized in the subnormal range using the sign bit for the exponent.  */
 static inline double
 log_inline (uint64_t ix, double *tail)
 {
   /* x = 2^k z; where z is in range [Off,2*Off) and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
   uint64_t tmp = ix - Off;
   int i = (tmp >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1);
   int k = (int64_t) tmp >> 52; /* arithmetic shift.  */
   uint64_t iz = ix - (tmp & 0xfffULL << 52);
   double z = asdouble (iz);
   double kd = (double) k;
 
   /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
   double invc = __v_pow_log_data.invc[i];
   double logc = __v_pow_log_data.logc[i];
   double logctail = __v_pow_log_data.logctail[i];
 
   /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
   double r = fma (z, invc, -1.0);
 
   /* k*Ln2 + log(c) + r.  */
   double t1 = kd * __v_pow_log_data.ln2_hi + logc;
   double t2 = t1 + r;
   double lo1 = kd * __v_pow_log_data.ln2_lo + logctail;
   double lo2 = t1 - t2 + r;
 
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
   double ar = As[0] * r;
   double ar2 = r * ar;
   double ar3 = r * ar2;
   /* k*Ln2 + log(c) + r + A[0]*r*r.  */
   double hi = t2 + ar2;
   double lo3 = fma (ar, r, -ar2);
   double lo4 = t2 - hi + ar2;
   /* p = log1p(r) - r - A[0]*r*r.  */
   double p = (ar3
 	      * (As[1] + r * As[2]
 		 + ar2 * (As[3] + r * As[4] + ar2 * (As[5] + r * As[6]))));
   double lo = lo1 + lo2 + lo3 + lo4 + p;
   double y = hi + lo;
   *tail = hi - y + lo;
   return y;
 }
 
 /* Handle cases that may overflow or underflow when computing the result that
    is scale*(1+TMP) without intermediate rounding.  The bit representation of
    scale is in SBITS, however it has a computed exponent that may have
    overflown into the sign bit so that needs to be adjusted before using it as
    a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
 static inline double
 special_case (double tmp, uint64_t sbits, uint64_t ki)
 {
   double scale, y;
 
   if ((ki & 0x80000000) == 0)
     {
       /* k > 0, the exponent of scale might have overflowed by <= 460.  */
       sbits -= 1009ull << 52;
       scale = asdouble (sbits);
       y = 0x1p1009 * (scale + scale * tmp);
-      return check_oflow (eval_as_double (y));
+      return y;
     }
   /* k < 0, need special care in the subnormal range.  */
   sbits += 1022ull << 52;
   /* Note: sbits is signed scale.  */
   scale = asdouble (sbits);
   y = scale + scale * tmp;
 #if WANT_SIMD_EXCEPT
   if (fabs (y) < 1.0)
     {
       /* Round y to the right precision before scaling it into the subnormal
 	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
 	 E is the worst-case ulp error outside the subnormal range.  So this
 	 is only useful if the goal is better than 1 ulp worst-case error.  */
       double hi, lo, one = 1.0;
       if (y < 0.0)
 	one = -1.0;
       lo = scale - y + scale * tmp;
       hi = one + y;
       lo = one - hi + y + lo;
-      y = eval_as_double (hi + lo) - one;
+      y = (hi + lo) - one;
       /* Fix the sign of 0.  */
       if (y == 0.0)
 	y = asdouble (sbits & 0x8000000000000000);
       /* The underflow exception needs to be signaled explicitly.  */
       force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
     }
 #endif
   y = 0x1p-1022 * y;
-  return check_uflow (eval_as_double (y));
+  return y;
 }
 
 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1.  */
 static inline double
 exp_inline (double x, double xtail, uint32_t sign_bias)
 {
   uint32_t abstop = top12 (x) & 0x7ff;
   if (unlikely (abstop - SmallExp >= ThresExp))
     {
       if (abstop - SmallExp >= 0x80000000)
 	{
 	  /* Avoid spurious underflow for tiny x.  */
 	  /* Note: 0 is common input.  */
 	  return sign_bias ? -1.0 : 1.0;
 	}
       if (abstop >= top12 (1024.0))
 	{
 	  /* Note: inf and nan are already handled.  */
 	  /* Skip errno handling.  */
 #if WANT_SIMD_EXCEPT
 	  return asuint64 (x) >> 63 ? __math_uflow (sign_bias)
 				    : __math_oflow (sign_bias);
 #else
 	  double res_uoflow = asuint64 (x) >> 63 ? 0.0 : INFINITY;
 	  return sign_bias ? -res_uoflow : res_uoflow;
 #endif
 	}
       /* Large x is special cased below.  */
       abstop = 0;
     }
 
   /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
   /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
   double z = InvLn2N * x;
   double kd = round (z);
   uint64_t ki = lround (z);
   double r = x - kd * Ln2HiN - kd * Ln2LoN;
   /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
   r += xtail;
   /* 2^(k/N) ~= scale.  */
   uint64_t idx = ki & (N_EXP - 1);
   uint64_t top = (ki + sign_bias) << (52 - V_POW_EXP_TABLE_BITS);
   /* This is only a valid scale when -1023*N < k < 1024*N.  */
   uint64_t sbits = SBits[idx] + top;
   /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1).  */
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
   double r2 = r * r;
   double tmp = r + r2 * Cs[0] + r * r2 * (Cs[1] + r * Cs[2]);
   if (unlikely (abstop == 0))
     return special_case (tmp, sbits, ki);
   double scale = asdouble (sbits);
   /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
      is no spurious underflow here even without fma.  */
-  return eval_as_double (scale + scale * tmp);
+  return scale + scale * tmp;
 }
 
 /* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    A version of exp_inline that is not inlined and for which sign_bias is
    equal to 0.  */
 static double NOINLINE
 exp_nosignbias (double x, double xtail)
 {
   uint32_t abstop = top12 (x) & 0x7ff;
   if (unlikely (abstop - SmallExp >= ThresExp))
     {
       /* Avoid spurious underflow for tiny x.  */
       if (abstop - SmallExp >= 0x80000000)
 	return 1.0;
       /* Note: inf and nan are already handled.  */
       if (abstop >= top12 (1024.0))
 #if WANT_SIMD_EXCEPT
 	return asuint64 (x) >> 63 ? __math_uflow (0) : __math_oflow (0);
 #else
 	return asuint64 (x) >> 63 ? 0.0 : INFINITY;
 #endif
       /* Large x is special cased below.  */
       abstop = 0;
     }
 
   /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
   /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N].  */
   double z = InvLn2N * x;
   double kd = round (z);
   uint64_t ki = lround (z);
   double r = x - kd * Ln2HiN - kd * Ln2LoN;
   /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
   r += xtail;
   /* 2^(k/N) ~= scale.  */
   uint64_t idx = ki & (N_EXP - 1);
   uint64_t top = ki << (52 - V_POW_EXP_TABLE_BITS);
   /* This is only a valid scale when -1023*N < k < 1024*N.  */
   uint64_t sbits = SBits[idx] + top;
   /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
   double r2 = r * r;
   double tmp = r + r2 * Cs[0] + r * r2 * (Cs[1] + r * Cs[2]);
   if (unlikely (abstop == 0))
     return special_case (tmp, sbits, ki);
   double scale = asdouble (sbits);
   /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
      is no spurious underflow here even without fma.  */
-  return eval_as_double (scale + scale * tmp);
+  return scale + scale * tmp;
 }
 
 /* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
    the bit representation of a non-zero finite floating-point value.  */
 static inline int
 checkint (uint64_t iy)
 {
   int e = iy >> 52 & 0x7ff;
   if (e < 0x3ff)
     return 0;
   if (e > 0x3ff + 52)
     return 2;
   if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
     return 0;
   if (iy & (1ULL << (0x3ff + 52 - e)))
     return 1;
   return 2;
 }
 
 /* Returns 1 if input is the bit representation of 0, infinity or nan.  */
 static inline int
 zeroinfnan (uint64_t i)
 {
   return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
 }
 
 static double NOINLINE
-__pl_finite_pow (double x, double y)
+pow_scalar_special_case (double x, double y)
 {
   uint32_t sign_bias = 0;
   uint64_t ix, iy;
   uint32_t topx, topy;
 
   ix = asuint64 (x);
   iy = asuint64 (y);
   topx = top12 (x);
   topy = top12 (y);
   if (unlikely (topx - SmallPowX >= ThresPowX
 		|| (topy & 0x7ff) - SmallPowY >= ThresPowY))
     {
       /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
 	 and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1.  */
       /* Special cases: (x < 0x1p-126 or inf or nan) or
 	 (|y| < 0x1p-65 or |y| >= 0x1p63 or nan).  */
       if (unlikely (zeroinfnan (iy)))
 	{
 	  if (2 * iy == 0)
 	    return issignaling_inline (x) ? x + y : 1.0;
 	  if (ix == asuint64 (1.0))
 	    return issignaling_inline (y) ? x + y : 1.0;
 	  if (2 * ix > 2 * asuint64 (INFINITY)
 	      || 2 * iy > 2 * asuint64 (INFINITY))
 	    return x + y;
 	  if (2 * ix == 2 * asuint64 (1.0))
 	    return 1.0;
 	  if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
 	    return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
 	  return y * y;
 	}
       if (unlikely (zeroinfnan (ix)))
 	{
 	  double x2 = x * x;
 	  if (ix >> 63 && checkint (iy) == 1)
 	    {
 	      x2 = -x2;
 	      sign_bias = 1;
 	    }
 #if WANT_SIMD_EXCEPT
 	  if (2 * ix == 0 && iy >> 63)
 	    return __math_divzero (sign_bias);
 #endif
-	  /* Without the barrier some versions of clang hoist the 1/x2 and
-	     thus division by zero exception can be signaled spuriously.  */
-	  return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
+	  return iy >> 63 ? 1 / x2 : x2;
 	}
       /* Here x and y are non-zero finite.  */
       if (ix >> 63)
 	{
 	  /* Finite x < 0.  */
 	  int yint = checkint (iy);
 	  if (yint == 0)
 #if WANT_SIMD_EXCEPT
 	    return __math_invalid (x);
 #else
 	    return __builtin_nan ("");
 #endif
 	  if (yint == 1)
 	    sign_bias = SignBias;
 	  ix &= 0x7fffffffffffffff;
 	  topx &= 0x7ff;
 	}
       if ((topy & 0x7ff) - SmallPowY >= ThresPowY)
 	{
 	  /* Note: sign_bias == 0 here because y is not odd.  */
 	  if (ix == asuint64 (1.0))
 	    return 1.0;
 	  /* |y| < 2^-65, x^y ~= 1 + y*log(x).  */
 	  if ((topy & 0x7ff) < SmallPowY)
 	    return 1.0;
 #if WANT_SIMD_EXCEPT
 	  return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
 							 : __math_uflow (0);
 #else
 	  return (ix > asuint64 (1.0)) == (topy < 0x800) ? INFINITY : 0;
 #endif
 	}
       if (topx == 0)
 	{
 	  /* Normalize subnormal x so exponent becomes negative.  */
-	  /* Without the barrier some versions of clang evalutate the mul
-	     unconditionally causing spurious overflow exceptions.  */
-	  ix = asuint64 (opt_barrier_double (x) * 0x1p52);
+	  ix = asuint64 (x * 0x1p52);
 	  ix &= 0x7fffffffffffffff;
 	  ix -= 52ULL << 52;
 	}
     }
 
   double lo;
   double hi = log_inline (ix, &lo);
   double ehi = y * hi;
   double elo = y * lo + fma (y, hi, -ehi);
   return exp_inline (ehi, elo, sign_bias);
 }
diff --git a/contrib/arm-optimized-routines/pl/math/v_hypot_1u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/hypot.c
similarity index 74%
rename from contrib/arm-optimized-routines/pl/math/v_hypot_1u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/hypot.c
index d4ff7be89a8f..dc01ed5bac93 100644
--- a/contrib/arm-optimized-routines/pl/math/v_hypot_1u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/hypot.c
@@ -1,95 +1,95 @@
 /*
  * Double-precision vector hypot(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #if WANT_SIMD_EXCEPT
 static const struct data
 {
   uint64x2_t tiny_bound, thres;
 } data = {
   .tiny_bound = V2 (0x2000000000000000), /* asuint (0x1p-511).  */
-  .thres = V2 (0x3fe0000000000000), /* asuint (0x1p511) - tiny_bound.  */
+  .thres = V2 (0x3fe0000000000000),	 /* asuint (0x1p511) - tiny_bound.  */
 };
 #else
 static const struct data
 {
   uint64x2_t tiny_bound;
   uint32x4_t thres;
 } data = {
   .tiny_bound = V2 (0x0360000000000000), /* asuint (0x1p-969).  */
-  .thres = V4 (0x7c900000),	 /* asuint (inf) - tiny_bound.  */
+  .thres = V4 (0x7c900000),		 /* asuint (inf) - tiny_bound.  */
 };
 #endif
 
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, float64x2_t sqsum,
 	      uint32x2_t special)
 {
   return v_call2_f64 (hypot, x, y, vsqrtq_f64 (sqsum), vmovl_u32 (special));
 }
 
 /* Vector implementation of double-precision hypot.
    Maximum error observed is 1.21 ULP:
    _ZGVnN2vv_hypot (0x1.6a1b193ff85b5p-204, 0x1.bc50676c2a447p-222)
     got 0x1.6a1b19400964ep-204
    want 0x1.6a1b19400964dp-204.  */
 #if WANT_SIMD_EXCEPT
 
 float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y)
 {
   const struct data *d = ptr_barrier (&data);
 
   float64x2_t ax = vabsq_f64 (x);
   float64x2_t ay = vabsq_f64 (y);
 
   uint64x2_t ix = vreinterpretq_u64_f64 (ax);
   uint64x2_t iy = vreinterpretq_u64_f64 (ay);
 
   /* Extreme values, NaNs, and infinities should be handled by the scalar
      fallback for correct flag handling.  */
   uint64x2_t specialx = vcgeq_u64 (vsubq_u64 (ix, d->tiny_bound), d->thres);
   uint64x2_t specialy = vcgeq_u64 (vsubq_u64 (iy, d->tiny_bound), d->thres);
   ax = v_zerofy_f64 (ax, specialx);
   ay = v_zerofy_f64 (ay, specialy);
   uint32x2_t special = vaddhn_u64 (specialx, specialy);
 
   float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (ax, ax), ay, ay);
 
   if (unlikely (v_any_u32h (special)))
     return special_case (x, y, sqsum, special);
 
   return vsqrtq_f64 (sqsum);
 }
 #else
 
 float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y)
 {
   const struct data *d = ptr_barrier (&data);
 
   float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (x, x), y, y);
 
-  uint32x2_t special = vcge_u32 (
-      vsubhn_u64 (vreinterpretq_u64_f64 (sqsum), d->tiny_bound),
-      vget_low_u32 (d->thres));
+  uint32x2_t special
+      = vcge_u32 (vsubhn_u64 (vreinterpretq_u64_f64 (sqsum), d->tiny_bound),
+		  vget_low_u32 (d->thres));
 
   if (unlikely (v_any_u32h (special)))
     return special_case (x, y, sqsum, special);
 
   return vsqrtq_f64 (sqsum);
 }
 #endif
 
-PL_SIG (V, D, 2, hypot, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D2 (hypot), 1.21)
-PL_TEST_EXPECT_FENV (V_NAME_D2 (hypot), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, -0, -inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000)
+TEST_SIG (V, D, 2, hypot, -10.0, 10.0)
+TEST_ULP (V_NAME_D2 (hypot), 1.21)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D2 (hypot), WANT_SIMD_EXCEPT)
+TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, 0, inf, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, -0, -inf, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, 0, inf, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_hypotf_1u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/hypotf.c
similarity index 68%
rename from contrib/arm-optimized-routines/pl/math/v_hypotf_1u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/hypotf.c
index 3227b0a3fd8b..69634875be5a 100644
--- a/contrib/arm-optimized-routines/pl/math/v_hypotf_1u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/hypotf.c
@@ -1,94 +1,96 @@
 /*
  * Single-precision vector hypot(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #if WANT_SIMD_EXCEPT
 static const struct data
 {
   uint32x4_t tiny_bound, thres;
 } data = {
   .tiny_bound = V4 (0x20000000), /* asuint (0x1p-63).  */
-  .thres = V4 (0x3f000000), /* asuint (0x1p63) - tiny_bound.  */
+  .thres = V4 (0x3f000000),	 /* asuint (0x1p63) - tiny_bound.  */
 };
 #else
 static const struct data
 {
   uint32x4_t tiny_bound;
   uint16x8_t thres;
 } data = {
   .tiny_bound = V4 (0x0C800000), /* asuint (0x1p-102).  */
-  .thres = V8 (0x7300), /* asuint (inf) - tiny_bound.  */
+  .thres = V8 (0x7300),		 /* asuint (inf) - tiny_bound.  */
 };
 #endif
 
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, float32x4_t sqsum,
 	      uint16x4_t special)
 {
   return v_call2_f32 (hypotf, x, y, vsqrtq_f32 (sqsum), vmovl_u16 (special));
 }
 
 /* Vector implementation of single-precision hypot.
    Maximum error observed is 1.21 ULP:
    _ZGVnN4vv_hypotf (0x1.6a419cp-13, 0x1.82a852p-22) got 0x1.6a41d2p-13
 						    want 0x1.6a41dp-13.  */
 #if WANT_SIMD_EXCEPT
 
-float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
 {
   const struct data *d = ptr_barrier (&data);
 
   float32x4_t ax = vabsq_f32 (x);
   float32x4_t ay = vabsq_f32 (y);
 
   uint32x4_t ix = vreinterpretq_u32_f32 (ax);
   uint32x4_t iy = vreinterpretq_u32_f32 (ay);
 
   /* Extreme values, NaNs, and infinities should be handled by the scalar
      fallback for correct flag handling.  */
   uint32x4_t specialx = vcgeq_u32 (vsubq_u32 (ix, d->tiny_bound), d->thres);
   uint32x4_t specialy = vcgeq_u32 (vsubq_u32 (iy, d->tiny_bound), d->thres);
   ax = v_zerofy_f32 (ax, specialx);
   ay = v_zerofy_f32 (ay, specialy);
   uint16x4_t special = vaddhn_u32 (specialx, specialy);
 
   float32x4_t sqsum = vfmaq_f32 (vmulq_f32 (ax, ax), ay, ay);
 
   if (unlikely (v_any_u16h (special)))
     return special_case (x, y, sqsum, special);
 
   return vsqrtq_f32 (sqsum);
 }
 #else
 
-float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
 {
   const struct data *d = ptr_barrier (&data);
 
   float32x4_t sqsum = vfmaq_f32 (vmulq_f32 (x, x), y, y);
 
-  uint16x4_t special = vcge_u16 (
-      vsubhn_u32 (vreinterpretq_u32_f32 (sqsum), d->tiny_bound),
-      vget_low_u16 (d->thres));
+  uint16x4_t special
+      = vcge_u16 (vsubhn_u32 (vreinterpretq_u32_f32 (sqsum), d->tiny_bound),
+		  vget_low_u16 (d->thres));
 
   if (unlikely (v_any_u16h (special)))
     return special_case (x, y, sqsum, special);
 
   return vsqrtq_f32 (sqsum);
 }
 #endif
 
-PL_SIG (V, F, 2, hypot, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F2 (hypot), 1.21)
-PL_TEST_EXPECT_FENV (V_NAME_F2 (hypot), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), 0, inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), 0, inf, -0, -inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), -0, -inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000)
+HALF_WIDTH_ALIAS_F2 (hypot)
+
+TEST_SIG (V, F, 2, hypot, -10.0, 10.0)
+TEST_ULP (V_NAME_F2 (hypot), 1.21)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F2 (hypot), WANT_SIMD_EXCEPT)
+TEST_INTERVAL2 (V_NAME_F2 (hypot), 0, inf, 0, inf, 10000)
+TEST_INTERVAL2 (V_NAME_F2 (hypot), 0, inf, -0, -inf, 10000)
+TEST_INTERVAL2 (V_NAME_F2 (hypot), -0, -inf, 0, inf, 10000)
+TEST_INTERVAL2 (V_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/log.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/log.c
new file mode 100644
index 000000000000..94e3f4482079
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/log.c
@@ -0,0 +1,118 @@
+/*
+ * Double-precision vector log(x) function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
+
+static const struct data
+{
+  uint64x2_t off, sign_exp_mask, offset_lower_bound;
+  uint32x4_t special_bound;
+  float64x2_t c0, c2;
+  double c1, c3, ln2, c4;
+} data = {
+  /* Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
+  .c0 = V2 (-0x1.ffffffffffff7p-2),
+  .c1 = 0x1.55555555170d4p-2,
+  .c2 = V2 (-0x1.0000000399c27p-2),
+  .c3 = 0x1.999b2e90e94cap-3,
+  .c4 = -0x1.554e550bd501ep-3,
+  .ln2 = 0x1.62e42fefa39efp-1,
+  .sign_exp_mask = V2 (0xfff0000000000000),
+  .off = V2 (0x3fe6900900000000),
+  /* Lower bound is 0x0010000000000000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound - offset (which wraps around).  */
+  .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000),
+  .special_bound = V4 (0x7fe00000), /* asuint64(inf) -  asuint64(0x1p-126).  */
+};
+
+#define N (1 << V_LOG_TABLE_BITS)
+#define IndexMask (N - 1)
+
+struct entry
+{
+  float64x2_t invc;
+  float64x2_t logc;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+  /* Since N is a power of 2, n % N = n & (N - 1).  */
+  struct entry e;
+  uint64_t i0 = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i1 = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
+  float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
+  e.invc = vuzp1q_f64 (e0, e1);
+  e.logc = vuzp2q_f64 (e0, e1);
+  return e;
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2,
+	      uint32x2_t special, const struct data *d)
+{
+  float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off));
+  return v_call_f64 (log, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special));
+}
+
+/* Double-precision vector log routine.
+   The maximum observed error is 2.17 ULP:
+   _ZGVnN2v_log(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2
+				     want 0x1.ffffff1cca045p-2.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint64x2_t u = vreinterpretq_u64_f64 (x);
+  uint64x2_t u_off = vsubq_u64 (u, d->off);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52);
+  uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask));
+  float64x2_t z = vreinterpretq_f64_u64 (iz);
+
+  struct entry e = lookup (u_off);
+
+  uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound),
+				 vget_low_u32 (d->special_bound));
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+  float64x2_t kd = vcvtq_f64_s64 (k);
+
+  /* hi = r + log(c) + k*Ln2.  */
+  float64x2_t ln2_and_c4 = vld1q_f64 (&d->ln2);
+  float64x2_t hi = vfmaq_laneq_f64 (vaddq_f64 (e.logc, r), kd, ln2_and_c4, 0);
+
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  float64x2_t odd_coeffs = vld1q_f64 (&d->c1);
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1);
+  float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0);
+  y = vfmaq_laneq_f64 (y, r2, ln2_and_c4, 1);
+  y = vfmaq_f64 (p, r2, y);
+
+  if (unlikely (v_any_u32h (special)))
+    return special_case (hi, u_off, y, r2, special, d);
+  return vfmaq_f64 (hi, y, r2);
+}
+
+TEST_SIG (V, D, 1, log, 0.01, 11.1)
+TEST_ULP (V_NAME_D1 (log), 1.67)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (log), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_D1 (log), 0, 0xffff000000000000, 10000)
+TEST_INTERVAL (V_NAME_D1 (log), 0x1p-4, 0x1p4, 400000)
+TEST_INTERVAL (V_NAME_D1 (log), 0, inf, 400000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/log10.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/log10.c
new file mode 100644
index 000000000000..c2b8f1c54f0e
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/log10.c
@@ -0,0 +1,132 @@
+/*
+ * Double-precision vector log10(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  uint64x2_t off, sign_exp_mask, offset_lower_bound;
+  uint32x4_t special_bound;
+  double invln10, log10_2;
+  double c1, c3;
+  float64x2_t c0, c2, c4;
+} data = {
+  /* Computed from log coefficients divided by log(10) then rounded to double
+     precision.  */
+  .c0 = V2 (-0x1.bcb7b1526e506p-3),
+  .c1 = 0x1.287a7636be1d1p-3,
+  .c2 = V2 (-0x1.bcb7b158af938p-4),
+  .c3 = 0x1.63c78734e6d07p-4,
+  .c4 = V2 (-0x1.287461742fee4p-4),
+  .invln10 = 0x1.bcb7b1526e50ep-2,
+  .log10_2 = 0x1.34413509f79ffp-2,
+  .off = V2 (0x3fe6900900000000),
+  .sign_exp_mask = V2 (0xfff0000000000000),
+  /* Lower bound is 0x0010000000000000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound - offset (which wraps around).  */
+  .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000),
+  .special_bound = V4 (0x7fe00000), /* asuint64(inf) - 0x0010000000000000.  */
+};
+
+#define N (1 << V_LOG10_TABLE_BITS)
+#define IndexMask (N - 1)
+
+struct entry
+{
+  float64x2_t invc;
+  float64x2_t log10c;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+  struct entry e;
+  uint64_t i0
+      = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
+  uint64_t i1
+      = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
+  float64x2_t e0 = vld1q_f64 (&__v_log10_data.table[i0].invc);
+  float64x2_t e1 = vld1q_f64 (&__v_log10_data.table[i1].invc);
+  e.invc = vuzp1q_f64 (e0, e1);
+  e.log10c = vuzp2q_f64 (e0, e1);
+  return e;
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2,
+	      uint32x2_t special, const struct data *d)
+{
+  float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off));
+  return v_call_f64 (log10, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special));
+}
+
+/* Fast implementation of double-precision vector log10
+   is a slight modification of double-precision vector log.
+   Max ULP error: < 2.5 ulp (nearest rounding.)
+   Maximum measured at 2.46 ulp for x in [0.96, 0.97]
+   _ZGVnN2v_log10(0x1.13192407fcb46p+0) got 0x1.fff6be3cae4bbp-6
+				       want 0x1.fff6be3cae4b9p-6.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (log10) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint64x2_t u = vreinterpretq_u64_f64 (x);
+  uint64x2_t u_off = vsubq_u64 (u, d->off);
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52);
+  uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask));
+  float64x2_t z = vreinterpretq_f64_u64 (iz);
+
+  struct entry e = lookup (u_off);
+
+  uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound),
+				 vget_low_u32 (d->special_bound));
+
+  /* log10(x) = log1p(z/c-1)/log(10) + log10(c) + k*log10(2).  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+  float64x2_t kd = vcvtq_f64_s64 (k);
+
+  /* hi = r / log(10) + log10(c) + k*log10(2).
+     Constants in v_log10_data.c are computed (in extended precision) as
+     e.log10c := e.logc * invln10.  */
+  float64x2_t cte = vld1q_f64 (&d->invln10);
+  float64x2_t hi = vfmaq_laneq_f64 (e.log10c, r, cte, 0);
+
+  /* y = log10(1+r) + n * log10(2).  */
+  hi = vfmaq_laneq_f64 (hi, kd, cte, 1);
+
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t odd_coeffs = vld1q_f64 (&d->c1);
+  float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1);
+  float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0);
+  y = vfmaq_f64 (y, d->c4, r2);
+  y = vfmaq_f64 (p, y, r2);
+
+  if (unlikely (v_any_u32h (special)))
+    return special_case (hi, u_off, y, r2, special, d);
+  return vfmaq_f64 (hi, y, r2);
+}
+
+TEST_SIG (V, D, 1, log10, 0.01, 11.1)
+TEST_ULP (V_NAME_D1 (log10), 1.97)
+TEST_INTERVAL (V_NAME_D1 (log10), -0.0, -inf, 1000)
+TEST_INTERVAL (V_NAME_D1 (log10), 0, 0x1p-149, 1000)
+TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (V_NAME_D1 (log10), 1.0, 100, 50000)
+TEST_INTERVAL (V_NAME_D1 (log10), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/log10f.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/log10f.c
new file mode 100644
index 000000000000..907c1051e086
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/log10f.c
@@ -0,0 +1,106 @@
+/*
+ * Single-precision vector log10 function.
+ *
+ * Copyright (c) 2020-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  float32x4_t c0, c2, c4, c6, inv_ln10, ln2;
+  uint32x4_t off, offset_lower_bound;
+  uint16x8_t special_bound;
+  uint32x4_t mantissa_mask;
+  float c1, c3, c5, c7;
+} data = {
+  /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in
+      [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25.  */
+  .c0 = V4 (-0x1.bcb79cp-3f),
+  .c1 = 0x1.2879c8p-3f,
+  .c2 = V4 (-0x1.bcd472p-4f),
+  .c3 = 0x1.6408f8p-4f,
+  .c4 = V4 (-0x1.246f8p-4f),
+  .c5 = 0x1.f0e514p-5f,
+  .c6 = V4 (-0x1.0fc92cp-4f),
+  .c7 = 0x1.f5f76ap-5f,
+  .ln2 = V4 (0x1.62e43p-1f),
+  .inv_ln10 = V4 (0x1.bcb7b2p-2f),
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab),
+  .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000).  */
+  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
+  .mantissa_mask = V4 (0x007fffff),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t y, uint32x4_t u_off, float32x4_t p, float32x4_t r2,
+	      uint16x4_t cmp, const struct data *d)
+{
+  /* Fall back to scalar code.  */
+  return v_call_f32 (log10f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)),
+		     vfmaq_f32 (y, p, r2), vmovl_u16 (cmp));
+}
+
+/* Fast implementation of AdvSIMD log10f,
+   uses a similar approach as AdvSIMD logf with the same offset (i.e., 2/3) and
+   an order 9 polynomial.
+   Maximum error: 3.305ulps (nearest rounding.)
+   _ZGVnN4v_log10f(0x1.555c16p+0) got 0x1.ffe2fap-4
+				 want 0x1.ffe2f4p-4.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t c1357 = vld1q_f32 (&d->c1);
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint32x4_t u_off = vreinterpretq_u32_f32 (x);
+
+  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
+  u_off = vsubq_u32 (u_off, d->off);
+  float32x4_t n = vcvtq_f32_s32 (
+      vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend.  */
+
+  uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound),
+				 vget_low_u16 (d->special_bound));
+
+  uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off);
+  float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
+
+  /* y = log10(1+r) + n * log10(2).  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+
+  float32x4_t c01 = vfmaq_laneq_f32 (d->c0, r, c1357, 0);
+  float32x4_t c23 = vfmaq_laneq_f32 (d->c2, r, c1357, 1);
+  float32x4_t c45 = vfmaq_laneq_f32 (d->c4, r, c1357, 2);
+  float32x4_t c67 = vfmaq_laneq_f32 (d->c6, r, c1357, 3);
+
+  float32x4_t p47 = vfmaq_f32 (c45, r2, c67);
+  float32x4_t p27 = vfmaq_f32 (c23, r2, p47);
+  float32x4_t poly = vfmaq_f32 (c01, r2, p27);
+
+  /* y = Log10(2) * n + poly * InvLn(10).  */
+  float32x4_t y = vfmaq_f32 (r, d->ln2, n);
+  y = vmulq_f32 (y, d->inv_ln10);
+
+  if (unlikely (v_any_u16h (special)))
+    return special_case (y, u_off, poly, r2, special, d);
+  return vfmaq_f32 (y, poly, r2);
+}
+
+HALF_WIDTH_ALIAS_F1 (log10)
+
+TEST_SIG (V, F, 1, log10, 0.01, 11.1)
+TEST_ULP (V_NAME_F1 (log10), 2.81)
+TEST_INTERVAL (V_NAME_F1 (log10), -0.0, -inf, 100)
+TEST_INTERVAL (V_NAME_F1 (log10), 0, 0x1p-126, 100)
+TEST_INTERVAL (V_NAME_F1 (log10), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (V_NAME_F1 (log10), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (V_NAME_F1 (log10), 1.0, 100, 50000)
+TEST_INTERVAL (V_NAME_F1 (log10), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/log1p.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/log1p.c
new file mode 100644
index 000000000000..42a0c5793920
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/log1p.c
@@ -0,0 +1,61 @@
+/*
+ * Double-precision vector log(1+x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define WANT_V_LOG1P_K0_SHORTCUT 0
+#include "v_log1p_inline.h"
+
+const static struct data
+{
+  struct v_log1p_data d;
+  uint64x2_t inf, minus_one;
+} data = { .d = V_LOG1P_CONSTANTS_TABLE,
+	   .inf = V2 (0x7ff0000000000000),
+	   .minus_one = V2 (0xbff0000000000000) };
+
+#define BottomMask v_u64 (0xffffffff)
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, uint64x2_t cmp, const struct data *d)
+{
+  /* Side-step special lanes so fenv exceptions are not triggered
+     inadvertently.  */
+  float64x2_t x_nospecial = v_zerofy_f64 (x, cmp);
+  return v_call_f64 (log1p, x, log1p_inline (x_nospecial, &d->d), cmp);
+}
+
+/* Vector log1p approximation using polynomial on reduced interval. Routine is
+   a modification of the algorithm used in scalar log1p, with no shortcut for
+   k=0 and no narrowing for f and k. Maximum observed error is 2.45 ULP:
+   _ZGVnN2v_log1p(0x1.658f7035c4014p+11) got 0x1.fd61d0727429dp+2
+					want 0x1.fd61d0727429fp+2 .  */
+VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
+
+  uint64x2_t special_cases
+      = vorrq_u64 (vcgeq_u64 (ia, d->inf), vcgeq_u64 (ix, d->minus_one));
+
+  if (unlikely (v_any_u64 (special_cases)))
+    return special_case (x, special_cases, d);
+
+  return log1p_inline (x, &d->d);
+}
+
+TEST_SIG (V, D, 1, log1p, -0.9, 10.0)
+TEST_ULP (V_NAME_D1 (log1p), 1.95)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (log1p), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0.0, 0x1p-23, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0x1p-23, 0.001, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0.001, 1.0, 50000)
+TEST_INTERVAL (V_NAME_D1 (log1p), 1, inf, 40000)
+TEST_INTERVAL (V_NAME_D1 (log1p), -1.0, -inf, 500)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/log1pf.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/log1pf.c
new file mode 100644
index 000000000000..94b90249128f
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/log1pf.c
@@ -0,0 +1,92 @@
+/*
+ * Single-precision vector log(1+x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_log1pf_inline.h"
+
+#if WANT_SIMD_EXCEPT
+
+const static struct data
+{
+  uint32x4_t minus_one, thresh;
+  struct v_log1pf_data d;
+} data = {
+  .d = V_LOG1PF_CONSTANTS_TABLE,
+  .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - TinyBound.  */
+  .minus_one = V4 (0xbf800000),
+};
+
+/* asuint32(0x1p-23). ulp=0.5 at 0x1p-23.  */
+#  define TinyBound v_u32 (0x34000000)
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, uint32x4_t cmp, const struct data *d)
+{
+  /* Side-step special lanes so fenv exceptions are not triggered
+     inadvertently.  */
+  float32x4_t x_nospecial = v_zerofy_f32 (x, cmp);
+  return v_call_f32 (log1pf, x, log1pf_inline (x_nospecial, &d->d), cmp);
+}
+
+/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
+   error is 1.69 ULP:
+   _ZGVnN4v_log1pf(0x1.04418ap-2) got 0x1.cfcbd8p-3
+				 want 0x1.cfcbdcp-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log1p) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x));
+
+  uint32x4_t special_cases
+      = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, TinyBound), d->thresh),
+		   vcgeq_u32 (ix, d->minus_one));
+
+  if (unlikely (v_any_u32 (special_cases)))
+    return special_case (x, special_cases, d);
+
+  return log1pf_inline (x, &d->d);
+}
+
+#else
+
+const static struct v_log1pf_data data = V_LOG1PF_CONSTANTS_TABLE;
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, uint32x4_t cmp)
+{
+  return v_call_f32 (log1pf, x, log1pf_inline (x, ptr_barrier (&data)), cmp);
+}
+
+/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
+   error is 1.63 ULP:
+   _ZGVnN4v_log1pf(0x1.216d12p-2) got 0x1.fdcb12p-3
+				 want 0x1.fdcb16p-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log1p) (float32x4_t x)
+{
+  uint32x4_t special_cases = vornq_u32 (vcleq_f32 (x, v_f32 (-1)),
+					vcaleq_f32 (x, v_f32 (0x1p127f)));
+
+  if (unlikely (v_any_u32 (special_cases)))
+    return special_case (x, special_cases);
+
+  return log1pf_inline (x, ptr_barrier (&data));
+}
+
+#endif
+
+HALF_WIDTH_ALIAS_F1 (log1p)
+
+TEST_SIG (V, F, 1, log1p, -0.9, 10.0)
+TEST_ULP (V_NAME_F1 (log1p), 1.20)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (log1p), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (log1p), 0.0, 0x1p-23, 30000)
+TEST_SYM_INTERVAL (V_NAME_F1 (log1p), 0x1p-23, 1, 50000)
+TEST_INTERVAL (V_NAME_F1 (log1p), 1, inf, 50000)
+TEST_INTERVAL (V_NAME_F1 (log1p), -1.0, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/log2.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/log2.c
new file mode 100644
index 000000000000..7d2e44dad2c9
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/log2.c
@@ -0,0 +1,123 @@
+/*
+ * Double-precision vector log2 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  uint64x2_t off, sign_exp_mask, offset_lower_bound;
+  uint32x4_t special_bound;
+  float64x2_t c0, c2;
+  double c1, c3, invln2, c4;
+} data = {
+  /* Each coefficient was generated to approximate log(r) for |r| < 0x1.fp-9
+     and N = 128, then scaled by log2(e) in extended precision and rounded back
+     to double precision.  */
+  .c0 = V2 (-0x1.71547652b8300p-1),
+  .c1 = 0x1.ec709dc340953p-2,
+  .c2 = V2 (-0x1.71547651c8f35p-2),
+  .c3 = 0x1.2777ebe12dda5p-2,
+  .c4 = -0x1.ec738d616fe26p-3,
+  .invln2 = 0x1.71547652b82fep0,
+  .off = V2 (0x3fe6900900000000),
+  .sign_exp_mask = V2 (0xfff0000000000000),
+  /* Lower bound is 0x0010000000000000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound - offset (which wraps around).  */
+  .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000),
+  .special_bound = V4 (0x7fe00000), /* asuint64(inf) - asuint64(0x1p-1022).  */
+};
+
+#define N (1 << V_LOG2_TABLE_BITS)
+#define IndexMask (N - 1)
+
+struct entry
+{
+  float64x2_t invc;
+  float64x2_t log2c;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+  struct entry e;
+  uint64_t i0
+      = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
+  uint64_t i1
+      = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
+  float64x2_t e0 = vld1q_f64 (&__v_log2_data.table[i0].invc);
+  float64x2_t e1 = vld1q_f64 (&__v_log2_data.table[i1].invc);
+  e.invc = vuzp1q_f64 (e0, e1);
+  e.log2c = vuzp2q_f64 (e0, e1);
+  return e;
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2,
+	      uint32x2_t special, const struct data *d)
+{
+  float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off));
+  return v_call_f64 (log2, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special));
+}
+
+/* Double-precision vector log2 routine. Implements the same algorithm as
+   vector log10, with coefficients and table entries scaled in extended
+   precision. The maximum observed error is 2.58 ULP:
+   _ZGVnN2v_log2(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
+				      want 0x1.fffb34198d9ddp-5.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (log2) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint64x2_t u = vreinterpretq_u64_f64 (x);
+  uint64x2_t u_off = vsubq_u64 (u, d->off);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52);
+  uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask));
+  float64x2_t z = vreinterpretq_f64_u64 (iz);
+
+  struct entry e = lookup (u_off);
+
+  uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound),
+				 vget_low_u32 (d->special_bound));
+
+  /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k.  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+  float64x2_t kd = vcvtq_f64_s64 (k);
+
+  float64x2_t invln2_and_c4 = vld1q_f64 (&d->invln2);
+  float64x2_t hi
+      = vfmaq_laneq_f64 (vaddq_f64 (e.log2c, kd), r, invln2_and_c4, 0);
+
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t odd_coeffs = vld1q_f64 (&d->c1);
+  float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1);
+  float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0);
+  y = vfmaq_laneq_f64 (y, r2, invln2_and_c4, 1);
+  y = vfmaq_f64 (p, r2, y);
+
+  if (unlikely (v_any_u32h (special)))
+    return special_case (hi, u_off, y, r2, special, d);
+  return vfmaq_f64 (hi, y, r2);
+}
+
+TEST_SIG (V, D, 1, log2, 0.01, 11.1)
+TEST_ULP (V_NAME_D1 (log2), 2.09)
+TEST_INTERVAL (V_NAME_D1 (log2), -0.0, -0x1p126, 100)
+TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (V_NAME_D1 (log2), 1.0, 100, 50000)
+TEST_INTERVAL (V_NAME_D1 (log2), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/log2f.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/log2f.c
new file mode 100644
index 000000000000..3053c64bc552
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/log2f.c
@@ -0,0 +1,102 @@
+/*
+ * Single-precision vector log2 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  float32x4_t c0, c2, c4, c6, c8;
+  uint32x4_t off, offset_lower_bound;
+  uint16x8_t special_bound;
+  uint32x4_t mantissa_mask;
+  float c1, c3, c5, c7;
+} data = {
+  /* Coefficients generated using Remez algorithm approximate
+     log2(1+r)/r for r in [ -1/3, 1/3 ].
+     rel error: 0x1.c4c4b0cp-26.  */
+  .c0 = V4 (0x1.715476p0f), /* (float)(1 / ln(2)).  */
+  .c1 = -0x1.715458p-1f,
+  .c2 = V4 (0x1.ec701cp-2f),
+  .c3 = -0x1.7171a4p-2f,
+  .c4 = V4 (0x1.27a0b8p-2f),
+  .c5 = -0x1.e5143ep-3f,
+  .c6 = V4 (0x1.9d8ecap-3f),
+  .c7 = -0x1.c675bp-3f,
+  .c8 = V4 (0x1.9e495p-3f),
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab),
+  .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000).  */
+  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
+  .mantissa_mask = V4 (0x007fffff),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t n, uint32x4_t u_off, float32x4_t p, float32x4_t r,
+	      uint16x4_t cmp, const struct data *d)
+{
+  /* Fall back to scalar code.  */
+  return v_call_f32 (log2f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)),
+		     vfmaq_f32 (n, p, r), vmovl_u16 (cmp));
+}
+
+/* Fast implementation for single precision AdvSIMD log2,
+   relies on same argument reduction as AdvSIMD logf.
+   Maximum error: 2.48 ULPs
+   _ZGVnN4v_log2f(0x1.558174p+0) got 0x1.a9be84p-2
+				want 0x1.a9be8p-2.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint32x4_t u_off = vreinterpretq_u32_f32 (x);
+
+  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
+  u_off = vsubq_u32 (u_off, d->off);
+  float32x4_t n = vcvtq_f32_s32 (
+      vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend.  */
+
+  uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound),
+				 vget_low_u16 (d->special_bound));
+
+  uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off);
+  float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
+
+  /* y = log2(1+r) + n.  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+
+  float32x4_t c1357 = vld1q_f32 (&d->c1);
+  float32x4_t c01 = vfmaq_laneq_f32 (d->c0, r, c1357, 0);
+  float32x4_t c23 = vfmaq_laneq_f32 (d->c2, r, c1357, 1);
+  float32x4_t c45 = vfmaq_laneq_f32 (d->c4, r, c1357, 2);
+  float32x4_t c67 = vfmaq_laneq_f32 (d->c6, r, c1357, 3);
+  float32x4_t p68 = vfmaq_f32 (c67, r2, d->c8);
+  float32x4_t p48 = vfmaq_f32 (c45, r2, p68);
+  float32x4_t p28 = vfmaq_f32 (c23, r2, p48);
+  float32x4_t p = vfmaq_f32 (c01, r2, p28);
+
+  if (unlikely (v_any_u16h (special)))
+    return special_case (n, u_off, p, r, special, d);
+  return vfmaq_f32 (n, p, r);
+}
+
+HALF_WIDTH_ALIAS_F1 (log2)
+
+TEST_SIG (V, F, 1, log2, 0.01, 11.1)
+TEST_ULP (V_NAME_F1 (log2), 1.99)
+TEST_INTERVAL (V_NAME_F1 (log2), -0.0, -0x1p126, 100)
+TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (V_NAME_F1 (log2), 1.0, 100, 50000)
+TEST_INTERVAL (V_NAME_F1 (log2), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/logf.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/logf.c
new file mode 100644
index 000000000000..84705fad05ee
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/logf.c
@@ -0,0 +1,88 @@
+/*
+ * Single-precision vector log function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
+
+static const struct data
+{
+  float32x4_t c2, c4, c6, ln2;
+  uint32x4_t off, offset_lower_bound, mantissa_mask;
+  uint16x8_t special_bound;
+  float c1, c3, c5, c0;
+} data = {
+  /* 3.34 ulp error.  */
+  .c0 = -0x1.3e737cp-3f,
+  .c1 = 0x1.5a9aa2p-3f,
+  .c2 = V4 (-0x1.4f9934p-3f),
+  .c3 = 0x1.961348p-3f,
+  .c4 = V4 (-0x1.00187cp-2f),
+  .c5 = 0x1.555d7cp-2f,
+  .c6 = V4 (-0x1.ffffc8p-2f),
+  .ln2 = V4 (0x1.62e43p-1f),
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab),
+  .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000).  */
+  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
+  .mantissa_mask = V4 (0x007fffff)
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t p, uint32x4_t u_off, float32x4_t y, float32x4_t r2,
+	      uint16x4_t cmp, const struct data *d)
+{
+  /* Fall back to scalar code.  */
+  return v_call_f32 (logf, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)),
+		     vfmaq_f32 (p, y, r2), vmovl_u16 (cmp));
+}
+
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t c1350 = vld1q_f32 (&d->c1);
+
+  /* To avoid having to mov x out of the way, keep u after offset has been
+     applied, and recover x by adding the offset back in the special-case
+     handler.  */
+  uint32x4_t u_off = vsubq_u32 (vreinterpretq_u32_f32 (x), d->off);
+
+  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
+  float32x4_t n = vcvtq_f32_s32 (
+      vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend.  */
+  uint16x4_t cmp = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound),
+			     vget_low_u16 (d->special_bound));
+
+  uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off);
+  float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
+
+  /* y = log(1+r) + n*ln2.  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+  /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))).  */
+  float32x4_t p = vfmaq_laneq_f32 (d->c2, r, c1350, 0);
+  float32x4_t q = vfmaq_laneq_f32 (d->c4, r, c1350, 1);
+  float32x4_t y = vfmaq_laneq_f32 (d->c6, r, c1350, 2);
+  p = vfmaq_laneq_f32 (p, r2, c1350, 3);
+
+  q = vfmaq_f32 (q, p, r2);
+  y = vfmaq_f32 (y, q, r2);
+  p = vfmaq_f32 (r, d->ln2, n);
+
+  if (unlikely (v_any_u16h (cmp)))
+    return special_case (p, u_off, y, r2, cmp, d);
+  return vfmaq_f32 (p, y, r2);
+}
+
+HALF_WIDTH_ALIAS_F1 (log)
+
+TEST_SIG (V, F, 1, log, 0.01, 11.1)
+TEST_ULP (V_NAME_F1 (log), 2.9)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (log), WANT_SIMD_EXCEPT)
+TEST_INTERVAL (V_NAME_F1 (log), 0, 0xffff0000, 10000)
+TEST_INTERVAL (V_NAME_F1 (log), 0x1p-4, 0x1p4, 500000)
+TEST_INTERVAL (V_NAME_F1 (log), 0, inf, 50000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/modf.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/modf.c
new file mode 100644
index 000000000000..da2fcbff8514
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/modf.c
@@ -0,0 +1,33 @@
+/*
+ * Double-precision vector modf(x, *y) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+/* Modf algorithm. Produces exact values in all rounding modes.  */
+float64x2_t VPCS_ATTR V_NAME_D1_L1 (modf) (float64x2_t x, double *out_int)
+{
+  /* Get integer component of x.  */
+  float64x2_t rounded = vrndq_f64 (x);
+  vst1q_f64 (out_int, rounded);
+
+  /* Subtract integer component from input.  */
+  uint64x2_t remaining = vreinterpretq_u64_f64 (vsubq_f64 (x, rounded));
+
+  /* Return +0 for integer x.  */
+  uint64x2_t is_integer = vceqq_f64 (x, rounded);
+  return vreinterpretq_f64_u64 (vbicq_u64 (remaining, is_integer));
+}
+
+TEST_ULP (_ZGVnN2vl8_modf_frac, 0.0)
+TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_frac, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_frac, 1, inf, 20000)
+
+TEST_ULP (_ZGVnN2vl8_modf_int, 0.0)
+TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_int, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_int, 1, inf, 20000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/modff.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/modff.c
new file mode 100644
index 000000000000..0a646b24cb1a
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/modff.c
@@ -0,0 +1,34 @@
+/*
+ * Single-precision vector modf(x, *y) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+/* Modff algorithm. Produces exact values in all rounding modes.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1_L1 (modf) (float32x4_t x,
+						    float *out_int)
+{
+  /* Get integer component of x.  */
+  float32x4_t rounded = vrndq_f32 (x);
+  vst1q_f32 (out_int, rounded);
+
+  /* Subtract integer component from input.  */
+  uint32x4_t remaining = vreinterpretq_u32_f32 (vsubq_f32 (x, rounded));
+
+  /* Return +0 for integer x.  */
+  uint32x4_t is_integer = vceqq_f32 (x, rounded);
+  return vreinterpretq_f32_u32 (vbicq_u32 (remaining, is_integer));
+}
+
+TEST_ULP (_ZGVnN4vl4_modff_frac, 0.0)
+TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_frac, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_frac, 1, inf, 20000)
+
+TEST_ULP (_ZGVnN4vl4_modff_int, 0.0)
+TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_int, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_int, 1, inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_pow_1u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/pow.c
similarity index 60%
rename from contrib/arm-optimized-routines/pl/math/v_pow_1u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/pow.c
index 9053347d4e35..db9d6e9ba14b 100644
--- a/contrib/arm-optimized-routines/pl/math/v_pow_1u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/pow.c
@@ -1,259 +1,284 @@
 /*
  * Double-precision vector pow function.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Defines parameters of the approximation and scalar fallback.  */
 #include "finite_pow.h"
 
-#define VecSmallExp v_u64 (SmallExp)
-#define VecThresExp v_u64 (ThresExp)
-
 #define VecSmallPowX v_u64 (SmallPowX)
 #define VecThresPowX v_u64 (ThresPowX)
 #define VecSmallPowY v_u64 (SmallPowY)
 #define VecThresPowY v_u64 (ThresPowY)
 
 static const struct data
 {
-  float64x2_t log_poly[7];
-  float64x2_t exp_poly[3];
-  float64x2_t ln2_hi, ln2_lo;
-  float64x2_t shift, inv_ln2_n, ln2_hi_n, ln2_lo_n;
+  uint64x2_t inf;
+  float64x2_t small_powx;
+  uint64x2_t offset, mask;
+  uint64x2_t mask_sub_0, mask_sub_1;
+  float64x2_t log_c0, log_c2, log_c4, log_c5;
+  double log_c1, log_c3;
+  double ln2_lo, ln2_hi;
+  uint64x2_t small_exp, thres_exp;
+  double ln2_lo_n, ln2_hi_n;
+  double inv_ln2_n, exp_c2;
+  float64x2_t exp_c0, exp_c1;
 } data = {
+  /* Power threshold.  */
+  .inf = V2 (0x7ff0000000000000),
+  .small_powx = V2 (0x1p-126),
+  .offset = V2 (Off),
+  .mask = V2 (0xfffULL << 52),
+  .mask_sub_0 = V2 (1ULL << 52),
+  .mask_sub_1 = V2 (52ULL << 52),
   /* Coefficients copied from v_pow_log_data.c
      relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8]
      Coefficients are scaled to match the scaling during evaluation.  */
-  .log_poly = { V2 (-0x1p-1), V2 (0x1.555555555556p-2 * -2),
-		V2 (-0x1.0000000000006p-2 * -2), V2 (0x1.999999959554ep-3 * 4),
-		V2 (-0x1.555555529a47ap-3 * 4), V2 (0x1.2495b9b4845e9p-3 * -8),
-		V2 (-0x1.0002b8b263fc3p-3 * -8) },
-  .ln2_hi = V2 (0x1.62e42fefa3800p-1),
-  .ln2_lo = V2 (0x1.ef35793c76730p-45),
+  .log_c0 = V2 (0x1.555555555556p-2 * -2),
+  .log_c1 = -0x1.0000000000006p-2 * -2,
+  .log_c2 = V2 (0x1.999999959554ep-3 * 4),
+  .log_c3 = -0x1.555555529a47ap-3 * 4,
+  .log_c4 = V2 (0x1.2495b9b4845e9p-3 * -8),
+  .log_c5 = V2 (-0x1.0002b8b263fc3p-3 * -8),
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
   /* Polynomial coefficients: abs error: 1.43*2^-58, ulp error: 0.549
      (0.550 without fma) if |x| < ln2/512.  */
-  .exp_poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6ef9p-3),
-		V2 (0x1.5555576a5adcep-5) },
-  .shift = V2 (0x1.8p52), /* round to nearest int. without intrinsics.  */
-  .inv_ln2_n = V2 (0x1.71547652b82fep8), /* N/ln2.  */
-  .ln2_hi_n = V2 (0x1.62e42fefc0000p-9), /* ln2/N.  */
-  .ln2_lo_n = V2 (-0x1.c610ca86c3899p-45),
+  .exp_c0 = V2 (0x1.fffffffffffd4p-2),
+  .exp_c1 = V2 (0x1.5555571d6ef9p-3),
+  .exp_c2 = 0x1.5555576a5adcep-5,
+  .small_exp = V2 (0x3c90000000000000),
+  .thres_exp = V2 (0x03f0000000000000),
+  .inv_ln2_n = 0x1.71547652b82fep8, /* N/ln2.  */
+  .ln2_hi_n = 0x1.62e42fefc0000p-9, /* ln2/N.  */
+  .ln2_lo_n = -0x1.c610ca86c3899p-45,
 };
 
-#define A(i) data.log_poly[i]
-#define C(i) data.exp_poly[i]
-
-/* This version implements an algorithm close to AOR scalar pow but
+/* This version implements an algorithm close to scalar pow but
    - does not implement the trick in the exp's specialcase subroutine to avoid
      double-rounding,
    - does not use a tail in the exponential core computation,
    - and pow's exp polynomial order and table bits might differ.
 
    Maximum measured error is 1.04 ULPs:
    _ZGVnN2vv_pow(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)
      got 0x1.f71162f473251p-1
     want 0x1.f71162f473252p-1.  */
 
 static inline float64x2_t
 v_masked_lookup_f64 (const double *table, uint64x2_t i)
 {
   return (float64x2_t){
     table[(i[0] >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1)],
     table[(i[1] >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1)]
   };
 }
 
 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
    additional 15 bits precision.  IX is the bit representation of x, but
    normalized in the subnormal range using the sign bit for the exponent.  */
 static inline float64x2_t
 v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)
 {
   /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
-  uint64x2_t tmp = vsubq_u64 (ix, v_u64 (Off));
-  int64x2_t k
-      = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift.  */
-  uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, v_u64 (0xfffULL << 52)));
+  uint64x2_t tmp = vsubq_u64 (ix, d->offset);
+  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
+  uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->mask));
   float64x2_t z = vreinterpretq_f64_u64 (iz);
   float64x2_t kd = vcvtq_f64_s64 (k);
   /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
   float64x2_t invc = v_masked_lookup_f64 (__v_pow_log_data.invc, tmp);
   float64x2_t logc = v_masked_lookup_f64 (__v_pow_log_data.logc, tmp);
   float64x2_t logctail = v_masked_lookup_f64 (__v_pow_log_data.logctail, tmp);
   /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
   float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, invc);
   /* k*Ln2 + log(c) + r.  */
-  float64x2_t t1 = vfmaq_f64 (logc, kd, d->ln2_hi);
+  float64x2_t ln2 = vld1q_f64 (&d->ln2_lo);
+  float64x2_t t1 = vfmaq_laneq_f64 (logc, kd, ln2, 1);
   float64x2_t t2 = vaddq_f64 (t1, r);
-  float64x2_t lo1 = vfmaq_f64 (logctail, kd, d->ln2_lo);
+  float64x2_t lo1 = vfmaq_laneq_f64 (logctail, kd, ln2, 0);
   float64x2_t lo2 = vaddq_f64 (vsubq_f64 (t1, t2), r);
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
-  float64x2_t ar = vmulq_f64 (A (0), r);
+  float64x2_t ar = vmulq_f64 (v_f64 (-0.5), r);
   float64x2_t ar2 = vmulq_f64 (r, ar);
   float64x2_t ar3 = vmulq_f64 (r, ar2);
   /* k*Ln2 + log(c) + r + A[0]*r*r.  */
   float64x2_t hi = vaddq_f64 (t2, ar2);
   float64x2_t lo3 = vfmaq_f64 (vnegq_f64 (ar2), ar, r);
   float64x2_t lo4 = vaddq_f64 (vsubq_f64 (t2, hi), ar2);
   /* p = log1p(r) - r - A[0]*r*r.  */
-  float64x2_t a56 = vfmaq_f64 (A (5), r, A (6));
-  float64x2_t a34 = vfmaq_f64 (A (3), r, A (4));
-  float64x2_t a12 = vfmaq_f64 (A (1), r, A (2));
+  float64x2_t odd_coeffs = vld1q_f64 (&d->log_c1);
+  float64x2_t a56 = vfmaq_f64 (d->log_c4, r, d->log_c5);
+  float64x2_t a34 = vfmaq_laneq_f64 (d->log_c2, r, odd_coeffs, 1);
+  float64x2_t a12 = vfmaq_laneq_f64 (d->log_c0, r, odd_coeffs, 0);
   float64x2_t p = vfmaq_f64 (a34, ar2, a56);
   p = vfmaq_f64 (a12, ar2, p);
   p = vmulq_f64 (ar3, p);
   float64x2_t lo
       = vaddq_f64 (vaddq_f64 (vaddq_f64 (vaddq_f64 (lo1, lo2), lo3), lo4), p);
   float64x2_t y = vaddq_f64 (hi, lo);
   *tail = vaddq_f64 (vsubq_f64 (hi, y), lo);
   return y;
 }
 
+static float64x2_t VPCS_ATTR NOINLINE
+exp_special_case (float64x2_t x, float64x2_t xtail)
+{
+  return (float64x2_t){ exp_nosignbias (x[0], xtail[0]),
+			exp_nosignbias (x[1], xtail[1]) };
+}
+
 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.  */
 static inline float64x2_t
-v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d)
+v_exp_inline (float64x2_t x, float64x2_t neg_xtail, const struct data *d)
 {
   /* Fallback to scalar exp_inline for all lanes if any lane
      contains value of x s.t. |x| <= 2^-54 or >= 512.  */
-  uint64x2_t abstop
-      = vandq_u64 (vshrq_n_u64 (vreinterpretq_u64_f64 (x), 52), v_u64 (0x7ff));
-  uint64x2_t uoflowx
-      = vcgeq_u64 (vsubq_u64 (abstop, VecSmallExp), VecThresExp);
+  uint64x2_t uoflowx = vcgeq_u64 (
+      vsubq_u64 (vreinterpretq_u64_f64 (vabsq_f64 (x)), d->small_exp),
+      d->thres_exp);
   if (unlikely (v_any_u64 (uoflowx)))
-    return v_call2_f64 (exp_nosignbias, x, xtail, x, v_u64 (-1));
+    return exp_special_case (x, vnegq_f64 (neg_xtail));
+
   /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
   /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N].  */
-  float64x2_t z = vmulq_f64 (d->inv_ln2_n, x);
   /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
-  float64x2_t kd = vaddq_f64 (z, d->shift);
-  uint64x2_t ki = vreinterpretq_u64_f64 (kd);
-  kd = vsubq_f64 (kd, d->shift);
-  float64x2_t r = vfmsq_f64 (x, kd, d->ln2_hi_n);
-  r = vfmsq_f64 (r, kd, d->ln2_lo_n);
+  float64x2_t exp_consts = vld1q_f64 (&d->inv_ln2_n);
+  float64x2_t z = vmulq_laneq_f64 (x, exp_consts, 0);
+  float64x2_t kd = vrndnq_f64 (z);
+  uint64x2_t ki = vreinterpretq_u64_s64 (vcvtaq_s64_f64 (z));
+  float64x2_t ln2_n = vld1q_f64 (&d->ln2_lo_n);
+  float64x2_t r = vfmsq_laneq_f64 (x, kd, ln2_n, 1);
+  r = vfmsq_laneq_f64 (r, kd, ln2_n, 0);
   /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
-  r = vaddq_f64 (r, xtail);
+  r = vsubq_f64 (r, neg_xtail);
   /* 2^(k/N) ~= scale.  */
   uint64x2_t idx = vandq_u64 (ki, v_u64 (N_EXP - 1));
   uint64x2_t top = vshlq_n_u64 (ki, 52 - V_POW_EXP_TABLE_BITS);
   /* This is only a valid scale when -1023*N < k < 1024*N.  */
   uint64x2_t sbits = v_lookup_u64 (SBits, idx);
   sbits = vaddq_u64 (sbits, top);
   /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1).  */
   float64x2_t r2 = vmulq_f64 (r, r);
-  float64x2_t tmp = vfmaq_f64 (C (1), r, C (2));
-  tmp = vfmaq_f64 (C (0), r, tmp);
+  float64x2_t tmp = vfmaq_laneq_f64 (d->exp_c1, r, exp_consts, 1);
+  tmp = vfmaq_f64 (d->exp_c0, r, tmp);
   tmp = vfmaq_f64 (r, r2, tmp);
   float64x2_t scale = vreinterpretq_f64_u64 (sbits);
   /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
      is no spurious underflow here even without fma.  */
   return vfmaq_f64 (scale, scale, tmp);
 }
 
+static float64x2_t NOINLINE VPCS_ATTR
+scalar_fallback (float64x2_t x, float64x2_t y)
+{
+  return (float64x2_t){ pow_scalar_special_case (x[0], y[0]),
+			pow_scalar_special_case (x[1], y[1]) };
+}
+
 float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
 {
   const struct data *d = ptr_barrier (&data);
   /* Case of x <= 0 is too complicated to be vectorised efficiently here,
      fallback to scalar pow for all lanes if any x < 0 detected.  */
   if (v_any_u64 (vclezq_s64 (vreinterpretq_s64_f64 (x))))
-    return v_call2_f64 (__pl_finite_pow, x, y, x, v_u64 (-1));
+    return scalar_fallback (x, y);
 
   uint64x2_t vix = vreinterpretq_u64_f64 (x);
   uint64x2_t viy = vreinterpretq_u64_f64 (y);
-  uint64x2_t vtopx = vshrq_n_u64 (vix, 52);
-  uint64x2_t vtopy = vshrq_n_u64 (viy, 52);
-  uint64x2_t vabstopx = vandq_u64 (vtopx, v_u64 (0x7ff));
-  uint64x2_t vabstopy = vandq_u64 (vtopy, v_u64 (0x7ff));
+  uint64x2_t iay = vandq_u64 (viy, d->inf);
 
   /* Special cases of x or y.  */
 #if WANT_SIMD_EXCEPT
   /* Small or large.  */
+  uint64x2_t vtopx = vshrq_n_u64 (vix, 52);
+  uint64x2_t vabstopy = vshrq_n_u64 (iay, 52);
   uint64x2_t specialx
       = vcgeq_u64 (vsubq_u64 (vtopx, VecSmallPowX), VecThresPowX);
   uint64x2_t specialy
       = vcgeq_u64 (vsubq_u64 (vabstopy, VecSmallPowY), VecThresPowY);
 #else
-  /* Inf or nan.  */
-  uint64x2_t specialx = vcgeq_u64 (vabstopx, v_u64 (0x7ff));
-  uint64x2_t specialy = vcgeq_u64 (vabstopy, v_u64 (0x7ff));
   /* The case y==0 does not trigger a special case, since in this case it is
      necessary to fix the result only if x is a signalling nan, which already
      triggers a special case. We test y==0 directly in the scalar fallback.  */
+  uint64x2_t iax = vandq_u64 (vix, d->inf);
+  uint64x2_t specialx = vcgeq_u64 (iax, d->inf);
+  uint64x2_t specialy = vcgeq_u64 (iay, d->inf);
 #endif
   uint64x2_t special = vorrq_u64 (specialx, specialy);
   /* Fallback to scalar on all lanes if any lane is inf or nan.  */
   if (unlikely (v_any_u64 (special)))
-    return v_call2_f64 (__pl_finite_pow, x, y, x, v_u64 (-1));
+    return scalar_fallback (x, y);
 
   /* Small cases of x: |x| < 0x1p-126.  */
-  uint64x2_t smallx = vcltq_u64 (vabstopx, VecSmallPowX);
+  uint64x2_t smallx = vcaltq_f64 (x, d->small_powx);
   if (unlikely (v_any_u64 (smallx)))
     {
       /* Update ix if top 12 bits of x are 0.  */
-      uint64x2_t sub_x = vceqzq_u64 (vtopx);
+      uint64x2_t sub_x = vceqzq_u64 (vshrq_n_u64 (vix, 52));
       if (unlikely (v_any_u64 (sub_x)))
 	{
 	  /* Normalize subnormal x so exponent becomes negative.  */
-	  uint64x2_t vix_norm
-	      = vreinterpretq_u64_f64 (vmulq_f64 (x, v_f64 (0x1p52)));
-	  vix_norm = vandq_u64 (vix_norm, v_u64 (0x7fffffffffffffff));
-	  vix_norm = vsubq_u64 (vix_norm, v_u64 (52ULL << 52));
+	  uint64x2_t vix_norm = vreinterpretq_u64_f64 (
+	      vabsq_f64 (vmulq_f64 (x, vcvtq_f64_u64 (d->mask_sub_0))));
+	  vix_norm = vsubq_u64 (vix_norm, d->mask_sub_1);
 	  vix = vbslq_u64 (sub_x, vix_norm, vix);
 	}
     }
 
   /* Vector Log(ix, &lo).  */
   float64x2_t vlo;
   float64x2_t vhi = v_log_inline (vix, &vlo, d);
 
   /* Vector Exp(y_loghi, y_loglo).  */
   float64x2_t vehi = vmulq_f64 (y, vhi);
-  float64x2_t velo = vmulq_f64 (y, vlo);
   float64x2_t vemi = vfmsq_f64 (vehi, y, vhi);
-  velo = vsubq_f64 (velo, vemi);
-  return v_exp_inline (vehi, velo, d);
+  float64x2_t neg_velo = vfmsq_f64 (vemi, y, vlo);
+  return v_exp_inline (vehi, neg_velo, d);
 }
 
-PL_SIG (V, D, 2, pow)
-PL_TEST_ULP (V_NAME_D2 (pow), 0.55)
-PL_TEST_EXPECT_FENV (V_NAME_D2 (pow), WANT_SIMD_EXCEPT)
+TEST_SIG (V, D, 2, pow)
+TEST_ULP (V_NAME_D2 (pow), 0.55)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D2 (pow), WANT_SIMD_EXCEPT)
 /* Wide intervals spanning the whole domain but shared between x and y.  */
-#define V_POW_INTERVAL2(xlo, xhi, ylo, yhi, n)                                 \
-  PL_TEST_INTERVAL2 (V_NAME_D2 (pow), xlo, xhi, ylo, yhi, n)                   \
-  PL_TEST_INTERVAL2 (V_NAME_D2 (pow), xlo, xhi, -ylo, -yhi, n)                 \
-  PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -xlo, -xhi, ylo, yhi, n)                 \
-  PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -xlo, -xhi, -ylo, -yhi, n)
+#define V_POW_INTERVAL2(xlo, xhi, ylo, yhi, n)                                \
+  TEST_INTERVAL2 (V_NAME_D2 (pow), xlo, xhi, ylo, yhi, n)                     \
+  TEST_INTERVAL2 (V_NAME_D2 (pow), xlo, xhi, -ylo, -yhi, n)                   \
+  TEST_INTERVAL2 (V_NAME_D2 (pow), -xlo, -xhi, ylo, yhi, n)                   \
+  TEST_INTERVAL2 (V_NAME_D2 (pow), -xlo, -xhi, -ylo, -yhi, n)
 #define EXPAND(str) str##000000000
 #define SHL52(str) EXPAND (str)
 V_POW_INTERVAL2 (0, SHL52 (SmallPowX), 0, inf, 40000)
 V_POW_INTERVAL2 (SHL52 (SmallPowX), SHL52 (BigPowX), 0, inf, 40000)
 V_POW_INTERVAL2 (SHL52 (BigPowX), inf, 0, inf, 40000)
 V_POW_INTERVAL2 (0, inf, 0, SHL52 (SmallPowY), 40000)
 V_POW_INTERVAL2 (0, inf, SHL52 (SmallPowY), SHL52 (BigPowY), 40000)
 V_POW_INTERVAL2 (0, inf, SHL52 (BigPowY), inf, 40000)
 V_POW_INTERVAL2 (0, inf, 0, inf, 1000)
 /* x~1 or y~1.  */
 V_POW_INTERVAL2 (0x1p-1, 0x1p1, 0x1p-10, 0x1p10, 10000)
 V_POW_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000)
 V_POW_INTERVAL2 (0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p16, 10000)
 /* around argmaxs of ULP error.  */
 V_POW_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
 V_POW_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
 /* x is negative, y is odd or even integer, or y is real not integer.  */
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
 /* 1.0^y.  */
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
-PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
+TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/powf.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/powf.c
new file mode 100644
index 000000000000..47f74cf38ab0
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/powf.c
@@ -0,0 +1,209 @@
+/*
+ * Single-precision vector powf function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
+
+#define Min v_u32 (0x00800000)
+#define Max v_u32 (0x7f800000)
+#define Thresh v_u32 (0x7f000000) /* Max - Min.  */
+#define MantissaMask v_u32 (0x007fffff)
+
+#define A d->log2_poly
+#define C d->exp2f_poly
+
+/* 2.6 ulp ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2).  */
+#define Off v_u32 (0x3f35d000)
+
+#define V_POWF_LOG2_TABLE_BITS 5
+#define V_EXP2F_TABLE_BITS 5
+#define Log2IdxMask ((1 << V_POWF_LOG2_TABLE_BITS) - 1)
+#define Scale ((double) (1 << V_EXP2F_TABLE_BITS))
+
+static const struct data
+{
+  struct
+  {
+    double invc, logc;
+  } log2_tab[1 << V_POWF_LOG2_TABLE_BITS];
+  float64x2_t log2_poly[4];
+  uint64_t exp2f_tab[1 << V_EXP2F_TABLE_BITS];
+  float64x2_t exp2f_poly[3];
+} data = {
+  .log2_tab = {{0x1.6489890582816p+0, -0x1.e960f97b22702p-2 * Scale},
+	       {0x1.5cf19b35e3472p+0, -0x1.c993406cd4db6p-2 * Scale},
+	       {0x1.55aac0e956d65p+0, -0x1.aa711d9a7d0f3p-2 * Scale},
+	       {0x1.4eb0022977e01p+0, -0x1.8bf37bacdce9bp-2 * Scale},
+	       {0x1.47fcccda1dd1fp+0, -0x1.6e13b3519946ep-2 * Scale},
+	       {0x1.418ceabab68c1p+0, -0x1.50cb8281e4089p-2 * Scale},
+	       {0x1.3b5c788f1edb3p+0, -0x1.341504a237e2bp-2 * Scale},
+	       {0x1.3567de48e9c9ap+0, -0x1.17eaab624ffbbp-2 * Scale},
+	       {0x1.2fabc80fd19bap+0, -0x1.f88e708f8c853p-3 * Scale},
+	       {0x1.2a25200ce536bp+0, -0x1.c24b6da113914p-3 * Scale},
+	       {0x1.24d108e0152e3p+0, -0x1.8d02ee397cb1dp-3 * Scale},
+	       {0x1.1facd8ab2fbe1p+0, -0x1.58ac1223408b3p-3 * Scale},
+	       {0x1.1ab614a03efdfp+0, -0x1.253e6fd190e89p-3 * Scale},
+	       {0x1.15ea6d03af9ffp+0, -0x1.e5641882c12ffp-4 * Scale},
+	       {0x1.1147b994bb776p+0, -0x1.81fea712926f7p-4 * Scale},
+	       {0x1.0ccbf650593aap+0, -0x1.203e240de64a3p-4 * Scale},
+	       {0x1.0875408477302p+0, -0x1.8029b86a78281p-5 * Scale},
+	       {0x1.0441d42a93328p+0, -0x1.85d713190fb9p-6 * Scale},
+	       {0x1p+0, 0x0p+0 * Scale},
+	       {0x1.f1d006c855e86p-1, 0x1.4c1cc07312997p-5 * Scale},
+	       {0x1.e28c3341aa301p-1, 0x1.5e1848ccec948p-4 * Scale},
+	       {0x1.d4bdf9aa64747p-1, 0x1.04cfcb7f1196fp-3 * Scale},
+	       {0x1.c7b45a24e5803p-1, 0x1.582813d463c21p-3 * Scale},
+	       {0x1.bb5f5eb2ed60ap-1, 0x1.a936fa68760ccp-3 * Scale},
+	       {0x1.afb0bff8fe6b4p-1, 0x1.f81bc31d6cc4ep-3 * Scale},
+	       {0x1.a49badf7ab1f5p-1, 0x1.2279a09fae6b1p-2 * Scale},
+	       {0x1.9a14a111fc4c9p-1, 0x1.47ec0b6df5526p-2 * Scale},
+	       {0x1.901131f5b2fdcp-1, 0x1.6c71762280f1p-2 * Scale},
+	       {0x1.8687f73f6d865p-1, 0x1.90155070798dap-2 * Scale},
+	       {0x1.7d7067eb77986p-1, 0x1.b2e23b1d3068cp-2 * Scale},
+	       {0x1.74c2c1cf97b65p-1, 0x1.d4e21b0daa86ap-2 * Scale},
+	       {0x1.6c77f37cff2a1p-1, 0x1.f61e2a2f67f3fp-2 * Scale},},
+  .log2_poly = { /* rel err: 1.5 * 2^-30.  */
+		 V2 (-0x1.6ff5daa3b3d7cp-2 * Scale),
+		 V2 (0x1.ec81d03c01aebp-2 * Scale),
+		 V2 (-0x1.71547bb43f101p-1 * Scale),
+		 V2 (0x1.7154764a815cbp0 * Scale)},
+  .exp2f_tab = {0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f,
+		0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa,
+		0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715,
+		0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+		0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429,
+		0x3feea47eb03a5585, 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74,
+		0x3feea11473eb0187, 0x3feea589994cce13, 0x3feeace5422aa0db,
+		0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+		0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c,
+		0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f,
+		0x3fefa4afa2a490da, 0x3fefd0765b6e4540,},
+  .exp2f_poly = { /* rel err: 1.69 * 2^-34.  */
+		  V2 (0x1.c6af84b912394p-5 / Scale / Scale / Scale),
+		  V2 (0x1.ebfce50fac4f3p-3 / Scale / Scale),
+		  V2 (0x1.62e42ff0c52d6p-1 / Scale)}};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, float32x4_t ret, uint32x4_t cmp)
+{
+  return v_call2_f32 (powf, x, y, ret, cmp);
+}
+
+static inline float64x2_t
+ylogx_core (const struct data *d, float64x2_t iz, float64x2_t k,
+	    float64x2_t invc, float64x2_t logc, float64x2_t y)
+{
+
+  /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k.  */
+  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), iz, invc);
+  float64x2_t y0 = vaddq_f64 (logc, k);
+
+  /* Polynomial to approximate log1p(r)/ln2.  */
+  float64x2_t logx = vfmaq_f64 (A[1], r, A[0]);
+  logx = vfmaq_f64 (A[2], logx, r);
+  logx = vfmaq_f64 (A[3], logx, r);
+  logx = vfmaq_f64 (y0, logx, r);
+
+  return vmulq_f64 (logx, y);
+}
+
+static inline float64x2_t
+log2_lookup (const struct data *d, uint32_t i)
+{
+  return vld1q_f64 (
+      &d->log2_tab[(i >> (23 - V_POWF_LOG2_TABLE_BITS)) & Log2IdxMask].invc);
+}
+
+static inline uint64x1_t
+exp2f_lookup (const struct data *d, uint64_t i)
+{
+  return vld1_u64 (&d->exp2f_tab[i % (1 << V_EXP2F_TABLE_BITS)]);
+}
+
+static inline float32x2_t
+powf_core (const struct data *d, float64x2_t ylogx)
+{
+  /* N*x = k + r with r in [-1/2, 1/2].  */
+  float64x2_t kd = vrndnq_f64 (ylogx);
+  int64x2_t ki = vcvtaq_s64_f64 (ylogx);
+  float64x2_t r = vsubq_f64 (ylogx, kd);
+
+  /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1).  */
+  uint64x2_t t = vcombine_u64 (exp2f_lookup (d, vgetq_lane_s64 (ki, 0)),
+			       exp2f_lookup (d, vgetq_lane_s64 (ki, 1)));
+  t = vaddq_u64 (
+      t, vreinterpretq_u64_s64 (vshlq_n_s64 (ki, 52 - V_EXP2F_TABLE_BITS)));
+  float64x2_t s = vreinterpretq_f64_u64 (t);
+  float64x2_t p = vfmaq_f64 (C[1], r, C[0]);
+  p = vfmaq_f64 (C[2], r, p);
+  p = vfmaq_f64 (s, p, vmulq_f64 (s, r));
+  return vcvt_f32_f64 (p);
+}
+
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (pow) (float32x4_t x, float32x4_t y)
+{
+  const struct data *d = ptr_barrier (&data);
+  uint32x4_t u = vreinterpretq_u32_f32 (x);
+  uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (u, Min), Thresh);
+  uint32x4_t tmp = vsubq_u32 (u, Off);
+  uint32x4_t top = vbicq_u32 (tmp, MantissaMask);
+  float32x4_t iz = vreinterpretq_f32_u32 (vsubq_u32 (u, top));
+  int32x4_t k = vshrq_n_s32 (vreinterpretq_s32_u32 (top),
+			     23 - V_EXP2F_TABLE_BITS); /* arithmetic shift.  */
+
+  /* Use double precision for each lane: split input vectors into lo and hi
+     halves and promote.  */
+  float64x2_t tab0 = log2_lookup (d, vgetq_lane_u32 (tmp, 0)),
+	      tab1 = log2_lookup (d, vgetq_lane_u32 (tmp, 1)),
+	      tab2 = log2_lookup (d, vgetq_lane_u32 (tmp, 2)),
+	      tab3 = log2_lookup (d, vgetq_lane_u32 (tmp, 3));
+
+  float64x2_t iz_lo = vcvt_f64_f32 (vget_low_f32 (iz)),
+	      iz_hi = vcvt_high_f64_f32 (iz);
+
+  float64x2_t k_lo = vcvtq_f64_s64 (vmovl_s32 (vget_low_s32 (k))),
+	      k_hi = vcvtq_f64_s64 (vmovl_high_s32 (k));
+
+  float64x2_t invc_lo = vzip1q_f64 (tab0, tab1),
+	      invc_hi = vzip1q_f64 (tab2, tab3),
+	      logc_lo = vzip2q_f64 (tab0, tab1),
+	      logc_hi = vzip2q_f64 (tab2, tab3);
+
+  float64x2_t y_lo = vcvt_f64_f32 (vget_low_f32 (y)),
+	      y_hi = vcvt_high_f64_f32 (y);
+
+  float64x2_t ylogx_lo = ylogx_core (d, iz_lo, k_lo, invc_lo, logc_lo, y_lo);
+  float64x2_t ylogx_hi = ylogx_core (d, iz_hi, k_hi, invc_hi, logc_hi, y_hi);
+
+  uint32x4_t ylogx_top = vuzp2q_u32 (vreinterpretq_u32_f64 (ylogx_lo),
+				     vreinterpretq_u32_f64 (ylogx_hi));
+
+  cmp = vorrq_u32 (
+      cmp, vcgeq_u32 (vandq_u32 (vshrq_n_u32 (ylogx_top, 15), v_u32 (0xffff)),
+		      vdupq_n_u32 (asuint64 (126.0 * (1 << V_EXP2F_TABLE_BITS))
+				   >> 47)));
+
+  float32x2_t p_lo = powf_core (d, ylogx_lo);
+  float32x2_t p_hi = powf_core (d, ylogx_hi);
+
+  if (unlikely (v_any_u32 (cmp)))
+    return special_case (x, y, vcombine_f32 (p_lo, p_hi), cmp);
+  return vcombine_f32 (p_lo, p_hi);
+}
+
+HALF_WIDTH_ALIAS_F2 (pow)
+
+TEST_SIG (V, F, 2, pow)
+TEST_ULP (V_NAME_F2 (pow), 2.1)
+TEST_DISABLE_FENV (V_NAME_F2 (pow))
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1p-1, 0x1p1, 0x1p-7, 0x1p7, 50000)
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1p-1, 0x1p1, -0x1p-7, -0x1p7, 50000)
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1p-70, 0x1p70, 0x1p-1, 0x1p1, 50000)
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1p-70, 0x1p70, -0x1p-1, -0x1p1, 50000)
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p14, 50000)
+TEST_INTERVAL2 (V_NAME_F2 (pow), 0x1.ep-1, 0x1.1p0, -0x1p8, -0x1p14, 50000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_sin.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/sin.c
similarity index 77%
rename from contrib/arm-optimized-routines/math/aarch64/v_sin.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/sin.c
index 04129c31133d..0461bbb99405 100644
--- a/contrib/arm-optimized-routines/math/aarch64/v_sin.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/sin.c
@@ -1,97 +1,105 @@
 /*
  * Double-precision vector sin function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#include "test_defs.h"
+#include "test_sig.h"
 #include "mathlib.h"
 #include "v_math.h"
 
 static const struct data
 {
   float64x2_t poly[7];
-  float64x2_t range_val, inv_pi, shift, pi_1, pi_2, pi_3;
+  float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7),
 	    V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19),
 	    V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33),
 	    V2 (-0x1.9e9540300a1p-41) },
 
   .range_val = V2 (0x1p23),
   .inv_pi = V2 (0x1.45f306dc9c883p-2),
   .pi_1 = V2 (0x1.921fb54442d18p+1),
   .pi_2 = V2 (0x1.1a62633145c06p-53),
   .pi_3 = V2 (0x1.c1cd129024e09p-106),
-  .shift = V2 (0x1.8p52),
 };
 
 #if WANT_SIMD_EXCEPT
-# define TinyBound v_u64 (0x3000000000000000) /* asuint64 (0x1p-255).  */
-# define Thresh v_u64 (0x1160000000000000)    /* RangeVal - TinyBound.  */
+/* asuint64(0x1p-253)), below which multiply by inv_pi underflows.  */
+# define TinyBound v_u64 (0x3020000000000000)
+/* RangeVal - TinyBound.  */
+# define Thresh v_u64 (0x1160000000000000)
 #endif
 
 #define C(i) d->poly[i]
 
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
 {
   y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
   return v_call_f64 (sin, x, y, cmp);
 }
 
 /* Vector (AdvSIMD) sin approximation.
    Maximum observed error in [-pi/2, pi/2], where argument is not reduced,
    is 2.87 ULP:
    _ZGVnN2v_sin (0x1.921d5c6a07142p+0) got 0x1.fffffffa7dc02p-1
 				      want 0x1.fffffffa7dc05p-1
    Maximum observed error in the entire non-special domain ([-2^23, 2^23])
    is 3.22 ULP:
    _ZGVnN2v_sin (0x1.5702447b6f17bp+22) got 0x1.ffdcd125c84fbp-3
 				       want 0x1.ffdcd125c84f8p-3.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float64x2_t n, r, r2, r3, r4, y, t1, t2, t3;
   uint64x2_t odd, cmp;
 
 #if WANT_SIMD_EXCEPT
   /* Detect |x| <= TinyBound or |x| >= RangeVal. If fenv exceptions are to be
      triggered correctly, set any special lanes to 1 (which is neutral w.r.t.
      fenv). These lanes will be fixed by special-case handler later.  */
   uint64x2_t ir = vreinterpretq_u64_f64 (vabsq_f64 (x));
   cmp = vcgeq_u64 (vsubq_u64 (ir, TinyBound), Thresh);
-  r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x);
+  r = vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), cmp));
 #else
   r = x;
   cmp = vcageq_f64 (x, d->range_val);
 #endif
 
   /* n = rint(|x|/pi).  */
-  n = vfmaq_f64 (d->shift, d->inv_pi, r);
-  odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63);
-  n = vsubq_f64 (n, d->shift);
+  n = vrndaq_f64 (vmulq_f64 (r, d->inv_pi));
+  odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63);
 
   /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
   r = vfmsq_f64 (r, d->pi_1, n);
   r = vfmsq_f64 (r, d->pi_2, n);
   r = vfmsq_f64 (r, d->pi_3, n);
 
   /* sin(r) poly approx.  */
   r2 = vmulq_f64 (r, r);
   r3 = vmulq_f64 (r2, r);
   r4 = vmulq_f64 (r2, r2);
 
   t1 = vfmaq_f64 (C (4), C (5), r2);
   t2 = vfmaq_f64 (C (2), C (3), r2);
   t3 = vfmaq_f64 (C (0), C (1), r2);
 
   y = vfmaq_f64 (t1, C (6), r4);
   y = vfmaq_f64 (t2, y, r4);
   y = vfmaq_f64 (t3, y, r4);
   y = vfmaq_f64 (r, y, r3);
 
   if (unlikely (v_any_u64 (cmp)))
     return special_case (x, y, odd, cmp);
   return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
 }
+
+TEST_SIG (V, D, 1, sin, -3.1, 3.1)
+TEST_ULP (V_NAME_D1 (sin), 3.0)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (sin), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (sin), 0, 0x1p23, 500000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sin), 0x1p23, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_sincos_3u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/sincos.c
similarity index 70%
rename from contrib/arm-optimized-routines/pl/math/v_sincos_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/sincos.c
index 6fc014c120b8..83bfa45efa98 100644
--- a/contrib/arm-optimized-routines/pl/math/v_sincos_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/sincos.c
@@ -1,57 +1,67 @@
 /*
  * Double-precision vector sincos function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Define _GNU_SOURCE in order to include sincos declaration. If building
    pre-GLIBC 2.1, or on a non-GNU conforming system, this routine will need to
    be linked against the scalar sincosf from math/.  */
 #define _GNU_SOURCE
 #include <math.h>
-#undef _GNU_SOURCE
 
 #include "v_math.h"
-#include "pl_test.h"
+#include "test_defs.h"
 #include "v_sincos_common.h"
 
+/* sincos not available for all scalar libm implementations.  */
+#if defined(_MSC_VER) || !defined(__GLIBC__)
+static void
+sincos (double x, double *out_sin, double *out_cos)
+{
+  *out_sin = sin (x);
+  *out_cos = cos (x);
+}
+#endif
+
 static void VPCS_ATTR NOINLINE
 special_case (float64x2_t x, uint64x2_t special, double *out_sin,
 	      double *out_cos)
 {
   if (special[0])
     sincos (x[0], out_sin, out_cos);
   if (special[1])
     sincos (x[1], out_sin + 1, out_cos + 1);
 }
 
 /* Double-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate polynomials.
    Largest observed error is for sin, 3.22 ULP:
    v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
 				       want -0x1.ffe9537d5dbb4p-3.  */
 VPCS_ATTR void
 _ZGVnN2vl8l8_sincos (float64x2_t x, double *out_sin, double *out_cos)
 {
   const struct v_sincos_data *d = ptr_barrier (&v_sincos_data);
   uint64x2_t special = check_ge_rangeval (x, d);
 
   float64x2x2_t sc = v_sincos_inline (x, d);
 
   vst1q_f64 (out_sin, sc.val[0]);
   vst1q_f64 (out_cos, sc.val[1]);
 
   if (unlikely (v_any_u64 (special)))
     special_case (x, special, out_sin, out_cos);
 }
 
-PL_TEST_ULP (_ZGVnN2v_sincos_sin, 2.73)
-PL_TEST_ULP (_ZGVnN2v_sincos_cos, 2.73)
+TEST_DISABLE_FENV (_ZGVnN2v_sincos_cos)
+TEST_DISABLE_FENV (_ZGVnN2v_sincos_sin)
+TEST_ULP (_ZGVnN2v_sincos_sin, 2.73)
+TEST_ULP (_ZGVnN2v_sincos_cos, 2.73)
 #define V_SINCOS_INTERVAL(lo, hi, n)                                          \
-  PL_TEST_INTERVAL (_ZGVnN2v_sincos_sin, lo, hi, n)                           \
-  PL_TEST_INTERVAL (_ZGVnN2v_sincos_cos, lo, hi, n)
-V_SINCOS_INTERVAL (0, 0x1p23, 500000)
-V_SINCOS_INTERVAL (-0, -0x1p23, 500000)
+  TEST_INTERVAL (_ZGVnN2v_sincos_sin, lo, hi, n)                              \
+  TEST_INTERVAL (_ZGVnN2v_sincos_cos, lo, hi, n)
+V_SINCOS_INTERVAL (0, 0x1p-31, 50000)
+V_SINCOS_INTERVAL (0x1p-31, 0x1p23, 500000)
 V_SINCOS_INTERVAL (0x1p23, inf, 10000)
-V_SINCOS_INTERVAL (-0x1p23, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_sincosf_1u8.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/sincosf.c
similarity index 70%
rename from contrib/arm-optimized-routines/pl/math/v_sincosf_1u8.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/sincosf.c
index bf77afaa14db..cd482f38d5f6 100644
--- a/contrib/arm-optimized-routines/pl/math/v_sincosf_1u8.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/sincosf.c
@@ -1,58 +1,68 @@
 /*
  * Single-precision vector sincos function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Define _GNU_SOURCE in order to include sincosf declaration. If building
    pre-GLIBC 2.1, or on a non-GNU conforming system, this routine will need to
    be linked against the scalar sincosf from math/.  */
 #define _GNU_SOURCE
 #include <math.h>
-#undef _GNU_SOURCE
 
 #include "v_sincosf_common.h"
 #include "v_math.h"
-#include "pl_test.h"
+#include "test_defs.h"
+
+/* sincos not available for all scalar libm implementations.  */
+#if defined(_MSC_VER) || !defined(__GLIBC__)
+static void
+sincosf (float x, float *out_sin, float *out_cos)
+{
+  *out_sin = sinf (x);
+  *out_cos = cosf (x);
+}
+#endif
 
 static void VPCS_ATTR NOINLINE
 special_case (float32x4_t x, uint32x4_t special, float *out_sin,
 	      float *out_cos)
 {
   for (int i = 0; i < 4; i++)
     if (special[i])
       sincosf (x[i], out_sin + i, out_cos + i);
 }
 
 /* Single-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate low-order
    polynomials.
    Worst-case error for sin is 1.67 ULP:
    v_sincosf_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
    Worst-case error for cos is 1.81 ULP:
    v_sincosf_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6.  */
 VPCS_ATTR void
 _ZGVnN4vl4l4_sincosf (float32x4_t x, float *out_sin, float *out_cos)
 {
   const struct v_sincosf_data *d = ptr_barrier (&v_sincosf_data);
   uint32x4_t special = check_ge_rangeval (x, d);
 
   float32x4x2_t sc = v_sincosf_inline (x, d);
 
   vst1q_f32 (out_sin, sc.val[0]);
   vst1q_f32 (out_cos, sc.val[1]);
 
   if (unlikely (v_any_u32 (special)))
     special_case (x, special, out_sin, out_cos);
 }
 
-PL_TEST_ULP (_ZGVnN4v_sincosf_sin, 1.17)
-PL_TEST_ULP (_ZGVnN4v_sincosf_cos, 1.31)
+TEST_DISABLE_FENV (_ZGVnN4v_sincosf_sin)
+TEST_DISABLE_FENV (_ZGVnN4v_sincosf_cos)
+TEST_ULP (_ZGVnN4v_sincosf_sin, 1.17)
+TEST_ULP (_ZGVnN4v_sincosf_cos, 1.31)
 #define V_SINCOSF_INTERVAL(lo, hi, n)                                         \
-  PL_TEST_INTERVAL (_ZGVnN4v_sincosf_sin, lo, hi, n)                          \
-  PL_TEST_INTERVAL (_ZGVnN4v_sincosf_cos, lo, hi, n)
-V_SINCOSF_INTERVAL (0, 0x1p20, 500000)
-V_SINCOSF_INTERVAL (-0, -0x1p20, 500000)
+  TEST_INTERVAL (_ZGVnN4v_sincosf_sin, lo, hi, n)                             \
+  TEST_INTERVAL (_ZGVnN4v_sincosf_cos, lo, hi, n)
+V_SINCOSF_INTERVAL (0, 0x1p-31, 50000)
+V_SINCOSF_INTERVAL (0x1p-31, 0x1p20, 500000)
 V_SINCOSF_INTERVAL (0x1p20, inf, 10000)
-V_SINCOSF_INTERVAL (-0x1p20, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/sincospi.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/sincospi.c
new file mode 100644
index 000000000000..fd425202ce67
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/sincospi.c
@@ -0,0 +1,44 @@
+/*
+ * Double-precision vector sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "v_sincospi_common.h"
+#include "v_math.h"
+#include "test_defs.h"
+
+/* Double-precision vector function allowing calculation of both sin and cos in
+   one function call, using separate argument reduction and shared low-order
+   polynomials.
+   Approximation for vector double-precision sincospi(x).
+   Maximum Error 3.09 ULP:
+  _ZGVnN2v_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1
+					      want 0x1.fd54d0b327cf4p-1
+   Maximum Error 3.16 ULP:
+  _ZGVnN2v_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1
+					      want 0x1.fd2da484ff402p-1.  */
+VPCS_ATTR void
+_ZGVnN2vl8l8_sincospi (float64x2_t x, double *out_sin, double *out_cos)
+{
+  const struct v_sincospi_data *d = ptr_barrier (&v_sincospi_data);
+
+  float64x2x2_t sc = v_sincospi_inline (x, d);
+
+  vst1q_f64 (out_sin, sc.val[0]);
+  vst1q_f64 (out_cos, sc.val[1]);
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (_ZGVnN2v_sincospi_cos)
+TEST_DISABLE_FENV (_ZGVnN2v_sincospi_sin)
+TEST_ULP (_ZGVnN2v_sincospi_sin, 2.59)
+TEST_ULP (_ZGVnN2v_sincospi_cos, 2.66)
+#  define V_SINCOSPI_INTERVAL(lo, hi, n)                                      \
+    TEST_SYM_INTERVAL (_ZGVnN2v_sincospi_sin, lo, hi, n)                      \
+    TEST_SYM_INTERVAL (_ZGVnN2v_sincospi_cos, lo, hi, n)
+V_SINCOSPI_INTERVAL (0, 0x1p-63, 10000)
+V_SINCOSPI_INTERVAL (0x1p-63, 0.5, 50000)
+V_SINCOSPI_INTERVAL (0.5, 0x1p63, 50000)
+V_SINCOSPI_INTERVAL (0x1p63, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/sincospif.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/sincospif.c
new file mode 100644
index 000000000000..760ea3d4f5e1
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/sincospif.c
@@ -0,0 +1,43 @@
+/*
+ * Single-precision vector sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_sincospif_common.h"
+#include "v_math.h"
+#include "test_defs.h"
+#include "mathlib.h"
+
+/* Single-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+   Worst-case error for sin is 3.04 ULP:
+   _ZGVnN4v_sincospif_sin(0x1.1d341ap-1) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
+   Worst-case error for cos is 3.18 ULP:
+   _ZGVnN4v_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
+ */
+VPCS_ATTR void
+_ZGVnN4vl4l4_sincospif (float32x4_t x, float *out_sin, float *out_cos)
+{
+  const struct v_sincospif_data *d = ptr_barrier (&v_sincospif_data);
+
+  float32x4x2_t sc = v_sincospif_inline (x, d);
+
+  vst1q_f32 (out_sin, sc.val[0]);
+  vst1q_f32 (out_cos, sc.val[1]);
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (_ZGVnN4v_sincospif_sin)
+TEST_DISABLE_FENV (_ZGVnN4v_sincospif_cos)
+TEST_ULP (_ZGVnN4v_sincospif_sin, 2.54)
+TEST_ULP (_ZGVnN4v_sincospif_cos, 2.68)
+#  define V_SINCOSPIF_INTERVAL(lo, hi, n)                                     \
+    TEST_SYM_INTERVAL (_ZGVnN4v_sincospif_sin, lo, hi, n)                     \
+    TEST_SYM_INTERVAL (_ZGVnN4v_sincospif_cos, lo, hi, n)
+V_SINCOSPIF_INTERVAL (0, 0x1p-63, 10000)
+V_SINCOSPIF_INTERVAL (0x1p-63, 0.5, 50000)
+V_SINCOSPIF_INTERVAL (0.5, 0x1p31, 50000)
+V_SINCOSPIF_INTERVAL (0x1p31, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_sinf.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/sinf.c
similarity index 65%
rename from contrib/arm-optimized-routines/math/aarch64/v_sinf.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/sinf.c
index 336879844459..0764434039a0 100644
--- a/contrib/arm-optimized-routines/math/aarch64/v_sinf.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/sinf.c
@@ -1,82 +1,92 @@
 /*
  * Single-precision vector sin function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 static const struct data
 {
   float32x4_t poly[4];
-  float32x4_t range_val, inv_pi, shift, pi_1, pi_2, pi_3;
+  float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3;
 } data = {
   /* 1.886 ulp error.  */
   .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f),
 	    V4 (0x1.5b2e76p-19f) },
 
   .pi_1 = V4 (0x1.921fb6p+1f),
   .pi_2 = V4 (-0x1.777a5cp-24f),
   .pi_3 = V4 (-0x1.ee59dap-49f),
 
   .inv_pi = V4 (0x1.45f306p-2f),
-  .shift = V4 (0x1.8p+23f),
   .range_val = V4 (0x1p20f)
 };
 
 #if WANT_SIMD_EXCEPT
-# define TinyBound v_u32 (0x21000000) /* asuint32(0x1p-61f).  */
-# define Thresh v_u32 (0x28800000)    /* RangeVal - TinyBound.  */
+/* asuint32(0x1p-59f), below which multiply by inv_pi underflows.  */
+# define TinyBound v_u32 (0x22000000)
+/* RangeVal - TinyBound.  */
+# define Thresh v_u32 (0x27800000)
 #endif
 
 #define C(i) d->poly[i]
 
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
 {
   /* Fall back to scalar code.  */
   y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
   return v_call_f32 (sinf, x, y, cmp);
 }
 
-float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float32x4_t n, r, r2, y;
   uint32x4_t odd, cmp;
 
 #if WANT_SIMD_EXCEPT
   uint32x4_t ir = vreinterpretq_u32_f32 (vabsq_f32 (x));
   cmp = vcgeq_u32 (vsubq_u32 (ir, TinyBound), Thresh);
   /* If fenv exceptions are to be triggered correctly, set any special lanes
      to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
      special-case handler later.  */
-  r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x);
+  r = vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), cmp));
 #else
   r = x;
   cmp = vcageq_f32 (x, d->range_val);
 #endif
 
-  /* n = rint(|x|/pi) */
-  n = vfmaq_f32 (d->shift, d->inv_pi, r);
-  odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31);
-  n = vsubq_f32 (n, d->shift);
+  /* n = rint(|x|/pi).  */
+  n = vrndaq_f32 (vmulq_f32 (r, d->inv_pi));
+  odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31);
 
-  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2) */
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
   r = vfmsq_f32 (r, d->pi_1, n);
   r = vfmsq_f32 (r, d->pi_2, n);
   r = vfmsq_f32 (r, d->pi_3, n);
 
-  /* y = sin(r) */
+  /* y = sin(r).  */
   r2 = vmulq_f32 (r, r);
   y = vfmaq_f32 (C (2), C (3), r2);
   y = vfmaq_f32 (C (1), y, r2);
   y = vfmaq_f32 (C (0), y, r2);
   y = vfmaq_f32 (r, vmulq_f32 (y, r2), r);
 
   if (unlikely (v_any_u32 (cmp)))
     return special_case (x, y, odd, cmp);
   return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
 }
+
+HALF_WIDTH_ALIAS_F1 (sin)
+
+TEST_SIG (V, F, 1, sin, -3.1, 3.1)
+TEST_ULP (V_NAME_F1 (sin), 1.4)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (sin), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (sin), 0, 0x1p20, 500000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sin), 0x1p20, inf, 10000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/sinh.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/sinh.c
new file mode 100644
index 000000000000..f65ccd0c6270
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/sinh.c
@@ -0,0 +1,80 @@
+/*
+ * Double-precision vector sinh(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_expm1_inline.h"
+
+static const struct data
+{
+  struct v_expm1_data d;
+  uint64x2_t halff;
+#if WANT_SIMD_EXCEPT
+  uint64x2_t tiny_bound, thresh;
+#else
+  float64x2_t large_bound;
+#endif
+} data = {
+  .d = V_EXPM1_DATA,
+  .halff = V2 (0x3fe0000000000000),
+#if WANT_SIMD_EXCEPT
+  /* 2^-26, below which sinh(x) rounds to x.  */
+  .tiny_bound = V2 (0x3e50000000000000),
+  /* asuint(large_bound) - asuint(tiny_bound).  */
+  .thresh = V2 (0x0230000000000000),
+#else
+  /* 2^9. expm1 helper overflows for large input.  */
+  .large_bound = V2 (0x1p+9),
+#endif
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x)
+{
+  return v_call_f64 (sinh, x, x, v_u64 (-1));
+}
+
+/* Approximation for vector double-precision sinh(x) using expm1.
+   sinh(x) = (exp(x) - exp(-x)) / 2.
+   The greatest observed error is 2.52 ULP:
+   _ZGVnN2v_sinh(-0x1.a098a2177a2b9p-2) got -0x1.ac2f05bb66fccp-2
+				       want -0x1.ac2f05bb66fc9p-2.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  float64x2_t ax = vabsq_f64 (x);
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  float64x2_t halfsign = vreinterpretq_f64_u64 (
+      vbslq_u64 (v_u64 (0x8000000000000000), ix, d->halff));
+
+#if WANT_SIMD_EXCEPT
+  uint64x2_t special = vcgeq_u64 (
+      vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
+#else
+  uint64x2_t special = vcageq_f64 (x, d->large_bound);
+#endif
+
+  /* Fall back to scalar variant for all lanes if any of them are special.  */
+  if (unlikely (v_any_u64 (special)))
+    return special_case (x);
+
+  /* Up to the point that expm1 overflows, we can use it to calculate sinh
+     using a slight rearrangement of the definition of sinh. This allows us to
+     retain acceptable accuracy for very small inputs.  */
+  float64x2_t t = expm1_inline (ax, &d->d);
+  t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
+  return vmulq_f64 (t, halfsign);
+}
+
+TEST_SIG (V, D, 1, sinh, -10.0, 10.0)
+TEST_ULP (V_NAME_D1 (sinh), 2.02)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (sinh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0, 0x1p-26, 1000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p9, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_sinhf_2u3.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/sinhf.c
similarity index 59%
rename from contrib/arm-optimized-routines/pl/math/v_sinhf_2u3.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/sinhf.c
index cd8c0f08f784..12dbe26b425b 100644
--- a/contrib/arm-optimized-routines/pl/math/v_sinhf_2u3.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/sinhf.c
@@ -1,84 +1,84 @@
 /*
  * Single-precision vector sinh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
+#include "test_sig.h"
+#include "test_defs.h"
 #include "v_expm1f_inline.h"
 
 static const struct data
 {
   struct v_expm1f_data expm1f_consts;
-  uint32x4_t halff;
 #if WANT_SIMD_EXCEPT
   uint32x4_t tiny_bound, thresh;
 #else
-  uint32x4_t oflow_bound;
+  float32x4_t oflow_bound;
 #endif
 } data = {
   .expm1f_consts = V_EXPM1F_DATA,
-  .halff = V4 (0x3f000000),
 #if WANT_SIMD_EXCEPT
   /* 0x1.6a09e8p-32, below which expm1f underflows.  */
   .tiny_bound = V4 (0x2fb504f4),
   /* asuint(oflow_bound) - asuint(tiny_bound).  */
   .thresh = V4 (0x12fbbbb3),
 #else
   /* 0x1.61814ep+6, above which expm1f helper overflows.  */
-  .oflow_bound = V4 (0x42b0c0a7),
+  .oflow_bound = V4 (0x1.61814ep+6),
 #endif
 };
 
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, float32x4_t t, float32x4_t halfsign,
+	      uint32x4_t special)
 {
-  return v_call_f32 (sinhf, x, y, special);
+  return v_call_f32 (sinhf, x, vmulq_f32 (t, halfsign), special);
 }
 
 /* Approximation for vector single-precision sinh(x) using expm1.
    sinh(x) = (exp(x) - exp(-x)) / 2.
    The maximum error is 2.26 ULP:
    _ZGVnN4v_sinhf (0x1.e34a9ep-4) got 0x1.e469ep-4
 				 want 0x1.e469e4p-4.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (sinh) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
   float32x4_t ax = vabsq_f32 (x);
-  uint32x4_t iax = vreinterpretq_u32_f32 (ax);
-  uint32x4_t sign = veorq_u32 (ix, iax);
-  float32x4_t halfsign = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->halff));
+  float32x4_t halfsign = vreinterpretq_f32_u32 (
+      vbslq_u32 (v_u32 (0x80000000), ix, vreinterpretq_u32_f32 (v_f32 (0.5))));
 
 #if WANT_SIMD_EXCEPT
-  uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, d->tiny_bound), d->thresh);
+  uint32x4_t special = vcgeq_u32 (
+      vsubq_u32 (vreinterpretq_u32_f32 (ax), d->tiny_bound), d->thresh);
   ax = v_zerofy_f32 (ax, special);
 #else
-  uint32x4_t special = vcgeq_u32 (iax, d->oflow_bound);
+  uint32x4_t special = vcageq_f32 (x, d->oflow_bound);
 #endif
 
   /* Up to the point that expm1f overflows, we can use it to calculate sinhf
        using a slight rearrangement of the definition of asinh. This allows us
      to retain acceptable accuracy for very small inputs.  */
   float32x4_t t = expm1f_inline (ax, &d->expm1f_consts);
   t = vaddq_f32 (t, vdivq_f32 (t, vaddq_f32 (t, v_f32 (1.0))));
 
   /* Fall back to the scalar variant for any lanes that should trigger an
      exception.  */
   if (unlikely (v_any_u32 (special)))
-    return special_case (x, vmulq_f32 (t, halfsign), special);
+    return special_case (x, t, halfsign, special);
 
   return vmulq_f32 (t, halfsign);
 }
 
-PL_SIG (V, F, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (sinh), 1.76)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (sinh), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0, 0x2fb504f4, 1000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0x2fb504f4, 0x42b0c0a7, 100000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0x42b0c0a7, inf, 1000)
+HALF_WIDTH_ALIAS_F1 (sinh)
+
+TEST_SIG (V, F, 1, sinh, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (sinh), 1.76)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (sinh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0, 0x2fb504f4, 1000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0x2fb504f4, 0x42b0c0a7, 100000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0x42b0c0a7, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_sinpi_3u1.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/sinpi.c
similarity index 81%
rename from contrib/arm-optimized-routines/pl/math/v_sinpi_3u1.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/sinpi.c
index 8d2917ff8ecd..f86d167a2ac3 100644
--- a/contrib/arm-optimized-routines/pl/math/v_sinpi_3u1.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/sinpi.c
@@ -1,86 +1,87 @@
 /*
  * Double-precision vector sinpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64x2_t poly[10];
 } data = {
   /* Polynomial coefficients generated using Remez algorithm,
      see sinpi.sollya for details.  */
   .poly = { V2 (0x1.921fb54442d184p1), V2 (-0x1.4abbce625be53p2),
 	    V2 (0x1.466bc6775ab16p1), V2 (-0x1.32d2cce62dc33p-1),
 	    V2 (0x1.507834891188ep-4), V2 (-0x1.e30750a28c88ep-8),
 	    V2 (0x1.e8f48308acda4p-12), V2 (-0x1.6fc0032b3c29fp-16),
 	    V2 (0x1.af86ae521260bp-21), V2 (-0x1.012a9870eeb7dp-25) },
 };
 
 #if WANT_SIMD_EXCEPT
 # define TinyBound v_u64 (0x3bf0000000000000) /* asuint64(0x1p-64).  */
 /* asuint64(0x1p64) - TinyBound.  */
 # define Thresh v_u64 (0x07f0000000000000)
 
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
 {
   /* Fall back to scalar code.  */
   y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
-  return v_call_f64 (sinpi, x, y, cmp);
+  return v_call_f64 (arm_math_sinpi, x, y, cmp);
 }
 #endif
 
 /* Approximation for vector double-precision sinpi(x).
    Maximum Error 3.05 ULP:
   _ZGVnN2v_sinpi(0x1.d32750db30b4ap-2) got 0x1.fb295878301c7p-1
 				      want 0x1.fb295878301cap-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (sinpi) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
 #if WANT_SIMD_EXCEPT
   uint64x2_t ir = vreinterpretq_u64_f64 (vabsq_f64 (x));
   uint64x2_t cmp = vcgeq_u64 (vsubq_u64 (ir, TinyBound), Thresh);
 
   /* When WANT_SIMD_EXCEPT = 1, special lanes should be set to 0
      to avoid them under/overflowing and throwing exceptions.  */
   float64x2_t r = v_zerofy_f64 (x, cmp);
 #else
   float64x2_t r = x;
 #endif
 
   /* If r is odd, the sign of the result should be inverted.  */
   uint64x2_t odd
       = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtaq_s64_f64 (r)), 63);
 
   /* r = x - rint(x). Range reduction to -1/2 .. 1/2.  */
   r = vsubq_f64 (r, vrndaq_f64 (r));
 
   /* y = sin(r).  */
   float64x2_t r2 = vmulq_f64 (r, r);
   float64x2_t r4 = vmulq_f64 (r2, r2);
   float64x2_t y = vmulq_f64 (v_pw_horner_9_f64 (r2, r4, d->poly), r);
 
 #if WANT_SIMD_EXCEPT
   if (unlikely (v_any_u64 (cmp)))
     return special_case (x, y, odd, cmp);
 #endif
 
   return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
 }
 
-PL_SIG (V, D, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (V_NAME_D1 (sinpi), 3.06)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (sinpi), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0.5, 0x1p51, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0x1p51, inf, 10000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (V_NAME_D1 (sinpi), 2.56)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (sinpi), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0.5, 0x1p51, 10000)
+TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0x1p51, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/v_sinpif_3u.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/sinpif.c
similarity index 76%
rename from contrib/arm-optimized-routines/pl/math/v_sinpif_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/sinpif.c
index 3d6eeff333f7..98ba9d84d2fb 100644
--- a/contrib/arm-optimized-routines/pl/math/v_sinpif_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/sinpif.c
@@ -1,81 +1,84 @@
 /*
  * Single-precision vector sinpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float32x4_t poly[6];
 } data = {
   /* Taylor series coefficents for sin(pi * x).  */
   .poly = { V4 (0x1.921fb6p1f), V4 (-0x1.4abbcep2f), V4 (0x1.466bc6p1f),
 	    V4 (-0x1.32d2ccp-1f), V4 (0x1.50783p-4f), V4 (-0x1.e30750p-8f) },
 };
 
 #if WANT_SIMD_EXCEPT
 # define TinyBound v_u32 (0x30000000) /* asuint32(0x1p-31f).  */
 # define Thresh v_u32 (0x1f000000)    /* asuint32(0x1p31f) - TinyBound.  */
 
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
 {
   /* Fall back to scalar code.  */
   y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
-  return v_call_f32 (sinpif, x, y, cmp);
+  return v_call_f32 (arm_math_sinpif, x, y, cmp);
 }
 #endif
 
 /* Approximation for vector single-precision sinpi(x)
     Maximum Error 3.03 ULP:
     _ZGVnN4v_sinpif(0x1.c597ccp-2) got 0x1.f7cd56p-1
 				  want 0x1.f7cd5p-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (sinpi) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinpi) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
 #if WANT_SIMD_EXCEPT
   uint32x4_t ir = vreinterpretq_u32_f32 (vabsq_f32 (x));
   uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (ir, TinyBound), Thresh);
 
   /* When WANT_SIMD_EXCEPT = 1, special lanes should be set to 0
      to avoid them under/overflowing and throwing exceptions.  */
   float32x4_t r = v_zerofy_f32 (x, cmp);
 #else
   float32x4_t r = x;
 #endif
 
   /* If r is odd, the sign of the result should be inverted.  */
   uint32x4_t odd
       = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (r)), 31);
 
   /* r = x - rint(x). Range reduction to -1/2 .. 1/2.  */
   r = vsubq_f32 (r, vrndaq_f32 (r));
 
   /* Pairwise Horner approximation for y = sin(r * pi).  */
   float32x4_t r2 = vmulq_f32 (r, r);
   float32x4_t r4 = vmulq_f32 (r2, r2);
   float32x4_t y = vmulq_f32 (v_pw_horner_5_f32 (r2, r4, d->poly), r);
 
 #if WANT_SIMD_EXCEPT
   if (unlikely (v_any_u32 (cmp)))
     return special_case (x, y, odd, cmp);
 #endif
 
   return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
 }
 
-PL_SIG (V, F, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (V_NAME_F1 (sinpi), 2.54)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (sinpi), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0.5, 0x1p31f, 10000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0x1p31f, inf, 10000)
+HALF_WIDTH_ALIAS_F1 (sinpi)
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (V_NAME_F1 (sinpi), 2.54)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (sinpi), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0.5, 0x1p31f, 10000)
+TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0x1p31f, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/v_tan_3u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/tan.c
similarity index 86%
rename from contrib/arm-optimized-routines/pl/math/v_tan_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/tan.c
index c431c8c4889e..957f9aba3a1e 100644
--- a/contrib/arm-optimized-routines/pl/math/v_tan_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/tan.c
@@ -1,120 +1,122 @@
 /*
  * Double-precision vector tan(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64x2_t poly[9];
-  float64x2_t half_pi, two_over_pi, shift;
+  double half_pi[2];
+  float64x2_t two_over_pi, shift;
 #if !WANT_SIMD_EXCEPT
   float64x2_t range_val;
 #endif
 } data = {
   /* Coefficients generated using FPMinimax.  */
   .poly = { V2 (0x1.5555555555556p-2), V2 (0x1.1111111110a63p-3),
 	    V2 (0x1.ba1ba1bb46414p-5), V2 (0x1.664f47e5b5445p-6),
 	    V2 (0x1.226e5e5ecdfa3p-7), V2 (0x1.d6c7ddbf87047p-9),
 	    V2 (0x1.7ea75d05b583ep-10), V2 (0x1.289f22964a03cp-11),
 	    V2 (0x1.4e4fd14147622p-12) },
   .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 },
   .two_over_pi = V2 (0x1.45f306dc9c883p-1),
   .shift = V2 (0x1.8p52),
 #if !WANT_SIMD_EXCEPT
   .range_val = V2 (0x1p23),
 #endif
 };
 
 #define RangeVal 0x4160000000000000  /* asuint64(0x1p23).  */
 #define TinyBound 0x3e50000000000000 /* asuint64(2^-26).  */
 #define Thresh 0x310000000000000     /* RangeVal - TinyBound.  */
 
 /* Special cases (fall back to scalar calls).  */
 static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x)
 {
   return v_call_f64 (tan, x, x, v_u64 (-1));
 }
 
 /* Vector approximation for double-precision tan.
    Maximum measured error is 3.48 ULP:
    _ZGVnN2v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
 				      want -0x1.f6ccd8ecf7deap+37.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
 {
   const struct data *dat = ptr_barrier (&data);
   /* Our argument reduction cannot calculate q with sufficient accuracy for
      very large inputs. Fall back to scalar routine for all lanes if any are
      too large, or Inf/NaN. If fenv exceptions are expected, also fall back for
      tiny input to avoid underflow.  */
 #if WANT_SIMD_EXCEPT
   uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
   /* iax - tiny_bound > range_val - tiny_bound.  */
   uint64x2_t special
       = vcgtq_u64 (vsubq_u64 (iax, v_u64 (TinyBound)), v_u64 (Thresh));
   if (unlikely (v_any_u64 (special)))
     return special_case (x);
 #endif
 
   /* q = nearest integer to 2 * x / pi.  */
   float64x2_t q
       = vsubq_f64 (vfmaq_f64 (dat->shift, x, dat->two_over_pi), dat->shift);
   int64x2_t qi = vcvtq_s64_f64 (q);
 
   /* Use q to reduce x to r in [-pi/4, pi/4], by:
      r = x - q * pi/2, in extended precision.  */
   float64x2_t r = x;
-  r = vfmsq_laneq_f64 (r, q, dat->half_pi, 0);
-  r = vfmsq_laneq_f64 (r, q, dat->half_pi, 1);
+  float64x2_t half_pi = vld1q_f64 (dat->half_pi);
+  r = vfmsq_laneq_f64 (r, q, half_pi, 0);
+  r = vfmsq_laneq_f64 (r, q, half_pi, 1);
   /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
      formula.  */
   r = vmulq_n_f64 (r, 0.5);
 
   /* Approximate tan(r) using order 8 polynomial.
      tan(x) is odd, so polynomial has the form:
      tan(x) ~= x + C0 * x^3 + C1 * x^5 + C3 * x^7 + ...
      Hence we first approximate P(r) = C1 + C2 * r^2 + C3 * r^4 + ...
      Then compute the approximation by:
      tan(r) ~= r + r^3 * (C0 + r^2 * P(r)).  */
   float64x2_t r2 = vmulq_f64 (r, r), r4 = vmulq_f64 (r2, r2),
 	      r8 = vmulq_f64 (r4, r4);
   /* Offset coefficients to evaluate from C1 onwards.  */
   float64x2_t p = v_estrin_7_f64 (r2, r4, r8, dat->poly + 1);
   p = vfmaq_f64 (dat->poly[0], p, r2);
   p = vfmaq_f64 (r, r2, vmulq_f64 (p, r));
 
   /* Recombination uses double-angle formula:
      tan(2x) = 2 * tan(x) / (1 - (tan(x))^2)
      and reciprocity around pi/2:
      tan(x) = 1 / (tan(pi/2 - x))
      to assemble result using change-of-sign and conditional selection of
      numerator/denominator, dependent on odd/even-ness of q (hence quadrant).
    */
   float64x2_t n = vfmaq_f64 (v_f64 (-1), p, p);
   float64x2_t d = vaddq_f64 (p, p);
 
   uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1));
 
 #if !WANT_SIMD_EXCEPT
   uint64x2_t special = vcageq_f64 (x, dat->range_val);
   if (unlikely (v_any_u64 (special)))
     return special_case (x);
 #endif
 
   return vdivq_f64 (vbslq_f64 (no_recip, n, vnegq_f64 (d)),
 		    vbslq_f64 (no_recip, d, n));
 }
 
-PL_SIG (V, D, 1, tan, -3.1, 3.1)
-PL_TEST_ULP (V_NAME_D1 (tan), 2.99)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (tan), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tan), 0, TinyBound, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tan), TinyBound, RangeVal, 100000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tan), RangeVal, inf, 5000)
+TEST_SIG (V, D, 1, tan, -3.1, 3.1)
+TEST_ULP (V_NAME_D1 (tan), 2.99)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (tan), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (tan), 0, TinyBound, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tan), TinyBound, RangeVal, 100000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tan), RangeVal, inf, 5000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_tanf_3u5.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/tanf.c
similarity index 83%
rename from contrib/arm-optimized-routines/pl/math/v_tanf_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/tanf.c
index 98948b0a9ecf..ed5448649f6c 100644
--- a/contrib/arm-optimized-routines/pl/math/v_tanf_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/tanf.c
@@ -1,127 +1,130 @@
 /*
  * Single-precision vector tan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "v_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float32x4_t poly[6];
-  float32x4_t pi_consts;
+  float pi_consts[4];
   float32x4_t shift;
 #if !WANT_SIMD_EXCEPT
   float32x4_t range_val;
 #endif
 } data = {
   /* Coefficients generated using FPMinimax.  */
   .poly = { V4 (0x1.55555p-2f), V4 (0x1.11166p-3f), V4 (0x1.b88a78p-5f),
 	    V4 (0x1.7b5756p-6f), V4 (0x1.4ef4cep-8f), V4 (0x1.0e1e74p-7f) },
   /* Stores constants: (-pi/2)_high, (-pi/2)_mid, (-pi/2)_low, and 2/pi.  */
   .pi_consts
   = { -0x1.921fb6p+0f, 0x1.777a5cp-25f, 0x1.ee59dap-50f, 0x1.45f306p-1f },
   .shift = V4 (0x1.8p+23f),
 #if !WANT_SIMD_EXCEPT
   .range_val = V4 (0x1p15f),
 #endif
 };
 
 #define RangeVal v_u32 (0x47000000)  /* asuint32(0x1p15f).  */
 #define TinyBound v_u32 (0x30000000) /* asuint32 (0x1p-31f).  */
 #define Thresh v_u32 (0x16000000)    /* asuint32(RangeVal) - TinyBound.  */
 
 /* Special cases (fall back to scalar calls).  */
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
 {
   return v_call_f32 (tanf, x, y, cmp);
 }
 
 /* Use a full Estrin scheme to evaluate polynomial.  */
 static inline float32x4_t
 eval_poly (float32x4_t z, const struct data *d)
 {
   float32x4_t z2 = vmulq_f32 (z, z);
 #if WANT_SIMD_EXCEPT
   /* Tiny z (<= 0x1p-31) will underflow when calculating z^4.
      If fp exceptions are to be triggered correctly,
      sidestep this by fixing such lanes to 0.  */
   uint32x4_t will_uflow
       = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound);
   if (unlikely (v_any_u32 (will_uflow)))
     z2 = vbslq_f32 (will_uflow, v_f32 (0), z2);
 #endif
   float32x4_t z4 = vmulq_f32 (z2, z2);
   return v_estrin_5_f32 (z, z2, z4, d->poly);
 }
 
 /* Fast implementation of AdvSIMD tanf.
    Maximum error is 3.45 ULP:
    __v_tanf(-0x1.e5f0cap+13) got 0x1.ff9856p-1
 			    want 0x1.ff9850p-1.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (tan) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tan) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float32x4_t special_arg = x;
 
   /* iax >= RangeVal means x, if not inf or NaN, is too large to perform fast
      regression.  */
 #if WANT_SIMD_EXCEPT
   uint32x4_t iax = vreinterpretq_u32_f32 (vabsq_f32 (x));
   /* If fp exceptions are to be triggered correctly, also special-case tiny
      input, as this will load to overflow later. Fix any special lanes to 1 to
      prevent any exceptions being triggered.  */
   uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, TinyBound), Thresh);
   if (unlikely (v_any_u32 (special)))
     x = vbslq_f32 (special, v_f32 (1.0f), x);
 #else
   /* Otherwise, special-case large and special values.  */
   uint32x4_t special = vcageq_f32 (x, d->range_val);
 #endif
 
   /* n = rint(x/(pi/2)).  */
-  float32x4_t q = vfmaq_laneq_f32 (d->shift, x, d->pi_consts, 3);
+  float32x4_t pi_consts = vld1q_f32 (d->pi_consts);
+  float32x4_t q = vfmaq_laneq_f32 (d->shift, x, pi_consts, 3);
   float32x4_t n = vsubq_f32 (q, d->shift);
   /* Determine if x lives in an interval, where |tan(x)| grows to infinity.  */
   uint32x4_t pred_alt = vtstq_u32 (vreinterpretq_u32_f32 (q), v_u32 (1));
 
   /* r = x - n * (pi/2)  (range reduction into -pi./4 .. pi/4).  */
   float32x4_t r;
-  r = vfmaq_laneq_f32 (x, n, d->pi_consts, 0);
-  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 1);
-  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 2);
+  r = vfmaq_laneq_f32 (x, n, pi_consts, 0);
+  r = vfmaq_laneq_f32 (r, n, pi_consts, 1);
+  r = vfmaq_laneq_f32 (r, n, pi_consts, 2);
 
   /* If x lives in an interval, where |tan(x)|
      - is finite, then use a polynomial approximation of the form
        tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2).
      - grows to infinity then use symmetries of tangent and the identity
        tan(r) = cotan(pi/2 - r) to express tan(x) as 1/tan(-r). Finally, use
        the same polynomial approximation of tan as above.  */
 
   /* Invert sign of r if odd quadrant.  */
   float32x4_t z = vmulq_f32 (r, vbslq_f32 (pred_alt, v_f32 (-1), v_f32 (1)));
 
   /* Evaluate polynomial approximation of tangent on [-pi/4, pi/4].  */
   float32x4_t z2 = vmulq_f32 (r, r);
   float32x4_t p = eval_poly (z2, d);
   float32x4_t y = vfmaq_f32 (z, vmulq_f32 (z, z2), p);
 
   /* Compute reciprocal and apply if required.  */
   float32x4_t inv_y = vdivq_f32 (v_f32 (1.0f), y);
 
   if (unlikely (v_any_u32 (special)))
     return special_case (special_arg, vbslq_f32 (pred_alt, inv_y, y), special);
   return vbslq_f32 (pred_alt, inv_y, y);
 }
 
-PL_SIG (V, F, 1, tan, -3.1, 3.1)
-PL_TEST_ULP (V_NAME_F1 (tan), 2.96)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (tan), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0x1p-31, 0x1p15, 500000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0x1p15, inf, 5000)
+HALF_WIDTH_ALIAS_F1 (tan)
+
+TEST_SIG (V, F, 1, tan, -3.1, 3.1)
+TEST_ULP (V_NAME_F1 (tan), 2.96)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (tan), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0x1p-31, 0x1p15, 500000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0x1p15, inf, 5000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/tanh.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/tanh.c
new file mode 100644
index 000000000000..3dc6e5527ffc
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/tanh.c
@@ -0,0 +1,67 @@
+/*
+ * Double-precision vector tanh(x) function.
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_expm1_inline.h"
+
+static const struct data
+{
+  struct v_expm1_data d;
+  uint64x2_t thresh, tiny_bound;
+} data = {
+  .d = V_EXPM1_DATA,
+  .tiny_bound = V2 (0x3e40000000000000), /* asuint64 (0x1p-27).  */
+  /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound).  */
+  .thresh = V2 (0x01f241bf835f9d5f),
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t q, float64x2_t qp2,
+	      uint64x2_t special)
+{
+  return v_call_f64 (tanh, x, vdivq_f64 (q, qp2), special);
+}
+
+/* Vector approximation for double-precision tanh(x), using a simplified
+   version of expm1. The greatest observed error is 2.70 ULP:
+   _ZGVnN2v_tanh(-0x1.c59aa220cb177p-3) got -0x1.be5452a6459fep-3
+				       want -0x1.be5452a6459fbp-3.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (tanh) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
+
+  float64x2_t u = x;
+
+  /* Trigger special-cases for tiny, boring and infinity/NaN.  */
+  uint64x2_t special = vcgtq_u64 (vsubq_u64 (ia, d->tiny_bound), d->thresh);
+#if WANT_SIMD_EXCEPT
+  /* To trigger fp exceptions correctly, set special lanes to a neutral value.
+     They will be fixed up later by the special-case handler.  */
+  if (unlikely (v_any_u64 (special)))
+    u = v_zerofy_f64 (u, special);
+#endif
+
+  u = vaddq_f64 (u, u);
+
+  /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
+  float64x2_t q = expm1_inline (u, &d->d);
+  float64x2_t qp2 = vaddq_f64 (q, v_f64 (2.0));
+
+  if (unlikely (v_any_u64 (special)))
+    return special_case (x, q, qp2, special);
+  return vdivq_f64 (q, qp2);
+}
+
+TEST_SIG (V, D, 1, tanh, -10.0, 10.0)
+TEST_ULP (V_NAME_D1 (tanh), 2.21)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (tanh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0, 0x1p-27, 5000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0x1p-27, 0x1.241bf835f9d5fp+4, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0x1.241bf835f9d5fp+4, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_tanhf_2u6.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/tanhf.c
similarity index 62%
rename from contrib/arm-optimized-routines/pl/math/v_tanhf_2u6.c
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/tanhf.c
index d1cb9fb6eeb3..18fe93c7e7ba 100644
--- a/contrib/arm-optimized-routines/pl/math/v_tanhf_2u6.c
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/tanhf.c
@@ -1,73 +1,81 @@
 /*
  * Single-precision vector tanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
+#include "test_sig.h"
+#include "test_defs.h"
 #include "v_expm1f_inline.h"
 
 static const struct data
 {
   struct v_expm1f_data expm1f_consts;
   uint32x4_t boring_bound, large_bound, onef;
 } data = {
   .expm1f_consts = V_EXPM1F_DATA,
   /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for  negative).  */
   .boring_bound = V4 (0x41102cb3),
   .large_bound = V4 (0x7f800000),
-  .onef = V4 (0x3f800000),
 };
 
 static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, uint32x4_t is_boring, float32x4_t boring,
+	      float32x4_t q, uint32x4_t special)
 {
-  return v_call_f32 (tanhf, x, y, special);
+  return v_call_f32 (
+      tanhf, x,
+      vbslq_f32 (is_boring, boring, vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)))),
+      special);
 }
 
 /* Approximation for single-precision vector tanh(x), using a simplified
    version of expm1f. The maximum error is 2.58 ULP:
    _ZGVnN4v_tanhf (0x1.fa5eep-5) got 0x1.f9ba02p-5
 				want 0x1.f9ba08p-5.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (tanh) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
   float32x4_t ax = vabsq_f32 (x);
   uint32x4_t iax = vreinterpretq_u32_f32 (ax);
   uint32x4_t sign = veorq_u32 (ix, iax);
   uint32x4_t is_boring = vcgtq_u32 (iax, d->boring_bound);
-  float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->onef));
+  /* expm1 exponent bias is 1.0f reinterpreted to int.  */
+  float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (
+      sign, vreinterpretq_u32_s32 (d->expm1f_consts.exponent_bias)));
 
 #if WANT_SIMD_EXCEPT
   /* If fp exceptions are to be triggered properly, set all special and boring
      lanes to 0, which will trigger no exceptions, and fix them up later.  */
   uint32x4_t special = vorrq_u32 (vcgtq_u32 (iax, d->large_bound),
 				  vcltq_u32 (iax, v_u32 (0x34000000)));
   x = v_zerofy_f32 (x, is_boring);
   if (unlikely (v_any_u32 (special)))
     x = v_zerofy_f32 (x, special);
 #else
   uint32x4_t special = vcgtq_u32 (iax, d->large_bound);
 #endif
 
   /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
   float32x4_t q = expm1f_inline (vmulq_n_f32 (x, 2), &d->expm1f_consts);
-  float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)));
+
   if (unlikely (v_any_u32 (special)))
-    return special_case (vreinterpretq_f32_u32 (ix),
-			 vbslq_f32 (is_boring, boring, y), special);
+    return special_case (vreinterpretq_f32_u32 (ix), is_boring, boring, q,
+			 special);
+
+  float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)));
   return vbslq_f32 (is_boring, boring, y);
 }
 
-PL_SIG (V, F, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (tanh), 2.09)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (tanh), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0, 0x1p-23, 1000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0x1p-23, 0x1.205966p+3, 100000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0x1.205966p+3, inf, 100)
+HALF_WIDTH_ALIAS_F1 (tanh)
+
+TEST_SIG (V, F, 1, tanh, -10.0, 10.0)
+TEST_ULP (V_NAME_F1 (tanh), 2.09)
+TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (tanh), WANT_SIMD_EXCEPT)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0, 0x1p-23, 1000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0x1p-23, 0x1.205966p+3, 100000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0x1.205966p+3, inf, 100)
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/tanpi.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/tanpi.c
new file mode 100644
index 000000000000..16de00ad5556
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/tanpi.c
@@ -0,0 +1,88 @@
+/*
+ * Double-precision vector tanpi(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+const static struct v_tanpi_data
+{
+  float64x2_t c0, c2, c4, c6, c8, c10, c12;
+  double c1, c3, c5, c7, c9, c11, c13, c14;
+} tanpi_data = {
+  /* Coefficents for tan(pi * x) computed with fpminimax
+     on [ 0x1p-1022 0x1p-2 ]
+     approx rel error: 0x1.7eap-55
+     approx abs error: 0x1.7eap-55.  */
+  .c0 = V2 (0x1.921fb54442d18p1), /* pi.  */
+  .c1 = 0x1.4abbce625be52p3,	  .c2 = V2 (0x1.466bc6775b0f9p5),
+  .c3 = 0x1.45fff9b426f5ep7,	  .c4 = V2 (0x1.45f4730dbca5cp9),
+  .c5 = 0x1.45f3265994f85p11,	  .c6 = V2 (0x1.45f4234b330cap13),
+  .c7 = 0x1.45dca11be79ebp15,	  .c8 = V2 (0x1.47283fc5eea69p17),
+  .c9 = 0x1.3a6d958cdefaep19,	  .c10 = V2 (0x1.927896baee627p21),
+  .c11 = -0x1.89333f6acd922p19,	  .c12 = V2 (0x1.5d4e912bb8456p27),
+  .c13 = -0x1.a854d53ab6874p29,	  .c14 = 0x1.1b76de7681424p32,
+};
+
+/* Approximation for double-precision vector tanpi(x)
+   The maximum error is 3.06 ULP:
+   _ZGVnN2v_tanpi(0x1.0a4a07dfcca3ep-1) got -0x1.fa30112702c98p+3
+				       want -0x1.fa30112702c95p+3.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (tanpi) (float64x2_t x)
+{
+  const struct v_tanpi_data *d = ptr_barrier (&tanpi_data);
+
+  float64x2_t n = vrndnq_f64 (x);
+
+  /* inf produces nan that propagates.  */
+  float64x2_t xr = vsubq_f64 (x, n);
+  float64x2_t ar = vabdq_f64 (x, n);
+  uint64x2_t flip = vcgtq_f64 (ar, v_f64 (0.25));
+  float64x2_t r = vbslq_f64 (flip, vsubq_f64 (v_f64 (0.5), ar), ar);
+
+  /* Order-14 pairwise Horner.  */
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t r4 = vmulq_f64 (r2, r2);
+
+  float64x2_t c_1_3 = vld1q_f64 (&d->c1);
+  float64x2_t c_5_7 = vld1q_f64 (&d->c5);
+  float64x2_t c_9_11 = vld1q_f64 (&d->c9);
+  float64x2_t c_13_14 = vld1q_f64 (&d->c13);
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, r2, c_1_3, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, r2, c_1_3, 1);
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, r2, c_5_7, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, r2, c_5_7, 1);
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, r2, c_9_11, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, r2, c_9_11, 1);
+  float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, r2, c_13_14, 0);
+
+  float64x2_t p = vfmaq_laneq_f64 (p1213, r4, c_13_14, 1);
+  p = vfmaq_f64 (p1011, r4, p);
+  p = vfmaq_f64 (p89, r4, p);
+  p = vfmaq_f64 (p67, r4, p);
+  p = vfmaq_f64 (p45, r4, p);
+  p = vfmaq_f64 (p23, r4, p);
+  p = vfmaq_f64 (p01, r4, p);
+  p = vmulq_f64 (r, p);
+
+  float64x2_t p_recip = vdivq_f64 (v_f64 (1.0), p);
+  float64x2_t y = vbslq_f64 (flip, p_recip, p);
+
+  uint64x2_t sign
+      = veorq_u64 (vreinterpretq_u64_f64 (xr), vreinterpretq_u64_f64 (ar));
+  return vreinterpretq_f64_u64 (vorrq_u64 (vreinterpretq_u64_f64 (y), sign));
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (V_NAME_D1 (tanpi))
+TEST_ULP (V_NAME_D1 (tanpi), 2.57)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanpi), 0, 0x1p-31, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanpi), 0x1p-31, 0.5, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanpi), 0.5, 1.0, 200000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanpi), 1.0, 0x1p23, 50000)
+TEST_SYM_INTERVAL (V_NAME_D1 (tanpi), 0x1p23, inf, 50000)
+#endif
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/tanpif.c b/contrib/arm-optimized-routines/math/aarch64/advsimd/tanpif.c
new file mode 100644
index 000000000000..7bd6d206819f
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/tanpif.c
@@ -0,0 +1,70 @@
+/*
+ * Single-precision vector tanpi(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+const static struct v_tanpif_data
+{
+  float32x4_t c0, c2, c4, c6;
+  float c1, c3, c5, c7;
+} tanpif_data = {
+  /* Coefficents for tan(pi * x).  */
+  .c0 = V4 (0x1.921fb4p1f),  .c1 = 0x1.4abbcep3f,      .c2 = V4 (0x1.466b8p5f),
+  .c3 = 0x1.461c72p7f,	     .c4 = V4 (0x1.42e9d4p9f), .c5 = 0x1.69e2c4p11f,
+  .c6 = V4 (0x1.e85558p11f), .c7 = 0x1.a52e08p16f,
+};
+
+/* Approximation for single-precision vector tanpi(x)
+   The maximum error is 3.34 ULP:
+   _ZGVnN4v_tanpif(0x1.d6c09ap-2) got 0x1.f70aacp+2
+				 want 0x1.f70aa6p+2.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanpi) (float32x4_t x)
+{
+  const struct v_tanpif_data *d = ptr_barrier (&tanpif_data);
+
+  float32x4_t n = vrndnq_f32 (x);
+
+  /* inf produces nan that propagates.  */
+  float32x4_t xr = vsubq_f32 (x, n);
+  float32x4_t ar = vabdq_f32 (x, n);
+  uint32x4_t flip = vcgtq_f32 (ar, v_f32 (0.25f));
+  float32x4_t r = vbslq_f32 (flip, vsubq_f32 (v_f32 (0.5f), ar), ar);
+
+  /* Order-7 pairwise Horner polynomial evaluation scheme.  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+  float32x4_t r4 = vmulq_f32 (r2, r2);
+
+  float32x4_t odd_coeffs = vld1q_f32 (&d->c1);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, r2, odd_coeffs, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, r2, odd_coeffs, 1);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, r2, odd_coeffs, 2);
+  float32x4_t p67 = vfmaq_laneq_f32 (d->c6, r2, odd_coeffs, 3);
+  float32x4_t p = vfmaq_f32 (p45, r4, p67);
+  p = vfmaq_f32 (p23, r4, p);
+  p = vfmaq_f32 (p01, r4, p);
+
+  p = vmulq_f32 (r, p);
+  float32x4_t p_recip = vdivq_f32 (v_f32 (1.0f), p);
+  float32x4_t y = vbslq_f32 (flip, p_recip, p);
+
+  uint32x4_t sign
+      = veorq_u32 (vreinterpretq_u32_f32 (xr), vreinterpretq_u32_f32 (ar));
+  return vreinterpretq_f32_u32 (vorrq_u32 (vreinterpretq_u32_f32 (y), sign));
+}
+
+HALF_WIDTH_ALIAS_F1 (tanpi)
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (V_NAME_F1 (tanpi))
+TEST_ULP (V_NAME_F1 (tanpi), 2.84)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanpi), 0, 0x1p-31, 50000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanpi), 0x1p-31, 0.5, 100000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanpi), 0.5, 0x1p23f, 100000)
+TEST_SYM_INTERVAL (V_NAME_F1 (tanpi), 0x1p23f, inf, 100000)
+#endif
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/v_expf_inline.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_expf_inline.h
new file mode 100644
index 000000000000..797d217820c3
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_expf_inline.h
@@ -0,0 +1,58 @@
+/*
+ * Helper for single-precision routines which calculate exp(ax) and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_V_EXPF_INLINE_H
+#define MATH_V_EXPF_INLINE_H
+
+#include "v_math.h"
+
+struct v_expf_data
+{
+  float ln2_hi, ln2_lo, c0, c2;
+  float32x4_t inv_ln2, c1, c3, c4;
+  /* asuint(1.0f).  */
+  uint32x4_t exponent_bias;
+};
+
+/* maxerr: 1.45358 +0.5 ulp.  */
+#define V_EXPF_DATA                                                           \
+  {                                                                           \
+    .c0 = 0x1.0e4020p-7f, .c1 = V4 (0x1.573e2ep-5f), .c2 = 0x1.555e66p-3f,    \
+    .c3 = V4 (0x1.fffdb6p-2f), .c4 = V4 (0x1.ffffecp-1f),                     \
+    .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f,                        \
+    .inv_ln2 = V4 (0x1.715476p+0f), .exponent_bias = V4 (0x3f800000),         \
+  }
+
+static inline float32x4_t
+v_expf_inline (float32x4_t x, const struct v_expf_data *d)
+{
+  /* Helper routine for calculating exp(ax).
+     Copied from v_expf.c, with all special-case handling removed - the
+     calling routine should handle special values if required.  */
+
+  /* exp(ax) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+     ax = ln2*n + r, with r in [-ln2/2, ln2/2].  */
+  float32x4_t ax = vabsq_f32 (x);
+  float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi);
+  float32x4_t n = vrndaq_f32 (vmulq_f32 (ax, d->inv_ln2));
+  float32x4_t r = vfmsq_laneq_f32 (ax, n, ln2_c02, 0);
+  r = vfmsq_laneq_f32 (r, n, ln2_c02, 1);
+  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23);
+  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
+
+  /* Custom order-4 Estrin avoids building high order monomial.  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+  float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2);
+  float32x4_t q = vfmaq_laneq_f32 (d->c3, r, ln2_c02, 3);
+  q = vfmaq_f32 (q, p, r2);
+  p = vmulq_f32 (d->c4, r);
+  float32x4_t poly = vfmaq_f32 (p, q, r2);
+  return vfmaq_f32 (scale, poly, scale);
+}
+
+#endif // MATH_V_EXPF_INLINE_H
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/v_expm1_inline.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_expm1_inline.h
new file mode 100644
index 000000000000..82d2e9415d93
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_expm1_inline.h
@@ -0,0 +1,86 @@
+/*
+ * Helper for double-precision routines which calculate exp(x) - 1 and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_V_EXPM1_INLINE_H
+#define MATH_V_EXPM1_INLINE_H
+
+#include "v_math.h"
+
+struct v_expm1_data
+{
+  float64x2_t c2, c4, c6, c8;
+  float64x2_t invln2;
+  int64x2_t exponent_bias;
+  double c1, c3, c5, c7, c9, c10;
+  double ln2[2];
+};
+
+/* Generated using fpminimax, with degree=12 in [log(2)/2, log(2)/2].  */
+#define V_EXPM1_DATA                                                          \
+  {                                                                           \
+    .c1 = 0x1.5555555555559p-3, .c2 = V2 (0x1.555555555554bp-5),              \
+    .c3 = 0x1.111111110f663p-7, .c4 = V2 (0x1.6c16c16c1b5f3p-10),             \
+    .c5 = 0x1.a01a01affa35dp-13, .c6 = V2 (0x1.a01a018b4ecbbp-16),            \
+    .c7 = 0x1.71ddf82db5bb4p-19, .c8 = V2 (0x1.27e517fc0d54bp-22),            \
+    .c9 = 0x1.af5eedae67435p-26, .c10 = 0x1.1f143d060a28ap-29,                \
+    .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 },                   \
+    .invln2 = V2 (0x1.71547652b82fep0),                                       \
+    .exponent_bias = V2 (0x3ff0000000000000),                                 \
+  }
+
+static inline float64x2_t
+expm1_inline (float64x2_t x, const struct v_expm1_data *d)
+{
+  /* Helper routine for calculating exp(x) - 1.  */
+
+  float64x2_t ln2 = vld1q_f64 (&d->ln2[0]);
+
+  /* Reduce argument to smaller range:
+     Let i = round(x / ln2)
+     and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+     where 2^i is exact because i is an integer.  */
+  float64x2_t n = vrndaq_f64 (vmulq_f64 (x, d->invln2));
+  int64x2_t i = vcvtq_s64_f64 (n);
+  float64x2_t f = vfmsq_laneq_f64 (x, n, ln2, 0);
+  f = vfmsq_laneq_f64 (f, n, ln2, 1);
+
+  /* Approximate expm1(f) using polynomial.
+     Taylor expansion for expm1(x) has the form:
+	 x + ax^2 + bx^3 + cx^4 ....
+     So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+     and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
+  float64x2_t f2 = vmulq_f64 (f, f);
+  float64x2_t f4 = vmulq_f64 (f2, f2);
+  float64x2_t lane_consts_13 = vld1q_f64 (&d->c1);
+  float64x2_t lane_consts_57 = vld1q_f64 (&d->c5);
+  float64x2_t lane_consts_910 = vld1q_f64 (&d->c9);
+  float64x2_t p01 = vfmaq_laneq_f64 (v_f64 (0.5), f, lane_consts_13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, f, lane_consts_13, 1);
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, f, lane_consts_57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, f, lane_consts_57, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, f2, p23);
+  float64x2_t p47 = vfmaq_f64 (p45, f2, p67);
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, f, lane_consts_910, 0);
+  float64x2_t p = vfmaq_laneq_f64 (p89, f2, lane_consts_910, 1);
+  p = vfmaq_f64 (p47, f4, p);
+  p = vfmaq_f64 (p03, f4, p);
+
+  p = vfmaq_f64 (f, f2, p);
+
+  /* Assemble the result.
+     expm1(x) ~= 2^i * (p + 1) - 1
+     Let t = 2^i.  */
+  int64x2_t u = vaddq_s64 (vshlq_n_s64 (i, 52), d->exponent_bias);
+  float64x2_t t = vreinterpretq_f64_s64 (u);
+
+  /* expm1(x) ~= p * t + (t - 1).  */
+  return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
+}
+
+#endif // MATH_V_EXPM1_INLINE_H
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/v_expm1f_inline.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_expm1f_inline.h
new file mode 100644
index 000000000000..463b07aa7705
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_expm1f_inline.h
@@ -0,0 +1,62 @@
+/*
+ * Helper for single-precision routines which calculate exp(x) - 1 and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_V_EXPM1F_INLINE_H
+#define MATH_V_EXPM1F_INLINE_H
+
+#include "v_math.h"
+
+struct v_expm1f_data
+{
+  float32x4_t c0, c2;
+  int32x4_t exponent_bias;
+  float c1, c3, inv_ln2, c4;
+  float ln2_hi, ln2_lo;
+};
+
+/* Coefficients generated using fpminimax with degree=5 in [-log(2)/2,
+   log(2)/2]. Exponent bias is asuint(1.0f).  */
+#define V_EXPM1F_DATA                                                         \
+  {                                                                           \
+    .c0 = V4 (0x1.fffffep-2), .c1 = 0x1.5554aep-3, .c2 = V4 (0x1.555736p-5),  \
+    .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10,                                \
+    .exponent_bias = V4 (0x3f800000), .inv_ln2 = 0x1.715476p+0f,              \
+    .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f,                        \
+  }
+
+static inline float32x4_t
+expm1f_inline (float32x4_t x, const struct v_expm1f_data *d)
+{
+  /* Helper routine for calculating exp(x) - 1.  */
+
+  float32x2_t ln2 = vld1_f32 (&d->ln2_hi);
+  float32x4_t lane_consts = vld1q_f32 (&d->c1);
+
+  /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
+  float32x4_t j = vrndaq_f32 (vmulq_laneq_f32 (x, lane_consts, 2));
+  int32x4_t i = vcvtq_s32_f32 (j);
+  float32x4_t f = vfmsq_lane_f32 (x, j, ln2, 0);
+  f = vfmsq_lane_f32 (f, j, ln2, 1);
+
+  /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).  */
+  float32x4_t f2 = vmulq_f32 (f, f);
+  float32x4_t f4 = vmulq_f32 (f2, f2);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, f, lane_consts, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, f, lane_consts, 1);
+  float32x4_t p = vfmaq_f32 (p01, f2, p23);
+  p = vfmaq_laneq_f32 (p, f4, lane_consts, 3);
+  p = vfmaq_f32 (f, f2, p);
+
+  /* t = 2^i.  */
+  int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias);
+  float32x4_t t = vreinterpretq_f32_s32 (u);
+  /* expm1(x) ~= p * t + (t - 1).  */
+  return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t);
+}
+
+#endif // MATH_V_EXPM1F_INLINE_H
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/v_log1p_inline.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_log1p_inline.h
new file mode 100644
index 000000000000..ef906ae4b603
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_log1p_inline.h
@@ -0,0 +1,119 @@
+/*
+ * Helper for vector double-precision routines which calculate log(1 + x) and
+ * do not need special-case handling
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#ifndef MATH_V_LOG1P_INLINE_H
+#define MATH_V_LOG1P_INLINE_H
+
+#include "v_math.h"
+
+struct v_log1p_data
+{
+  float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16;
+  uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask;
+  int64x2_t one_top;
+  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c18;
+  double ln2[2];
+};
+
+/* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].  */
+#define V_LOG1P_CONSTANTS_TABLE                                               \
+  {                                                                           \
+    .c0 = V2 (-0x1.ffffffffffffbp-2), .c1 = 0x1.55555555551a9p-2,             \
+    .c2 = V2 (-0x1.00000000008e3p-2), .c3 = 0x1.9999999a32797p-3,             \
+    .c4 = V2 (-0x1.555555552fecfp-3), .c5 = 0x1.249248e071e5ap-3,             \
+    .c6 = V2 (-0x1.ffffff8bf8482p-4), .c7 = 0x1.c71c8f07da57ap-4,             \
+    .c8 = V2 (-0x1.9999ca4ccb617p-4), .c9 = 0x1.7459ad2e1dfa3p-4,             \
+    .c10 = V2 (-0x1.554d2680a3ff2p-4), .c11 = 0x1.3b4c54d487455p-4,           \
+    .c12 = V2 (-0x1.2548a9ffe80e6p-4), .c13 = 0x1.0f389a24b2e07p-4,           \
+    .c14 = V2 (-0x1.eee4db15db335p-5), .c15 = 0x1.e95b494d4a5ddp-5,           \
+    .c16 = V2 (-0x1.15fdf07cb7c73p-4), .c17 = 0x1.0310b70800fcfp-4,           \
+    .c18 = -0x1.cfa7385bdb37ep-6,                                             \
+    .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 },                   \
+    .hf_rt2_top = V2 (0x3fe6a09e00000000),                                    \
+    .one_m_hf_rt2_top = V2 (0x00095f6200000000),                              \
+    .umask = V2 (0x000fffff00000000), .one_top = V2 (0x3ff)                   \
+  }
+
+#define BottomMask v_u64 (0xffffffff)
+
+static inline float64x2_t
+eval_poly (float64x2_t m, float64x2_t m2, const struct v_log1p_data *d)
+{
+  /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+  float64x2_t c1315 = vld1q_f64 (&d->c13);
+  float64x2_t c1718 = vld1q_f64 (&d->c17);
+  float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, m, c1718, 0);
+  float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, m, c1315, 1);
+  float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, m, c1315, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, m, c911, 1);
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, m, c911, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, m, c57, 1);
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, m, c57, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, m, c13, 1);
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, m, c13, 0);
+  float64x2_t p = vfmaq_laneq_f64 (p1617, m2, c1718, 1);
+  p = vfmaq_f64 (p1415, m2, p);
+  p = vfmaq_f64 (p1213, m2, p);
+  p = vfmaq_f64 (p1011, m2, p);
+  p = vfmaq_f64 (p89, m2, p);
+  p = vfmaq_f64 (p67, m2, p);
+  p = vfmaq_f64 (p45, m2, p);
+  p = vfmaq_f64 (p23, m2, p);
+  return vfmaq_f64 (p01, m2, p);
+}
+
+static inline float64x2_t
+log1p_inline (float64x2_t x, const struct v_log1p_data *d)
+{
+  /* Helper for calculating log(x + 1):
+     - No special-case handling - this should be dealt with by the caller.
+     - Optionally simulate the shortcut for k=0, used in the scalar routine,
+       using v_sel, for improved accuracy when the argument to log1p is close
+       to 0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1
+       in the source of the caller before including this file.  */
+  float64x2_t m = vaddq_f64 (x, v_f64 (1.0));
+  uint64x2_t mi = vreinterpretq_u64_f64 (m);
+  uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
+
+  int64x2_t ki
+      = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
+  float64x2_t k = vcvtq_f64_s64 (ki);
+
+  /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
+  uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
+  uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask));
+  float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1.0));
+
+  /* Correction term c/m.  */
+  float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1.0))), m);
+
+#ifndef WANT_V_LOG1P_K0_SHORTCUT
+# error                                                                       \
+      "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
+#elif WANT_V_LOG1P_K0_SHORTCUT
+  /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
+     that the approximation is solely the polynomial.  */
+  uint64x2_t k0 = vceqzq_f64 (k);
+  cm = v_zerofy_f64 (cm, k0);
+  f = vbslq_f64 (k0, x, f);
+#endif
+
+  /* Approximate log1p(f) on the reduced input using a polynomial.  */
+  float64x2_t f2 = vmulq_f64 (f, f);
+  float64x2_t p = eval_poly (f, f2, d);
+
+  /* Assemble log1p(x) = k * log2 + log1p(f) + c/m.  */
+  float64x2_t ln2 = vld1q_f64 (&d->ln2[0]);
+  float64x2_t ylo = vfmaq_laneq_f64 (cm, k, ln2, 1);
+  float64x2_t yhi = vfmaq_laneq_f64 (f, k, ln2, 0);
+  return vfmaq_f64 (vaddq_f64 (ylo, yhi), f2, p);
+}
+
+#endif // MATH_V_LOG1P_INLINE_H
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/v_log1pf_inline.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_log1pf_inline.h
new file mode 100644
index 000000000000..e81fa24486ae
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_log1pf_inline.h
@@ -0,0 +1,94 @@
+/*
+ * Helper for single-precision routines which calculate log(1 + x) and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_V_LOG1PF_INLINE_H
+#define MATH_V_LOG1PF_INLINE_H
+
+#include "v_math.h"
+#include "v_poly_f32.h"
+
+struct v_log1pf_data
+{
+  uint32x4_t four;
+  int32x4_t three_quarters;
+  float c0, c3, c5, c7;
+  float32x4_t c4, c6, c1, c2, ln2;
+};
+
+/* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients
+   (1, -0.5) are not stored as they can be generated more efficiently.  */
+#define V_LOG1PF_CONSTANTS_TABLE                                              \
+  {                                                                           \
+    .c0 = 0x1.5555aap-2f, .c1 = V4 (-0x1.000038p-2f),                         \
+    .c2 = V4 (0x1.99675cp-3f), .c3 = -0x1.54ef78p-3f,                         \
+    .c4 = V4 (0x1.28a1f4p-3f), .c5 = -0x1.0da91p-3f,                          \
+    .c6 = V4 (0x1.abcb6p-4f), .c7 = -0x1.6f0d5ep-5f,                          \
+    .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000),                       \
+    .three_quarters = V4 (0x3f400000)                                         \
+  }
+
+static inline float32x4_t
+eval_poly (float32x4_t m, const struct v_log1pf_data *d)
+{
+  /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner.  */
+  float32x4_t c0357 = vld1q_f32 (&d->c0);
+  float32x4_t q = vfmaq_laneq_f32 (v_f32 (-0.5), m, c0357, 0);
+  float32x4_t m2 = vmulq_f32 (m, m);
+  float32x4_t p67 = vfmaq_laneq_f32 (d->c6, m, c0357, 3);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, m, c0357, 2);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, m, c0357, 1);
+  float32x4_t p = vfmaq_f32 (p45, m2, p67);
+  p = vfmaq_f32 (p23, m2, p);
+  p = vfmaq_f32 (d->c1, m, p);
+  p = vmulq_f32 (m2, p);
+  p = vfmaq_f32 (m, m2, p);
+  return vfmaq_f32 (p, m2, q);
+}
+
+static inline float32x4_t
+log1pf_inline (float32x4_t x, const struct v_log1pf_data *d)
+{
+  /* Helper for calculating log(x + 1).  */
+
+  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+			   is in [-0.25, 0.5]):
+     log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+     We approximate log1p(m) with a polynomial, then scale by
+     k*log(2). Instead of doing this directly, we use an intermediate
+     scale factor s = 4*k*log(2) to ensure the scale is representable
+     as a normalised fp32 number.  */
+  float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
+
+  /* Choose k to scale x to the range [-1/4, 1/2].  */
+  int32x4_t k
+      = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters),
+		   v_s32 (0xff800000));
+  uint32x4_t ku = vreinterpretq_u32_s32 (k);
+
+  /* Scale up to ensure that the scale factor is representable as normalised
+     fp32 number, and scale m down accordingly.  */
+  float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku));
+
+  /* Scale x by exponent manipulation.  */
+  float32x4_t m_scale
+      = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
+  m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
+
+  /* Evaluate polynomial on the reduced interval.  */
+  float32x4_t p = eval_poly (m_scale, d);
+
+  /* The scale factor to be applied back at the end - by multiplying float(k)
+     by 2^-23 we get the unbiased exponent of k.  */
+  float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f));
+
+  /* Apply the scaling back.  */
+  return vfmaq_f32 (p, scale_back, d->ln2);
+}
+
+#endif //  MATH_V_LOG1PF_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/v_log_inline.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_log_inline.h
similarity index 94%
rename from contrib/arm-optimized-routines/pl/math/v_log_inline.h
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/v_log_inline.h
index 2df00cf4ddf4..770f9e81c195 100644
--- a/contrib/arm-optimized-routines/pl/math/v_log_inline.h
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_log_inline.h
@@ -1,104 +1,104 @@
 /*
  * Double-precision vector log(x) function - inline version
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
 #include "math_config.h"
 
 #ifndef V_LOG_INLINE_POLY_ORDER
 #  error Cannot use inline log helper without specifying poly order (options are 4 or 5)
 #endif
 
 #if V_LOG_INLINE_POLY_ORDER == 4
 #  define POLY                                                                \
     {                                                                         \
       V2 (-0x1.ffffffffcbad3p-2), V2 (0x1.555555578ed68p-2),                  \
 	  V2 (-0x1.0000d3a1e7055p-2), V2 (0x1.999392d02a63ep-3)               \
     }
 #elif V_LOG_INLINE_POLY_ORDER == 5
 #  define POLY                                                                \
     {                                                                         \
       V2 (-0x1.ffffffffffff7p-2), V2 (0x1.55555555170d4p-2),                  \
 	  V2 (-0x1.0000000399c27p-2), V2 (0x1.999b2e90e94cap-3),              \
 	  V2 (-0x1.554e550bd501ep-3)                                          \
     }
 #else
 #  error Can only choose order 4 or 5 for log poly
 #endif
 
 struct v_log_inline_data
 {
   float64x2_t poly[V_LOG_INLINE_POLY_ORDER];
   float64x2_t ln2;
   uint64x2_t off, sign_exp_mask;
 };
 
 #define V_LOG_CONSTANTS                                                       \
   {                                                                           \
     .poly = POLY, .ln2 = V2 (0x1.62e42fefa39efp-1),                           \
     .sign_exp_mask = V2 (0xfff0000000000000), .off = V2 (0x3fe6900900000000)  \
   }
 
 #define A(i) d->poly[i]
 #define N (1 << V_LOG_TABLE_BITS)
 #define IndexMask (N - 1)
 
 struct entry
 {
   float64x2_t invc;
   float64x2_t logc;
 };
 
 static inline struct entry
 log_lookup (uint64x2_t i)
 {
   /* Since N is a power of 2, n % N = n & (N - 1).  */
   struct entry e;
-  uint64_t i0 = (i[0] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
-  uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i0 = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i1 = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
   float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
   float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
   e.invc = vuzp1q_f64 (e0, e1);
   e.logc = vuzp2q_f64 (e0, e1);
   return e;
 }
 
 static inline float64x2_t
 v_log_inline (float64x2_t x, const struct v_log_inline_data *d)
 {
   float64x2_t z, r, r2, p, y, kd, hi;
   uint64x2_t ix, iz, tmp;
   int64x2_t k;
   struct entry e;
 
   ix = vreinterpretq_u64_f64 (x);
 
   /* x = 2^k z; where z is in range [Off,2*Off) and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
   tmp = vsubq_u64 (ix, d->off);
   k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift.  */
   iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
   z = vreinterpretq_f64_u64 (iz);
   e = log_lookup (tmp);
 
   /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
   r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
   kd = vcvtq_f64_s64 (k);
 
   /* hi = r + log(c) + k*Ln2.  */
   hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2);
   /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
   r2 = vmulq_f64 (r, r);
   y = vfmaq_f64 (A (2), A (3), r);
   p = vfmaq_f64 (A (0), A (1), r);
 #if V_LOG_POLY_ORDER == 5
   y = vfmaq_f64 (y, A (4), r2);
 #endif
   y = vfmaq_f64 (p, y, r2);
 
   return vfmaq_f64 (hi, y, r2);
 }
diff --git a/contrib/arm-optimized-routines/pl/math/v_math.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_math.h
similarity index 58%
rename from contrib/arm-optimized-routines/pl/math/v_math.h
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/v_math.h
index 1b10929faccc..75cd71cc87a7 100644
--- a/contrib/arm-optimized-routines/pl/math/v_math.h
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_math.h
@@ -1,175 +1,202 @@
 /*
  * Vector math abstractions.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef _V_MATH_H
 #define _V_MATH_H
 
-#ifndef WANT_VMATH
-/* Enable the build of vector math code.  */
-# define WANT_VMATH 1
+#if !__aarch64__
+# error "Cannot build without AArch64"
 #endif
 
-#if WANT_VMATH
-
-# if __aarch64__
-#  define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
-# else
-#  error "Cannot build without AArch64"
-# endif
-
-# include <stdint.h>
-# include "math_config.h"
-# if __aarch64__
+#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
+
+#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
+#define V_NAME_D1(fun) _ZGVnN2v_##fun
+#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
+#define V_NAME_D2(fun) _ZGVnN2vv_##fun
+#define V_NAME_F1_L1(fun) _ZGVnN4vl4_##fun##f
+#define V_NAME_D1_L1(fun) _ZGVnN2vl8_##fun
+
+#if USE_GLIBC_ABI
+
+# define HALF_WIDTH_ALIAS_F1(fun)                                             \
+    float32x2_t VPCS_ATTR _ZGVnN2v_##fun##f (float32x2_t x)                   \
+    {                                                                         \
+      return vget_low_f32 (_ZGVnN4v_##fun##f (vcombine_f32 (x, x)));          \
+    }
+
+# define HALF_WIDTH_ALIAS_F2(fun)                                             \
+    float32x2_t VPCS_ATTR _ZGVnN2vv_##fun##f (float32x2_t x, float32x2_t y)   \
+    {                                                                         \
+      return vget_low_f32 (                                                   \
+	  _ZGVnN4vv_##fun##f (vcombine_f32 (x, x), vcombine_f32 (y, y)));     \
+    }
+
+#else
+# define HALF_WIDTH_ALIAS_F1(fun)
+# define HALF_WIDTH_ALIAS_F2(fun)
+#endif
 
-#  include <arm_neon.h>
+#include <stdint.h>
+#include "math_config.h"
+#include <arm_neon.h>
 
 /* Shorthand helpers for declaring constants.  */
-#  define V2(X) { X, X }
-#  define V4(X) { X, X, X, X }
-#  define V8(X) { X, X, X, X, X, X, X, X }
+#define V2(X)                                                                 \
+  {                                                                           \
+    X, X                                                                      \
+  }
+#define V4(X)                                                                 \
+  {                                                                           \
+    X, X, X, X                                                                \
+  }
+#define V8(X)                                                                 \
+  {                                                                           \
+    X, X, X, X, X, X, X, X                                                    \
+  }
 
 static inline int
 v_any_u16h (uint16x4_t x)
 {
   return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
 }
 
+static inline int
+v_lanes32 (void)
+{
+  return 4;
+}
+
 static inline float32x4_t
 v_f32 (float x)
 {
   return (float32x4_t) V4 (x);
 }
 static inline uint32x4_t
 v_u32 (uint32_t x)
 {
   return (uint32x4_t) V4 (x);
 }
 static inline int32x4_t
 v_s32 (int32_t x)
 {
   return (int32x4_t) V4 (x);
 }
 
-/* true if any elements of a vector compare result is non-zero.  */
+/* true if any elements of a v_cond result is non-zero.  */
 static inline int
 v_any_u32 (uint32x4_t x)
 {
   /* assume elements in x are either 0 or -1u.  */
   return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
 }
 static inline int
 v_any_u32h (uint32x2_t x)
 {
   return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
 }
 static inline float32x4_t
 v_lookup_f32 (const float *tab, uint32x4_t idx)
 {
   return (float32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
 }
 static inline uint32x4_t
 v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
 {
   return (uint32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
 }
 static inline float32x4_t
 v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
 {
   return (float32x4_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
 			p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3] };
 }
 static inline float32x4_t
 v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
 	     float32x4_t y, uint32x4_t p)
 {
   return (float32x4_t){ p[0] ? f (x1[0], x2[0]) : y[0],
 			p[1] ? f (x1[1], x2[1]) : y[1],
 			p[2] ? f (x1[2], x2[2]) : y[2],
 			p[3] ? f (x1[3], x2[3]) : y[3] };
 }
 static inline float32x4_t
 v_zerofy_f32 (float32x4_t x, uint32x4_t mask)
 {
   return vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), mask));
 }
 
+static inline int
+v_lanes64 (void)
+{
+  return 2;
+}
 static inline float64x2_t
 v_f64 (double x)
 {
   return (float64x2_t) V2 (x);
 }
 static inline uint64x2_t
 v_u64 (uint64_t x)
 {
   return (uint64x2_t) V2 (x);
 }
 static inline int64x2_t
 v_s64 (int64_t x)
 {
   return (int64x2_t) V2 (x);
 }
 
-/* true if any elements of a vector compare result is non-zero.  */
+/* true if any elements of a v_cond result is non-zero.  */
 static inline int
 v_any_u64 (uint64x2_t x)
 {
   /* assume elements in x are either 0 or -1u.  */
   return vpaddd_u64 (x) != 0;
 }
-/* true if all elements of a vector compare result is 1.  */
-static inline int
-v_all_u64 (uint64x2_t x)
-{
-  /* assume elements in x are either 0 or -1u.  */
-  return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2;
-}
 static inline float64x2_t
 v_lookup_f64 (const double *tab, uint64x2_t idx)
 {
   return (float64x2_t){ tab[idx[0]], tab[idx[1]] };
 }
 static inline uint64x2_t
 v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
 {
   return (uint64x2_t){ tab[idx[0]], tab[idx[1]] };
 }
-
 static inline float64x2_t
 v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
 {
   double p1 = p[1];
   double x1 = x[1];
   if (likely (p[0]))
     y[0] = f (x[0]);
   if (likely (p1))
     y[1] = f (x1);
   return y;
 }
 
 static inline float64x2_t
 v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2,
 	     float64x2_t y, uint64x2_t p)
 {
   double p1 = p[1];
   double x1h = x1[1];
   double x2h = x2[1];
   if (likely (p[0]))
     y[0] = f (x1[0], x2[0]);
   if (likely (p1))
     y[1] = f (x1h, x2h);
   return y;
 }
 static inline float64x2_t
 v_zerofy_f64 (float64x2_t x, uint64x2_t mask)
 {
   return vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), mask));
 }
 
-# endif
-#endif
-
 #endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_advsimd_f32.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_poly_f32.h
similarity index 81%
rename from contrib/arm-optimized-routines/pl/math/poly_advsimd_f32.h
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/v_poly_f32.h
index 438e153dff90..9a9c5c1ac15b 100644
--- a/contrib/arm-optimized-routines/pl/math/poly_advsimd_f32.h
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_poly_f32.h
@@ -1,24 +1,24 @@
 /*
  * Helpers for evaluating polynomials on single-precision AdvSIMD input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_ADVSIMD_F32_H
-#define PL_MATH_POLY_ADVSIMD_F32_H
+#ifndef MATH_POLY_ADVSIMD_F32_H
+#define MATH_POLY_ADVSIMD_F32_H
 
 #include <arm_neon.h>
 
 /* Wrap AdvSIMD f32 helpers: evaluation of some scheme/order has form:
    v_[scheme]_[order]_f32.  */
 #define VTYPE float32x4_t
 #define FMA(x, y, z) vfmaq_f32 (z, x, y)
 #define VWRAP(f) v_##f##_f32
 #include "poly_generic.h"
 #undef VWRAP
 #undef FMA
 #undef VTYPE
 
 #endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_advsimd_f64.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_poly_f64.h
similarity index 81%
rename from contrib/arm-optimized-routines/pl/math/poly_advsimd_f64.h
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/v_poly_f64.h
index 7ea249a91225..4331bfbd03b0 100644
--- a/contrib/arm-optimized-routines/pl/math/poly_advsimd_f64.h
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_poly_f64.h
@@ -1,24 +1,24 @@
 /*
  * Helpers for evaluating polynomials on double-precision AdvSIMD input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_ADVSIMD_F64_H
-#define PL_MATH_POLY_ADVSIMD_F64_H
+#ifndef MATH_POLY_ADVSIMD_F64_H
+#define MATH_POLY_ADVSIMD_F64_H
 
 #include <arm_neon.h>
 
 /* Wrap AdvSIMD f64 helpers: evaluation of some scheme/order has form:
    v_[scheme]_[order]_f64.  */
 #define VTYPE float64x2_t
 #define FMA(x, y, z) vfmaq_f64 (z, x, y)
 #define VWRAP(f) v_##f##_f64
 #include "poly_generic.h"
 #undef VWRAP
 #undef FMA
 #undef VTYPE
 
 #endif
diff --git a/contrib/arm-optimized-routines/pl/math/v_sincos_common.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincos_common.h
similarity index 97%
rename from contrib/arm-optimized-routines/pl/math/v_sincos_common.h
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincos_common.h
index ee7937e0785a..14227d9339a8 100644
--- a/contrib/arm-optimized-routines/pl/math/v_sincos_common.h
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincos_common.h
@@ -1,86 +1,86 @@
 /*
  * Core approximation for double-precision vector sincos
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
+#include "v_poly_f64.h"
 
 static const struct v_sincos_data
 {
   float64x2_t sin_poly[7], cos_poly[6], pio2[3];
   float64x2_t inv_pio2, shift, range_val;
 } v_sincos_data = {
   .inv_pio2 = V2 (0x1.45f306dc9c882p-1),
   .pio2 = { V2 (0x1.921fb50000000p+0), V2 (0x1.110b460000000p-26),
 	    V2 (0x1.1a62633145c07p-54) },
   .shift = V2 (0x1.8p52),
   .sin_poly = { /* Computed using Remez in [-pi/2, pi/2].  */
 	        V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7),
 		V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19),
 		V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33),
 		V2 (-0x1.9e9540300a1p-41) },
   .cos_poly = { /* Computed using Remez in [-pi/4, pi/4].  */
 	        V2 (0x1.555555555554cp-5), V2 (-0x1.6c16c16c1521fp-10),
 		V2 (0x1.a01a019cbf62ap-16), V2 (-0x1.27e4f812b681ep-22),
 		V2 (0x1.1ee9f152a57cdp-29), V2 (-0x1.8fb131098404bp-37) },
   .range_val = V2 (0x1p23), };
 
 static inline uint64x2_t
 check_ge_rangeval (float64x2_t x, const struct v_sincos_data *d)
 {
   return vcagtq_f64 (x, d->range_val);
 }
 
 /* Double-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate polynomials.
    Largest observed error is for sin, 3.22 ULP:
    v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
 				       want -0x1.ffe9537d5dbb4p-3.  */
 static inline float64x2x2_t
 v_sincos_inline (float64x2_t x, const struct v_sincos_data *d)
 {
   /* q = nearest integer to 2 * x / pi.  */
   float64x2_t q = vsubq_f64 (vfmaq_f64 (d->shift, x, d->inv_pio2), d->shift);
   int64x2_t n = vcvtq_s64_f64 (q);
 
   /* Use q to reduce x to r in [-pi/4, pi/4], by:
      r = x - q * pi/2, in extended precision.  */
   float64x2_t r = x;
   r = vfmsq_f64 (r, q, d->pio2[0]);
   r = vfmsq_f64 (r, q, d->pio2[1]);
   r = vfmsq_f64 (r, q, d->pio2[2]);
 
   float64x2_t r2 = r * r, r3 = r2 * r, r4 = r2 * r2;
 
   /* Approximate sin(r) ~= r + r^3 * poly_sin(r^2).  */
   float64x2_t s = v_pw_horner_6_f64 (r2, r4, d->sin_poly);
   s = vfmaq_f64 (r, r3, s);
 
   /* Approximate cos(r) ~= 1 - (r^2)/2 + r^4 * poly_cos(r^2).  */
   float64x2_t c = v_pw_horner_5_f64 (r2, r4, d->cos_poly);
   c = vfmaq_f64 (v_f64 (-0.5), r2, c);
   c = vfmaq_f64 (v_f64 (1), r2, c);
 
   /* If odd quadrant, swap cos and sin.  */
   uint64x2_t swap = vtstq_s64 (n, v_s64 (1));
   float64x2_t ss = vbslq_f64 (swap, c, s);
   float64x2_t cc = vbslq_f64 (swap, s, c);
 
   /* Fix signs according to quadrant.
      ss = asdouble(asuint64(ss) ^ ((n       & 2) << 62))
      cc = asdouble(asuint64(cc) & (((n + 1) & 2) << 62)).  */
   uint64x2_t sin_sign
       = vshlq_n_u64 (vandq_u64 (vreinterpretq_u64_s64 (n), v_u64 (2)), 62);
   uint64x2_t cos_sign = vshlq_n_u64 (
       vandq_u64 (vreinterpretq_u64_s64 (vaddq_s64 (n, v_s64 (1))), v_u64 (2)),
       62);
   ss = vreinterpretq_f64_u64 (
       veorq_u64 (vreinterpretq_u64_f64 (ss), sin_sign));
   cc = vreinterpretq_f64_u64 (
       veorq_u64 (vreinterpretq_u64_f64 (cc), cos_sign));
 
   return (float64x2x2_t){ ss, cc };
 }
diff --git a/contrib/arm-optimized-routines/pl/math/v_sincosf_common.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincosf_common.h
similarity index 98%
rename from contrib/arm-optimized-routines/pl/math/v_sincosf_common.h
rename to contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincosf_common.h
index 8239bd9f0176..7c29eded14d6 100644
--- a/contrib/arm-optimized-routines/pl/math/v_sincosf_common.h
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincosf_common.h
@@ -1,84 +1,84 @@
 /*
  * Core approximation for single-precision vector sincos
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
 
 const static struct v_sincosf_data
 {
   float32x4_t poly_sin[3], poly_cos[3], pio2[3], inv_pio2, shift, range_val;
 } v_sincosf_data = {
   .poly_sin = { /* Generated using Remez, odd coeffs only, in [-pi/4, pi/4].  */
 	        V4 (-0x1.555546p-3), V4 (0x1.11076p-7), V4 (-0x1.994eb4p-13) },
   .poly_cos = { /* Generated using Remez, even coeffs only, in [-pi/4, pi/4].  */
 	        V4 (0x1.55554ap-5), V4 (-0x1.6c0c1ap-10), V4 (0x1.99e0eep-16) },
   .pio2 = { V4 (0x1.921fb6p+0f), V4 (-0x1.777a5cp-25f), V4 (-0x1.ee59dap-50f) },
   .inv_pio2 = V4 (0x1.45f306p-1f),
   .shift = V4 (0x1.8p23),
   .range_val = V4 (0x1p20),
 };
 
 static inline uint32x4_t
 check_ge_rangeval (float32x4_t x, const struct v_sincosf_data *d)
 {
   return vcagtq_f32 (x, d->range_val);
 }
 
 /* Single-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate low-order
    polynomials.
    Worst-case error for sin is 1.67 ULP:
    v_sincosf_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
    Worst-case error for cos is 1.81 ULP:
    v_sincosf_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6.  */
 static inline float32x4x2_t
 v_sincosf_inline (float32x4_t x, const struct v_sincosf_data *d)
 {
   /* n = rint ( x / (pi/2) ).  */
   float32x4_t shift = d->shift;
   float32x4_t q = vfmaq_f32 (shift, x, d->inv_pio2);
   q = vsubq_f32 (q, shift);
   int32x4_t n = vcvtq_s32_f32 (q);
 
   /* Reduce x such that r is in [ -pi/4, pi/4 ].  */
   float32x4_t r = x;
   r = vfmsq_f32 (r, q, d->pio2[0]);
   r = vfmsq_f32 (r, q, d->pio2[1]);
   r = vfmsq_f32 (r, q, d->pio2[2]);
 
   /* Approximate sin(r) ~= r + r^3 * poly_sin(r^2).  */
   float32x4_t r2 = vmulq_f32 (r, r), r3 = vmulq_f32 (r, r2);
   float32x4_t s = vfmaq_f32 (d->poly_sin[1], r2, d->poly_sin[2]);
   s = vfmaq_f32 (d->poly_sin[0], r2, s);
   s = vfmaq_f32 (r, r3, s);
 
   /* Approximate cos(r) ~= 1 - (r^2)/2 + r^4 * poly_cos(r^2).  */
   float32x4_t r4 = vmulq_f32 (r2, r2);
   float32x4_t p = vfmaq_f32 (d->poly_cos[1], r2, d->poly_cos[2]);
   float32x4_t c = vfmaq_f32 (v_f32 (-0.5), r2, d->poly_cos[0]);
   c = vfmaq_f32 (c, r4, p);
   c = vfmaq_f32 (v_f32 (1), c, r2);
 
   /* If odd quadrant, swap cos and sin.  */
   uint32x4_t swap = vtstq_u32 (vreinterpretq_u32_s32 (n), v_u32 (1));
   float32x4_t ss = vbslq_f32 (swap, c, s);
   float32x4_t cc = vbslq_f32 (swap, s, c);
 
   /* Fix signs according to quadrant.
      ss = asfloat(asuint(ss) ^ ((n       & 2) << 30))
      cc = asfloat(asuint(cc) & (((n + 1) & 2) << 30)).  */
   uint32x4_t sin_sign
       = vshlq_n_u32 (vandq_u32 (vreinterpretq_u32_s32 (n), v_u32 (2)), 30);
   uint32x4_t cos_sign = vshlq_n_u32 (
       vandq_u32 (vreinterpretq_u32_s32 (vaddq_s32 (n, v_s32 (1))), v_u32 (2)),
       30);
   ss = vreinterpretq_f32_u32 (
       veorq_u32 (vreinterpretq_u32_f32 (ss), sin_sign));
   cc = vreinterpretq_f32_u32 (
       veorq_u32 (vreinterpretq_u32_f32 (cc), cos_sign));
 
   return (float32x4x2_t){ ss, cc };
 }
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincospi_common.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincospi_common.h
new file mode 100644
index 000000000000..438b141b9174
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincospi_common.h
@@ -0,0 +1,64 @@
+/*
+ * Helper for Double-precision vector sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "v_math.h"
+#include "v_poly_f64.h"
+
+static const struct v_sincospi_data
+{
+  float64x2_t poly[10], range_val;
+} v_sincospi_data = {
+  /* Polynomial coefficients generated using Remez algorithm,
+     see sinpi.sollya for details.  */
+  .poly = { V2 (0x1.921fb54442d184p1), V2 (-0x1.4abbce625be53p2),
+	    V2 (0x1.466bc6775ab16p1), V2 (-0x1.32d2cce62dc33p-1),
+	    V2 (0x1.507834891188ep-4), V2 (-0x1.e30750a28c88ep-8),
+	    V2 (0x1.e8f48308acda4p-12), V2 (-0x1.6fc0032b3c29fp-16),
+	    V2 (0x1.af86ae521260bp-21), V2 (-0x1.012a9870eeb7dp-25) },
+  .range_val = V2 (0x1p63),
+};
+
+/* Double-precision vector function allowing calculation of both sin and cos in
+   one function call, using separate argument reduction and shared low-order
+   polynomials.
+   Approximation for vector double-precision sincospi(x).
+   Maximum Error 3.09 ULP:
+  _ZGVnN2v_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1
+					      want 0x1.fd54d0b327cf4p-1
+   Maximum Error 3.16 ULP:
+  _ZGVnN2v_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1
+					      want 0x1.fd2da484ff402p-1.  */
+static inline float64x2x2_t
+v_sincospi_inline (float64x2_t x, const struct v_sincospi_data *d)
+{
+  /* If r is odd, the sign of the result should be inverted for sinpi
+     and reintroduced for cospi.  */
+  uint64x2_t cmp = vcgeq_f64 (x, d->range_val);
+  uint64x2_t odd = vshlq_n_u64 (
+      vbicq_u64 (vreinterpretq_u64_s64 (vcvtaq_s64_f64 (x)), cmp), 63);
+
+  /* r = x - rint(x).  */
+  float64x2_t sr = vsubq_f64 (x, vrndaq_f64 (x));
+  /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2.  */
+  float64x2_t cr = vsubq_f64 (v_f64 (0.5), vabsq_f64 (sr));
+
+  /* Pairwise Horner approximation for y = sin(r * pi).  */
+  float64x2_t sr2 = vmulq_f64 (sr, sr);
+  float64x2_t sr4 = vmulq_f64 (sr2, sr2);
+  float64x2_t cr2 = vmulq_f64 (cr, cr);
+  float64x2_t cr4 = vmulq_f64 (cr2, cr2);
+
+  float64x2_t ss = vmulq_f64 (v_pw_horner_9_f64 (sr2, sr4, d->poly), sr);
+  float64x2_t cc = vmulq_f64 (v_pw_horner_9_f64 (cr2, cr4, d->poly), cr);
+
+  float64x2_t sinpix
+      = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (ss), odd));
+
+  float64x2_t cospix
+      = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (cc), odd));
+
+  return (float64x2x2_t){ sinpix, cospix };
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincospif_common.h b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincospif_common.h
new file mode 100644
index 000000000000..8d4177dd871e
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/advsimd/v_sincospif_common.h
@@ -0,0 +1,57 @@
+/*
+ * Helper for Single-precision vector sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "mathlib.h"
+#include "v_math.h"
+#include "v_poly_f32.h"
+
+const static struct v_sincospif_data
+{
+  float32x4_t poly[6], range_val;
+} v_sincospif_data = {
+  /* Taylor series coefficents for sin(pi * x).  */
+  .poly = { V4 (0x1.921fb6p1f), V4 (-0x1.4abbcep2f), V4 (0x1.466bc6p1f),
+	    V4 (-0x1.32d2ccp-1f), V4 (0x1.50783p-4f), V4 (-0x1.e30750p-8f) },
+  .range_val = V4 (0x1p31f),
+};
+
+/* Single-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+   Worst-case error for sin is 3.04 ULP:
+   _ZGVnN4v_sincospif_sin(0x1.1d341ap-1) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
+   Worst-case error for cos is 3.18 ULP:
+   _ZGVnN4v_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
+ */
+static inline float32x4x2_t
+v_sincospif_inline (float32x4_t x, const struct v_sincospif_data *d)
+{
+  /* If r is odd, the sign of the result should be inverted for sinpi and
+     reintroduced for cospi.  */
+  uint32x4_t cmp = vcgeq_f32 (x, d->range_val);
+  uint32x4_t odd = vshlq_n_u32 (
+      vbicq_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), cmp), 31);
+
+  /* r = x - rint(x).  */
+  float32x4_t sr = vsubq_f32 (x, vrndaq_f32 (x));
+  /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2.  */
+  float32x4_t cr = vsubq_f32 (v_f32 (0.5f), vabsq_f32 (sr));
+
+  /* Pairwise Horner approximation for y = sin(r * pi).  */
+  float32x4_t sr2 = vmulq_f32 (sr, sr);
+  float32x4_t sr4 = vmulq_f32 (sr2, sr2);
+  float32x4_t cr2 = vmulq_f32 (cr, cr);
+  float32x4_t cr4 = vmulq_f32 (cr2, cr2);
+
+  float32x4_t ss = vmulq_f32 (v_pw_horner_5_f32 (sr2, sr4, d->poly), sr);
+  float32x4_t cc = vmulq_f32 (v_pw_horner_5_f32 (cr2, cr4, d->poly), cr);
+
+  float32x4_t sinpix
+      = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (ss), odd));
+  float32x4_t cospix
+      = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (cc), odd));
+
+  return (float32x4x2_t){ sinpix, cospix };
+}
diff --git a/contrib/arm-optimized-routines/pl/math/cospi_3u1.c b/contrib/arm-optimized-routines/math/aarch64/cospi_3u5.c
similarity index 82%
rename from contrib/arm-optimized-routines/pl/math/cospi_3u1.c
rename to contrib/arm-optimized-routines/math/aarch64/cospi_3u5.c
index 4a688a076829..4131f6c816a1 100644
--- a/contrib/arm-optimized-routines/pl/math/cospi_3u1.c
+++ b/contrib/arm-optimized-routines/math/aarch64/cospi_3u5.c
@@ -1,89 +1,98 @@
 /*
  * Double-precision scalar cospi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "poly_scalar_f64.h"
 
 /* Taylor series coefficents for sin(pi * x).
    C2 coefficient (orginally ~=5.16771278) has been split into two parts:
    C2_hi = 4, C2_lo = C2 - C2_hi (~=1.16771278)
    This change in magnitude reduces floating point rounding errors.
    C2_hi is then reintroduced after the polynomial approxmation.  */
 static const double poly[]
     = { 0x1.921fb54442d184p1,  -0x1.2aef39896f94bp0,   0x1.466bc6775ab16p1,
 	-0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4,   -0x1.e30750a28c88ep-8,
 	0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16, 0x1.af86ae521260bp-21,
 	-0x1.012a9870eeb7dp-25 };
 
 #define Shift 0x1.8p+52
 
 /* Approximation for scalar double-precision cospi(x).
    Maximum error: 3.13 ULP:
    cospi(0x1.160b129300112p-21) got 0x1.fffffffffd16bp-1
 			       want 0x1.fffffffffd16ep-1.  */
 double
-cospi (double x)
+arm_math_cospi (double x)
 {
-  if (isinf (x))
+  if (isinf (x) || isnan (x))
     return __math_invalid (x);
 
   double ax = asdouble (asuint64 (x) & ~0x8000000000000000);
 
   /* Edge cases for when cospif should be exactly 1. (Integers)
      0x1p53 is the limit for single precision to store any decimal places.  */
   if (ax >= 0x1p53)
     return 1;
 
   /* If x is an integer, return +- 1, based upon if x is odd.  */
   uint64_t m = (uint64_t) ax;
   if (m == ax)
     return (m & 1) ? -1 : 1;
 
   /* For very small inputs, squaring r causes underflow.
      Values below this threshold can be approximated via
      cospi(x) ~= 1.  */
   if (ax < 0x1p-63)
     return 1;
 
   /* Any non-integer values >= 0x1x51 will be int +0.5.
      These values should return exactly 0.  */
   if (ax >= 0x1p51)
     return 0;
 
   /* n = rint(|x|).  */
   double n = ax + Shift;
   uint64_t sign = asuint64 (n) << 63;
   n = n - Shift;
 
   /* We know that cospi(x) = sinpi(0.5 - x)
      range reduction and offset into sinpi range -1/2 .. 1/2
      r = 0.5 - |x - rint(x)|.  */
   double r = 0.5 - fabs (ax - n);
 
   /* y = sin(r).  */
   double r2 = r * r;
   double y = horner_9_f64 (r2, poly);
   y = y * r;
 
   /* Reintroduce C2_hi.  */
   y = fma (-4 * r2, r, y);
 
   /* As all values are reduced to -1/2 .. 1/2, the result of cos(x) always be
      positive, therefore, the sign must be introduced based upon if x rounds to
      odd or even.  */
   return asdouble (asuint64 (y) ^ sign);
 }
 
-PL_SIG (S, D, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (cospi, 2.63)
-PL_TEST_SYM_INTERVAL (cospi, 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (cospi, 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (cospi, 0.5, 0x1p51f, 10000)
-PL_TEST_SYM_INTERVAL (cospi, 0x1p51f, inf, 10000)
+#if WANT_EXPERIMENTAL_MATH
+double
+cospi (double x)
+{
+  return arm_math_cospi (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_cospi, 2.63)
+TEST_SYM_INTERVAL (arm_math_cospi, 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (arm_math_cospi, 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (arm_math_cospi, 0.5, 0x1p51f, 10000)
+TEST_SYM_INTERVAL (arm_math_cospi, 0x1p51f, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/cospif_2u6.c b/contrib/arm-optimized-routines/math/aarch64/cospif_2u6.c
similarity index 79%
rename from contrib/arm-optimized-routines/pl/math/cospif_2u6.c
rename to contrib/arm-optimized-routines/math/aarch64/cospif_2u6.c
index d78a98ed8b2d..eb5b75402a63 100644
--- a/contrib/arm-optimized-routines/pl/math/cospif_2u6.c
+++ b/contrib/arm-optimized-routines/math/aarch64/cospif_2u6.c
@@ -1,84 +1,93 @@
 /*
  * Single-precision scalar cospi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Taylor series coefficents for sin(pi * x).  */
 #define C0 0x1.921fb6p1f
 #define C1 -0x1.4abbcep2f
 #define C2 0x1.466bc6p1f
 #define C3 -0x1.32d2ccp-1f
 #define C4 0x1.50783p-4f
 #define C5 -0x1.e30750p-8f
 
 #define Shift 0x1.0p+23f
 
 /* Approximation for scalar single-precision cospi(x) - cospif.
    Maximum error: 2.64 ULP:
    cospif(0x1.37e844p-4) got 0x1.f16b3p-1
 			want 0x1.f16b2ap-1.  */
 float
-cospif (float x)
+arm_math_cospif (float x)
 {
-  if (isinf (x))
+  if (isinf (x) || isnan (x))
     return __math_invalidf (x);
 
   float ax = asfloat (asuint (x) & ~0x80000000);
 
   /* Edge cases for when cospif should be exactly +/- 1. (Integers)
      0x1p23 is the limit for single precision to store any decimal places.  */
   if (ax >= 0x1p24f)
     return 1;
 
   uint32_t m = roundf (ax);
   if (m == ax)
     return (m & 1) ? -1 : 1;
 
   /* Any non-integer values >= 0x1p22f will be int +0.5.
      These values should return exactly 0.  */
   if (ax >= 0x1p22f)
     return 0;
 
   /* For very small inputs, squaring r causes underflow.
      Values below this threshold can be approximated via cospi(x) ~= 1 -
      (pi*x).  */
   if (ax < 0x1p-31f)
     return 1 - (C0 * x);
 
   /* n = rint(|x|).  */
   float n = ax + Shift;
   uint32_t sign = asuint (n) << 31;
   n = n - Shift;
 
   /* We know that cospi(x) = sinpi(0.5 - x)
      range reduction and offset into sinpi range -1/2 .. 1/2
      r = 0.5 - |x - rint(x)|.  */
   float r = 0.5f - fabs (ax - n);
 
   /* y = sin(pi * r).  */
   float r2 = r * r;
   float y = fmaf (C5, r2, C4);
   y = fmaf (y, r2, C3);
   y = fmaf (y, r2, C2);
   y = fmaf (y, r2, C1);
   y = fmaf (y, r2, C0);
 
   /* As all values are reduced to -1/2 .. 1/2, the result of cos(x) always be
      positive, therefore, the sign must be introduced based upon if x rounds to
      odd or even.  */
   return asfloat (asuint (y * r) ^ sign);
 }
 
-PL_SIG (S, F, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (cospif, 2.15)
-PL_TEST_SYM_INTERVAL (cospif, 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (cospif, 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (cospif, 0.5, 0x1p22f, 10000)
-PL_TEST_SYM_INTERVAL (cospif, 0x1p22f, inf, 10000)
+#if WANT_EXPERIMENTAL_MATH
+float
+cospif (float x)
+{
+  return arm_math_cospif (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_cospif, 2.15)
+TEST_SYM_INTERVAL (arm_math_cospif, 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (arm_math_cospif, 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (arm_math_cospif, 0.5, 0x1p22f, 10000)
+TEST_SYM_INTERVAL (arm_math_cospif, 0x1p22f, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/README.contributors b/contrib/arm-optimized-routines/math/aarch64/experimental/README.contributors
similarity index 71%
rename from contrib/arm-optimized-routines/pl/README.contributors
rename to contrib/arm-optimized-routines/math/aarch64/experimental/README.contributors
index 3af9b1fc7741..abb749485ba3 100644
--- a/contrib/arm-optimized-routines/pl/README.contributors
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/README.contributors
@@ -1,23 +1,16 @@
 Code in this sub-directory should follow the GNU Coding Standard, but it is
 not expected to be upstreamed into glibc without modification, so
 glibc-specific conventions need not be followed.
 
 The requirements for portable code apply to non-portable code with the
 following differences:
 
-
 1. Worst-case ULP error should be encoded in filenames (e.g. sin_u35.c). There
    are no specific restrictions on acceptable ULP error, but if functions
    provide significantly less accuracy than portable equivalents then a clear
    justification for inclusion should be stated in comments at the top of the
    source file. Error bounds of the approximation should be clearly documented
    in comments.
 
 2. Functions are assumed to support round-to-nearest mode by default, unless
    stated; other rounding modes are not required to be provided.
-
-3. Handling of special cases may be relaxed for vector functions. Checking
-   whether each vector lane contains special values such as NaN, Inf or
-   denormal numbers can prove too costly for vector functions. This is often
-   not required since vector functions are typically used along with aggressive
-   compiler optimization flags.
diff --git a/contrib/arm-optimized-routines/pl/math/acos_2u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/acos_2u.c
similarity index 76%
rename from contrib/arm-optimized-routines/pl/math/acos_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/acos_2u.c
index 9ec6894f1d81..062215c92248 100644
--- a/contrib/arm-optimized-routines/pl/math/acos_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/acos_2u.c
@@ -1,100 +1,100 @@
 /*
  * Double-precision acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 #include "poly_scalar_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define AbsMask (0x7fffffffffffffff)
-#define Half (0x3fe0000000000000)
-#define One (0x3ff0000000000000)
-#define PiOver2 (0x1.921fb54442d18p+0)
-#define Pi (0x1.921fb54442d18p+1)
-#define Small (0x3c90000000000000) /* 2^-53.  */
-#define Small16 (0x3c90)
-#define QNaN (0x7ff8)
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define One 0x3ff0000000000000
+#define PiOver2 0x1.921fb54442d18p+0
+#define Pi 0x1.921fb54442d18p+1
+#define Small 0x3c90000000000000 /* 2^-53.  */
+#define Small16 0x3c90
+#define QNaN 0x7ff8
 
 /* Fast implementation of double-precision acos(x) based on polynomial
    approximation of double-precision asin(x).
 
    For x < Small, approximate acos(x) by pi/2 - x. Small = 2^-53 for correct
    rounding.
 
    For |x| in [Small, 0.5], use the trigonometric identity
 
      acos(x) = pi/2 - asin(x)
 
-   and use an order 11 polynomial P such that the final approximation of asin is
-   an odd polynomial: asin(x) ~ x + x^3 * P(x^2).
+   and use an order 11 polynomial P such that the final approximation of asin
+   is an odd polynomial: asin(x) ~ x + x^3 * P(x^2).
 
    The largest observed error in this region is 1.18 ulps,
    acos(0x1.fbab0a7c460f6p-2) got 0x1.0d54d1985c068p+0
 			     want 0x1.0d54d1985c069p+0.
 
    For |x| in [0.5, 1.0], use the following development of acos(x) near x = 1
 
      acos(x) ~ pi/2 - 2 * sqrt(z) (1 + z * P(z))
 
    where z = (1-x)/2, z is near 0 when x approaches 1, and P contributes to the
    approximation of asin near 0.
 
    The largest observed error in this region is 1.52 ulps,
    acos(0x1.23d362722f591p-1) got 0x1.edbbedf8a7d6ep-1
 			     want 0x1.edbbedf8a7d6cp-1.
 
    For x in [-1.0, -0.5], use this other identity to deduce the negative inputs
    from their absolute value: acos(x) = pi - acos(-x).  */
 double
 acos (double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t ia = ix & AbsMask;
   uint64_t ia16 = ia >> 48;
   double ax = asdouble (ia);
   uint64_t sign = ix & ~AbsMask;
 
   /* Special values and invalid range.  */
   if (unlikely (ia16 == QNaN))
     return x;
   if (ia > One)
     return __math_invalid (x);
   if (ia16 < Small16)
     return PiOver2 - x;
 
   /* Evaluate polynomial Q(|x|) = z + z * z2 * P(z2) with
      z2 = x ^ 2         and z = |x|     , if |x| < 0.5
      z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
   double z2 = ax < 0.5 ? x * x : fma (-0.5, ax, 0.5);
   double z = ax < 0.5 ? ax : sqrt (z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
   double z4 = z2 * z2;
   double z8 = z4 * z4;
   double z16 = z8 * z8;
   double p = estrin_11_f64 (z2, z4, z8, z16, __asin_poly);
 
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = fma (z * z2, p, z);
 
   /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5
 	       = pi - 2 Q(|x|), for -1.0 < x <= -0.5
 	       = 2 Q(|x|)     , for -0.5 < x < 0.0.  */
   if (ax < 0.5)
     return PiOver2 - asdouble (asuint64 (p) | sign);
 
   return (x <= -0.5) ? fma (-2.0, p, Pi) : 2.0 * p;
 }
 
-PL_SIG (S, D, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (acos, 1.02)
-PL_TEST_INTERVAL (acos, 0, Small, 5000)
-PL_TEST_INTERVAL (acos, Small, 0.5, 50000)
-PL_TEST_INTERVAL (acos, 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (acos, 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (acos, 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (acos, -0, -inf, 20000)
+TEST_SIG (S, D, 1, acos, -1.0, 1.0)
+TEST_ULP (acos, 1.02)
+TEST_INTERVAL (acos, 0, Small, 5000)
+TEST_INTERVAL (acos, Small, 0.5, 50000)
+TEST_INTERVAL (acos, 0.5, 1.0, 50000)
+TEST_INTERVAL (acos, 1.0, 0x1p11, 50000)
+TEST_INTERVAL (acos, 0x1p11, inf, 20000)
+TEST_INTERVAL (acos, -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/acosf_1u4.c b/contrib/arm-optimized-routines/math/aarch64/experimental/acosf_1u4.c
similarity index 79%
rename from contrib/arm-optimized-routines/pl/math/acosf_1u4.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/acosf_1u4.c
index 6dde422ef85a..d207f5e89f26 100644
--- a/contrib/arm-optimized-routines/pl/math/acosf_1u4.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/acosf_1u4.c
@@ -1,99 +1,99 @@
 /*
  * Single-precision acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define AbsMask (0x7fffffff)
-#define Half (0x3f000000)
-#define One (0x3f800000)
-#define PiOver2f (0x1.921fb6p+0f)
-#define Pif (0x1.921fb6p+1f)
-#define Small (0x32800000) /* 2^-26.  */
-#define Small12 (0x328)
-#define QNaN (0x7fc)
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define One 0x3f800000
+#define PiOver2f 0x1.921fb6p+0f
+#define Pif 0x1.921fb6p+1f
+#define Small 0x32800000 /* 2^-26.  */
+#define Small12 0x328
+#define QNaN 0x7fc
 
 /* Fast implementation of single-precision acos(x) based on polynomial
    approximation of single-precision asin(x).
 
    For x < Small, approximate acos(x) by pi/2 - x. Small = 2^-26 for correct
    rounding.
 
    For |x| in [Small, 0.5], use the trigonometric identity
 
      acos(x) = pi/2 - asin(x)
 
    and use an order 4 polynomial P such that the final approximation of asin is
    an odd polynomial: asin(x) ~ x + x^3 * P(x^2).
 
    The largest observed error in this region is 1.16 ulps,
      acosf(0x1.ffbeccp-2) got 0x1.0c27f8p+0 want 0x1.0c27f6p+0.
 
    For |x| in [0.5, 1.0], use the following development of acos(x) near x = 1
 
      acos(x) ~ pi/2 - 2 * sqrt(z) (1 + z * P(z))
 
    where z = (1-x)/2, z is near 0 when x approaches 1, and P contributes to the
    approximation of asin near 0.
 
    The largest observed error in this region is 1.32 ulps,
      acosf(0x1.15ba56p-1) got 0x1.feb33p-1 want 0x1.feb32ep-1.
 
    For x in [-1.0, -0.5], use this other identity to deduce the negative inputs
    from their absolute value.
 
      acos(x) = pi - acos(-x)
 
    The largest observed error in this region is 1.28 ulps,
      acosf(-0x1.002072p-1) got 0x1.0c1e84p+1 want 0x1.0c1e82p+1.  */
 float
 acosf (float x)
 {
   uint32_t ix = asuint (x);
   uint32_t ia = ix & AbsMask;
   uint32_t ia12 = ia >> 20;
   float ax = asfloat (ia);
   uint32_t sign = ix & ~AbsMask;
 
   /* Special values and invalid range.  */
   if (unlikely (ia12 == QNaN))
     return x;
   if (ia > One)
     return __math_invalidf (x);
   if (ia12 < Small12)
     return PiOver2f - x;
 
   /* Evaluate polynomial Q(|x|) = z + z * z2 * P(z2) with
      z2 = x ^ 2         and z = |x|     , if |x| < 0.5
      z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
   float z2 = ax < 0.5 ? x * x : fmaf (-0.5f, ax, 0.5f);
   float z = ax < 0.5 ? ax : sqrtf (z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
   float p = horner_4_f32 (z2, __asinf_poly);
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = fmaf (z * z2, p, z);
 
   /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5
 	       = pi - 2 Q(|x|), for -1.0 < x <= -0.5
 	       = 2 Q(|x|)     , for -0.5 < x < 0.0.  */
   if (ax < 0.5)
     return PiOver2f - asfloat (asuint (p) | sign);
 
   return (x <= -0.5) ? fmaf (-2.0f, p, Pif) : 2.0f * p;
 }
 
-PL_SIG (S, F, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (acosf, 0.82)
-PL_TEST_INTERVAL (acosf, 0, Small, 5000)
-PL_TEST_INTERVAL (acosf, Small, 0.5, 50000)
-PL_TEST_INTERVAL (acosf, 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (acosf, 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (acosf, 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (acosf, -0, -inf, 20000)
+TEST_SIG (S, F, 1, acos, -1.0, 1.0)
+TEST_ULP (acosf, 0.82)
+TEST_INTERVAL (acosf, 0, Small, 5000)
+TEST_INTERVAL (acosf, Small, 0.5, 50000)
+TEST_INTERVAL (acosf, 0.5, 1.0, 50000)
+TEST_INTERVAL (acosf, 1.0, 0x1p11, 50000)
+TEST_INTERVAL (acosf, 0x1p11, inf, 20000)
+TEST_INTERVAL (acosf, -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/acosh_3u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/acosh_3u.c
similarity index 69%
rename from contrib/arm-optimized-routines/pl/math/acosh_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/acosh_3u.c
index 4e2cb6737ba8..19da82f4f3e5 100644
--- a/contrib/arm-optimized-routines/pl/math/acosh_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/acosh_3u.c
@@ -1,66 +1,61 @@
 /*
  * Double-precision acosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Ln2 (0x1.62e42fefa39efp-1)
 #define MinusZero (0x8000000000000000)
 #define SquareLim (0x5fe0000000000000) /* asuint64(0x1.0p511).  */
 #define Two (0x4000000000000000)       /* asuint64(2.0).  */
 
-double
-optr_aor_log_f64 (double);
-
-double
-log1p (double);
-
 /* acosh approximation using a variety of approaches on different intervals:
 
    acosh(x) = ln(x + sqrt(x * x - 1)).
 
-   x >= 2^511: We cannot square x without overflow. For huge x, sqrt(x*x - 1) is
-   close enough to x that we can calculate the result by ln(2x) == ln(x) +
+   x >= 2^511: We cannot square x without overflow. For huge x, sqrt(x*x - 1)
+   is close enough to x that we can calculate the result by ln(2x) == ln(x) +
    ln(2). The greatest observed error in this region is 0.98 ULP:
    acosh(0x1.1b9bf42923d1dp+853) got 0x1.28066a11a7c7fp+9
 				want 0x1.28066a11a7c8p+9.
 
    x > 2: Calculate the result directly using definition of acosh(x). Greatest
    observed error in this region is 1.33 ULP:
    acosh(0x1.1e45d14bfcfa2p+1) got 0x1.71a06f50c34b5p+0
 			      want 0x1.71a06f50c34b6p+0.
 
    0 <= x <= 2: Calculate the result using log1p. For x < 1, acosh(x) is
    undefined. For 1 <= x <= 2, the largest observed error is 2.69 ULP:
    acosh(0x1.073528248093p+0) got 0x1.e4d9bd20684f3p-3
 			     want 0x1.e4d9bd20684f6p-3.  */
 double
 acosh (double x)
 {
   uint64_t ix = asuint64 (x);
 
   if (unlikely (ix >= MinusZero))
     return __math_invalid (x);
 
   if (unlikely (ix >= SquareLim))
-    return optr_aor_log_f64 (x) + Ln2;
+    return log (x) + Ln2;
 
   if (ix >= Two)
-    return optr_aor_log_f64 (x + sqrt (x * x - 1));
+    return log (x + sqrt (x * x - 1));
 
   double xm1 = x - 1;
   return log1p (xm1 + sqrt (2 * xm1 + xm1 * xm1));
 }
 
-PL_SIG (S, D, 1, acosh, 1.0, 10.0)
-PL_TEST_ULP (acosh, 2.19)
-PL_TEST_INTERVAL (acosh, 0, 1, 10000)
-PL_TEST_INTERVAL (acosh, 1, 2, 100000)
-PL_TEST_INTERVAL (acosh, 2, 0x1p511, 100000)
-PL_TEST_INTERVAL (acosh, 0x1p511, inf, 100000)
-PL_TEST_INTERVAL (acosh, -0, -inf, 10000)
+TEST_SIG (S, D, 1, acosh, 1.0, 10.0)
+TEST_ULP (acosh, 2.19)
+TEST_INTERVAL (acosh, 0, 1, 10000)
+TEST_INTERVAL (acosh, 1, 2, 100000)
+TEST_INTERVAL (acosh, 2, 0x1p511, 100000)
+TEST_INTERVAL (acosh, 0x1p511, inf, 100000)
+TEST_INTERVAL (acosh, -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/acoshf_2u8.c b/contrib/arm-optimized-routines/math/aarch64/experimental/acoshf_2u8.c
similarity index 68%
rename from contrib/arm-optimized-routines/pl/math/acoshf_2u8.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/acoshf_2u8.c
index c9cded7fd2ff..a46b310ee312 100644
--- a/contrib/arm-optimized-routines/pl/math/acoshf_2u8.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/acoshf_2u8.c
@@ -1,63 +1,55 @@
 /*
  * Single-precision acosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Ln2 (0x1.62e4p-1f)
 #define MinusZero 0x80000000
 #define SquareLim 0x5f800000 /* asuint(0x1p64).  */
 #define Two 0x40000000
 
-/* Single-precision log from math/.  */
-float
-optr_aor_log_f32 (float);
-
-/* Single-precision log(1+x) from pl/math.  */
-float
-log1pf (float);
-
 /* acoshf approximation using a variety of approaches on different intervals:
 
    x >= 2^64: We cannot square x without overflow. For huge x, sqrt(x*x - 1) is
    close enough to x that we can calculate the result by ln(2x) == ln(x) +
    ln(2). The greatest error in the region is 0.94 ULP:
    acoshf(0x1.15f706p+92) got 0x1.022e14p+6 want 0x1.022e16p+6.
 
    x > 2: Calculate the result directly using definition of asinh(x) = ln(x +
    sqrt(x*x - 1)). Greatest error in this region is 1.30 ULP:
    acoshf(0x1.249d8p+1) got 0x1.77e1aep+0 want 0x1.77e1bp+0.
 
    0 <= x <= 2: Calculate the result using log1p. For x < 1, acosh(x) is
    undefined. For 1 <= x <= 2, the greatest error is 2.78 ULP:
    acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 want 0x1.ef9ea2p-3.  */
 float
 acoshf (float x)
 {
   uint32_t ix = asuint (x);
 
   if (unlikely (ix >= MinusZero))
     return __math_invalidf (x);
 
   if (unlikely (ix >= SquareLim))
-    return optr_aor_log_f32 (x) + Ln2;
+    return logf (x) + Ln2;
 
   if (ix > Two)
-    return optr_aor_log_f32 (x + sqrtf (x * x - 1));
+    return logf (x + sqrtf (x * x - 1));
 
   float xm1 = x - 1;
   return log1pf (xm1 + sqrtf (2 * xm1 + xm1 * xm1));
 }
 
-PL_SIG (S, F, 1, acosh, 1.0, 10.0)
-PL_TEST_ULP (acoshf, 2.30)
-PL_TEST_INTERVAL (acoshf, 0, 1, 100)
-PL_TEST_INTERVAL (acoshf, 1, 2, 10000)
-PL_TEST_INTERVAL (acoshf, 2, 0x1p64, 100000)
-PL_TEST_INTERVAL (acoshf, 0x1p64, inf, 100000)
-PL_TEST_INTERVAL (acoshf, -0, -inf, 10000)
+TEST_SIG (S, F, 1, acosh, 1.0, 10.0)
+TEST_ULP (acoshf, 2.30)
+TEST_INTERVAL (acoshf, 0, 1, 100)
+TEST_INTERVAL (acoshf, 1, 2, 10000)
+TEST_INTERVAL (acoshf, 2, 0x1p64, 100000)
+TEST_INTERVAL (acoshf, 0x1p64, inf, 100000)
+TEST_INTERVAL (acoshf, -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_erfinv_25u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/advsimd/erfinv_25u.c
similarity index 88%
rename from contrib/arm-optimized-routines/pl/math/v_erfinv_25u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/advsimd/erfinv_25u.c
index 654a7336e85b..2fa2f0beb8b7 100644
--- a/contrib/arm-optimized-routines/pl/math/v_erfinv_25u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/advsimd/erfinv_25u.c
@@ -1,161 +1,166 @@
 /*
  * Double-precision inverse error function (AdvSIMD variant).
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "v_math.h"
-#include "pl_test.h"
+#include "test_defs.h"
 #include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "poly_advsimd_f64.h"
+#include "test_sig.h"
+#include "v_poly_f64.h"
 #define V_LOG_INLINE_POLY_ORDER 4
 #include "v_log_inline.h"
 
 const static struct data
 {
   /*  We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
       coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
       of the denominator. P is interleaved P_17 and P_37, similar for Q. P17
       and Q17 are provided as homogenous vectors as well for when the shortcut
       can be taken.  */
   double P[8][2], Q[7][2];
   float64x2_t tailshift;
-  uint8x16_t idx;
+  uint8_t idx[16];
   struct v_log_inline_data log_tbl;
   float64x2_t P_57[9], Q_57[10], P_17[7], Q_17[6];
 } data = { .P = { { 0x1.007ce8f01b2e8p+4, -0x1.f3596123109edp-7 },
 		  { -0x1.6b23cc5c6c6d7p+6, 0x1.60b8fe375999ep-2 },
 		  { 0x1.74e5f6ceb3548p+7, -0x1.779bb9bef7c0fp+1 },
 		  { -0x1.5200bb15cc6bbp+7, 0x1.786ea384470a2p+3 },
 		  { 0x1.05d193233a849p+6, -0x1.6a7c1453c85d3p+4 },
 		  { -0x1.148c5474ee5e1p+3, 0x1.31f0fc5613142p+4 },
 		  { 0x1.689181bbafd0cp-3, -0x1.5ea6c007d4dbbp+2 },
 		  { 0, 0x1.e66f265ce9e5p-3 } },
 	   .Q = { { 0x1.d8fb0f913bd7bp+3, -0x1.636b2dcf4edbep-7 },
 		  { -0x1.6d7f25a3f1c24p+6, 0x1.0b5411e2acf29p-2 },
 		  { 0x1.a450d8e7f4cbbp+7, -0x1.3413109467a0bp+1 },
 		  { -0x1.bc3480485857p+7, 0x1.563e8136c554ap+3 },
 		  { 0x1.ae6b0c504ee02p+6, -0x1.7b77aab1dcafbp+4 },
 		  { -0x1.499dfec1a7f5fp+4, 0x1.8a3e174e05ddcp+4 },
 		  { 0x1p+0, -0x1.4075c56404eecp+3 } },
 	   .P_57 = { V2 (0x1.b874f9516f7f1p-14), V2 (0x1.5921f2916c1c4p-7),
 		     V2 (0x1.145ae7d5b8fa4p-2), V2 (0x1.29d6dcc3b2fb7p+1),
 		     V2 (0x1.cabe2209a7985p+2), V2 (0x1.11859f0745c4p+3),
 		     V2 (0x1.b7ec7bc6a2ce5p+2), V2 (0x1.d0419e0bb42aep+1),
 		     V2 (0x1.c5aa03eef7258p-1) },
 	   .Q_57 = { V2 (0x1.b8747e12691f1p-14), V2 (0x1.59240d8ed1e0ap-7),
 		     V2 (0x1.14aef2b181e2p-2), V2 (0x1.2cd181bcea52p+1),
 		     V2 (0x1.e6e63e0b7aa4cp+2), V2 (0x1.65cf8da94aa3ap+3),
 		     V2 (0x1.7e5c787b10a36p+3), V2 (0x1.0626d68b6cea3p+3),
 		     V2 (0x1.065c5f193abf6p+2), V2 (0x1p+0) },
 	   .P_17 = { V2 (0x1.007ce8f01b2e8p+4), V2 (-0x1.6b23cc5c6c6d7p+6),
 		     V2 (0x1.74e5f6ceb3548p+7), V2 (-0x1.5200bb15cc6bbp+7),
 		     V2 (0x1.05d193233a849p+6), V2 (-0x1.148c5474ee5e1p+3),
 		     V2 (0x1.689181bbafd0cp-3) },
 	   .Q_17 = { V2 (0x1.d8fb0f913bd7bp+3), V2 (-0x1.6d7f25a3f1c24p+6),
 		     V2 (0x1.a450d8e7f4cbbp+7), V2 (-0x1.bc3480485857p+7),
 		     V2 (0x1.ae6b0c504ee02p+6), V2 (-0x1.499dfec1a7f5fp+4) },
 	   .tailshift = V2 (-0.87890625),
-	   .idx = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+	   .idx = { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 },
 	   .log_tbl = V_LOG_CONSTANTS };
 
 static inline float64x2_t
 special (float64x2_t x, const struct data *d)
 {
   /* Note erfinv(inf) should return NaN, and erfinv(1) should return Inf.
      By using log here, instead of log1p, we return finite values for both
      these inputs, and values outside [-1, 1]. This is non-compliant, but is an
      acceptable optimisation at Ofast. To get correct behaviour for all finite
      values use the log1p_inline helper on -abs(x) - note that erfinv(inf)
      will still be finite.  */
   float64x2_t t = vnegq_f64 (
       v_log_inline (vsubq_f64 (v_f64 (1), vabsq_f64 (x)), &d->log_tbl));
   t = vdivq_f64 (v_f64 (1), vsqrtq_f64 (t));
   float64x2_t ts = vbslq_f64 (v_u64 (0x7fffffffffffffff), t, x);
   return vdivq_f64 (v_horner_8_f64 (t, d->P_57),
 		    vmulq_f64 (ts, v_horner_9_f64 (t, d->Q_57)));
 }
 
 static inline float64x2_t
 lookup (const double *c, uint8x16_t idx)
 {
   float64x2_t x = vld1q_f64 (c);
   return vreinterpretq_f64_u8 (vqtbl1q_u8 (vreinterpretq_u8_f64 (x), idx));
 }
 
 static inline float64x2_t VPCS_ATTR
 notails (float64x2_t x, const struct data *d)
 {
   /* Shortcut when no input is in a tail region - no need to gather shift or
      coefficients.  */
   float64x2_t t = vfmaq_f64 (v_f64 (-0.5625), x, x);
   float64x2_t p = vmulq_f64 (v_horner_6_f64 (t, d->P_17), x);
   float64x2_t q = vaddq_f64 (d->Q_17[5], t);
   for (int i = 4; i >= 0; i--)
     q = vfmaq_f64 (d->Q_17[i], q, t);
   return vdivq_f64 (p, q);
 }
 
 /* Vector implementation of Blair et al's rational approximation to inverse
    error function in single-precision. Largest observed error is 24.75 ULP:
    _ZGVnN2v_erfinv(0x1.fc861d81c2ba8p-1) got 0x1.ea05472686625p+0
 					want 0x1.ea0547268660cp+0.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (erfinv) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
   /* Calculate inverse error using algorithm described in
      J. M. Blair, C. A. Edwards, and J. H. Johnson,
      "Rational Chebyshev approximations for the inverse of the error function",
      Math. Comp. 30, pp. 827--830 (1976).
      https://doi.org/10.1090/S0025-5718-1976-0421040-7.
 
      Algorithm has 3 intervals:
      - 'Normal' region [-0.75, 0.75]
      - Tail region [0.75, 0.9375] U [-0.9375, -0.75]
      - Extreme tail [-1, -0.9375] U [0.9375, 1]
      Normal and tail are both rational approximation of similar order on
      shifted input - these are typically performed in parallel using gather
      loads to obtain correct coefficients depending on interval.  */
   uint64x2_t is_tail = vcagtq_f64 (x, v_f64 (0.75));
 
   if (unlikely (!v_any_u64 (is_tail)))
     /* If input is normally distributed in [-1, 1] then likelihood of this is
        0.75^2 ~= 0.56.  */
     return notails (x, d);
 
   uint64x2_t extreme_tail = vcagtq_f64 (x, v_f64 (0.9375));
 
   uint8x16_t off = vandq_u8 (vreinterpretq_u8_u64 (is_tail), vdupq_n_u8 (8));
-  uint8x16_t idx = vaddq_u8 (d->idx, off);
+  uint8x16_t idx = vaddq_u8 (vld1q_u8 (d->idx), off);
 
   float64x2_t t = vbslq_f64 (is_tail, d->tailshift, v_f64 (-0.5625));
   t = vfmaq_f64 (t, x, x);
 
   float64x2_t p = lookup (&d->P[7][0], idx);
   /* Last coeff of q is either 0 or 1 - use mask instead of load.  */
   float64x2_t q = vreinterpretq_f64_u64 (
       vandq_u64 (is_tail, vreinterpretq_u64_f64 (v_f64 (1))));
   for (int i = 6; i >= 0; i--)
     {
       p = vfmaq_f64 (lookup (&d->P[i][0], idx), p, t);
       q = vfmaq_f64 (lookup (&d->Q[i][0], idx), q, t);
     }
   p = vmulq_f64 (p, x);
 
   if (unlikely (v_any_u64 (extreme_tail)))
     return vbslq_f64 (extreme_tail, special (x, d), vdivq_f64 (p, q));
 
   return vdivq_f64 (p, q);
 }
 
-PL_SIG (V, D, 1, erfinv, -0.99, 0.99)
-PL_TEST_ULP (V_NAME_D1 (erfinv), 24.8)
+#if USE_MPFR
+# warning Not generating tests for _ZGVnN2v_erfinv, as MPFR has no suitable reference
+#else
+TEST_SIG (V, D, 1, erfinv, -0.99, 0.99)
+TEST_ULP (V_NAME_D1 (erfinv), 24.8)
+TEST_DISABLE_FENV (V_NAME_D1 (erfinv))
+TEST_SYM_INTERVAL (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000)
+TEST_SYM_INTERVAL (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000)
+TEST_SYM_INTERVAL (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000)
 /* Test with control lane in each interval.  */
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000,
-			0.5)
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000,
-			0.8)
-PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000,
-			0.95)
+TEST_CONTROL_VALUE (V_NAME_D1 (erfinv), 0.5)
+TEST_CONTROL_VALUE (V_NAME_D1 (erfinv), 0.8)
+TEST_CONTROL_VALUE (V_NAME_D1 (erfinv), 0.95)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/v_erfinvf_5u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/advsimd/erfinvf_5u.c
similarity index 83%
rename from contrib/arm-optimized-routines/pl/math/v_erfinvf_5u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/advsimd/erfinvf_5u.c
index 5a6800b86ae9..254d50feb289 100644
--- a/contrib/arm-optimized-routines/pl/math/v_erfinvf_5u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/advsimd/erfinvf_5u.c
@@ -1,163 +1,172 @@
 /*
  * Single-precision inverse error function (AdvSIMD variant).
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "v_poly_f32.h"
 #include "v_logf_inline.h"
 
 const static struct data
 {
   /*  We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
       coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
       of the denominator. Coefficients are stored in various interleaved
       formats to allow for table-based (vector-to-vector) lookup.
 
       Plo is first two coefficients of P_10 and P_29 interleaved.
       PQ is third coeff of P_10 and first of Q_29 interleaved.
       Qhi is second and third coeffs of Q_29 interleaved.
       P29_3 is a homogenous vector with fourth coeff of P_29.
 
       P_10 and Q_10 are also stored in homogenous vectors to allow better
       memory access when no lanes are in a tail region.  */
-  float32x4_t Plo, PQ, Qhi, P29_3, tailshift;
+  float Plo[4], PQ[4], Qhi[4];
+  float32x4_t P29_3, tailshift;
   float32x4_t P_50[6], Q_50[2];
   float32x4_t P_10[3], Q_10[3];
-  uint8x16_t idxhi, idxlo;
+  uint8_t idxhi[16], idxlo[16];
   struct v_logf_data logf_tbl;
 } data = {
-  .idxlo = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
-  .idxhi = { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
+  .idxlo = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 },
+  .idxhi = { 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11 },
   .P29_3 = V4 (0x1.b13626p-2),
   .tailshift = V4 (-0.87890625),
   .Plo = { -0x1.a31268p+3, -0x1.fc0252p-4, 0x1.ac9048p+4, 0x1.119d44p+0 },
   .PQ = { -0x1.293ff6p+3, -0x1.f59ee2p+0, -0x1.8265eep+3, -0x1.69952p-4 },
   .Qhi = { 0x1.ef5eaep+4, 0x1.c7b7d2p-1, -0x1.12665p+4, -0x1.167d7p+1 },
   .P_50 = { V4 (0x1.3d8948p-3), V4 (0x1.61f9eap+0), V4 (0x1.61c6bcp-1),
 	    V4 (-0x1.20c9f2p+0), V4 (0x1.5c704cp-1), V4 (-0x1.50c6bep-3) },
   .Q_50 = { V4 (0x1.3d7dacp-3), V4 (0x1.629e5p+0) },
   .P_10 = { V4 (-0x1.a31268p+3), V4 (0x1.ac9048p+4), V4 (-0x1.293ff6p+3) },
   .Q_10 = { V4 (-0x1.8265eep+3), V4 (0x1.ef5eaep+4), V4 (-0x1.12665p+4) },
   .logf_tbl = V_LOGF_CONSTANTS
 };
 
 static inline float32x4_t
 special (float32x4_t x, const struct data *d)
 {
   /* Note erfinvf(inf) should return NaN, and erfinvf(1) should return Inf.
      By using log here, instead of log1p, we return finite values for both
      these inputs, and values outside [-1, 1]. This is non-compliant, but is an
      acceptable optimisation at Ofast. To get correct behaviour for all finite
      values use the log1pf_inline helper on -abs(x) - note that erfinvf(inf)
      will still be finite.  */
   float32x4_t t = vdivq_f32 (
       v_f32 (1), vsqrtq_f32 (vnegq_f32 (v_logf_inline (
 		     vsubq_f32 (v_f32 (1), vabsq_f32 (x)), &d->logf_tbl))));
   float32x4_t ts = vbslq_f32 (v_u32 (0x7fffffff), t, x);
   float32x4_t q = vfmaq_f32 (d->Q_50[0], vaddq_f32 (t, d->Q_50[1]), t);
   return vdivq_f32 (v_horner_5_f32 (t, d->P_50), vmulq_f32 (ts, q));
 }
 
 static inline float32x4_t
 notails (float32x4_t x, const struct data *d)
 {
   /* Shortcut when no input is in a tail region - no need to gather shift or
      coefficients.  */
   float32x4_t t = vfmaq_f32 (v_f32 (-0.5625), x, x);
   float32x4_t q = vaddq_f32 (t, d->Q_10[2]);
   q = vfmaq_f32 (d->Q_10[1], t, q);
   q = vfmaq_f32 (d->Q_10[0], t, q);
 
   return vdivq_f32 (vmulq_f32 (x, v_horner_2_f32 (t, d->P_10)), q);
 }
 
 static inline float32x4_t
 lookup (float32x4_t tbl, uint8x16_t idx)
 {
   return vreinterpretq_f32_u8 (vqtbl1q_u8 (vreinterpretq_u8_f32 (tbl), idx));
 }
 
 /* Vector implementation of Blair et al's rational approximation to inverse
    error function in single-precision. Worst-case error is 4.98 ULP, in the
    tail region:
    _ZGVnN4v_erfinvf(0x1.f7dbeep-1) got 0x1.b4793p+0
 				  want 0x1.b4793ap+0 .  */
-float32x4_t VPCS_ATTR V_NAME_F1 (erfinv) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (erfinv) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* Calculate inverse error using algorithm described in
      J. M. Blair, C. A. Edwards, and J. H. Johnson,
      "Rational Chebyshev approximations for the inverse of the error
       function", Math. Comp. 30, pp. 827--830 (1976).
      https://doi.org/10.1090/S0025-5718-1976-0421040-7.
 
     Algorithm has 3 intervals:
      - 'Normal' region [-0.75, 0.75]
      - Tail region [0.75, 0.9375] U [-0.9375, -0.75]
      - Extreme tail [-1, -0.9375] U [0.9375, 1]
      Normal and tail are both rational approximation of similar order on
      shifted input - these are typically performed in parallel using gather
      loads to obtain correct coefficients depending on interval.  */
   uint32x4_t is_tail = vcageq_f32 (x, v_f32 (0.75));
   uint32x4_t extreme_tail = vcageq_f32 (x, v_f32 (0.9375));
 
   if (unlikely (!v_any_u32 (is_tail)))
     /* Shortcut for if all lanes are in [-0.75, 0.75] - can avoid having to
        gather coefficients. If input is uniform in [-1, 1] then likelihood of
        this is 0.75^4 ~= 0.31.  */
     return notails (x, d);
 
   /* Select requisite shift depending on interval: polynomial is evaluated on
      x * x - shift.
      Normal shift = 0.5625
      Tail shift   = 0.87890625.  */
   float32x4_t t
       = vfmaq_f32 (vbslq_f32 (is_tail, d->tailshift, v_f32 (-0.5625)), x, x);
 
   /* Calculate indexes for tbl: tbl is byte-wise, so:
      [0, 1, 2, 3, 4, 5, 6, ....] copies the vector
      Add 4 * i to a group of 4 lanes to copy 32-bit lane i. Each vector stores
      two pairs of coeffs, so we need two idx vectors - one for each pair.  */
   uint8x16_t off = vandq_u8 (vreinterpretq_u8_u32 (is_tail), vdupq_n_u8 (4));
-  uint8x16_t idx_lo = vaddq_u8 (d->idxlo, off);
-  uint8x16_t idx_hi = vaddq_u8 (d->idxhi, off);
+  uint8x16_t idx_lo = vaddq_u8 (vld1q_u8 (d->idxlo), off);
+  uint8x16_t idx_hi = vaddq_u8 (vld1q_u8 (d->idxhi), off);
 
   /* Load the tables.  */
-  float32x4_t p_lo = d->Plo;
-  float32x4_t pq = d->PQ;
-  float32x4_t qhi = d->Qhi;
+  float32x4_t plo = vld1q_f32 (d->Plo);
+  float32x4_t pq = vld1q_f32 (d->PQ);
+  float32x4_t qhi = vld1q_f32 (d->Qhi);
 
   /* Do the lookup (and calculate p3 by masking non-tail lanes).  */
   float32x4_t p3 = vreinterpretq_f32_u32 (
       vandq_u32 (is_tail, vreinterpretq_u32_f32 (d->P29_3)));
-  float32x4_t p0 = lookup (p_lo, idx_lo), p1 = lookup (p_lo, idx_hi),
+  float32x4_t p0 = lookup (plo, idx_lo), p1 = lookup (plo, idx_hi),
 	      p2 = lookup (pq, idx_lo), q0 = lookup (pq, idx_hi),
 	      q1 = lookup (qhi, idx_lo), q2 = lookup (qhi, idx_hi);
 
   float32x4_t p = vfmaq_f32 (p2, p3, t);
   p = vfmaq_f32 (p1, p, t);
   p = vfmaq_f32 (p0, p, t);
   p = vmulq_f32 (x, p);
 
   float32x4_t q = vfmaq_f32 (q1, vaddq_f32 (q2, t), t);
   q = vfmaq_f32 (q0, q, t);
 
   if (unlikely (v_any_u32 (extreme_tail)))
     /* At least one lane is in the extreme tail - if input is uniform in
        [-1, 1] the likelihood of this is ~0.23.  */
     return vbslq_f32 (extreme_tail, special (x, d), vdivq_f32 (p, q));
 
   return vdivq_f32 (p, q);
 }
 
-PL_SIG (V, F, 1, erfinv, -0.99, 0.99)
-PL_TEST_ULP (V_NAME_F1 (erfinv), 4.49)
+HALF_WIDTH_ALIAS_F1 (erfinv)
+
+#if USE_MPFR
+# warning Not generating tests for _ZGVnN4v_erfinvf, as MPFR has no suitable reference
+#else
+TEST_SIG (V, F, 1, erfinv, -0.99, 0.99)
+TEST_DISABLE_FENV (V_NAME_F1 (erfinv))
+TEST_ULP (V_NAME_F1 (erfinv), 4.49)
+TEST_SYM_INTERVAL (V_NAME_F1 (erfinv), 0, 0x1.fffffep-1, 40000)
 /* Test with control lane in each interval.  */
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (erfinv), 0, 0x1.fffffep-1, 40000, 0.5)
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (erfinv), 0, 0x1.fffffep-1, 40000, 0.8)
-PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (erfinv), 0, 0x1.fffffep-1, 40000, 0.95)
+TEST_CONTROL_VALUE (V_NAME_F1 (erfinv), 0.5)
+TEST_CONTROL_VALUE (V_NAME_F1 (erfinv), 0.8)
+TEST_CONTROL_VALUE (V_NAME_F1 (erfinv), 0.95)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/v_logf_inline.h b/contrib/arm-optimized-routines/math/aarch64/experimental/advsimd/v_logf_inline.h
similarity index 97%
rename from contrib/arm-optimized-routines/pl/math/v_logf_inline.h
rename to contrib/arm-optimized-routines/math/aarch64/experimental/advsimd/v_logf_inline.h
index c00fe0909afc..3f4534173289 100644
--- a/contrib/arm-optimized-routines/pl/math/v_logf_inline.h
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/advsimd/v_logf_inline.h
@@ -1,59 +1,59 @@
 /*
  * Single-precision vector log function - inline version
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "v_math.h"
 
 struct v_logf_data
 {
   float32x4_t poly[7];
   float32x4_t ln2;
   uint32x4_t off, mantissa_mask;
 };
 
 #define V_LOGF_CONSTANTS                                                      \
   {                                                                           \
     .poly                                                                     \
 	= { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f),	V4 (-0x1.4f9934p-3f), \
 	    V4 (0x1.961348p-3f),  V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f),  \
 	    V4 (-0x1.ffffc8p-2f) },                                           \
 	.ln2 = V4 (0x1.62e43p-1f), .off = V4 (0x3f2aaaab),                    \
 	.mantissa_mask = V4 (0x007fffff)                                      \
   }
 
 #define P(i) d->poly[7 - i]
 
 static inline float32x4_t
 v_logf_inline (float32x4_t x, const struct v_logf_data *d)
 {
   float32x4_t n, p, q, r, r2, y;
   uint32x4_t u;
 
   u = vreinterpretq_u32_f32 (x);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
   u = vsubq_u32 (u, d->off);
   n = vcvtq_f32_s32 (
       vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
   u = vandq_u32 (u, d->mantissa_mask);
   u = vaddq_u32 (u, d->off);
   r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
 
   /* y = log(1+r) + n*ln2.  */
   r2 = vmulq_f32 (r, r);
   /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))).  */
   p = vfmaq_f32 (P (5), P (6), r);
   q = vfmaq_f32 (P (3), P (4), r);
   y = vfmaq_f32 (P (1), P (2), r);
   p = vfmaq_f32 (p, P (7), r2);
   q = vfmaq_f32 (q, p, r2);
   y = vfmaq_f32 (y, q, r2);
   p = vfmaq_f32 (r, d->ln2, n);
 
   return vfmaq_f32 (p, y, r2);
 }
 
 #undef P
diff --git a/contrib/arm-optimized-routines/pl/math/asin_3u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/asin_3u.c
similarity index 78%
rename from contrib/arm-optimized-routines/pl/math/asin_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/asin_3u.c
index 0b50995449ce..56e63e451ba1 100644
--- a/contrib/arm-optimized-routines/pl/math/asin_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/asin_3u.c
@@ -1,106 +1,106 @@
 /*
  * Double-precision asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f64.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
-#define AbsMask (0x7fffffffffffffff)
-#define Half (0x3fe0000000000000)
-#define One (0x3ff0000000000000)
-#define PiOver2 (0x1.921fb54442d18p+0)
-#define Small (0x3e50000000000000) /* 2^-26.  */
-#define Small16 (0x3e50)
-#define QNaN (0x7ff8)
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define One 0x3ff0000000000000
+#define PiOver2 0x1.921fb54442d18p+0
+#define Small 0x3e50000000000000 /* 2^-26.  */
+#define Small16 0x3e50
+#define QNaN 0x7ff8
 
 /* Fast implementation of double-precision asin(x) based on polynomial
    approximation.
 
    For x < Small, approximate asin(x) by x. Small = 2^-26 for correct rounding.
 
    For x in [Small, 0.5], use an order 11 polynomial P such that the final
    approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
 
    The largest observed error in this region is 1.01 ulps,
    asin(0x1.da9735b5a9277p-2) got 0x1.ed78525a927efp-2
 			     want 0x1.ed78525a927eep-2.
 
    No cheap approximation can be obtained near x = 1, since the function is not
    continuously differentiable on 1.
 
    For x in [0.5, 1.0], we use a method based on a trigonometric identity
 
      asin(x) = pi/2 - acos(x)
 
    and a generalized power series expansion of acos(y) near y=1, that reads as
 
      acos(y)/sqrt(2y) ~ 1 + 1/12 * y + 3/160 * y^2 + ... (1)
 
    The Taylor series of asin(z) near z = 0, reads as
 
      asin(z) ~ z + z^3 P(z^2) = z + z^3 * (1/6 + 3/40 z^2 + ...).
 
    Therefore, (1) can be written in terms of P(y/2) or even asin(y/2)
 
      acos(y) ~ sqrt(2y) (1 + y/2 * P(y/2)) = 2 * sqrt(y/2) (1 + y/2 * P(y/2)
 
    Hence, if we write z = (1-x)/2, z is near 0 when x approaches 1 and
 
      asin(x) ~ pi/2 - acos(x) ~ pi/2 - 2 * sqrt(z) (1 + z * P(z)).
 
    The largest observed error in this region is 2.69 ulps,
-   asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
-			     want 0x1.110d7e85fdd53p-1.  */
+   asin(0x1.044e8cefee301p-1) got 0x1.1111dd54ddf96p-1
+			     want 0x1.1111dd54ddf99p-1.  */
 double
 asin (double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t ia = ix & AbsMask;
   uint64_t ia16 = ia >> 48;
   double ax = asdouble (ia);
   uint64_t sign = ix & ~AbsMask;
 
   /* Special values and invalid range.  */
   if (unlikely (ia16 == QNaN))
     return x;
   if (ia > One)
     return __math_invalid (x);
   if (ia16 < Small16)
     return x;
 
   /* Evaluate polynomial Q(x) = y + y * z * P(z) with
      z2 = x ^ 2         and z = |x|     , if |x| < 0.5
      z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
   double z2 = ax < 0.5 ? x * x : fma (-0.5, ax, 0.5);
   double z = ax < 0.5 ? ax : sqrt (z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
   double z4 = z2 * z2;
   double z8 = z4 * z4;
   double z16 = z8 * z8;
   double p = estrin_11_f64 (z2, z4, z8, z16, __asin_poly);
 
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = fma (z * z2, p, z);
 
   /* asin(|x|) = Q(|x|)         , for |x| < 0.5
 	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
   double y = ax < 0.5 ? p : fma (-2.0, p, PiOver2);
 
   /* Copy sign.  */
   return asdouble (asuint64 (y) | sign);
 }
 
-PL_SIG (S, D, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (asin, 2.19)
-PL_TEST_INTERVAL (asin, 0, Small, 5000)
-PL_TEST_INTERVAL (asin, Small, 0.5, 50000)
-PL_TEST_INTERVAL (asin, 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (asin, 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (asin, 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (asin, -0, -inf, 20000)
+TEST_SIG (S, D, 1, asin, -1.0, 1.0)
+TEST_ULP (asin, 2.20)
+TEST_INTERVAL (asin, 0, Small, 5000)
+TEST_INTERVAL (asin, Small, 0.5, 50000)
+TEST_INTERVAL (asin, 0.5, 1.0, 50000)
+TEST_INTERVAL (asin, 1.0, 0x1p11, 50000)
+TEST_INTERVAL (asin, 0x1p11, inf, 20000)
+TEST_INTERVAL (asin, -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/asin_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/asin_data.c
similarity index 94%
rename from contrib/arm-optimized-routines/pl/math/asin_data.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/asin_data.c
index b5517731c7f4..60ab476e7ec9 100644
--- a/contrib/arm-optimized-routines/pl/math/asin_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/asin_data.c
@@ -1,19 +1,19 @@
 /*
  * Coefficients for single-precision asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 /* Approximate asin(x) directly in [0x1p-106, 0.25]. See tools/asin.sollya
    for these coeffcients were generated.  */
 const double __asin_poly[] = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
   0x1.555555555554ep-3, 0x1.3333333337233p-4,  0x1.6db6db67f6d9fp-5,
   0x1.f1c71fbd29fbbp-6, 0x1.6e8b264d467d6p-6,  0x1.1c5997c357e9dp-6,
   0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7,  0x1.fd1151acb6bedp-8,
   0x1.087182f799c1dp-6, -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6,
 };
diff --git a/contrib/arm-optimized-routines/pl/math/asinf_2u5.c b/contrib/arm-optimized-routines/math/aarch64/experimental/asinf_2u5.c
similarity index 80%
rename from contrib/arm-optimized-routines/pl/math/asinf_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/asinf_2u5.c
index ec608146ff66..1136da01550e 100644
--- a/contrib/arm-optimized-routines/pl/math/asinf_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/asinf_2u5.c
@@ -1,100 +1,100 @@
 /*
  * Single-precision asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
-#define AbsMask (0x7fffffff)
-#define Half (0x3f000000)
-#define One (0x3f800000)
-#define PiOver2f (0x1.921fb6p+0f)
-#define Small (0x39800000) /* 2^-12.  */
-#define Small12 (0x398)
-#define QNaN (0x7fc)
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define One 0x3f800000
+#define PiOver2f 0x1.921fb6p+0f
+#define Small 0x39800000 /* 2^-12.  */
+#define Small12 0x398
+#define QNaN 0x7fc
 
 /* Fast implementation of single-precision asin(x) based on polynomial
    approximation.
 
    For x < Small, approximate asin(x) by x. Small = 2^-12 for correct rounding.
 
    For x in [Small, 0.5], use order 4 polynomial P such that the final
    approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
 
    The largest observed error in this region is 0.83 ulps,
      asinf(0x1.ea00f4p-2) got 0x1.fef15ep-2 want 0x1.fef15cp-2.
 
    No cheap approximation can be obtained near x = 1, since the function is not
    continuously differentiable on 1.
 
    For x in [0.5, 1.0], we use a method based on a trigonometric identity
 
      asin(x) = pi/2 - acos(x)
 
    and a generalized power series expansion of acos(y) near y=1, that reads as
 
      acos(y)/sqrt(2y) ~ 1 + 1/12 * y + 3/160 * y^2 + ... (1)
 
    The Taylor series of asin(z) near z = 0, reads as
 
      asin(z) ~ z + z^3 P(z^2) = z + z^3 * (1/6 + 3/40 z^2 + ...).
 
    Therefore, (1) can be written in terms of P(y/2) or even asin(y/2)
 
      acos(y) ~ sqrt(2y) (1 + y/2 * P(y/2)) = 2 * sqrt(y/2) (1 + y/2 * P(y/2)
 
    Hence, if we write z = (1-x)/2, z is near 0 when x approaches 1 and
 
      asin(x) ~ pi/2 - acos(x) ~ pi/2 - 2 * sqrt(z) (1 + z * P(z)).
 
    The largest observed error in this region is 2.41 ulps,
      asinf(0x1.00203ep-1) got 0x1.0c3a64p-1 want 0x1.0c3a6p-1.  */
 float
 asinf (float x)
 {
   uint32_t ix = asuint (x);
   uint32_t ia = ix & AbsMask;
   uint32_t ia12 = ia >> 20;
   float ax = asfloat (ia);
   uint32_t sign = ix & ~AbsMask;
 
   /* Special values and invalid range.  */
   if (unlikely (ia12 == QNaN))
     return x;
   if (ia > One)
     return __math_invalidf (x);
   if (ia12 < Small12)
     return x;
 
   /* Evaluate polynomial Q(x) = y + y * z * P(z) with
      z2 = x ^ 2         and z = |x|     , if |x| < 0.5
      z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
   float z2 = ax < 0.5 ? x * x : fmaf (-0.5f, ax, 0.5f);
   float z = ax < 0.5 ? ax : sqrtf (z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
   float p = horner_4_f32 (z2, __asinf_poly);
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = fmaf (z * z2, p, z);
 
   /* asin(|x|) = Q(|x|)         , for |x| < 0.5
 	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
   float y = ax < 0.5 ? p : fmaf (-2.0f, p, PiOver2f);
 
   /* Copy sign.  */
   return asfloat (asuint (y) | sign);
 }
 
-PL_SIG (S, F, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (asinf, 1.91)
-PL_TEST_INTERVAL (asinf, 0, Small, 5000)
-PL_TEST_INTERVAL (asinf, Small, 0.5, 50000)
-PL_TEST_INTERVAL (asinf, 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (asinf, 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (asinf, 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (asinf, -0, -inf, 20000)
+TEST_SIG (S, F, 1, asin, -1.0, 1.0)
+TEST_ULP (asinf, 1.91)
+TEST_INTERVAL (asinf, 0, Small, 5000)
+TEST_INTERVAL (asinf, Small, 0.5, 50000)
+TEST_INTERVAL (asinf, 0.5, 1.0, 50000)
+TEST_INTERVAL (asinf, 1.0, 0x1p11, 50000)
+TEST_INTERVAL (asinf, 0x1p11, inf, 20000)
+TEST_INTERVAL (asinf, -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/asinf_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/asinf_data.c
similarity index 92%
rename from contrib/arm-optimized-routines/pl/math/asinf_data.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/asinf_data.c
index 1652025e2920..15f331dde5a7 100644
--- a/contrib/arm-optimized-routines/pl/math/asinf_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/asinf_data.c
@@ -1,16 +1,16 @@
 /*
  * Coefficients for single-precision asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 /* Approximate asinf(x) directly in [0x1p-24, 0.25]. See for tools/asinf.sollya
    for these coeffs were generated.  */
 const float __asinf_poly[] = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))  on
      [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 .  */
   0x1.55555ep-3, 0x1.33261ap-4, 0x1.70d7dcp-5, 0x1.b059dp-6, 0x1.3af7d8p-5,
 };
diff --git a/contrib/arm-optimized-routines/pl/math/asinh_2u5.c b/contrib/arm-optimized-routines/math/aarch64/experimental/asinh_2u5.c
similarity index 75%
rename from contrib/arm-optimized-routines/pl/math/asinh_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/asinh_2u5.c
index b7fc81a2b94f..9d2d160a1453 100644
--- a/contrib/arm-optimized-routines/pl/math/asinh_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/asinh_2u5.c
@@ -1,85 +1,82 @@
 /*
  * Double-precision asinh(x) function
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
+#include "mathlib.h"
 #include "poly_scalar_f64.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffffffffffff
 #define ExpM26 0x3e50000000000000 /* asuint64(0x1.0p-26).  */
 #define One 0x3ff0000000000000	  /* asuint64(1.0).  */
 #define Exp511 0x5fe0000000000000 /* asuint64(0x1.0p511).  */
 #define Ln2 0x1.62e42fefa39efp-1
 
-double
-optr_aor_log_f64 (double);
-
 /* Scalar double-precision asinh implementation. This routine uses different
    approaches on different intervals:
 
    |x| < 2^-26: Return x. Function is exact in this region.
 
    |x| < 1: Use custom order-17 polynomial. This is least accurate close to 1.
      The largest observed error in this region is 1.47 ULPs:
      asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
 				want 0x1.c1d6bf874019cp-1.
 
    |x| < 2^511: Upper bound of this region is close to sqrt(DBL_MAX). Calculate
      the result directly using the definition asinh(x) = ln(x + sqrt(x*x + 1)).
      The largest observed error in this region is 2.03 ULPs:
      asinh(-0x1.00094e0f39574p+0) got -0x1.c3508eb6a681ep-1
 				 want -0x1.c3508eb6a682p-1.
 
    |x| >= 2^511: We cannot square x without overflow at a low
      cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot
      even double x without overflow, so calculate this as ln(x) +
      ln(2). The largest observed error in this region is 0.98 ULPs at many
      values, for instance:
      asinh(0x1.5255a4cf10319p+975) got 0x1.52652f4cb26cbp+9
 				  want 0x1.52652f4cb26ccp+9.  */
 double
 asinh (double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t ia = ix & AbsMask;
   double ax = asdouble (ia);
   uint64_t sign = ix & ~AbsMask;
 
   if (ia < ExpM26)
     {
       return x;
     }
 
   if (ia < One)
     {
       double x2 = x * x;
       double z2 = x2 * x2;
       double z4 = z2 * z2;
       double z8 = z4 * z4;
       double p = estrin_17_f64 (x2, z2, z4, z8, z8 * z8, __asinh_data.poly);
       double y = fma (p, x2 * ax, ax);
       return asdouble (asuint64 (y) | sign);
     }
 
   if (unlikely (ia >= Exp511))
     {
-      return asdouble (asuint64 (optr_aor_log_f64 (ax) + Ln2) | sign);
+      return asdouble (asuint64 (log (ax) + Ln2) | sign);
     }
 
-  return asdouble (asuint64 (optr_aor_log_f64 (ax + sqrt (ax * ax + 1)))
-		   | sign);
+  return asdouble (asuint64 (log (ax + sqrt (ax * ax + 1))) | sign);
 }
 
-PL_SIG (S, D, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (asinh, 1.54)
-PL_TEST_INTERVAL (asinh, -0x1p-26, 0x1p-26, 50000)
-PL_TEST_INTERVAL (asinh, 0x1p-26, 1.0, 40000)
-PL_TEST_INTERVAL (asinh, -0x1p-26, -1.0, 10000)
-PL_TEST_INTERVAL (asinh, 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (asinh, -1.0, -100.0, 10000)
-PL_TEST_INTERVAL (asinh, 100.0, inf, 50000)
-PL_TEST_INTERVAL (asinh, -100.0, -inf, 10000)
+TEST_SIG (S, D, 1, asinh, -10.0, 10.0)
+TEST_ULP (asinh, 1.54)
+TEST_INTERVAL (asinh, -0x1p-26, 0x1p-26, 50000)
+TEST_INTERVAL (asinh, 0x1p-26, 1.0, 40000)
+TEST_INTERVAL (asinh, -0x1p-26, -1.0, 10000)
+TEST_INTERVAL (asinh, 1.0, 100.0, 40000)
+TEST_INTERVAL (asinh, -1.0, -100.0, 10000)
+TEST_INTERVAL (asinh, 100.0, inf, 50000)
+TEST_INTERVAL (asinh, -100.0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/asinh_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/asinh_data.c
similarity index 51%
rename from contrib/arm-optimized-routines/pl/math/asinh_data.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/asinh_data.c
index 073b19799bda..7afaf6960130 100644
--- a/contrib/arm-optimized-routines/pl/math/asinh_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/asinh_data.c
@@ -1,22 +1,23 @@
 /*
  * Double-precision polynomial coefficients for scalar asinh(x)
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 /* asinh(x) is odd, and the first term of the Taylor expansion is x, so we can
    approximate the function by x + x^3 * P(x^2), where P(z) has the form:
    C0 + C1 * z + C2 * z^2 + C3 * z^3 + ...
    Note P is evaluated on even powers of x only. See tools/asinh.sollya for the
    algorithm used to generate these coefficients.  */
 const struct asinh_data __asinh_data
-  = {.poly
-     = {-0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5,
-	0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6,
-	-0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7,
-	0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8,
-	-0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11,
-	0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14, 0x1.93d4ba83d34dap-18}};
+    = { .poly
+	= { -0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5,
+	    0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6,
+	    -0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7,
+	    0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8,
+	    -0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11,
+	    0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14,
+	    0x1.93d4ba83d34dap-18 } };
diff --git a/contrib/arm-optimized-routines/pl/math/asinhf_3u5.c b/contrib/arm-optimized-routines/math/aarch64/experimental/asinhf_3u5.c
similarity index 77%
rename from contrib/arm-optimized-routines/pl/math/asinhf_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/asinhf_3u5.c
index ec26b80ec2ec..92c6dfd9b43d 100644
--- a/contrib/arm-optimized-routines/pl/math/asinhf_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/asinhf_3u5.c
@@ -1,76 +1,73 @@
 /*
  * Single-precision asinh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask (0x7fffffff)
 #define SqrtFltMax (0x1.749e96p+10f)
 #define Ln2 (0x1.62e4p-1f)
 #define One (0x3f8)
 #define ExpM12 (0x398)
 
-float
-optr_aor_log_f32 (float);
-
 /* asinhf approximation using a variety of approaches on different intervals:
 
    |x| < 2^-12: Return x. Function is exactly rounded in this region.
 
    |x| < 1.0: Use custom order-8 polynomial. The largest observed
      error in this region is 1.3ulps:
      asinhf(0x1.f0f74cp-1) got 0x1.b88de4p-1 want 0x1.b88de2p-1.
 
    |x| <= SqrtFltMax: Calculate the result directly using the
      definition of asinh(x) = ln(x + sqrt(x*x + 1)). The largest
      observed error in this region is 1.99ulps.
      asinhf(0x1.00e358p+0) got 0x1.c4849ep-1 want 0x1.c484a2p-1.
 
    |x| > SqrtFltMax: We cannot square x without overflow at a low
      cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot
      even double x without overflow, so calculate this as ln(x) +
      ln(2). This largest observed error in this region is 3.39ulps.
      asinhf(0x1.749e9ep+10) got 0x1.fffff8p+2 want 0x1.fffffep+2.  */
 float
 asinhf (float x)
 {
   uint32_t ix = asuint (x);
   uint32_t ia = ix & AbsMask;
   uint32_t ia12 = ia >> 20;
   float ax = asfloat (ia);
   uint32_t sign = ix & ~AbsMask;
 
   if (unlikely (ia12 < ExpM12 || ia == 0x7f800000))
     return x;
 
   if (unlikely (ia12 >= 0x7f8))
     return __math_invalidf (x);
 
   if (ia12 < One)
     {
       float x2 = ax * ax;
       float p = estrin_7_f32 (ax, x2, x2 * x2, __asinhf_data.coeffs);
       float y = fmaf (x2, p, ax);
       return asfloat (asuint (y) | sign);
     }
 
   if (unlikely (ax > SqrtFltMax))
     {
-      return asfloat (asuint (optr_aor_log_f32 (ax) + Ln2) | sign);
+      return asfloat (asuint (logf (ax) + Ln2) | sign);
     }
 
-  return asfloat (asuint (optr_aor_log_f32 (ax + sqrtf (ax * ax + 1))) | sign);
+  return asfloat (asuint (logf (ax + sqrtf (ax * ax + 1))) | sign);
 }
 
-PL_SIG (S, F, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (asinhf, 2.9)
-PL_TEST_INTERVAL (asinhf, 0, 0x1p-12, 5000)
-PL_TEST_INTERVAL (asinhf, 0x1p-12, 1.0, 50000)
-PL_TEST_INTERVAL (asinhf, 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (asinhf, 0x1p11, 0x1p127, 20000)
+TEST_SIG (S, F, 1, asinh, -10.0, 10.0)
+TEST_ULP (asinhf, 2.9)
+TEST_INTERVAL (asinhf, 0, 0x1p-12, 5000)
+TEST_INTERVAL (asinhf, 0x1p-12, 1.0, 50000)
+TEST_INTERVAL (asinhf, 1.0, 0x1p11, 50000)
+TEST_INTERVAL (asinhf, 0x1p11, 0x1p127, 20000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/experimental/asinhf_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/asinhf_data.c
new file mode 100644
index 000000000000..5ed261ba835b
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/asinhf_data.c
@@ -0,0 +1,15 @@
+/*
+ * Coefficients for single-precision asinh(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Approximate asinhf(x) directly in [2^-12, 1]. See for tools/asinhf.sollya
+   for these coeffs were generated.  */
+const struct asinhf_data __asinhf_data
+    = { .coeffs = { -0x1.9b16fap-19f, -0x1.552baap-3f, -0x1.4e572ap-11f,
+		    0x1.3a81dcp-4f, 0x1.65bbaap-10f, -0x1.057f1p-4f,
+		    0x1.6c1d46p-5f, -0x1.4cafe8p-7f } };
diff --git a/contrib/arm-optimized-routines/pl/math/atan2_2u5.c b/contrib/arm-optimized-routines/math/aarch64/experimental/atan2_2u5.c
similarity index 91%
rename from contrib/arm-optimized-routines/pl/math/atan2_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/atan2_2u5.c
index c909ac99fa22..518e34589e5b 100644
--- a/contrib/arm-optimized-routines/pl/math/atan2_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/atan2_2u5.c
@@ -1,159 +1,159 @@
 /*
  * Double-precision scalar atan2(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdbool.h>
 
 #include "atan_common.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Pi (0x1.921fb54442d18p+1)
 #define PiOver2 (0x1.921fb54442d18p+0)
 #define PiOver4 (0x1.921fb54442d18p-1)
 #define SignMask (0x8000000000000000)
 #define ExpMask (0x7ff0000000000000)
 
 /* We calculate atan2 by P(n/d), where n and d are similar to the input
    arguments, and P is a polynomial. Evaluating P(x) requires calculating x^8,
    which may underflow if n and d have very different magnitude.
    POW8_EXP_UFLOW_BOUND is the lower bound of the difference in exponents of n
    and d for which P underflows, and is used to special-case such inputs.  */
 #define POW8_EXP_UFLOW_BOUND 62
 
 static inline int64_t
 biased_exponent (double f)
 {
   uint64_t fi = asuint64 (f);
   return (fi & ExpMask) >> 52;
 }
 
 /* Fast implementation of scalar atan2. Largest errors are when y and x are
    close together. The greatest observed error is 2.28 ULP:
    atan2(-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
    got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1.  */
 double
 atan2 (double y, double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t iy = asuint64 (y);
 
   uint64_t sign_x = ix & SignMask;
   uint64_t sign_y = iy & SignMask;
 
   uint64_t iax = ix & ~SignMask;
   uint64_t iay = iy & ~SignMask;
 
   bool xisnan = isnan (x);
   if (unlikely (isnan (y) && !xisnan))
     return __math_invalid (y);
   if (unlikely (xisnan))
     return __math_invalid (x);
 
   /* m = 2 * sign(x) + sign(y).  */
   uint32_t m = ((iy >> 63) & 1) | ((ix >> 62) & 2);
 
   int64_t exp_diff = biased_exponent (x) - biased_exponent (y);
 
   /* y = 0.  */
   if (iay == 0)
     {
       switch (m)
 	{
 	case 0:
 	case 1:
 	  return y; /* atan(+-0,+anything)=+-0.  */
 	case 2:
 	  return Pi; /* atan(+0,-anything) = pi.  */
 	case 3:
 	  return -Pi; /* atan(-0,-anything) =-pi.  */
 	}
     }
   /* Special case for (x, y) either on or very close to the y axis. Either x =
      0, or y is much larger than x (difference in exponents >=
      POW8_EXP_UFLOW_BOUND).  */
   if (unlikely (iax == 0 || exp_diff <= -POW8_EXP_UFLOW_BOUND))
     return sign_y ? -PiOver2 : PiOver2;
 
-  /* Special case for either x is INF or (x, y) is very close to x axis and x is
-     negative.  */
+  /* Special case for either x is INF or (x, y) is very close to x axis and x
+     is negative.  */
   if (unlikely (iax == 0x7ff0000000000000
 		|| (exp_diff >= POW8_EXP_UFLOW_BOUND && m >= 2)))
     {
       if (iay == 0x7ff0000000000000)
 	{
 	  switch (m)
 	    {
 	    case 0:
 	      return PiOver4; /* atan(+INF,+INF).  */
 	    case 1:
 	      return -PiOver4; /* atan(-INF,+INF).  */
 	    case 2:
 	      return 3.0 * PiOver4; /* atan(+INF,-INF).  */
 	    case 3:
 	      return -3.0 * PiOver4; /* atan(-INF,-INF).  */
 	    }
 	}
       else
 	{
 	  switch (m)
 	    {
 	    case 0:
 	      return 0.0; /* atan(+...,+INF).  */
 	    case 1:
 	      return -0.0; /* atan(-...,+INF).  */
 	    case 2:
 	      return Pi; /* atan(+...,-INF).  */
 	    case 3:
 	      return -Pi; /* atan(-...,-INF).  */
 	    }
 	}
     }
   /* y is INF.  */
   if (iay == 0x7ff0000000000000)
     return sign_y ? -PiOver2 : PiOver2;
 
   uint64_t sign_xy = sign_x ^ sign_y;
 
   double ax = asdouble (iax);
   double ay = asdouble (iay);
   uint64_t pred_aygtax = (ay > ax);
 
   /* Set up z for call to atan.  */
   double n = pred_aygtax ? -ax : ay;
   double d = pred_aygtax ? ay : ax;
   double z = n / d;
 
   double ret;
   if (unlikely (m < 2 && exp_diff >= POW8_EXP_UFLOW_BOUND))
     {
       /* If (x, y) is very close to x axis and x is positive, the polynomial
 	 will underflow and evaluate to z.  */
       ret = z;
     }
   else
     {
       /* Work out the correct shift.  */
       double shift = sign_x ? -2.0 : 0.0;
       shift = pred_aygtax ? shift + 1.0 : shift;
       shift *= PiOver2;
 
       ret = eval_poly (z, z, shift);
     }
 
   /* Account for the sign of x and y.  */
   return asdouble (asuint64 (ret) ^ sign_xy);
 }
 
 /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (S, D, 2, atan2)
-PL_TEST_ULP (atan2, 1.78)
-PL_TEST_INTERVAL (atan2, -10.0, 10.0, 50000)
-PL_TEST_INTERVAL (atan2, -1.0, 1.0, 40000)
-PL_TEST_INTERVAL (atan2, 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (atan2, 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (atan2, 1e6, 1e32, 40000)
+TEST_SIG (S, D, 2, atan2)
+TEST_ULP (atan2, 1.78)
+TEST_INTERVAL (atan2, -10.0, 10.0, 50000)
+TEST_INTERVAL (atan2, -1.0, 1.0, 40000)
+TEST_INTERVAL (atan2, 0.0, 1.0, 40000)
+TEST_INTERVAL (atan2, 1.0, 100.0, 40000)
+TEST_INTERVAL (atan2, 1e6, 1e32, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/atan2f_3u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/atan2f_3u.c
similarity index 90%
rename from contrib/arm-optimized-routines/pl/math/atan2f_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/atan2f_3u.c
index 38e1df59c102..245ba551566c 100644
--- a/contrib/arm-optimized-routines/pl/math/atan2f_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/atan2f_3u.c
@@ -1,167 +1,167 @@
 /*
  * Single-precision scalar atan2(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdbool.h>
 
 #include "atanf_common.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Pi (0x1.921fb6p+1f)
 #define PiOver2 (0x1.921fb6p+0f)
 #define PiOver4 (0x1.921fb6p-1f)
 #define SignMask (0x80000000)
 
 /* We calculate atan2f by P(n/d), where n and d are similar to the input
    arguments, and P is a polynomial. The polynomial may underflow.
-   POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d
-   for which P underflows, and is used to special-case such inputs.  */
+   POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and
+   d for which P underflows, and is used to special-case such inputs.  */
 #define POLY_UFLOW_BOUND 24
 
 static inline int32_t
 biased_exponent (float f)
 {
   uint32_t fi = asuint (f);
   int32_t ex = (int32_t) ((fi & 0x7f800000) >> 23);
   if (unlikely (ex == 0))
     {
       /* Subnormal case - we still need to get the exponent right for subnormal
 	 numbers as division may take us back inside the normal range.  */
       return ex - __builtin_clz (fi << 9);
     }
   return ex;
 }
 
 /* Fast implementation of scalar atan2f. Largest observed error is
    2.88ulps in [99.0, 101.0] x [99.0, 101.0]:
    atan2f(0x1.9332d8p+6, 0x1.8cb6c4p+6) got 0x1.964646p-1
 				       want 0x1.964640p-1.  */
 float
 atan2f (float y, float x)
 {
   uint32_t ix = asuint (x);
   uint32_t iy = asuint (y);
 
   uint32_t sign_x = ix & SignMask;
   uint32_t sign_y = iy & SignMask;
 
   uint32_t iax = ix & ~SignMask;
   uint32_t iay = iy & ~SignMask;
 
   /* x or y is NaN.  */
   if ((iax > 0x7f800000) || (iay > 0x7f800000))
     return x + y;
 
   /* m = 2 * sign(x) + sign(y).  */
   uint32_t m = ((iy >> 31) & 1) | ((ix >> 30) & 2);
 
   /* The following follows glibc ieee754 implementation, except
      that we do not use +-tiny shifts (non-nearest rounding mode).  */
 
   int32_t exp_diff = biased_exponent (x) - biased_exponent (y);
 
   /* Special case for (x, y) either on or very close to the x axis. Either y =
      0, or y is tiny and x is huge (difference in exponents >=
      POLY_UFLOW_BOUND). In the second case, we only want to use this special
      case when x is negative (i.e. quadrants 2 or 3).  */
   if (unlikely (iay == 0 || (exp_diff >= POLY_UFLOW_BOUND && m >= 2)))
     {
       switch (m)
 	{
 	case 0:
 	case 1:
 	  return y; /* atan(+-0,+anything)=+-0.  */
 	case 2:
 	  return Pi; /* atan(+0,-anything) = pi.  */
 	case 3:
 	  return -Pi; /* atan(-0,-anything) =-pi.  */
 	}
     }
   /* Special case for (x, y) either on or very close to the y axis. Either x =
      0, or x is tiny and y is huge (difference in exponents >=
      POLY_UFLOW_BOUND).  */
   if (unlikely (iax == 0 || exp_diff <= -POLY_UFLOW_BOUND))
     return sign_y ? -PiOver2 : PiOver2;
 
   /* x is INF.  */
   if (iax == 0x7f800000)
     {
       if (iay == 0x7f800000)
 	{
 	  switch (m)
 	    {
 	    case 0:
 	      return PiOver4; /* atan(+INF,+INF).  */
 	    case 1:
 	      return -PiOver4; /* atan(-INF,+INF).  */
 	    case 2:
 	      return 3.0f * PiOver4; /* atan(+INF,-INF).  */
 	    case 3:
 	      return -3.0f * PiOver4; /* atan(-INF,-INF).  */
 	    }
 	}
       else
 	{
 	  switch (m)
 	    {
 	    case 0:
 	      return 0.0f; /* atan(+...,+INF).  */
 	    case 1:
 	      return -0.0f; /* atan(-...,+INF).  */
 	    case 2:
 	      return Pi; /* atan(+...,-INF).  */
 	    case 3:
 	      return -Pi; /* atan(-...,-INF).  */
 	    }
 	}
     }
   /* y is INF.  */
   if (iay == 0x7f800000)
     return sign_y ? -PiOver2 : PiOver2;
 
   uint32_t sign_xy = sign_x ^ sign_y;
 
   float ax = asfloat (iax);
   float ay = asfloat (iay);
 
   bool pred_aygtax = (ay > ax);
 
   /* Set up z for call to atanf.  */
   float n = pred_aygtax ? -ax : ay;
   float d = pred_aygtax ? ay : ax;
   float z = n / d;
 
   float ret;
   if (unlikely (m < 2 && exp_diff >= POLY_UFLOW_BOUND))
     {
       /* If (x, y) is very close to x axis and x is positive, the polynomial
 	 will underflow and evaluate to z.  */
       ret = z;
     }
   else
     {
       /* Work out the correct shift.  */
       float shift = sign_x ? -2.0f : 0.0f;
       shift = pred_aygtax ? shift + 1.0f : shift;
       shift *= PiOver2;
 
       ret = eval_poly (z, z, shift);
     }
 
   /* Account for the sign of x and y.  */
   return asfloat (asuint (ret) ^ sign_xy);
 }
 
 /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (S, F, 2, atan2)
-PL_TEST_ULP (atan2f, 2.4)
-PL_TEST_INTERVAL (atan2f, -10.0, 10.0, 50000)
-PL_TEST_INTERVAL (atan2f, -1.0, 1.0, 40000)
-PL_TEST_INTERVAL (atan2f, 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (atan2f, 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (atan2f, 1e6, 1e32, 40000)
+TEST_SIG (S, F, 2, atan2)
+TEST_ULP (atan2f, 2.4)
+TEST_INTERVAL (atan2f, -10.0, 10.0, 50000)
+TEST_INTERVAL (atan2f, -1.0, 1.0, 40000)
+TEST_INTERVAL (atan2f, 0.0, 1.0, 40000)
+TEST_INTERVAL (atan2f, 1.0, 100.0, 40000)
+TEST_INTERVAL (atan2f, 1e6, 1e32, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/atan_2u5.c b/contrib/arm-optimized-routines/math/aarch64/experimental/atan_2u5.c
similarity index 79%
rename from contrib/arm-optimized-routines/pl/math/atan_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/atan_2u5.c
index ee4770101758..9c9c77d98cd3 100644
--- a/contrib/arm-optimized-routines/pl/math/atan_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/atan_2u5.c
@@ -1,73 +1,73 @@
 /*
  * Double-precision atan(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "atan_common.h"
 
 #define AbsMask 0x7fffffffffffffff
 #define PiOver2 0x1.921fb54442d18p+0
 #define TinyBound 0x3e1 /* top12(asuint64(0x1p-30)).  */
 #define BigBound 0x434	/* top12(asuint64(0x1p53)).  */
 #define OneTop 0x3ff
 
 /* Fast implementation of double-precision atan.
    Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
    z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps:
    atan(0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
 			     want 0x1.9225645bdd7c3p-1.  */
 double
 atan (double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t sign = ix & ~AbsMask;
   uint64_t ia = ix & AbsMask;
   uint32_t ia12 = ia >> 52;
 
   if (unlikely (ia12 >= BigBound || ia12 < TinyBound))
     {
       if (ia12 < TinyBound)
 	/* Avoid underflow by returning x.  */
 	return x;
       if (ia > 0x7ff0000000000000)
 	/* Propagate NaN.  */
 	return __math_invalid (x);
       /* atan(x) rounds to PiOver2 for large x.  */
       return asdouble (asuint64 (PiOver2) ^ sign);
     }
 
   double z, az, shift;
   if (ia12 >= OneTop)
     {
       /* For x > 1, use atan(x) = pi / 2 + atan(-1 / x).  */
       z = -1.0 / x;
       shift = PiOver2;
       /* Use absolute value only when needed (odd powers of z).  */
       az = -fabs (z);
     }
   else
     {
       /* For x < 1, approximate atan(x) directly.  */
       z = x;
       shift = 0;
       az = asdouble (ia);
     }
 
   /* Calculate polynomial, shift + z + z^3 * P(z^2).  */
   double y = eval_poly (z, az, shift);
   /* Copy sign.  */
   return asdouble (asuint64 (y) ^ sign);
 }
 
-PL_SIG (S, D, 1, atan, -10.0, 10.0)
-PL_TEST_ULP (atan, 1.78)
-PL_TEST_INTERVAL (atan, 0, 0x1p-30, 10000)
-PL_TEST_INTERVAL (atan, -0, -0x1p-30, 1000)
-PL_TEST_INTERVAL (atan, 0x1p-30, 0x1p53, 900000)
-PL_TEST_INTERVAL (atan, -0x1p-30, -0x1p53, 90000)
-PL_TEST_INTERVAL (atan, 0x1p53, inf, 10000)
-PL_TEST_INTERVAL (atan, -0x1p53, -inf, 1000)
+TEST_SIG (S, D, 1, atan, -10.0, 10.0)
+TEST_ULP (atan, 1.78)
+TEST_INTERVAL (atan, 0, 0x1p-30, 10000)
+TEST_INTERVAL (atan, -0, -0x1p-30, 1000)
+TEST_INTERVAL (atan, 0x1p-30, 0x1p53, 900000)
+TEST_INTERVAL (atan, -0x1p-30, -0x1p53, 90000)
+TEST_INTERVAL (atan, 0x1p53, inf, 10000)
+TEST_INTERVAL (atan, -0x1p53, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/atan_common.h b/contrib/arm-optimized-routines/math/aarch64/experimental/atan_common.h
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/atan_common.h
rename to contrib/arm-optimized-routines/math/aarch64/experimental/atan_common.h
index 798cc22cc40a..1fd83860219b 100644
--- a/contrib/arm-optimized-routines/pl/math/atan_common.h
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/atan_common.h
@@ -1,33 +1,33 @@
 /*
  * Double-precision polynomial evaluation function for scalar
  * atan(x) and atan2(y,x).
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 #include "poly_scalar_f64.h"
 
 /* Polynomial used in fast atan(x) and atan2(y,x) implementations
    The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
 static inline double
 eval_poly (double z, double az, double shift)
 {
   /* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
      full scheme to avoid underflow in x^16.  */
   double z2 = z * z;
   double x2 = z2 * z2;
   double x4 = x2 * x2;
   double x8 = x4 * x4;
   double y = fma (estrin_11_f64 (z2, x2, x4, x8, __atan_poly_data.poly + 8),
 		  x8, estrin_7_f64 (z2, x2, x4, __atan_poly_data.poly));
 
   /* Finalize. y = shift + z + z^3 * P(z^2).  */
   y = fma (y, z2 * az, az);
   y = y + shift;
 
   return y;
 }
 
 #undef P
diff --git a/contrib/arm-optimized-routines/math/aarch64/experimental/atan_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/atan_data.c
new file mode 100644
index 000000000000..5d24fa912d02
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/atan_data.c
@@ -0,0 +1,23 @@
+/*
+ * Double-precision polynomial coefficients for vector atan(x) and atan2(y,x).
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct atan_poly_data __atan_poly_data
+    = { .poly = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2)
+		     on [2**-1022, 1.0]. See atan.sollya for details of how
+		     these were generated.  */
+		  -0x1.5555555555555p-2,  0x1.99999999996c1p-3,
+		  -0x1.2492492478f88p-3,  0x1.c71c71bc3951cp-4,
+		  -0x1.745d160a7e368p-4,  0x1.3b139b6a88ba1p-4,
+		  -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5,
+		  -0x1.aebfe7b418581p-5,  0x1.842dbe9b0d916p-5,
+		  -0x1.5d30140ae5e99p-5,  0x1.338e31eb2fbbcp-5,
+		  -0x1.00e6eece7de8p-5,	  0x1.860897b29e5efp-6,
+		  -0x1.0051381722a59p-6,  0x1.14e9dc19a4a4ep-7,
+		  -0x1.d0062b42fe3bfp-9,  0x1.17739e210171ap-10,
+		  -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16 } };
diff --git a/contrib/arm-optimized-routines/pl/math/atanf_2u9.c b/contrib/arm-optimized-routines/math/aarch64/experimental/atanf_2u9.c
similarity index 82%
rename from contrib/arm-optimized-routines/pl/math/atanf_2u9.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/atanf_2u9.c
index ba6f68089de1..518415ded634 100644
--- a/contrib/arm-optimized-routines/pl/math/atanf_2u9.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/atanf_2u9.c
@@ -1,72 +1,72 @@
 /*
  * Single-precision atan(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "atanf_common.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define PiOver2 0x1.921fb6p+0f
 #define AbsMask 0x7fffffff
 #define TinyBound 0x30800000 /* asuint(0x1p-30).  */
 #define BigBound 0x4e800000  /* asuint(0x1p30).  */
 #define One 0x3f800000
 
 /* Approximation of single-precision atan(x) based on
    atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1]
    using z=-1/x and shift = pi/2.
    Maximum error is 2.88 ulps:
    atanf(0x1.0565ccp+0) got 0x1.97771p-1
 		       want 0x1.97770ap-1.  */
 float
 atanf (float x)
 {
   uint32_t ix = asuint (x);
   uint32_t sign = ix & ~AbsMask;
   uint32_t ia = ix & AbsMask;
 
   if (unlikely (ia < TinyBound))
     /* Avoid underflow by returning x.  */
     return x;
 
   if (unlikely (ia > BigBound))
     {
       if (ia > 0x7f800000)
 	/* Propagate NaN.  */
 	return __math_invalidf (x);
       /* atan(x) rounds to PiOver2 for large x.  */
       return asfloat (asuint (PiOver2) ^ sign);
     }
 
   float z, az, shift;
   if (ia > One)
     {
       /* For x > 1, use atan(x) = pi / 2 + atan(-1 / x).  */
       z = -1.0f / x;
       shift = PiOver2;
       /* Use absolute value only when needed (odd powers of z).  */
       az = -fabsf (z);
     }
   else
     {
       /* For x < 1, approximate atan(x) directly.  */
       z = x;
       az = asfloat (ia);
       shift = 0;
     }
 
   /* Calculate polynomial, shift + z + z^3 * P(z^2).  */
   float y = eval_poly (z, az, shift);
   /* Copy sign.  */
   return asfloat (asuint (y) ^ sign);
 }
 
-PL_SIG (S, F, 1, atan, -10.0, 10.0)
-PL_TEST_ULP (atanf, 2.38)
-PL_TEST_SYM_INTERVAL (atanf, 0, 0x1p-30, 5000)
-PL_TEST_SYM_INTERVAL (atanf, 0x1p-30, 1, 40000)
-PL_TEST_SYM_INTERVAL (atanf, 1, 0x1p30, 40000)
-PL_TEST_SYM_INTERVAL (atanf, 0x1p30, inf, 1000)
+TEST_SIG (S, F, 1, atan, -10.0, 10.0)
+TEST_ULP (atanf, 2.38)
+TEST_SYM_INTERVAL (atanf, 0, 0x1p-30, 5000)
+TEST_SYM_INTERVAL (atanf, 0x1p-30, 1, 40000)
+TEST_SYM_INTERVAL (atanf, 1, 0x1p30, 40000)
+TEST_SYM_INTERVAL (atanf, 0x1p30, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/atanf_common.h b/contrib/arm-optimized-routines/math/aarch64/experimental/atanf_common.h
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/atanf_common.h
rename to contrib/arm-optimized-routines/math/aarch64/experimental/atanf_common.h
index 8952e7e0078b..3e6542047309 100644
--- a/contrib/arm-optimized-routines/pl/math/atanf_common.h
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/atanf_common.h
@@ -1,38 +1,38 @@
 /*
  * Single-precision polynomial evaluation function for scalar
  * atan(x) and atan2(y,x).
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef PL_MATH_ATANF_COMMON_H
 #define PL_MATH_ATANF_COMMON_H
 
 #include "math_config.h"
 #include "poly_scalar_f32.h"
 
 /* Polynomial used in fast atanf(x) and atan2f(y,x) implementations
    The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
 static inline float
 eval_poly (float z, float az, float shift)
 {
   /* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
      a standard implementation using z8 creates spurious underflow
      in the very last fma (when z^8 is small enough).
      Therefore, we split the last fma into a mul and and an fma.
      Horner and single-level Estrin have higher errors that exceed
      threshold.  */
   float z2 = z * z;
   float z4 = z2 * z2;
 
   /* Then assemble polynomial.  */
   float y = fmaf (
       z4, z4 * pairwise_poly_3_f32 (z2, z4, __atanf_poly_data.poly + 4),
       pairwise_poly_3_f32 (z2, z4, __atanf_poly_data.poly));
   /* Finalize:
      y = shift + z * P(z^2).  */
   return fmaf (y, z2 * az, az) + shift;
 }
 
 #endif // PL_MATH_ATANF_COMMON_H
diff --git a/contrib/arm-optimized-routines/math/aarch64/experimental/atanf_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/atanf_data.c
new file mode 100644
index 000000000000..f4d607c2a12d
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/atanf_data.c
@@ -0,0 +1,17 @@
+/*
+ * Single-precision polynomial coefficients for vector atan(x) and atan2(y,x).
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
+ */
+const struct atanf_poly_data __atanf_poly_data
+    = { .poly
+	= { /* See atanf.sollya for details of how these were generated.  */
+	    -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
+	    -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f,
+	    0x1.01fd88p-8f } };
diff --git a/contrib/arm-optimized-routines/pl/math/atanh_3u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/atanh_3u.c
similarity index 88%
rename from contrib/arm-optimized-routines/pl/math/atanh_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/atanh_3u.c
index dcfbe8192a22..d01b8bacd46a 100644
--- a/contrib/arm-optimized-routines/pl/math/atanh_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/atanh_3u.c
@@ -1,83 +1,83 @@
 /*
  * Double-precision atanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 #include "poly_scalar_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffffffffffff
 #define Half 0x3fe0000000000000
 #define One 0x3ff0000000000000
 #define Ln2Hi 0x1.62e42fefa3800p-1
 #define Ln2Lo 0x1.ef35793c76730p-45
-#define OneMHfRt2Top                                                           \
+#define OneMHfRt2Top                                                          \
   0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)).  */
 #define OneTop12 0x3ff
 #define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)).  */
 #define BottomMask 0xffffffff
 
 static inline double
 log1p_inline (double x)
 {
   /* Helper for calculating log(1 + x) using order-18 polynomial on a reduced
      interval. Copied from log1p_2u.c, with no special-case handling. See that
      file for details of the algorithm.  */
   double m = x + 1;
   uint64_t mi = asuint64 (m);
 
   /* Decompose x + 1 into (f + 1) * 2^k, with k chosen such that f is in
      [sqrt(2)/2, sqrt(2)].  */
   uint32_t u = (mi >> 32) + OneMHfRt2Top;
   int32_t k = (int32_t) (u >> 20) - OneTop12;
   uint32_t utop = (u & 0x000fffff) + HfRt2Top;
   uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);
   double f = asdouble (u_red) - 1;
 
   /* Correction term for round-off in f.  */
   double cm = (x - (m - 1)) / m;
 
   /* Approximate log1p(f) with polynomial.  */
   double f2 = f * f;
   double f4 = f2 * f2;
   double f8 = f4 * f4;
   double p = fma (
       f, estrin_18_f64 (f, f2, f4, f8, f8 * f8, __log1p_data.coeffs) * f, f);
 
   /* Recombine log1p(x) = k*log2 + log1p(f) + c/m.  */
   double kd = k;
   double y = fma (Ln2Lo, kd, cm);
   return y + fma (Ln2Hi, kd, p);
 }
 
 /* Approximation for double-precision inverse tanh(x), using a simplified
    version of log1p. Greatest observed error is 3.00 ULP:
    atanh(0x1.e58f3c108d714p-4) got 0x1.e7da77672a647p-4
 			      want 0x1.e7da77672a64ap-4.  */
 double
 atanh (double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t sign = ix & ~AbsMask;
   uint64_t ia = ix & AbsMask;
 
   if (unlikely (ia == One))
     return __math_divzero (sign >> 32);
 
   if (unlikely (ia > One))
     return __math_invalid (x);
 
   double halfsign = asdouble (Half | sign);
   double ax = asdouble (ia);
   return halfsign * log1p_inline ((2 * ax) / (1 - ax));
 }
 
-PL_SIG (S, D, 1, atanh, -1.0, 1.0)
-PL_TEST_ULP (atanh, 3.00)
-PL_TEST_SYM_INTERVAL (atanh, 0, 0x1p-23, 10000)
-PL_TEST_SYM_INTERVAL (atanh, 0x1p-23, 1, 90000)
-PL_TEST_SYM_INTERVAL (atanh, 1, inf, 100)
+TEST_SIG (S, D, 1, atanh, -1.0, 1.0)
+TEST_ULP (atanh, 3.00)
+TEST_SYM_INTERVAL (atanh, 0, 0x1p-23, 10000)
+TEST_SYM_INTERVAL (atanh, 0x1p-23, 1, 90000)
+TEST_SYM_INTERVAL (atanh, 1, inf, 100)
diff --git a/contrib/arm-optimized-routines/pl/math/atanhf_3u1.c b/contrib/arm-optimized-routines/math/aarch64/experimental/atanhf_3u1.c
similarity index 87%
rename from contrib/arm-optimized-routines/pl/math/atanhf_3u1.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/atanhf_3u1.c
index e99d5a9900a9..c452bab91f97 100644
--- a/contrib/arm-optimized-routines/pl/math/atanhf_3u1.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/atanhf_3u1.c
@@ -1,86 +1,86 @@
 /*
  * Single-precision atanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 #include "mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffff
 #define Half 0x3f000000
 #define One 0x3f800000
 #define Four 0x40800000
 #define Ln2 0x1.62e43p-1f
 /* asuint(0x1p-12), below which atanhf(x) rounds to x.  */
 #define TinyBound 0x39800000
 
 #define C(i) __log1pf_data.coeffs[i]
 
 static inline float
 eval_poly (float m)
 {
   /* Approximate log(1+m) on [-0.25, 0.5] using Estrin scheme.  */
   float p_12 = fmaf (m, C (1), C (0));
   float p_34 = fmaf (m, C (3), C (2));
   float p_56 = fmaf (m, C (5), C (4));
   float p_78 = fmaf (m, C (7), C (6));
 
   float m2 = m * m;
   float p_02 = fmaf (m2, p_12, m);
   float p_36 = fmaf (m2, p_56, p_34);
   float p_79 = fmaf (m2, C (8), p_78);
 
   float m4 = m2 * m2;
   float p_06 = fmaf (m4, p_36, p_02);
 
   return fmaf (m4 * p_79, m4, p_06);
 }
 
 static inline float
 log1pf_inline (float x)
 {
   /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no
      special-case handling. See that file for details of the algorithm.  */
   float m = x + 1.0f;
   int k = (asuint (m) - 0x3f400000) & 0xff800000;
   float s = asfloat (Four - k);
   float m_scale = asfloat (asuint (x) - k) + fmaf (0.25f, s, -1.0f);
   float p = eval_poly (m_scale);
   float scale_back = (float) k * 0x1.0p-23f;
   return fmaf (scale_back, Ln2, p);
 }
 
 /* Approximation for single-precision inverse tanh(x), using a simplified
    version of log1p. Maximum error is 3.08 ULP:
    atanhf(0x1.ff0d5p-5) got 0x1.ffb768p-5
 		       want 0x1.ffb76ep-5.  */
 float
 atanhf (float x)
 {
   uint32_t ix = asuint (x);
   uint32_t iax = ix & AbsMask;
   uint32_t sign = ix & ~AbsMask;
 
   if (unlikely (iax < TinyBound))
     return x;
 
   if (iax == One)
     return __math_divzero (sign);
 
   if (unlikely (iax > One))
     return __math_invalidf (x);
 
   float halfsign = asfloat (Half | sign);
   float ax = asfloat (iax);
   return halfsign * log1pf_inline ((2 * ax) / (1 - ax));
 }
 
-PL_SIG (S, F, 1, atanh, -1.0, 1.0)
-PL_TEST_ULP (atanhf, 2.59)
-PL_TEST_SYM_INTERVAL (atanhf, 0, 0x1p-12, 500)
-PL_TEST_SYM_INTERVAL (atanhf, 0x1p-12, 1, 200000)
-PL_TEST_SYM_INTERVAL (atanhf, 1, inf, 1000)
+TEST_SIG (S, F, 1, atanh, -1.0, 1.0)
+TEST_ULP (atanhf, 2.59)
+TEST_SYM_INTERVAL (atanhf, 0, 0x1p-12, 500)
+TEST_SYM_INTERVAL (atanhf, 0x1p-12, 1, 200000)
+TEST_SYM_INTERVAL (atanhf, 1, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/cbrt_2u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/cbrt_2u.c
similarity index 89%
rename from contrib/arm-optimized-routines/pl/math/cbrt_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/cbrt_2u.c
index 80be83c4470c..cf31627e43dc 100644
--- a/contrib/arm-optimized-routines/pl/math/cbrt_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/cbrt_2u.c
@@ -1,69 +1,69 @@
 /*
  * Double-precision cbrt(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
-PL_SIG (S, D, 1, cbrt, -10.0, 10.0)
+TEST_SIG (S, D, 1, cbrt, -10.0, 10.0)
 
 #define AbsMask 0x7fffffffffffffff
 #define TwoThirds 0x1.5555555555555p-1
 
 #define C(i) __cbrt_data.poly[i]
 #define T(i) __cbrt_data.table[i]
 
 /* Approximation for double-precision cbrt(x), using low-order polynomial and
    two Newton iterations. Greatest observed error is 1.79 ULP. Errors repeat
    according to the exponent, for instance an error observed for double value
    m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an
    integer.
    cbrt(0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
 			     want 0x1.965fe72821e99p+0.  */
 double
 cbrt (double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t iax = ix & AbsMask;
   uint64_t sign = ix & ~AbsMask;
 
   if (unlikely (iax == 0 || iax == 0x7ff0000000000000))
     return x;
 
   /* |x| = m * 2^e, where m is in [0.5, 1.0].
      We can easily decompose x into m and e using frexp.  */
   int e;
   double m = frexp (asdouble (iax), &e);
 
-  /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for
-     Newton iterations.  */
+  /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point
+     for Newton iterations.  */
   double p_01 = fma (C (1), m, C (0));
   double p_23 = fma (C (3), m, C (2));
   double p = fma (p_23, m * m, p_01);
 
   /* Two iterations of Newton's method for iteratively approximating cbrt.  */
   double m_by_3 = m / 3;
   double a = fma (TwoThirds, p, m_by_3 / (p * p));
   a = fma (TwoThirds, a, m_by_3 / (a * a));
 
   /* Assemble the result by the following:
 
      cbrt(x) = cbrt(m) * 2 ^ (e / 3).
 
      Let t = (2 ^ (e / 3)) / (2 ^ round(e / 3)).
 
      Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3.
      i is an integer in [-2, 2], so t can be looked up in the table T.
      Hence the result is assembled as:
 
      cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.
      Which can be done easily using ldexp.  */
   return asdouble (asuint64 (ldexp (a * T (2 + e % 3), e / 3)) | sign);
 }
 
-PL_TEST_ULP (cbrt, 1.30)
-PL_TEST_SYM_INTERVAL (cbrt, 0, inf, 1000000)
+TEST_ULP (cbrt, 1.30)
+TEST_SYM_INTERVAL (cbrt, 0, inf, 1000000)
diff --git a/contrib/arm-optimized-routines/pl/math/cbrt_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/cbrt_data.c
similarity index 93%
rename from contrib/arm-optimized-routines/pl/math/cbrt_data.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/cbrt_data.c
index 3d484c2779e2..dabcb6aff2d4 100644
--- a/contrib/arm-optimized-routines/pl/math/cbrt_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/cbrt_data.c
@@ -1,15 +1,15 @@
 /*
  * Coefficients and table entries for double-precision cbrt(x).
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 const struct cbrt_data __cbrt_data
   = {.poly = { /* Coefficients for very rough approximation of cbrt(x) in [0.5, 1].
                   See cbrt.sollya for details of generation.  */
 	      0x1.c14e8ee44767p-2, 0x1.dd2d3f99e4c0ep-1, -0x1.08e83026b7e74p-1, 0x1.2c74eaa3ba428p-3},
      .table = { /* table[i] = 2^((i - 2) / 3).  */
 	         0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0}};
diff --git a/contrib/arm-optimized-routines/pl/math/cbrtf_1u5.c b/contrib/arm-optimized-routines/math/aarch64/experimental/cbrtf_1u5.c
similarity index 88%
rename from contrib/arm-optimized-routines/pl/math/cbrtf_1u5.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/cbrtf_1u5.c
index 88fcb7162ef6..5f0288e6d27a 100644
--- a/contrib/arm-optimized-routines/pl/math/cbrtf_1u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/cbrtf_1u5.c
@@ -1,66 +1,66 @@
 /*
  * Single-precision cbrt(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffff
 #define SignMask 0x80000000
 #define TwoThirds 0x1.555556p-1f
 
 #define T(i) __cbrtf_data.table[i]
 
 /* Approximation for single-precision cbrt(x), using low-order polynomial and
    one Newton iteration on a reduced interval. Greatest error is 1.5 ULP. This
-   is observed for every value where the mantissa is 0x1.81410e and the exponent
-   is a multiple of 3, for example:
+   is observed for every value where the mantissa is 0x1.81410e and the
+   exponent is a multiple of 3, for example:
    cbrtf(0x1.81410ep+30) got 0x1.255d96p+10
 			want 0x1.255d92p+10.  */
 float
 cbrtf (float x)
 {
   uint32_t ix = asuint (x);
   uint32_t iax = ix & AbsMask;
   uint32_t sign = ix & SignMask;
 
   if (unlikely (iax == 0 || iax == 0x7f800000))
     return x;
 
   /* |x| = m * 2^e, where m is in [0.5, 1.0].
      We can easily decompose x into m and e using frexpf.  */
   int e;
   float m = frexpf (asfloat (iax), &e);
 
   /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
      the less accurate the next stage of the algorithm needs to be. An order-4
      polynomial is enough for one Newton iteration.  */
   float p = pairwise_poly_3_f32 (m, m * m, __cbrtf_data.poly);
 
   /* One iteration of Newton's method for iteratively approximating cbrt.  */
   float m_by_3 = m / 3;
   float a = fmaf (TwoThirds, p, m_by_3 / (p * p));
 
   /* Assemble the result by the following:
 
      cbrt(x) = cbrt(m) * 2 ^ (e / 3).
 
      Let t = (2 ^ (e / 3)) / (2 ^ round(e / 3)).
 
      Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3.
      i is an integer in [-2, 2], so t can be looked up in the table T.
      Hence the result is assembled as:
 
      cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.
      Which can be done easily using ldexpf.  */
   return asfloat (asuint (ldexpf (a * T (2 + e % 3), e / 3)) | sign);
 }
 
-PL_SIG (S, F, 1, cbrt, -10.0, 10.0)
-PL_TEST_ULP (cbrtf, 1.03)
-PL_TEST_SYM_INTERVAL (cbrtf, 0, inf, 1000000)
+TEST_SIG (S, F, 1, cbrt, -10.0, 10.0)
+TEST_ULP (cbrtf, 1.03)
+TEST_SYM_INTERVAL (cbrtf, 0, inf, 1000000)
diff --git a/contrib/arm-optimized-routines/pl/math/cbrtf_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/cbrtf_data.c
similarity index 93%
rename from contrib/arm-optimized-routines/pl/math/cbrtf_data.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/cbrtf_data.c
index c6cdb4de0d65..7b5c53f4a606 100644
--- a/contrib/arm-optimized-routines/pl/math/cbrtf_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/cbrtf_data.c
@@ -1,15 +1,15 @@
 /*
  * Coefficients and table entries for single-precision cbrt(x).
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 const struct cbrtf_data __cbrtf_data
   = {.poly = { /* Coefficients for very rough approximation of cbrt(x) in [0.5, 1].
                   See cbrtf.sollya for details of generation.  */
 	        0x1.c14e96p-2, 0x1.dd2d3p-1, -0x1.08e81ap-1, 0x1.2c74c2p-3},
      .table = { /* table[i] = 2^((i - 2) / 3).  */
 	        0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0}};
diff --git a/contrib/arm-optimized-routines/pl/math/cosh_2u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/cosh_2u.c
similarity index 70%
rename from contrib/arm-optimized-routines/pl/math/cosh_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/cosh_2u.c
index 2240a9c56f15..f5bc73b85df8 100644
--- a/contrib/arm-optimized-routines/pl/math/cosh_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/cosh_2u.c
@@ -1,63 +1,61 @@
 /*
  * Double-precision cosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "exp_inline.h"
 
 #define AbsMask 0x7fffffffffffffff
-#define SpecialBound                                                           \
+#define SpecialBound                                                          \
   0x40861da04cbafe44 /* 0x1.61da04cbafe44p+9, above which exp overflows.  */
 
-double
-__exp_dd (double, double);
-
 static double
 specialcase (double x, uint64_t iax)
 {
   if (iax == 0x7ff0000000000000)
     return INFINITY;
   if (iax > 0x7ff0000000000000)
     return __math_invalid (x);
-  /* exp overflows above SpecialBound. At this magnitude cosh(x) is dominated by
-     exp(x), so we can approximate cosh(x) by (exp(|x|/2)) ^ 2 / 2.  */
-  double t = __exp_dd (asdouble (iax) / 2, 0);
+  /* exp overflows above SpecialBound. At this magnitude cosh(x) is dominated
+     by exp(x), so we can approximate cosh(x) by (exp(|x|/2)) ^ 2 / 2.  */
+  double t = exp_inline (asdouble (iax) / 2, 0);
   return (0.5 * t) * t;
 }
 
 /* Approximation for double-precision cosh(x).
    cosh(x) = (exp(x) + exp(-x)) / 2.
    The greatest observed error is in the special region, 1.93 ULP:
    cosh(0x1.628af341989dap+9) got 0x1.fdf28623ef921p+1021
 			     want 0x1.fdf28623ef923p+1021.
 
    The greatest observed error in the non-special region is 1.03 ULP:
    cosh(0x1.502cd8e56ab3bp+0) got 0x1.fe54962842d0ep+0
 			     want 0x1.fe54962842d0fp+0.  */
 double
 cosh (double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t iax = ix & AbsMask;
 
-  /* exp overflows a little bit before cosh, so use special-case handler for the
-     gap, as well as special values.  */
+  /* exp overflows a little bit before cosh, so use special-case handler for
+     the gap, as well as special values.  */
   if (unlikely (iax >= SpecialBound))
     return specialcase (x, iax);
 
   double ax = asdouble (iax);
   /* Use double-precision exp helper to calculate exp(x), then:
      cosh(x) = exp(|x|) / 2 + 1 / (exp(|x| * 2).  */
-  double t = __exp_dd (ax, 0);
+  double t = exp_inline (ax, 0);
   return 0.5 * t + 0.5 / t;
 }
 
-PL_SIG (S, D, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (cosh, 1.43)
-PL_TEST_SYM_INTERVAL (cosh, 0, 0x1.61da04cbafe44p+9, 100000)
-PL_TEST_SYM_INTERVAL (cosh, 0x1.61da04cbafe44p+9, 0x1p10, 1000)
-PL_TEST_SYM_INTERVAL (cosh, 0x1p10, inf, 100)
+TEST_SIG (S, D, 1, cosh, -10.0, 10.0)
+TEST_ULP (cosh, 1.43)
+TEST_SYM_INTERVAL (cosh, 0, 0x1.61da04cbafe44p+9, 100000)
+TEST_SYM_INTERVAL (cosh, 0x1.61da04cbafe44p+9, 0x1p10, 1000)
+TEST_SYM_INTERVAL (cosh, 0x1p10, inf, 100)
diff --git a/contrib/arm-optimized-routines/pl/math/coshf_1u9.c b/contrib/arm-optimized-routines/math/aarch64/experimental/coshf_1u9.c
similarity index 71%
rename from contrib/arm-optimized-routines/pl/math/coshf_1u9.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/coshf_1u9.c
index cf737840e0d6..b7e7720a472e 100644
--- a/contrib/arm-optimized-routines/pl/math/coshf_1u9.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/coshf_1u9.c
@@ -1,68 +1,65 @@
 /*
  * Single-precision cosh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffff
 #define TinyBound 0x20000000 /* 0x1p-63: Round to 1 below this.  */
-#define SpecialBound                                                           \
-  0x42ad496c /* 0x1.5a92d8p+6: expf overflows above this, so have to use       \
-		special case.  */
-
-float
-optr_aor_exp_f32 (float);
+/* 0x1.5a92d8p+6: expf overflows above this, so have to use special case.  */
+#define SpecialBound 0x42ad496c
 
 static NOINLINE float
 specialcase (float x, uint32_t iax)
 {
   if (iax == 0x7f800000)
     return INFINITY;
   if (iax > 0x7f800000)
     return __math_invalidf (x);
   if (iax <= TinyBound)
     /* For tiny x, avoid underflow by just returning 1.  */
     return 1;
   /* Otherwise SpecialBound <= |x| < Inf. x is too large to calculate exp(x)
      without overflow, so use exp(|x|/2) instead. For large x cosh(x) is
      dominated by exp(x), so return:
      cosh(x) ~= (exp(|x|/2))^2 / 2.  */
-  float t = optr_aor_exp_f32 (asfloat (iax) / 2);
+  float t = expf (asfloat (iax) / 2);
   return (0.5 * t) * t;
 }
 
 /* Approximation for single-precision cosh(x) using exp.
    cosh(x) = (exp(x) + exp(-x)) / 2.
    The maximum error is 1.89 ULP, observed for |x| > SpecialBound:
    coshf(0x1.65898cp+6) got 0x1.f00aep+127 want 0x1.f00adcp+127.
    The maximum error observed for TinyBound < |x| < SpecialBound is 1.02 ULP:
    coshf(0x1.50a3cp+0) got 0x1.ff21dcp+0 want 0x1.ff21dap+0.  */
 float
 coshf (float x)
 {
   uint32_t ix = asuint (x);
   uint32_t iax = ix & AbsMask;
   float ax = asfloat (iax);
 
   if (unlikely (iax <= TinyBound || iax >= SpecialBound))
     {
       /* x is tiny, large or special.  */
       return specialcase (x, iax);
     }
 
   /* Compute cosh using the definition:
      coshf(x) = exp(x) / 2 + exp(-x) / 2.  */
-  float t = optr_aor_exp_f32 (ax);
+  float t = expf (ax);
   return 0.5f * t + 0.5f / t;
 }
 
-PL_SIG (S, F, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (coshf, 1.89)
-PL_TEST_SYM_INTERVAL (coshf, 0, 0x1p-63, 100)
-PL_TEST_SYM_INTERVAL (coshf, 0, 0x1.5a92d8p+6, 80000)
-PL_TEST_SYM_INTERVAL (coshf, 0x1.5a92d8p+6, inf, 2000)
+TEST_SIG (S, F, 1, cosh, -10.0, 10.0)
+TEST_ULP (coshf, 1.89)
+TEST_SYM_INTERVAL (coshf, 0, 0x1p-63, 100)
+TEST_SYM_INTERVAL (coshf, 0, 0x1.5a92d8p+6, 80000)
+TEST_SYM_INTERVAL (coshf, 0x1.5a92d8p+6, inf, 2000)
diff --git a/contrib/arm-optimized-routines/pl/math/erf_2u5.c b/contrib/arm-optimized-routines/math/aarch64/experimental/erf_2u5.c
similarity index 87%
rename from contrib/arm-optimized-routines/pl/math/erf_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/erf_2u5.c
index 3ca2a1332c1f..0bbe3e9548f8 100644
--- a/contrib/arm-optimized-routines/pl/math/erf_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/erf_2u5.c
@@ -1,102 +1,101 @@
 /*
  * Double-precision erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
 #define Shift 0x1p45
 
 /* Polynomial coefficients.  */
 #define OneThird 0x1.5555555555555p-2
 #define TwoThird 0x1.5555555555555p-1
 
 #define TwoOverFifteen 0x1.1111111111111p-3
 #define TwoOverFive 0x1.999999999999ap-2
 #define Tenth 0x1.999999999999ap-4
 
 #define TwoOverNine 0x1.c71c71c71c71cp-3
 #define TwoOverFortyFive 0x1.6c16c16c16c17p-5
 #define Sixth 0x1.555555555555p-3
 
 /* Fast erf approximation based on series expansion near x rounded to
    nearest multiple of 1/128.
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
 
    erf(x) ~ erf(r)
      + scale * d * [
        + 1
        - r d
        + 1/3 (2 r^2 - 1) d^2
        - 1/6 (r (2 r^2 - 3)) d^3
        + 1/30 (4 r^4 - 12 r^2 + 3) d^4
        - 1/90 (4 r^4 - 20 r^2 + 15) d^5
      ]
 
    Maximum measure error: 2.29 ULP
    erf(-0x1.00003c924e5d1p-8) got -0x1.20dd59132ebadp-8
 			     want -0x1.20dd59132ebafp-8.  */
 double
-erf (double x)
+arm_math_erf (double x)
 {
   /* Get absolute value and sign.  */
   uint64_t ix = asuint64 (x);
   uint64_t ia = ix & 0x7fffffffffffffff;
   uint64_t sign = ix & ~0x7fffffffffffffff;
 
   /* |x| < 0x1p-508. Triggers exceptions.  */
   if (unlikely (ia < 0x2030000000000000))
     return fma (TwoOverSqrtPiMinusOne, x, x);
 
   if (ia < 0x4017f80000000000) /* |x| <  6 - 1 / 128 = 5.9921875.  */
     {
       /* Set r to multiple of 1/128 nearest to |x|.  */
       double a = asdouble (ia);
       double z = a + Shift;
       uint64_t i = asuint64 (z) - asuint64 (Shift);
       double r = z - Shift;
       /* Lookup erf(r) and scale(r) in table.
 	 Set erf(r) to 0 and scale to 2/sqrt(pi) for |x| <= 0x1.cp-9.  */
-      double erfr = __erf_data.tab[i].erf;
-      double scale = __erf_data.tab[i].scale;
+      double erfr = __v_erf_data.tab[i].erf;
+      double scale = __v_erf_data.tab[i].scale;
 
       /* erf(x) ~ erf(r) + scale * d * poly (d, r).  */
       double d = a - r;
       double r2 = r * r;
       double d2 = d * d;
 
       /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p5(r) * d^5.  */
       double p1 = -r;
       double p2 = fma (TwoThird, r2, -OneThird);
       double p3 = -r * fma (OneThird, r2, -0.5);
       double p4 = fma (fma (TwoOverFifteen, r2, -TwoOverFive), r2, Tenth);
       double p5
 	  = -r * fma (fma (TwoOverFortyFive, r2, -TwoOverNine), r2, Sixth);
 
       double p34 = fma (p4, d, p3);
       double p12 = fma (p2, d, p1);
       double y = fma (p5, d2, p34);
       y = fma (y, d2, p12);
 
       y = fma (fma (y, d2, d), scale, erfr);
       return asdouble (asuint64 (y) | sign);
     }
 
   /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1.  */
   if (unlikely (ia >= 0x7ff0000000000000))
     return (1.0 - (double) (sign >> 62)) + 1.0 / x;
 
   /* Boring domain (|x| >= 6.0).  */
   return asdouble (sign | asuint64 (1.0));
 }
 
-PL_SIG (S, D, 1, erf, -6.0, 6.0)
-PL_TEST_ULP (erf, 1.79)
-PL_TEST_SYM_INTERVAL (erf, 0, 5.9921875, 40000)
-PL_TEST_SYM_INTERVAL (erf, 5.9921875, inf, 40000)
-PL_TEST_SYM_INTERVAL (erf, 0, inf, 40000)
+TEST_ULP (arm_math_erf, 1.79)
+TEST_SYM_INTERVAL (arm_math_erf, 0, 5.9921875, 40000)
+TEST_SYM_INTERVAL (arm_math_erf, 5.9921875, inf, 40000)
+TEST_SYM_INTERVAL (arm_math_erf, 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/erfc_1u8.c b/contrib/arm-optimized-routines/math/aarch64/experimental/erfc_1u8.c
similarity index 90%
rename from contrib/arm-optimized-routines/pl/math/erfc_1u8.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/erfc_1u8.c
index 7f2004e9335d..5357e9329433 100644
--- a/contrib/arm-optimized-routines/pl/math/erfc_1u8.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/erfc_1u8.c
@@ -1,153 +1,153 @@
 /*
  * Double-precision erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Shift 0x1p45
 #define P20 0x1.5555555555555p-2 /* 1/3.  */
 #define P21 0x1.5555555555555p-1 /* 2/3.  */
 
 #define P40 0x1.999999999999ap-4  /* 1/10.  */
 #define P41 0x1.999999999999ap-2  /* 2/5.  */
 #define P42 0x1.11111111111111p-3 /* 2/15.  */
 
 #define P50 0x1.5555555555555p-3 /* 1/6.  */
 #define P51 0x1.c71c71c71c71cp-3 /* 2/9.  */
 #define P52 0x1.6c16c16c16c17p-5 /* 2/45.  */
 
 /* Qi = (i+1) / i.  */
 #define Q5 0x1.3333333333333p0
 #define Q6 0x1.2aaaaaaaaaaabp0
 #define Q7 0x1.2492492492492p0
 #define Q8 0x1.2p0
 #define Q9 0x1.1c71c71c71c72p0
 
 /* Ri = -2 * i / ((i+1)*(i+2)).  */
 #define R5 -0x1.e79e79e79e79ep-3
 #define R6 -0x1.b6db6db6db6dbp-3
 #define R7 -0x1.8e38e38e38e39p-3
 #define R8 -0x1.6c16c16c16c17p-3
 #define R9 -0x1.4f2094f2094f2p-3
 
 /* Fast erfc approximation based on series expansion near x rounded to
    nearest multiple of 1/128.
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
 
    erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
 
    poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
 		+ (2/15 r^4 - 2/5 r^2 + 1/10) d^4
 		- r * (2/45 r^4 - 2/9 r^2 + 1/6) d^5
 		+ p6(r) d^6 + ... + p10(r) d^10
 
    Polynomials p6(r) to p10(r) are computed using recurrence relation
 
    2(i+1)p_i + 2r(i+2)p_{i+1} + (i+2)(i+3)p_{i+2} = 0,
    with p0 = 1, and p1(r) = -r.
 
    Values of erfc(r) and scale(r) are read from lookup tables. Stored values
    are scaled to avoid hitting the subnormal range.
 
    Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
 
    Maximum measured error: 1.71 ULP
    erfc(0x1.46cfe976733p+4) got 0x1.e15fcbea3e7afp-608
 			   want 0x1.e15fcbea3e7adp-608.  */
 double
 erfc (double x)
 {
   /* Get top words and sign.  */
   uint64_t ix = asuint64 (x);
   uint64_t ia = ix & 0x7fffffffffffffff;
   double a = asdouble (ia);
   uint64_t sign = ix & ~0x7fffffffffffffff;
 
   /* erfc(nan)=nan, erfc(+inf)=0 and erfc(-inf)=2.  */
   if (unlikely (ia >= 0x7ff0000000000000))
     return asdouble (sign >> 1) + 1.0 / x; /* Special cases.  */
 
   /* Return early for large enough negative values.  */
   if (x < -6.0)
     return 2.0;
 
   /* For |x| < 3487.0/128.0, the following approximation holds.  */
   if (likely (ia < 0x403b3e0000000000))
     {
       /* |x| < 0x1p-511 => accurate to 0.5 ULP.  */
       if (unlikely (ia < asuint64 (0x1p-511)))
 	return 1.0 - x;
 
       /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 1 and scale
 	 to 2/sqrt(pi), when x reduced to r = 0.  */
       double z = a + Shift;
-      uint64_t i = asuint64 (z);
+      uint64_t i = asuint64 (z) - asuint64 (Shift);
       double r = z - Shift;
       /* These values are scaled by 2^128.  */
-      double erfcr = __erfc_data.tab[i].erfc;
-      double scale = __erfc_data.tab[i].scale;
+      double erfcr = __v_erfc_data.tab[i].erfc;
+      double scale = __v_erfc_data.tab[i].scale;
 
       /* erfc(x) ~ erfc(r) - scale * d * poly (r, d).  */
       double d = a - r;
       double d2 = d * d;
       double r2 = r * r;
       /* Compute p_i as a regular (low-order) polynomial.  */
       double p1 = -r;
       double p2 = fma (P21, r2, -P20);
       double p3 = -r * fma (P20, r2, -0.5);
       double p4 = fma (fma (P42, r2, -P41), r2, P40);
       double p5 = -r * fma (fma (P52, r2, -P51), r2, P50);
       /* Compute p_i using recurrence relation:
 	 p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}.  */
       double p6 = fma (Q5 * r, p5, p4) * R5;
       double p7 = fma (Q6 * r, p6, p5) * R6;
       double p8 = fma (Q7 * r, p7, p6) * R7;
       double p9 = fma (Q8 * r, p8, p7) * R8;
       double p10 = fma (Q9 * r, p9, p8) * R9;
       /* Compute polynomial in d using pairwise Horner scheme.  */
       double p90 = fma (p10, d, p9);
       double p78 = fma (p8, d, p7);
       double p56 = fma (p6, d, p5);
       double p34 = fma (p4, d, p3);
       double p12 = fma (p2, d, p1);
       double y = fma (p90, d2, p78);
       y = fma (y, d2, p56);
       y = fma (y, d2, p34);
       y = fma (y, d2, p12);
 
       y = fma (-fma (y, d2, d), scale, erfcr);
 
       /* Handle sign and scale back in a single fma.  */
       double off = asdouble (sign >> 1);
       double fac = asdouble (asuint64 (0x1p-128) | sign);
       y = fma (y, fac, off);
 
       if (unlikely (x > 26.0))
 	{
 	  /* The underflow exception needs to be signaled explicitly when
 	     result gets into the subnormal range.  */
 	  if (unlikely (y < 0x1p-1022))
 	    force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
 	  /* Set errno to ERANGE if result rounds to 0.  */
 	  return __math_check_uflow (y);
 	}
 
       return y;
     }
   /* Above the threshold (x > 3487.0/128.0) erfc is constant and needs to raise
      underflow exception for positive x.  */
   return __math_uflow (0);
 }
 
-PL_SIG (S, D, 1, erfc, -6.0, 28.0)
-PL_TEST_ULP (erfc, 1.21)
-PL_TEST_SYM_INTERVAL (erfc, 0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (erfc, 0x1p-26, 28.0, 100000)
-PL_TEST_INTERVAL (erfc, -0x1p-26, -6.0, 100000)
-PL_TEST_INTERVAL (erfc, 28.0, inf, 40000)
-PL_TEST_INTERVAL (erfc, -6.0, -inf, 40000)
+TEST_SIG (S, D, 1, erfc, -6.0, 28.0)
+TEST_ULP (erfc, 1.21)
+TEST_SYM_INTERVAL (erfc, 0, 0x1p-26, 40000)
+TEST_INTERVAL (erfc, 0x1p-26, 28.0, 100000)
+TEST_INTERVAL (erfc, -0x1p-26, -6.0, 100000)
+TEST_INTERVAL (erfc, 28.0, inf, 40000)
+TEST_INTERVAL (erfc, -6.0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/erfcf_1u7.c b/contrib/arm-optimized-routines/math/aarch64/experimental/erfcf_1u7.c
similarity index 86%
rename from contrib/arm-optimized-routines/pl/math/erfcf_1u7.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/erfcf_1u7.c
index c8ce95cca058..e56193c8a103 100644
--- a/contrib/arm-optimized-routines/pl/math/erfcf_1u7.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/erfcf_1u7.c
@@ -1,103 +1,103 @@
 /*
  * Single-precision erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Shift 0x1p17f
 #define OneThird 0x1.555556p-2f
 #define TwoThird 0x1.555556p-1f
 
 #define TwoOverFifteen 0x1.111112p-3f
 #define TwoOverFive 0x1.99999ap-2f
 #define Tenth 0x1.99999ap-4f
 
 #define SignMask 0x7fffffff
 
 /* Fast erfcf approximation based on series expansion near x rounded to
    nearest multiple of 1/64.
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
 
    erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
 
    poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
 		+ (2/15 r^4 - 2/5 r^2 + 1/10) d^4
 
    Values of erfc(r) and scale are read from lookup tables. Stored values
    are scaled to avoid hitting the subnormal range.
 
    Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
 
    Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0).
    erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120
 		       want 0x1.f51216p-120.  */
 float
 erfcf (float x)
 {
   /* Get top words and sign.  */
   uint32_t ix = asuint (x);
   uint32_t ia = ix & SignMask;
   uint32_t sign = ix & ~SignMask;
 
   /* |x| < 0x1.0p-26 => accurate to 0.5 ULP (top12(0x1p-26) = 0x328).  */
   if (unlikely (ia < 0x32800000))
     return 1.0f - x; /* Small case.  */
 
   /* For |x| < 10.0625, the following approximation holds.  */
   if (likely (ia < 0x41210000))
     {
       /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 1 and scale
 	 to 2/sqrt(pi), when x reduced to r = 0.  */
       float a = asfloat (ia);
       float z = a + Shift;
       uint32_t i = asuint (z) - asuint (Shift);
       float r = z - Shift;
 
       /* These values are scaled by 2^-47.  */
-      float erfcr = __erfcf_data.tab[i].erfc;
-      float scale = __erfcf_data.tab[i].scale;
+      float erfcr = __v_erfcf_data.tab[i].erfc;
+      float scale = __v_erfcf_data.tab[i].scale;
 
       /* erfc(x) ~ erfc(r) - scale * d * poly (r, d).  */
       float d = a - r;
       float d2 = d * d;
       float r2 = r * r;
       float p1 = -r;
       float p2 = fmaf (TwoThird, r2, -OneThird);
       float p3 = -r * fmaf (OneThird, r2, -0.5f);
       float p4 = fmaf (fmaf (TwoOverFifteen, r2, -TwoOverFive), r2, Tenth);
       float y = fmaf (p4, d, p3);
       y = fmaf (y, d, p2);
       y = fmaf (y, d, p1);
       y = fmaf (-fmaf (y, d2, d), scale, erfcr);
       /* Handle sign and scale back in a single fma.  */
       float off = asfloat (sign >> 1);
       float fac = asfloat (asuint (0x1p-47f) | sign);
       y = fmaf (y, fac, off);
       /* The underflow exception needs to be signaled explicitly when
 	 result gets into subormnal range.  */
       if (x >= 0x1.2639cp+3f)
 	force_eval_float (opt_barrier_float (0x1p-123f) * 0x1p-123f);
       return y;
     }
 
   /* erfcf(nan)=nan, erfcf(+inf)=0 and erfcf(-inf)=2.  */
   if (unlikely (ia >= 0x7f800000))
     return asfloat (sign >> 1) + 1.0f / x; /* Special cases.  */
 
   /* Above this threshold erfcf is constant and needs to raise underflow
      exception for positive x.  */
   return sign ? 2.0f : __math_uflowf (0);
 }
 
-PL_SIG (S, F, 1, erfc, -4.0, 10.0)
-PL_TEST_ULP (erfcf, 1.14)
-PL_TEST_SYM_INTERVAL (erfcf, 0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (erfcf, 0x1p-26, 10.0625, 40000)
-PL_TEST_INTERVAL (erfcf, -0x1p-26, -4.0, 40000)
-PL_TEST_INTERVAL (erfcf, 10.0625, inf, 40000)
-PL_TEST_INTERVAL (erfcf, -4.0, -inf, 40000)
+TEST_SIG (S, F, 1, erfc, -4.0, 10.0)
+TEST_ULP (erfcf, 1.14)
+TEST_SYM_INTERVAL (erfcf, 0, 0x1p-26, 40000)
+TEST_INTERVAL (erfcf, 0x1p-26, 10.0625, 40000)
+TEST_INTERVAL (erfcf, -0x1p-26, -4.0, 40000)
+TEST_INTERVAL (erfcf, 10.0625, inf, 40000)
+TEST_INTERVAL (erfcf, -4.0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/erff_2u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/erff_2u.c
similarity index 83%
rename from contrib/arm-optimized-routines/pl/math/erff_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/erff_2u.c
index f43e647072f8..9487f60dd1e3 100644
--- a/contrib/arm-optimized-routines/pl/math/erff_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/erff_2u.c
@@ -1,82 +1,81 @@
 /*
  * Single-precision erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
 #define Shift 0x1p16f
 #define OneThird 0x1.555556p-2f
 
 /* Fast erff approximation based on series expansion near x rounded to
    nearest multiple of 1/128.
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
 
    erf(x) ~ erf(r)
      + scale * d * [
        + 1
        - r d
        + 1/3 (2 r^2 - 1) d^2
        - 1/6 (r (2 r^2 - 3) ) d^3
        + 1/30 (4 r^4 - 12 r^2 + 3) d^4
      ]
 
    This single precision implementation uses only the following terms:
 
    erf(x) ~ erf(r) + scale * d * [1 - r * d - 1/3 * d^2]
 
    Values of erf(r) and scale are read from lookup tables.
    For |x| > 3.9375, erf(|x|) rounds to 1.0f.
 
    Maximum error: 1.93 ULP
    erff(0x1.c373e6p-9) got 0x1.fd686cp-9
 		      want 0x1.fd6868p-9.  */
 float
-erff (float x)
+arm_math_erff (float x)
 {
   /* Get absolute value and sign.  */
   uint32_t ix = asuint (x);
   uint32_t ia = ix & 0x7fffffff;
   uint32_t sign = ix & ~0x7fffffff;
 
   /* |x| < 0x1p-62. Triggers exceptions.  */
   if (unlikely (ia < 0x20800000))
     return fmaf (TwoOverSqrtPiMinusOne, x, x);
 
   if (ia < 0x407b8000) /* |x| <  4 - 8 / 128 = 3.9375.  */
     {
       /* Lookup erf(r) and scale(r) in tables, e.g. set erf(r) to 0 and scale
 	 to 2/sqrt(pi), when x reduced to r = 0.  */
       float a = asfloat (ia);
       float z = a + Shift;
       uint32_t i = asuint (z) - asuint (Shift);
       float r = z - Shift;
-      float erfr = __erff_data.tab[i].erf;
-      float scale = __erff_data.tab[i].scale;
+      float erfr = __v_erff_data.tab[i].erf;
+      float scale = __v_erff_data.tab[i].scale;
 
       /* erf(x) ~ erf(r) + scale * d * (1 - r * d - 1/3 * d^2).  */
       float d = a - r;
       float d2 = d * d;
       float y = -fmaf (OneThird, d, r);
       y = fmaf (fmaf (y, d2, d), scale, erfr);
       return asfloat (asuint (y) | sign);
     }
 
   /* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1.  */
   if (unlikely (ia >= 0x7f800000))
     return (1.0f - (float) (sign >> 30)) + 1.0f / x;
 
   /* Boring domain (|x| >= 4.0).  */
   return asfloat (sign | asuint (1.0f));
 }
 
-PL_SIG (S, F, 1, erf, -4.0, 4.0)
-PL_TEST_ULP (erff, 1.43)
-PL_TEST_SYM_INTERVAL (erff, 0, 3.9375, 40000)
-PL_TEST_SYM_INTERVAL (erff, 3.9375, inf, 40000)
-PL_TEST_SYM_INTERVAL (erff, 0, inf, 40000)
+TEST_ULP (arm_math_erff, 1.43)
+TEST_SYM_INTERVAL (arm_math_erff, 0, 3.9375, 40000)
+TEST_SYM_INTERVAL (arm_math_erff, 3.9375, inf, 40000)
+TEST_SYM_INTERVAL (arm_math_erff, 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/erfinv_24u5.c b/contrib/arm-optimized-routines/math/aarch64/experimental/erfinv_24u5.c
similarity index 88%
rename from contrib/arm-optimized-routines/pl/math/erfinv_24u5.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/erfinv_24u5.c
index 20e1e361befc..753f38a79f66 100644
--- a/contrib/arm-optimized-routines/pl/math/erfinv_24u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/erfinv_24u5.c
@@ -1,81 +1,85 @@
 /*
  * Double-precision inverse error function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
 #include "poly_scalar_f64.h"
-#include "pl_sig.h"
-#define IGNORE_SCALAR_FENV
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 const static struct
 {
   /*  We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
       coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
       of the denominator.  */
   double P_17[7], Q_17[7], P_37[8], Q_37[8], P_57[9], Q_57[10];
 } data = {
   .P_17 = { 0x1.007ce8f01b2e8p+4, -0x1.6b23cc5c6c6d7p+6, 0x1.74e5f6ceb3548p+7,
 	    -0x1.5200bb15cc6bbp+7, 0x1.05d193233a849p+6, -0x1.148c5474ee5e1p+3,
 	    0x1.689181bbafd0cp-3 },
   .Q_17 = { 0x1.d8fb0f913bd7bp+3, -0x1.6d7f25a3f1c24p+6, 0x1.a450d8e7f4cbbp+7,
 	    -0x1.bc3480485857p+7, 0x1.ae6b0c504ee02p+6, -0x1.499dfec1a7f5fp+4,
 	    0x1p+0 },
   .P_37 = { -0x1.f3596123109edp-7, 0x1.60b8fe375999ep-2, -0x1.779bb9bef7c0fp+1,
 	    0x1.786ea384470a2p+3, -0x1.6a7c1453c85d3p+4, 0x1.31f0fc5613142p+4,
 	    -0x1.5ea6c007d4dbbp+2, 0x1.e66f265ce9e5p-3 },
   .Q_37 = { -0x1.636b2dcf4edbep-7, 0x1.0b5411e2acf29p-2, -0x1.3413109467a0bp+1,
 	    0x1.563e8136c554ap+3, -0x1.7b77aab1dcafbp+4, 0x1.8a3e174e05ddcp+4,
 	    -0x1.4075c56404eecp+3, 0x1p+0 },
   .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2,
 	    0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3,
 	    0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 },
   .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2,
 	    0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3,
 	    0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2,
 	    0x1p+0 }
 };
 
 /* Inverse error function approximation, based on rational approximation as
    described in
    J. M. Blair, C. A. Edwards, and J. H. Johnson,
    "Rational Chebyshev approximations for the inverse of the error function",
    Math. Comp. 30, pp. 827--830 (1976).
    https://doi.org/10.1090/S0025-5718-1976-0421040-7
    Largest observed error is 24.46 ULP, in the extreme tail:
    erfinv(0x1.fd9504351b757p-1) got 0x1.ff72c1092917p+0
 			       want 0x1.ff72c10929158p+0.  */
 double
 erfinv (double x)
 {
   double a = fabs (x);
 
   if (a <= 0.75)
     {
       /* Largest observed error in this region is 6.06 ULP:
 	 erfinv(0x1.1884650fd2d41p-2) got 0x1.fb65998cbd3fep-3
 				     want 0x1.fb65998cbd404p-3.  */
       double t = x * x - 0.5625;
       return x * horner_6_f64 (t, data.P_17) / horner_6_f64 (t, data.Q_17);
     }
 
   if (a <= 0.9375)
     {
       /* Largest observed error in this region is 6.95 ULP:
 	 erfinv(0x1.a8d65b94d8c6p-1) got 0x1.f08325591b54p-1
 				    want 0x1.f08325591b547p-1.  */
       double t = x * x - 0.87890625;
       return x * horner_7_f64 (t, data.P_37) / horner_7_f64 (t, data.Q_37);
     }
 
   double t = 1.0 / (sqrt (-log (1 - a)));
   return horner_8_f64 (t, data.P_57)
 	 / (copysign (t, x) * horner_9_f64 (t, data.Q_57));
 }
 
-PL_SIG (S, D, 1, erfinv, -0.99, 0.99)
-PL_TEST_ULP (erfinv, 24.0)
-PL_TEST_INTERVAL (erfinv, 0, 1, 40000)
-PL_TEST_INTERVAL (erfinv, -0x1p-1022, -1, 40000)
+#if USE_MPFR
+# warning Not generating tests for erfinv, as MPFR has no suitable reference
+#else
+TEST_DISABLE_FENV (erfinv)
+TEST_SIG (S, D, 1, erfinv, -0.99, 0.99)
+TEST_ULP (erfinv, 24.0)
+TEST_INTERVAL (erfinv, 0, 1, 40000)
+TEST_INTERVAL (erfinv, -0x1p-1022, -1, 40000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/erfinvf_4u7.c b/contrib/arm-optimized-routines/math/aarch64/experimental/erfinvf_4u7.c
similarity index 88%
rename from contrib/arm-optimized-routines/pl/math/erfinvf_4u7.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/erfinvf_4u7.c
index 40736da08be8..152994f6336a 100644
--- a/contrib/arm-optimized-routines/pl/math/erfinvf_4u7.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/erfinvf_4u7.c
@@ -1,74 +1,78 @@
 /*
  * Single-precision inverse error function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 const static struct
 {
   /*  We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
       coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
       of the denominator.  */
   float P_10[3], Q_10[4], P_29[4], Q_29[4], P_50[6], Q_50[3];
 } data = { .P_10 = { -0x1.a31268p+3, 0x1.ac9048p+4, -0x1.293ff6p+3 },
 	   .Q_10 = { -0x1.8265eep+3, 0x1.ef5eaep+4, -0x1.12665p+4, 0x1p+0 },
 	   .P_29
 	   = { -0x1.fc0252p-4, 0x1.119d44p+0, -0x1.f59ee2p+0, 0x1.b13626p-2 },
 	   .Q_29 = { -0x1.69952p-4, 0x1.c7b7d2p-1, -0x1.167d7p+1, 0x1p+0 },
 	   .P_50 = { 0x1.3d8948p-3, 0x1.61f9eap+0, 0x1.61c6bcp-1,
 		     -0x1.20c9f2p+0, 0x1.5c704cp-1, -0x1.50c6bep-3 },
 	   .Q_50 = { 0x1.3d7dacp-3, 0x1.629e5p+0, 0x1p+0 } };
 
 /* Inverse error function approximation, based on rational approximation as
    described in
    J. M. Blair, C. A. Edwards, and J. H. Johnson,
    "Rational Chebyshev approximations for the inverse of the error function",
    Math. Comp. 30, pp. 827--830 (1976).
    https://doi.org/10.1090/S0025-5718-1976-0421040-7
    Largest error is 4.71 ULP, in the tail region:
    erfinvf(0x1.f84e9ap-1) got 0x1.b8326ap+0
 			 want 0x1.b83274p+0.  */
 float
 erfinvf (float x)
 {
   if (x == 1.0f)
     return __math_oflowf (0);
   if (x == -1.0f)
     return __math_oflowf (1);
 
   float a = fabsf (x);
   if (a > 1.0f)
     return __math_invalidf (x);
 
   if (a <= 0.75f)
     {
       /* Greatest error in this region is 4.60 ULP:
 	 erfinvf(0x1.0a98bap-5) got 0x1.d8a93ep-6
 			       want 0x1.d8a948p-6.  */
       float t = x * x - 0.5625f;
       return x * horner_2_f32 (t, data.P_10) / horner_3_f32 (t, data.Q_10);
     }
   if (a < 0.9375f)
     {
       /* Greatest error in this region is 3.79 ULP:
 	 erfinvf(0x1.ac82d6p-1) got 0x1.f8fc54p-1
 			       want 0x1.f8fc5cp-1.  */
       float t = x * x - 0.87890625f;
       return x * horner_3_f32 (t, data.P_29) / horner_3_f32 (t, data.Q_29);
     }
 
   /* Tail region, where error is greatest (and sensitive to sqrt and log1p
      implementations.  */
   float t = 1.0 / sqrtf (-log1pf (-a));
   return horner_5_f32 (t, data.P_50)
 	 / (copysignf (t, x) * horner_2_f32 (t, data.Q_50));
 }
 
-PL_SIG (S, F, 1, erfinv, -0.99, 0.99)
-PL_TEST_ULP (erfinvf, 4.09)
-PL_TEST_SYM_INTERVAL (erfinvf, 0, 1, 40000)
+#if USE_MPFR
+# warning Not generating tests for erfinvf, as MPFR has no suitable reference
+#else
+TEST_SIG (S, F, 1, erfinv, -0.99, 0.99)
+TEST_ULP (erfinvf, 4.09)
+TEST_SYM_INTERVAL (erfinvf, 0, 1, 40000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/erfinvl.c b/contrib/arm-optimized-routines/math/aarch64/experimental/erfinvl.c
similarity index 98%
rename from contrib/arm-optimized-routines/pl/math/erfinvl.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/erfinvl.c
index ea4aadfccd00..4d91410f1a5c 100644
--- a/contrib/arm-optimized-routines/pl/math/erfinvl.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/erfinvl.c
@@ -1,114 +1,114 @@
 /*
  * Extended precision inverse error function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #define _GNU_SOURCE
 #include <math.h>
 #include <stdbool.h>
 #include <float.h>
 
 #include "math_config.h"
 #include "poly_scalar_f64.h"
 
 #define SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p0l
 #define HF_SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p-1l
 
 const static struct
 {
   /*  We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
       coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
       of the denominator.  */
   double P_17[7], Q_17[7], P_37[8], Q_37[8], P_57[9], Q_57[10];
 } data = {
   .P_17 = { 0x1.007ce8f01b2e8p+4, -0x1.6b23cc5c6c6d7p+6, 0x1.74e5f6ceb3548p+7,
 	    -0x1.5200bb15cc6bbp+7, 0x1.05d193233a849p+6, -0x1.148c5474ee5e1p+3,
 	    0x1.689181bbafd0cp-3 },
   .Q_17 = { 0x1.d8fb0f913bd7bp+3, -0x1.6d7f25a3f1c24p+6, 0x1.a450d8e7f4cbbp+7,
 	    -0x1.bc3480485857p+7, 0x1.ae6b0c504ee02p+6, -0x1.499dfec1a7f5fp+4,
 	    0x1p+0 },
   .P_37 = { -0x1.f3596123109edp-7, 0x1.60b8fe375999ep-2, -0x1.779bb9bef7c0fp+1,
 	    0x1.786ea384470a2p+3, -0x1.6a7c1453c85d3p+4, 0x1.31f0fc5613142p+4,
 	    -0x1.5ea6c007d4dbbp+2, 0x1.e66f265ce9e5p-3 },
   .Q_37 = { -0x1.636b2dcf4edbep-7, 0x1.0b5411e2acf29p-2, -0x1.3413109467a0bp+1,
 	    0x1.563e8136c554ap+3, -0x1.7b77aab1dcafbp+4, 0x1.8a3e174e05ddcp+4,
 	    -0x1.4075c56404eecp+3, 0x1p+0 },
   .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2,
 	    0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3,
 	    0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 },
   .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2,
 	    0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3,
 	    0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2,
 	    0x1p+0 }
 };
 
 /* Inverse error function approximation, based on rational approximation as
    described in
    J. M. Blair, C. A. Edwards, and J. H. Johnson,
    "Rational Chebyshev approximations for the inverse of the error function",
    Math. Comp. 30, pp. 827--830 (1976).
    https://doi.org/10.1090/S0025-5718-1976-0421040-7.  */
 static inline double
 __erfinv (double x)
 {
   if (x == 1.0)
     return __math_oflow (0);
   if (x == -1.0)
     return __math_oflow (1);
 
   double a = fabs (x);
   if (a > 1)
     return __math_invalid (x);
 
   if (a <= 0.75)
     {
       double t = x * x - 0.5625;
       return x * horner_6_f64 (t, data.P_17) / horner_6_f64 (t, data.Q_17);
     }
 
   if (a <= 0.9375)
     {
       double t = x * x - 0.87890625;
       return x * horner_7_f64 (t, data.P_37) / horner_7_f64 (t, data.Q_37);
     }
 
   double t = 1.0 / (sqrtl (-log1pl (-a)));
   return horner_8_f64 (t, data.P_57)
 	 / (copysign (t, x) * horner_9_f64 (t, data.Q_57));
 }
 
 /* Extended-precision variant, which uses the above (or asymptotic estimate) as
    starting point for Newton refinement. This implementation is a port to C of
    the version in the SpecialFunctions.jl Julia package, with relaxed stopping
    criteria for the Newton refinement.  */
 long double
 erfinvl (long double x)
 {
   if (x == 0)
     return 0;
 
   double yf = __erfinv (x);
   long double y;
   if (isfinite (yf))
     y = yf;
   else
     {
       /* Double overflowed, use asymptotic estimate instead.  */
       y = copysignl (sqrtl (-logl (1.0l - fabsl (x)) * SQRT_PIl), x);
       if (!isfinite (y))
 	return y;
     }
 
   double eps = fabs (yf - nextafter (yf, 0));
   while (true)
     {
       long double dy = HF_SQRT_PIl * (erfl (y) - x) * exp (y * y);
       y -= dy;
       /* Stopping criterion is different to Julia implementation, but is enough
 	 to ensure result is accurate when rounded to double-precision.  */
       if (fabsl (dy) < eps)
 	break;
     }
   return y;
 }
diff --git a/contrib/arm-optimized-routines/pl/math/exp.c b/contrib/arm-optimized-routines/math/aarch64/experimental/exp_inline.h
similarity index 93%
rename from contrib/arm-optimized-routines/pl/math/exp.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/exp_inline.h
index 90253b68875d..1a327c1e67d3 100644
--- a/contrib/arm-optimized-routines/pl/math/exp.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/exp_inline.h
@@ -1,163 +1,159 @@
 /*
  * Double-precision e^x function.
  *
- * Copyright (c) 2018-2023, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#ifndef PL_MATH_EXP_INLINE_H
+#define PL_MATH_EXP_INLINE_H
+
 #include <float.h>
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
 
 #define N (1 << EXP_TABLE_BITS)
 #define InvLn2N __exp_data.invln2N
 #define NegLn2hiN __exp_data.negln2hiN
 #define NegLn2loN __exp_data.negln2loN
 #define Shift __exp_data.shift
 #define T __exp_data.tab
 #define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
 #define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
 #define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
 #define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
 #define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
 
 /* Handle cases that may overflow or underflow when computing the result that
    is scale*(1+TMP) without intermediate rounding.  The bit representation of
    scale is in SBITS, however it has a computed exponent that may have
    overflown into the sign bit so that needs to be adjusted before using it as
    a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
 static inline double
-specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
+exp_inline_special_case (double_t tmp, uint64_t sbits, uint64_t ki)
 {
   double_t scale, y;
 
   if ((ki & 0x80000000) == 0)
     {
       /* k > 0, the exponent of scale might have overflowed by <= 460.  */
       sbits -= 1009ull << 52;
       scale = asdouble (sbits);
       y = 0x1p1009 * (scale + scale * tmp);
       return check_oflow (eval_as_double (y));
     }
   /* k < 0, need special care in the subnormal range.  */
   sbits += 1022ull << 52;
   scale = asdouble (sbits);
   y = scale + scale * tmp;
   if (y < 1.0)
     {
       /* Round y to the right precision before scaling it into the subnormal
 	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
 	 E is the worst-case ulp error outside the subnormal range.  So this
 	 is only useful if the goal is better than 1 ulp worst-case error.  */
       double_t hi, lo;
       lo = scale - y + scale * tmp;
       hi = 1.0 + y;
       lo = 1.0 - hi + y + lo;
       y = eval_as_double (hi + lo) - 1.0;
       /* Avoid -0.0 with downward rounding.  */
       if (WANT_ROUNDING && y == 0.0)
 	y = 0.0;
       /* The underflow exception needs to be signaled explicitly.  */
       force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
     }
   y = 0x1p-1022 * y;
   return check_uflow (eval_as_double (y));
 }
 
 /* Top 12 bits of a double (sign and exponent bits).  */
 static inline uint32_t
 top12 (double x)
 {
   return asuint64 (x) >> 52;
 }
 
 /* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    If hastail is 0 then xtail is assumed to be 0 too.  */
 static inline double
-exp_inline (double x, double xtail, int hastail)
+exp_inline (double x, double xtail)
 {
   uint32_t abstop;
   uint64_t ki, idx, top, sbits;
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t kd, z, r, r2, scale, tail, tmp;
 
   abstop = top12 (x) & 0x7ff;
   if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
     {
       if (abstop - top12 (0x1p-54) >= 0x80000000)
 	/* Avoid spurious underflow for tiny x.  */
 	/* Note: 0 is common input.  */
 	return WANT_ROUNDING ? 1.0 + x : 1.0;
       if (abstop >= top12 (1024.0))
 	{
 	  if (asuint64 (x) == asuint64 (-INFINITY))
 	    return 0.0;
 	  if (abstop >= top12 (INFINITY))
 	    return 1.0 + x;
 	  if (asuint64 (x) >> 63)
 	    return __math_uflow (0);
 	  else
 	    return __math_oflow (0);
 	}
       /* Large x is special cased below.  */
       abstop = 0;
     }
 
   /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
   /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
   z = InvLn2N * x;
 #if TOINT_INTRINSICS
   kd = roundtoint (z);
   ki = converttoint (z);
 #elif EXP_USE_TOINT_NARROW
   /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
   kd = eval_as_double (z + Shift);
   ki = asuint64 (kd) >> 16;
   kd = (double_t) (int32_t) ki;
 #else
   /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
   kd = eval_as_double (z + Shift);
   ki = asuint64 (kd);
   kd -= Shift;
 #endif
   r = x + kd * NegLn2hiN + kd * NegLn2loN;
   /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
-  if (hastail)
+  if (!__builtin_constant_p (xtail) || xtail != 0.0)
     r += xtail;
   /* 2^(k/N) ~= scale * (1 + tail).  */
   idx = 2 * (ki % N);
   top = ki << (52 - EXP_TABLE_BITS);
   tail = asdouble (T[idx]);
   /* This is only a valid scale when -1023*N < k < 1024*N.  */
   sbits = T[idx + 1] + top;
   /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
   r2 = r * r;
   /* Without fma the worst case error is 0.25/N ulp larger.  */
   /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
 #if EXP_POLY_ORDER == 4
   tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
 #elif EXP_POLY_ORDER == 5
   tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
 #elif EXP_POLY_ORDER == 6
   tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
 #endif
   if (unlikely (abstop == 0))
-    return specialcase (tmp, sbits, ki);
+    return exp_inline_special_case (tmp, sbits, ki);
   scale = asdouble (sbits);
   /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
      is no spurious underflow here even without fma.  */
   return eval_as_double (scale + scale * tmp);
 }
 
-/* May be useful for implementing pow where more than double
-   precision input is needed.  */
-double
-__exp_dd (double x, double xtail)
-{
-  return exp_inline (x, xtail, 1);
-}
-
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/expf_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/expf_data.c
similarity index 93%
rename from contrib/arm-optimized-routines/pl/math/expf_data.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/expf_data.c
index 474ad57a29a0..958f705cc676 100644
--- a/contrib/arm-optimized-routines/pl/math/expf_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/expf_data.c
@@ -1,31 +1,31 @@
 /*
  * Coeffs and table entries for single-precision exp. Copied from
  * math/exp2f_data.c, with EXP2F_TABLE_BITS == 32.
  *
- * Copyright (c) 2017-2023, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 #define N (1 << EXPF_TABLE_BITS)
 
 const struct expf_data __expf_data = {
   /* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
      used for computing 2^(k/N) for an int |k| < 150 N as
-     double(tab[k%N] + (k << 52-BITS)) */
+     double(tab[k%N] + (k << 52-BITS)).  */
   .tab = {
 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
 0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
   },
   .invln2_scaled = 0x1.71547652b82fep+0 * N,
   .poly_scaled = {
   0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
   },
 };
diff --git a/contrib/arm-optimized-routines/pl/math/expm1_2u5.c b/contrib/arm-optimized-routines/math/aarch64/experimental/expm1_2u5.c
similarity index 83%
rename from contrib/arm-optimized-routines/pl/math/expm1_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/expm1_2u5.c
index f7d431198614..a4805e832af3 100644
--- a/contrib/arm-optimized-routines/pl/math/expm1_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/expm1_2u5.c
@@ -1,85 +1,85 @@
 /*
  * Double-precision e^x - 1 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f64.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define InvLn2 0x1.71547652b82fep0
 #define Ln2hi 0x1.62e42fefa39efp-1
 #define Ln2lo 0x1.abc9e3b39803fp-56
 #define Shift 0x1.8p52
 /* 0x1p-51, below which expm1(x) is within 2 ULP of x.  */
 #define TinyBound 0x3cc0000000000000
 /* Above which expm1(x) overflows.  */
 #define BigBound 0x1.63108c75a1937p+9
 /* Below which expm1(x) rounds to 1.  */
 #define NegBound -0x1.740bf7c0d927dp+9
 #define AbsMask 0x7fffffffffffffff
 
 /* Approximation for exp(x) - 1 using polynomial on a reduced interval.
    The maximum error observed error is 2.17 ULP:
    expm1(0x1.63f90a866748dp-2) got 0x1.a9af56603878ap-2
 			      want 0x1.a9af566038788p-2.  */
 double
 expm1 (double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t ax = ix & AbsMask;
 
   /* Tiny, +Infinity.  */
   if (ax <= TinyBound || ix == 0x7ff0000000000000)
     return x;
 
   /* +/-NaN.  */
   if (ax > 0x7ff0000000000000)
     return __math_invalid (x);
 
   /* Result is too large to be represented as a double.  */
   if (x >= 0x1.63108c75a1937p+9)
     return __math_oflow (0);
 
   /* Result rounds to -1 in double precision.  */
   if (x <= NegBound)
     return -1;
 
   /* Reduce argument to smaller range:
      Let i = round(x / ln2)
      and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
      where 2^i is exact because i is an integer.  */
   double j = fma (InvLn2, x, Shift) - Shift;
   int64_t i = j;
   double f = fma (j, -Ln2hi, x);
   f = fma (j, -Ln2lo, f);
 
   /* Approximate expm1(f) using polynomial.
      Taylor expansion for expm1(x) has the form:
 	 x + ax^2 + bx^3 + cx^4 ....
      So we calculate the polynomial P(f) = a + bf + cf^2 + ...
      and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
   double f2 = f * f;
   double f4 = f2 * f2;
   double p = fma (f2, estrin_10_f64 (f, f2, f4, f4 * f4, __expm1_poly), f);
 
   /* Assemble the result, using a slight rearrangement to achieve acceptable
      accuracy.
      expm1(x) ~= 2^i * (p + 1) - 1
      Let t = 2^(i - 1).  */
   double t = ldexp (0.5, i);
   /* expm1(x) ~= 2 * (p * t + (t - 1/2)).  */
   return 2 * fma (p, t, t - 0.5);
 }
 
-PL_SIG (S, D, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (expm1, 1.68)
-PL_TEST_SYM_INTERVAL (expm1, 0, 0x1p-51, 1000)
-PL_TEST_INTERVAL (expm1, 0x1p-51, 0x1.63108c75a1937p+9, 100000)
-PL_TEST_INTERVAL (expm1, -0x1p-51, -0x1.740bf7c0d927dp+9, 100000)
-PL_TEST_INTERVAL (expm1, 0x1.63108c75a1937p+9, inf, 100)
-PL_TEST_INTERVAL (expm1, -0x1.740bf7c0d927dp+9, -inf, 100)
+TEST_SIG (S, D, 1, expm1, -9.9, 9.9)
+TEST_ULP (expm1, 1.68)
+TEST_SYM_INTERVAL (expm1, 0, 0x1p-51, 1000)
+TEST_INTERVAL (expm1, 0x1p-51, 0x1.63108c75a1937p+9, 100000)
+TEST_INTERVAL (expm1, -0x1p-51, -0x1.740bf7c0d927dp+9, 100000)
+TEST_INTERVAL (expm1, 0x1.63108c75a1937p+9, inf, 100)
+TEST_INTERVAL (expm1, -0x1.740bf7c0d927dp+9, -inf, 100)
diff --git a/contrib/arm-optimized-routines/math/aarch64/experimental/expm1_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/expm1_data.c
new file mode 100644
index 000000000000..955895056924
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/expm1_data.c
@@ -0,0 +1,21 @@
+/*
+ * Coefficients for double-precision e^x - 1 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Generated using fpminimax, see tools/expm1.sollya for details.  */
+const double __expm1_poly[] = { 0x1p-1,
+				0x1.5555555555559p-3,
+				0x1.555555555554bp-5,
+				0x1.111111110f663p-7,
+				0x1.6c16c16c1b5f3p-10,
+				0x1.a01a01affa35dp-13,
+				0x1.a01a018b4ecbbp-16,
+				0x1.71ddf82db5bb4p-19,
+				0x1.27e517fc0d54bp-22,
+				0x1.af5eedae67435p-26,
+				0x1.1f143d060a28ap-29 };
diff --git a/contrib/arm-optimized-routines/pl/math/expm1f_1u6.c b/contrib/arm-optimized-routines/math/aarch64/experimental/expm1f_1u6.c
similarity index 82%
rename from contrib/arm-optimized-routines/pl/math/expm1f_1u6.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/expm1f_1u6.c
index e12c9ba9a8a2..03d1e9dc31ef 100644
--- a/contrib/arm-optimized-routines/pl/math/expm1f_1u6.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/expm1f_1u6.c
@@ -1,79 +1,79 @@
 /*
  * Single-precision e^x - 1 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Shift (0x1.8p23f)
 #define InvLn2 (0x1.715476p+0f)
 #define Ln2hi (0x1.62e4p-1f)
 #define Ln2lo (0x1.7f7d1cp-20f)
 #define AbsMask (0x7fffffff)
-#define InfLimit                                                               \
+#define InfLimit                                                              \
   (0x1.644716p6) /* Smallest value of x for which expm1(x) overflows.  */
-#define NegLimit                                                               \
+#define NegLimit                                                              \
   (-0x1.9bbabcp+6) /* Largest value of x for which expm1(x) rounds to 1.  */
 
 /* Approximation for exp(x) - 1 using polynomial on a reduced interval.
    The maximum error is 1.51 ULP:
    expm1f(0x1.8baa96p-2) got 0x1.e2fb9p-2
 			want 0x1.e2fb94p-2.  */
 float
 expm1f (float x)
 {
   uint32_t ix = asuint (x);
   uint32_t ax = ix & AbsMask;
 
   /* Tiny: |x| < 0x1p-23. expm1(x) is closely approximated by x.
      Inf:  x == +Inf => expm1(x) = x.  */
   if (ax <= 0x34000000 || (ix == 0x7f800000))
     return x;
 
   /* +/-NaN.  */
   if (ax > 0x7f800000)
     return __math_invalidf (x);
 
   if (x >= InfLimit)
     return __math_oflowf (0);
 
   if (x <= NegLimit || ix == 0xff800000)
     return -1;
 
   /* Reduce argument to smaller range:
      Let i = round(x / ln2)
      and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
      where 2^i is exact because i is an integer.  */
   float j = fmaf (InvLn2, x, Shift) - Shift;
   int32_t i = j;
   float f = fmaf (j, -Ln2hi, x);
   f = fmaf (j, -Ln2lo, f);
 
   /* Approximate expm1(f) using polynomial.
      Taylor expansion for expm1(x) has the form:
 	 x + ax^2 + bx^3 + cx^4 ....
      So we calculate the polynomial P(f) = a + bf + cf^2 + ...
      and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
   float p = fmaf (f * f, horner_4_f32 (f, __expm1f_poly), f);
   /* Assemble the result, using a slight rearrangement to achieve acceptable
      accuracy.
      expm1(x) ~= 2^i * (p + 1) - 1
      Let t = 2^(i - 1).  */
   float t = ldexpf (0.5f, i);
   /* expm1(x) ~= 2 * (p * t + (t - 1/2)).  */
   return 2 * fmaf (p, t, t - 0.5f);
 }
 
-PL_SIG (S, F, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (expm1f, 1.02)
-PL_TEST_SYM_INTERVAL (expm1f, 0, 0x1p-23, 1000)
-PL_TEST_INTERVAL (expm1f, 0x1p-23, 0x1.644716p6, 100000)
-PL_TEST_INTERVAL (expm1f, 0x1.644716p6, inf, 1000)
-PL_TEST_INTERVAL (expm1f, -0x1p-23, -0x1.9bbabcp+6, 100000)
-PL_TEST_INTERVAL (expm1f, -0x1.9bbabcp+6, -inf, 1000)
+TEST_SIG (S, F, 1, expm1, -9.9, 9.9)
+TEST_ULP (expm1f, 1.02)
+TEST_SYM_INTERVAL (expm1f, 0, 0x1p-23, 1000)
+TEST_INTERVAL (expm1f, 0x1p-23, 0x1.644716p6, 100000)
+TEST_INTERVAL (expm1f, 0x1.644716p6, inf, 1000)
+TEST_INTERVAL (expm1f, -0x1p-23, -0x1.9bbabcp+6, 100000)
+TEST_INTERVAL (expm1f, -0x1.9bbabcp+6, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/expm1f_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/expm1f_data.c
similarity index 59%
rename from contrib/arm-optimized-routines/pl/math/expm1f_data.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/expm1f_data.c
index 9d02dc448ebb..92d9189ff503 100644
--- a/contrib/arm-optimized-routines/pl/math/expm1f_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/expm1f_data.c
@@ -1,12 +1,12 @@
 /*
  * Coefficients for single-precision e^x - 1 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 /* Generated using fpminimax, see tools/expm1f.sollya for details.  */
-const float __expm1f_poly[] = {0x1.fffffep-2, 0x1.5554aep-3, 0x1.555736p-5,
-			       0x1.12287cp-7, 0x1.6b55a2p-10};
+const float __expm1f_poly[] = { 0x1.fffffep-2, 0x1.5554aep-3, 0x1.555736p-5,
+				0x1.12287cp-7, 0x1.6b55a2p-10 };
diff --git a/contrib/arm-optimized-routines/pl/math/log10_2u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/log10_2u.c
similarity index 84%
rename from contrib/arm-optimized-routines/pl/math/log10_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/log10_2u.c
index 74828ea9ef3c..84ee1544fe1a 100644
--- a/contrib/arm-optimized-routines/pl/math/log10_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/log10_2u.c
@@ -1,150 +1,151 @@
 /*
  * Double-precision log10(x) function.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Polynomial coefficients and lookup tables.  */
 #define T __log10_data.tab
 #define T2 __log10_data.tab2
 #define B __log10_data.poly1
 #define A __log10_data.poly
 #define Ln2hi __log10_data.ln2hi
 #define Ln2lo __log10_data.ln2lo
 #define InvLn10 __log10_data.invln10
 #define N (1 << LOG10_TABLE_BITS)
 #define OFF 0x3fe6000000000000
 #define LO asuint64 (1.0 - 0x1p-4)
 #define HI asuint64 (1.0 + 0x1.09p-4)
 
 /* Top 16 bits of a double.  */
 static inline uint32_t
 top16 (double x)
 {
   return asuint64 (x) >> 48;
 }
 
 /* Fast and low accuracy implementation of log10.
    The implementation is similar to that of math/log, except that:
    - Polynomials are computed for log10(1+r) with r on same intervals as log.
-   - Lookup parameters are scaled (at runtime) to switch from base e to base 10.
-   Many errors above 1.59 ulp are observed across the whole range of doubles.
-   The greatest observed error is 1.61 ulp, at around 0.965:
-   log10(0x1.dc8710333a29bp-1) got -0x1.fee26884905a6p-6
-			      want -0x1.fee26884905a8p-6.  */
+   - Lookup parameters are scaled (at runtime) to switch from base e to
+     base 10. Many errors above 1.59 ulp are observed across the whole range of
+     doubles. The greatest observed error is 1.61 ulp, at around 0.965:
+     log10(0x1.dc8710333a29bp-1) got -0x1.fee26884905a6p-6
+				want -0x1.fee26884905a8p-6.  */
 double
 log10 (double x)
 {
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
   uint64_t ix, iz, tmp;
   uint32_t top;
   int k, i;
 
   ix = asuint64 (x);
   top = top16 (x);
 
   if (unlikely (ix - LO < HI - LO))
     {
       /* Handle close to 1.0 inputs separately.  */
       /* Fix sign of zero with downward rounding when x==1.  */
       if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
 	return 0;
       r = x - 1.0;
       r2 = r * r;
       r3 = r * r2;
       y = r3
 	  * (B[1] + r * B[2] + r2 * B[3]
 	     + r3
-		 * (B[4] + r * B[5] + r2 * B[6]
-		    + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+		   * (B[4] + r * B[5] + r2 * B[6]
+		      + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
       /* Worst-case error is around 0.507 ULP.  */
       w = r * 0x1p27;
       double_t rhi = r + w - w;
       double_t rlo = r - rhi;
       w = rhi * rhi * B[0];
       hi = r + w;
       lo = r - hi + w;
       lo += B[0] * rlo * (rhi + r);
       y += lo;
       y += hi;
       /* Scale by 1/ln(10). Polynomial already contains scaling.  */
       y = y * InvLn10;
 
       return eval_as_double (y);
     }
   if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
     {
       /* x < 0x1p-1022 or inf or nan.  */
       if (ix * 2 == 0)
 	return __math_divzero (1);
       if (ix == asuint64 (INFINITY)) /* log10(inf) == inf.  */
 	return x;
       if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
 	return __math_invalid (x);
       /* x is subnormal, normalize it.  */
       ix = asuint64 (x * 0x1p52);
       ix -= 52ULL << 52;
     }
 
   /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
   tmp = ix - OFF;
   i = (tmp >> (52 - LOG10_TABLE_BITS)) % N;
   k = (int64_t) tmp >> 52; /* arithmetic shift.  */
   iz = ix - (tmp & 0xfffULL << 52);
   invc = T[i].invc;
   logc = T[i].logc;
   z = asdouble (iz);
 
   /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
   /* r ~= z/c - 1, |r| < 1/(2*N).  */
 #if HAVE_FAST_FMA
   /* rounding error: 0x1p-55/N.  */
   r = fma (z, invc, -1.0);
 #else
   /* rounding error: 0x1p-55/N + 0x1p-66.  */
   r = (z - T2[i].chi - T2[i].clo) * invc;
 #endif
   kd = (double_t) k;
 
   /* w = log(c) + k*Ln2hi.  */
   w = kd * Ln2hi + logc;
   hi = w + r;
   lo = w - hi + r + kd * Ln2lo;
 
   /* log10(x) = (w + r)/log(10) + (log10(1+r) - r/log(10)).  */
   r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
 
   /* Scale by 1/ln(10). Polynomial already contains scaling.  */
-  y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+  y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4]))
+      + hi;
   y = y * InvLn10;
 
   return eval_as_double (y);
 }
 
 // clang-format off
 #if USE_GLIBC_ABI
 strong_alias (log10, __log10_finite)
 hidden_alias (log10, __ieee754_log10)
 #if LDBL_MANT_DIG == 53
 long double
 log10l (long double x)
 {
   return log10 (x);
 }
 #endif
 #endif
 // clang-format on
 
-PL_SIG (S, D, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (log10, 1.11)
-PL_TEST_INTERVAL (log10, 0, 0xffff000000000000, 10000)
-PL_TEST_INTERVAL (log10, 0x1p-4, 0x1p4, 40000)
-PL_TEST_INTERVAL (log10, 0, inf, 40000)
+TEST_SIG (S, D, 1, log10, 0.01, 11.1)
+TEST_ULP (log10, 1.11)
+TEST_INTERVAL (log10, 0, 0xffff000000000000, 10000)
+TEST_INTERVAL (log10, 0x1p-4, 0x1p4, 40000)
+TEST_INTERVAL (log10, 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/log10_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/log10_data.c
similarity index 99%
rename from contrib/arm-optimized-routines/pl/math/log10_data.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/log10_data.c
index 9976f19cd6df..20b5ef883ed8 100644
--- a/contrib/arm-optimized-routines/pl/math/log10_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/log10_data.c
@@ -1,337 +1,337 @@
 /*
  * Data for log10.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 #define N (1 << LOG10_TABLE_BITS)
 
 const struct log10_data __log10_data = {
 .ln2hi = 0x1.62e42fefa3800p-1,
 .ln2lo = 0x1.ef35793c76730p-45,
 .invln10 = 0x1.bcb7b1526e50ep-2,
 .poly1 = {
 #if LOG10_POLY1_ORDER == 12
 // relative error: 0x1.c04d76cp-63
 // in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
 -0x1p-1,
 0x1.5555555555577p-2,
 -0x1.ffffffffffdcbp-3,
 0x1.999999995dd0cp-3,
 -0x1.55555556745a7p-3,
 0x1.24924a344de3p-3,
 -0x1.fffffa4423d65p-4,
 0x1.c7184282ad6cap-4,
 -0x1.999eb43b068ffp-4,
 0x1.78182f7afd085p-4,
 -0x1.5521375d145cdp-4,
 #endif
 },
 .poly = {
 #if N == 128 && LOG10_POLY_ORDER == 6
 // relative error: 0x1.926199e8p-56
 // abs error: 0x1.882ff33p-65
 // in -0x1.fp-9 0x1.fp-9
 -0x1.0000000000001p-1,
 0x1.555555551305bp-2,
 -0x1.fffffffeb459p-3,
 0x1.999b324f10111p-3,
 -0x1.55575e506c89fp-3,
 #endif
 },
 /* Algorithm:
 
 	x = 2^k z
 	log(x) = k ln2 + log(c) + log(z/c)
 	log(z/c) = poly(z/c - 1)
 
 where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
 into the ith one, then table entries are computed as
 
 	tab[i].invc = 1/c
 	tab[i].logc = (double)log(c)
 	tab2[i].chi = (double)c
 	tab2[i].clo = (double)(c - (double)c)
 
 where c is near the center of the subinterval and is chosen by trying +-2^29
 floating point invc candidates around 1/center and selecting one for which
 
 	1) the rounding error in 0x1.8p9 + logc is 0,
 	2) the rounding error in z - chi - clo is < 0x1p-66 and
 	3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
 
 Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
 2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
 a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
 that logc + poly(z/c - 1) has small error, however near x == 1 when
 |log(x)| < 0x1p-4, this is not enough so that is special cased.  */
 .tab = {
 #if N == 128
 {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
 {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
 {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
 {0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
 {0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
 {0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
 {0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
 {0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
 {0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
 {0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
 {0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
 {0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
 {0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
 {0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
 {0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
 {0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
 {0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
 {0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
 {0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
 {0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
 {0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
 {0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
 {0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
 {0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
 {0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
 {0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
 {0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
 {0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
 {0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
 {0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
 {0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
 {0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
 {0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
 {0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
 {0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
 {0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
 {0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
 {0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
 {0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
 {0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
 {0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
 {0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
 {0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
 {0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
 {0x1.293726014b530p+0, -0x1.31b996b490000p-3},
 {0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
 {0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
 {0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
 {0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
 {0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
 {0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
 {0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
 {0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
 {0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
 {0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
 {0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
 {0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
 {0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
 {0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
 {0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
 {0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
 {0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
 {0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
 {0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
 {0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
 {0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
 {0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
 {0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
 {0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
 {0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
 {0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
 {0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
 {0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
 {0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
 {0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
 {0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
 {0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
 {0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
 {0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
 {0x1.008040614b195p+0, -0x1.0040979240000p-9},
 {0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
 {0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
 {0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
 {0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
 {0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
 {0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
 {0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
 {0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
 {0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
 {0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
 {0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
 {0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
 {0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
 {0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
 {0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
 {0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
 {0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
 {0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
 {0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
 {0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
 {0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
 {0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
 {0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
 {0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
 {0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
 {0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
 {0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
 {0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
 {0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
 {0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
 {0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
 {0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
 {0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
 {0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
 {0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
 {0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
 {0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
 {0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
 {0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
 {0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
 {0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
 {0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
 {0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
 {0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
 {0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
 {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
 {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
 {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
 #endif
 },
 #if !HAVE_FAST_FMA
 .tab2 = {
 #if N == 128
 {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
 {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
 {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
 {0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
 {0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
 {0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
 {0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
 {0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
 {0x1.710000e86978p-1, 0x1.bff6671097952p-56},
 {0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
 {0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
 {0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
 {0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
 {0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
 {0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
 {0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
 {0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
 {0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
 {0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
 {0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
 {0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
 {0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
 {0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
 {0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
 {0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
 {0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
 {0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
 {0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
 {0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
 {0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
 {0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
 {0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
 {0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
 {0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
 {0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
 {0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
 {0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
 {0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
 {0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
 {0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
 {0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
 {0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
 {0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
 {0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
 {0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
 {0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
 {0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
 {0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
 {0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
 {0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
 {0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
 {0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
 {0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
 {0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
 {0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
 {0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
 {0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
 {0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
 {0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
 {0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
 {0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
 {0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
 {0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
 {0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
 {0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
 {0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
 {0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
 {0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
 {0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
 {0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
 {0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
 {0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
 {0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
 {0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
 {0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
 {0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
 {0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
 {0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
 {0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
 {0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
 {0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
 {0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
 {0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
 {0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
 {0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
 {0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
 {0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
 {0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
 {0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
 {0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
 {0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
 {0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
 {0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
 {0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
 {0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
 {0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
 {0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
 {0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
 {0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
 {0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
 {0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
 {0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
 {0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
 {0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
 {0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
 {0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
 {0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
 {0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
 {0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
 {0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
 {0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
 {0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
 {0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
 {0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
 {0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
 {0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
 {0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
 {0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
 {0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
 {0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
 {0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
 {0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
 {0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
 {0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
 {0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
 {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
 {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
 {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
 #endif
 },
-#endif /* !HAVE_FAST_FMA */
+#endif /* !HAVE_FAST_FMA.  */
 };
diff --git a/contrib/arm-optimized-routines/pl/math/log1p_2u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/log1p_2u.c
similarity index 91%
rename from contrib/arm-optimized-routines/pl/math/log1p_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/log1p_2u.c
index f9491ce52b44..a1ff309ecb5f 100644
--- a/contrib/arm-optimized-routines/pl/math/log1p_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/log1p_2u.c
@@ -1,131 +1,131 @@
 /*
  * Double-precision log(1+x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f64.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Ln2Hi 0x1.62e42fefa3800p-1
 #define Ln2Lo 0x1.ef35793c76730p-45
 #define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)).  */
-#define OneMHfRt2Top                                                           \
+#define OneMHfRt2Top                                                          \
   0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)).  */
 #define OneTop12 0x3ff
 #define BottomMask 0xffffffff
 #define OneMHfRt2 0x3fd2bec333018866
 #define Rt2MOne 0x3fda827999fcef32
 #define AbsMask 0x7fffffffffffffff
 #define ExpM63 0x3c00
 
 static inline double
 eval_poly (double f)
 {
   double f2 = f * f;
   double f4 = f2 * f2;
   double f8 = f4 * f4;
   return estrin_18_f64 (f, f2, f4, f8, f8 * f8, __log1p_data.coeffs);
 }
 
 /* log1p approximation using polynomial on reduced interval. Largest
    observed errors are near the lower boundary of the region where k
    is 0.
    Maximum measured error: 1.75ULP.
    log1p(-0x1.2e1aea97b3e5cp-2) got -0x1.65fb8659a2f9p-2
 			       want -0x1.65fb8659a2f92p-2.  */
 double
 log1p (double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t ia = ix & AbsMask;
   uint32_t ia16 = ia >> 48;
 
   /* Handle special cases first.  */
   if (unlikely (ia16 >= 0x7ff0 || ix >= 0xbff0000000000000
 		|| ix == 0x8000000000000000))
     {
       if (ix == 0x8000000000000000 || ix == 0x7ff0000000000000)
 	{
 	  /* x ==  -0 => log1p(x) =  -0.
 	     x == Inf => log1p(x) = Inf.  */
 	  return x;
 	}
       if (ix == 0xbff0000000000000)
 	{
 	  /* x == -1 => log1p(x) = -Inf.  */
 	  return __math_divzero (-1);
 	  ;
 	}
       if (ia16 >= 0x7ff0)
 	{
 	  /* x == +/-NaN => log1p(x) = NaN.  */
 	  return __math_invalid (asdouble (ia));
 	}
       /* x  <      -1 => log1p(x) =  NaN.
 	 x ==    -Inf => log1p(x) =  NaN.  */
       return __math_invalid (x);
     }
 
   /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
 			   is in [sqrt(2)/2, sqrt(2)]):
      log1p(x) = k*log(2) + log1p(f).
 
      f may not be representable exactly, so we need a correction term:
      let m = round(1 + x), c = (1 + x) - m.
      c << m: at very small x, log1p(x) ~ x, hence:
      log(1+x) - log(m) ~ c/m.
 
      We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m.  */
 
   uint64_t sign = ix & ~AbsMask;
   if (ia <= OneMHfRt2 || (!sign && ia <= Rt2MOne))
     {
       if (unlikely (ia16 <= ExpM63))
 	{
 	  /* If exponent of x <= -63 then shortcut the polynomial and avoid
 	     underflow by just returning x, which is exactly rounded in this
 	     region.  */
 	  return x;
 	}
       /* If x is in [sqrt(2)/2 - 1, sqrt(2) - 1] then we can shortcut all the
 	 logic below, as k = 0 and f = x and therefore representable exactly.
 	 All we need is to return the polynomial.  */
       return fma (x, eval_poly (x) * x, x);
     }
 
   /* Obtain correctly scaled k by manipulation in the exponent.  */
   double m = x + 1;
   uint64_t mi = asuint64 (m);
   uint32_t u = (mi >> 32) + OneMHfRt2Top;
   int32_t k = (int32_t) (u >> 20) - OneTop12;
 
   /* Correction term c/m.  */
   double cm = (x - (m - 1)) / m;
 
   /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
   uint32_t utop = (u & 0x000fffff) + HfRt2Top;
   uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);
   double f = asdouble (u_red) - 1;
 
   /* Approximate log1p(x) on the reduced input using a polynomial. Because
      log1p(0)=0 we choose an approximation of the form:
 	x + C0*x^2 + C1*x^3 + C2x^4 + ...
      Hence approximation has the form f + f^2 * P(f)
 	where P(x) = C0 + C1*x + C2x^2 + ...  */
   double p = fma (f, eval_poly (f) * f, f);
 
   double kd = k;
   double y = fma (Ln2Lo, kd, cm);
   return y + fma (Ln2Hi, kd, p);
 }
 
-PL_SIG (S, D, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (log1p, 1.26)
-PL_TEST_SYM_INTERVAL (log1p, 0.0, 0x1p-23, 50000)
-PL_TEST_SYM_INTERVAL (log1p, 0x1p-23, 0.001, 50000)
-PL_TEST_SYM_INTERVAL (log1p, 0.001, 1.0, 50000)
-PL_TEST_SYM_INTERVAL (log1p, 1.0, inf, 5000)
+TEST_SIG (S, D, 1, log1p, -0.9, 10.0)
+TEST_ULP (log1p, 1.26)
+TEST_SYM_INTERVAL (log1p, 0.0, 0x1p-23, 50000)
+TEST_SYM_INTERVAL (log1p, 0x1p-23, 0.001, 50000)
+TEST_SYM_INTERVAL (log1p, 0.001, 1.0, 50000)
+TEST_SYM_INTERVAL (log1p, 1.0, inf, 5000)
diff --git a/contrib/arm-optimized-routines/math/aarch64/experimental/log1p_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/log1p_data.c
new file mode 100644
index 000000000000..91a7196d795f
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/log1p_data.c
@@ -0,0 +1,20 @@
+/*
+ * Data used in double-precision log(1+x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Polynomial coefficients generated using Remez algorithm, see
+   log1p.sollya for details.  */
+const struct log1p_data __log1p_data
+    = { .coeffs
+	= { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
+	    0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
+	    -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
+	    0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
+	    -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
+	    0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
+	    -0x1.cfa7385bdb37ep-6 } };
diff --git a/contrib/arm-optimized-routines/pl/math/log1pf_2u1.c b/contrib/arm-optimized-routines/math/aarch64/experimental/log1pf_2u1.c
similarity index 93%
rename from contrib/arm-optimized-routines/pl/math/log1pf_2u1.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/log1pf_2u1.c
index e99174853720..fe4f93865220 100644
--- a/contrib/arm-optimized-routines/pl/math/log1pf_2u1.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/log1pf_2u1.c
@@ -1,161 +1,161 @@
 /*
  * Single-precision log(1+x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "poly_scalar_f32.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define Ln2 (0x1.62e43p-1f)
 #define SignMask (0x80000000)
 
 /* Biased exponent of the largest float m for which m^8 underflows.  */
 #define M8UFLOW_BOUND_BEXP 112
 /* Biased exponent of the largest float for which we just return x.  */
 #define TINY_BOUND_BEXP 103
 
 #define C(i) __log1pf_data.coeffs[i]
 
 static inline float
 eval_poly (float m, uint32_t e)
 {
 #ifdef LOG1PF_2U5
 
   /* 2.5 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using
      slightly modified Estrin scheme (no x^0 term, and x term is just x).  */
   float p_12 = fmaf (m, C (1), C (0));
   float p_34 = fmaf (m, C (3), C (2));
   float p_56 = fmaf (m, C (5), C (4));
   float p_78 = fmaf (m, C (7), C (6));
 
   float m2 = m * m;
   float p_02 = fmaf (m2, p_12, m);
   float p_36 = fmaf (m2, p_56, p_34);
   float p_79 = fmaf (m2, C (8), p_78);
 
   float m4 = m2 * m2;
   float p_06 = fmaf (m4, p_36, p_02);
 
   if (unlikely (e < M8UFLOW_BOUND_BEXP))
     return p_06;
 
   float m8 = m4 * m4;
   return fmaf (m8, p_79, p_06);
 
 #elif defined(LOG1PF_1U3)
 
   /* 1.3 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using Horner
      scheme. Our polynomial approximation for log1p has the form
      x + C1 * x^2 + C2 * x^3 + C3 * x^4 + ...
      Hence approximation has the form m + m^2 * P(m)
        where P(x) = C1 + C2 * x + C3 * x^2 + ... .  */
   return fmaf (m, m * horner_8_f32 (m, __log1pf_data.coeffs), m);
 
 #else
 #error No log1pf approximation exists with the requested precision. Options are 13 or 25.
 #endif
 }
 
 static inline uint32_t
 biased_exponent (uint32_t ix)
 {
   return (ix & 0x7f800000) >> 23;
 }
 
 /* log1pf approximation using polynomial on reduced interval. Worst-case error
    when using Estrin is roughly 2.02 ULP:
    log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3.  */
 float
 log1pf (float x)
 {
   uint32_t ix = asuint (x);
   uint32_t ia = ix & ~SignMask;
   uint32_t ia12 = ia >> 20;
   uint32_t e = biased_exponent (ix);
 
   /* Handle special cases first.  */
   if (unlikely (ia12 >= 0x7f8 || ix >= 0xbf800000 || ix == 0x80000000
 		|| e <= TINY_BOUND_BEXP))
     {
       if (ix == 0xff800000)
 	{
 	  /* x == -Inf => log1pf(x) =  NaN.  */
 	  return NAN;
 	}
       if ((ix == 0x7f800000 || e <= TINY_BOUND_BEXP) && ia12 <= 0x7f8)
 	{
 	  /* |x| < TinyBound => log1p(x)  =  x.
 	      x ==       Inf => log1pf(x) = Inf.  */
 	  return x;
 	}
       if (ix == 0xbf800000)
 	{
 	  /* x == -1.0 => log1pf(x) = -Inf.  */
 	  return __math_divzerof (-1);
 	}
       if (ia12 >= 0x7f8)
 	{
 	  /* x == +/-NaN => log1pf(x) = NaN.  */
 	  return __math_invalidf (asfloat (ia));
 	}
       /* x <    -1.0 => log1pf(x) = NaN.  */
       return __math_invalidf (x);
     }
 
   /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
 			   is in [-0.25, 0.5]):
      log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
 
      We approximate log1p(m) with a polynomial, then scale by
      k*log(2). Instead of doing this directly, we use an intermediate
      scale factor s = 4*k*log(2) to ensure the scale is representable
      as a normalised fp32 number.  */
 
   if (ix <= 0x3f000000 || ia <= 0x3e800000)
     {
       /* If x is in [-0.25, 0.5] then we can shortcut all the logic
 	 below, as k = 0 and m = x.  All we need is to return the
 	 polynomial.  */
       return eval_poly (x, e);
     }
 
   float m = x + 1.0f;
 
   /* k is used scale the input. 0x3f400000 is chosen as we are trying to
      reduce x to the range [-0.25, 0.5]. Inside this range, k is 0.
      Outside this range, if k is reinterpreted as (NOT CONVERTED TO) float:
 	 let k = sign * 2^p      where sign = -1 if x < 0
 					       1 otherwise
 	 and p is a negative integer whose magnitude increases with the
 	 magnitude of x.  */
   int k = (asuint (m) - 0x3f400000) & 0xff800000;
 
   /* By using integer arithmetic, we obtain the necessary scaling by
      subtracting the unbiased exponent of k from the exponent of x.  */
   float m_scale = asfloat (asuint (x) - k);
 
   /* Scale up to ensure that the scale factor is representable as normalised
      fp32 number (s in [2**-126,2**26]), and scale m down accordingly.  */
   float s = asfloat (asuint (4.0f) - k);
   m_scale = m_scale + fmaf (0.25f, s, -1.0f);
 
   float p = eval_poly (m_scale, biased_exponent (asuint (m_scale)));
 
   /* The scale factor to be applied back at the end - by multiplying float(k)
      by 2^-23 we get the unbiased exponent of k.  */
   float scale_back = (float) k * 0x1.0p-23f;
 
   /* Apply the scaling back.  */
   return fmaf (scale_back, Ln2, p);
 }
 
-PL_SIG (S, F, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (log1pf, 1.52)
-PL_TEST_SYM_INTERVAL (log1pf, 0.0, 0x1p-23, 50000)
-PL_TEST_SYM_INTERVAL (log1pf, 0x1p-23, 0.001, 50000)
-PL_TEST_SYM_INTERVAL (log1pf, 0.001, 1.0, 50000)
-PL_TEST_SYM_INTERVAL (log1pf, 1.0, inf, 5000)
+TEST_SIG (S, F, 1, log1p, -0.9, 10.0)
+TEST_ULP (log1pf, 1.52)
+TEST_SYM_INTERVAL (log1pf, 0.0, 0x1p-23, 50000)
+TEST_SYM_INTERVAL (log1pf, 0x1p-23, 0.001, 50000)
+TEST_SYM_INTERVAL (log1pf, 0.001, 1.0, 50000)
+TEST_SYM_INTERVAL (log1pf, 1.0, inf, 5000)
diff --git a/contrib/arm-optimized-routines/pl/math/log1pf_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/log1pf_data.c
similarity index 59%
rename from contrib/arm-optimized-routines/pl/math/log1pf_data.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/log1pf_data.c
index 8c92d5738fe8..e0ac269a1069 100644
--- a/contrib/arm-optimized-routines/pl/math/log1pf_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/log1pf_data.c
@@ -1,14 +1,14 @@
 /*
  * Data used in single-precision log1p(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
 
 /* Polynomial coefficients generated using floating-point minimax
    algorithm, see tools/log1pf.sollya for details.  */
 const struct log1pf_data __log1pf_data
-  = {.coeffs = {-0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
-		-0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
-		-0x1.6f0d5ep-5f}};
+    = { .coeffs = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
+		    -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f,
+		    0x1.abcb6p-4f, -0x1.6f0d5ep-5f } };
diff --git a/contrib/arm-optimized-routines/pl/math/sinh_3u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/sinh_3u.c
similarity index 72%
rename from contrib/arm-optimized-routines/pl/math/sinh_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/sinh_3u.c
index 1d86629ee2a3..39030d2750a9 100644
--- a/contrib/arm-optimized-routines/pl/math/sinh_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/sinh_3u.c
@@ -1,63 +1,60 @@
 /*
  * Double-precision sinh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "exp_inline.h"
 
 #define AbsMask 0x7fffffffffffffff
 #define Half 0x3fe0000000000000
-#define OFlowBound                                                             \
-  0x40862e42fefa39f0 /* 0x1.62e42fefa39fp+9, above which using expm1 results   \
-			in NaN.  */
-
-double
-__exp_dd (double, double);
+/* 0x1.62e42fefa39fp+9, above which using expm1 results in NaN.  */
+#define OFlowBound 0x40862e42fefa39f0
 
 /* Approximation for double-precision sinh(x) using expm1.
    sinh(x) = (exp(x) - exp(-x)) / 2.
    The greatest observed error is 2.57 ULP:
    __v_sinh(0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
 				 want 0x1.ab34e59d678d9p-2.  */
 double
 sinh (double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t iax = ix & AbsMask;
   double ax = asdouble (iax);
   uint64_t sign = ix & ~AbsMask;
   double halfsign = asdouble (Half | sign);
 
   if (unlikely (iax >= OFlowBound))
     {
       /* Special values and overflow.  */
       if (unlikely (iax > 0x7ff0000000000000))
 	return __math_invalidf (x);
       /* expm1 overflows a little before sinh. We have to fill this
 	 gap by using a different algorithm, in this case we use a
 	 double-precision exp helper. For large x sinh(x) is dominated
 	 by exp(x), however we cannot compute exp without overflow
 	 either. We use the identity: exp(a) = (exp(a / 2)) ^ 2
 	 to compute sinh(x) ~= (exp(|x| / 2)) ^ 2 / 2    for x > 0
 			    ~= (exp(|x| / 2)) ^ 2 / -2   for x < 0.  */
-      double e = __exp_dd (ax / 2, 0);
+      double e = exp_inline (ax / 2, 0);
       return (e * halfsign) * e;
     }
 
   /* Use expm1f to retain acceptable precision for small numbers.
      Let t = e^(|x|) - 1.  */
   double t = expm1 (ax);
   /* Then sinh(x) = (t + t / (t + 1)) / 2   for x > 0
 		    (t + t / (t + 1)) / -2  for x < 0.  */
   return (t + t / (t + 1)) * halfsign;
 }
 
-PL_SIG (S, D, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (sinh, 2.08)
-PL_TEST_SYM_INTERVAL (sinh, 0, 0x1p-51, 100)
-PL_TEST_SYM_INTERVAL (sinh, 0x1p-51, 0x1.62e42fefa39fp+9, 100000)
-PL_TEST_SYM_INTERVAL (sinh, 0x1.62e42fefa39fp+9, inf, 1000)
+TEST_SIG (S, D, 1, sinh, -10.0, 10.0)
+TEST_ULP (sinh, 2.08)
+TEST_SYM_INTERVAL (sinh, 0, 0x1p-51, 100)
+TEST_SYM_INTERVAL (sinh, 0x1p-51, 0x1.62e42fefa39fp+9, 100000)
+TEST_SYM_INTERVAL (sinh, 0x1.62e42fefa39fp+9, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sinhf_2u3.c b/contrib/arm-optimized-routines/math/aarch64/experimental/sinhf_2u3.c
similarity index 69%
rename from contrib/arm-optimized-routines/pl/math/sinhf_2u3.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/sinhf_2u3.c
index aa7aadcf67c5..860ddc0fc83c 100644
--- a/contrib/arm-optimized-routines/pl/math/sinhf_2u3.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/sinhf_2u3.c
@@ -1,73 +1,69 @@
 /*
  * Single-precision sinh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffff
 #define Half 0x3f000000
-#define Expm1OFlowLimit                                                        \
-  0x42b17218 /* 0x1.62e43p+6, 2^7*ln2, minimum value for which expm1f          \
-		overflows.  */
-#define OFlowLimit                                                             \
-  0x42b2d4fd /* 0x1.65a9fap+6, minimum positive value for which sinhf should   \
-		overflow.  */
-
-float
-optr_aor_exp_f32 (float);
+/* 0x1.62e43p+6, 2^7*ln2, minimum value for which expm1f overflows.  */
+#define Expm1OFlowLimit 0x42b17218
+/* 0x1.65a9fap+6, minimum positive value for which sinhf should overflow.  */
+#define OFlowLimit 0x42b2d4fd
 
 /* Approximation for single-precision sinh(x) using expm1.
    sinh(x) = (exp(x) - exp(-x)) / 2.
    The maximum error is 2.26 ULP:
    sinhf(0x1.e34a9ep-4) got 0x1.e469ep-4 want 0x1.e469e4p-4.  */
 float
 sinhf (float x)
 {
   uint32_t ix = asuint (x);
   uint32_t iax = ix & AbsMask;
   float ax = asfloat (iax);
   uint32_t sign = ix & ~AbsMask;
   float halfsign = asfloat (Half | sign);
 
   if (unlikely (iax >= Expm1OFlowLimit))
     {
       /* Special values and overflow.  */
       if (iax >= 0x7fc00001 || iax == 0x7f800000)
 	return x;
       if (iax >= 0x7f800000)
 	return __math_invalidf (x);
       if (iax >= OFlowLimit)
 	return __math_oflowf (sign);
 
       /* expm1f overflows a little before sinhf, (~88.7 vs ~89.4). We have to
 	 fill this gap by using a different algorithm, in this case we use a
 	 double-precision exp helper. For large x sinh(x) dominated by exp(x),
 	 however we cannot compute exp without overflow either. We use the
 	 identity:
 	 exp(a) = (exp(a / 2)) ^ 2.
 	 to compute sinh(x) ~= (exp(|x| / 2)) ^ 2 / 2    for x > 0
 			    ~= (exp(|x| / 2)) ^ 2 / -2   for x < 0.
 	 Greatest error in this region is 1.89 ULP:
 	 sinhf(0x1.65898cp+6) got 0x1.f00aep+127  want 0x1.f00adcp+127.  */
-      float e = optr_aor_exp_f32 (ax / 2);
+      float e = expf (ax / 2);
       return (e * halfsign) * e;
     }
 
   /* Use expm1f to retain acceptable precision for small numbers.
      Let t = e^(|x|) - 1.  */
   float t = expm1f (ax);
   /* Then sinh(x) = (t + t / (t + 1)) / 2   for x > 0
 		    (t + t / (t + 1)) / -2  for x < 0.  */
   return (t + t / (t + 1)) * halfsign;
 }
 
-PL_SIG (S, F, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (sinhf, 1.76)
-PL_TEST_SYM_INTERVAL (sinhf, 0, 0x1.62e43p+6, 100000)
-PL_TEST_SYM_INTERVAL (sinhf, 0x1.62e43p+6, 0x1.65a9fap+6, 100)
-PL_TEST_SYM_INTERVAL (sinhf, 0x1.65a9fap+6, inf, 100)
+TEST_SIG (S, F, 1, sinh, -10.0, 10.0)
+TEST_ULP (sinhf, 1.76)
+TEST_SYM_INTERVAL (sinhf, 0, 0x1.62e43p+6, 100000)
+TEST_SYM_INTERVAL (sinhf, 0x1.62e43p+6, 0x1.65a9fap+6, 100)
+TEST_SYM_INTERVAL (sinhf, 0x1.65a9fap+6, inf, 100)
diff --git a/contrib/arm-optimized-routines/math/aarch64/experimental/sve/erfinv_25u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/sve/erfinv_25u.c
new file mode 100644
index 000000000000..4de6d08ab80f
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/sve/erfinv_25u.c
@@ -0,0 +1,156 @@
+/*
+ * Double-precision inverse error function (SVE variant).
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "sv_math.h"
+#include "test_defs.h"
+#include "math_config.h"
+#include "test_sig.h"
+#include "sv_poly_f64.h"
+#define SV_LOG_INLINE_POLY_ORDER 4
+#include "sv_log_inline.h"
+
+const static struct data
+{
+  /*  We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
+      coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
+      of the denominator. P is interleaved P_17 and P_37, similar for Q.  */
+  double P[7][2], Q[7][2];
+  double P_57[9], Q_57[9], tailshift, P37_0;
+  struct sv_log_inline_data log_tbl;
+} data = {
+  .P37_0 = -0x1.f3596123109edp-7,
+  .tailshift = -0.87890625,
+  .P = { { 0x1.007ce8f01b2e8p+4, 0x1.60b8fe375999ep-2 },
+	 { -0x1.6b23cc5c6c6d7p+6, -0x1.779bb9bef7c0fp+1 },
+	 { 0x1.74e5f6ceb3548p+7, 0x1.786ea384470a2p+3 },
+	 { -0x1.5200bb15cc6bbp+7, -0x1.6a7c1453c85d3p+4 },
+	 { 0x1.05d193233a849p+6, 0x1.31f0fc5613142p+4 },
+	 { -0x1.148c5474ee5e1p+3, -0x1.5ea6c007d4dbbp+2 },
+	 { 0x1.689181bbafd0cp-3, 0x1.e66f265ce9e5p-3 } },
+  .Q = { { 0x1.d8fb0f913bd7bp+3, -0x1.636b2dcf4edbep-7 },
+	 { -0x1.6d7f25a3f1c24p+6, 0x1.0b5411e2acf29p-2 },
+	 { 0x1.a450d8e7f4cbbp+7, -0x1.3413109467a0bp+1 },
+	 { -0x1.bc3480485857p+7, 0x1.563e8136c554ap+3 },
+	 { 0x1.ae6b0c504ee02p+6, -0x1.7b77aab1dcafbp+4 },
+	 { -0x1.499dfec1a7f5fp+4, 0x1.8a3e174e05ddcp+4 },
+	 { 0x1p+0, -0x1.4075c56404eecp+3 } },
+  .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2,
+	    0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3,
+	    0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 },
+  .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2,
+	    0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3,
+	    0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2 },
+  .log_tbl = SV_LOG_CONSTANTS
+};
+
+static inline svfloat64_t
+special (svbool_t pg, svfloat64_t x, const struct data *d)
+{
+  /* Note erfinv(inf) should return NaN, and erfinv(1) should return Inf.
+     By using log here, instead of log1p, we return finite values for both
+     these inputs, and values outside [-1, 1]. This is non-compliant, but is an
+     acceptable optimisation at Ofast. To get correct behaviour for all finite
+     values use the log1p_inline helper on -abs(x) - note that erfinv(inf)
+     will still be finite.  */
+  svfloat64_t ax = svabs_x (pg, x);
+  svfloat64_t t
+      = svneg_x (pg, sv_log_inline (pg, svsubr_x (pg, ax, 1), &d->log_tbl));
+  t = svdivr_x (pg, svsqrt_x (pg, t), 1);
+  svuint64_t sign
+      = sveor_x (pg, svreinterpret_u64 (ax), svreinterpret_u64 (x));
+  svfloat64_t ts
+      = svreinterpret_f64 (svorr_x (pg, sign, svreinterpret_u64 (t)));
+
+  svfloat64_t q = svadd_x (pg, t, d->Q_57[8]);
+  for (int i = 7; i >= 0; i--)
+    q = svmad_x (pg, q, t, d->Q_57[i]);
+
+  return svdiv_x (pg, sv_horner_8_f64_x (pg, t, d->P_57), svmul_x (pg, ts, q));
+}
+
+static inline svfloat64_t
+lookup (const double *c, svuint64_t idx)
+{
+  svfloat64_t x = svld1rq_f64 (svptrue_b64 (), c);
+  return svtbl (x, idx);
+}
+
+static inline svfloat64_t
+notails (svbool_t pg, svfloat64_t x, const struct data *d)
+{
+  svfloat64_t t = svmad_x (pg, x, x, -0.5625);
+  svfloat64_t p = svmla_x (pg, sv_f64 (d->P[5][0]), t, d->P[6][0]);
+  svfloat64_t q = svadd_x (pg, t, d->Q[5][0]);
+  for (int i = 4; i >= 0; i--)
+    {
+      p = svmad_x (pg, t, p, d->P[i][0]);
+      q = svmad_x (pg, t, q, d->Q[i][0]);
+    }
+  p = svmul_x (pg, p, x);
+  return svdiv_x (pg, p, q);
+}
+
+/* Vector implementation of Blair et al's rational approximation to inverse
+   error function in double precision. Largest observed error is 24.75 ULP:
+   _ZGVsMxv_erfinv(0x1.fc861d81c2ba8p-1) got 0x1.ea05472686625p+0
+					want 0x1.ea0547268660cp+0.  */
+svfloat64_t SV_NAME_D1 (erfinv) (svfloat64_t x, svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  /* Calculate inverse error using algorithm described in
+     J. M. Blair, C. A. Edwards, and J. H. Johnson,
+     "Rational Chebyshev approximations for the inverse of the error function",
+     Math. Comp. 30, pp. 827--830 (1976).
+     https://doi.org/10.1090/S0025-5718-1976-0421040-7.
+
+     Algorithm has 3 intervals:
+     - 'Normal' region [-0.75, 0.75]
+     - Tail region [0.75, 0.9375] U [-0.9375, -0.75]
+     - Extreme tail [-1, -0.9375] U [0.9375, 1]
+     Normal and tail are both rational approximation of similar order on
+     shifted input - these are typically performed in parallel using gather
+     loads to obtain correct coefficients depending on interval.  */
+
+  svbool_t no_tail = svacle (pg, x, 0.75);
+  if (unlikely (!svptest_any (pg, svnot_z (pg, no_tail))))
+    return notails (pg, x, d);
+
+  svbool_t is_tail = svnot_z (pg, no_tail);
+  svbool_t extreme_tail = svacgt (pg, x, 0.9375);
+  svuint64_t idx = svdup_n_u64_z (is_tail, 1);
+
+  svfloat64_t t = svsel_f64 (is_tail, sv_f64 (d->tailshift), sv_f64 (-0.5625));
+  t = svmla_x (pg, t, x, x);
+
+  svfloat64_t p = lookup (&d->P[6][0], idx);
+  svfloat64_t q
+      = svmla_x (pg, lookup (&d->Q[6][0], idx), svdup_n_f64_z (is_tail, 1), t);
+  for (int i = 5; i >= 0; i--)
+    {
+      p = svmla_x (pg, lookup (&d->P[i][0], idx), p, t);
+      q = svmla_x (pg, lookup (&d->Q[i][0], idx), q, t);
+    }
+  p = svmad_m (is_tail, p, t, d->P37_0);
+  p = svmul_x (pg, p, x);
+
+  if (likely (svptest_any (pg, extreme_tail)))
+    return svsel (extreme_tail, special (pg, x, d), svdiv_x (pg, p, q));
+  return svdiv_x (pg, p, q);
+}
+
+#if USE_MPFR
+# warning Not generating tests for _ZGVsMxv_erfinv, as MPFR has no suitable reference
+#else
+TEST_SIG (SV, D, 1, erfinv, -0.99, 0.99)
+TEST_ULP (SV_NAME_D1 (erfinv), 24.5)
+TEST_DISABLE_FENV (SV_NAME_D1 (erfinv))
+/* Test with control lane in each interval.  */
+TEST_SYM_INTERVAL (SV_NAME_F1 (erfinv), 0, 1, 100000)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.5)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.8)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.95)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/experimental/sve/erfinvf_5u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/sve/erfinvf_5u.c
new file mode 100644
index 000000000000..2c81c4e0b9a2
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/sve/erfinvf_5u.c
@@ -0,0 +1,156 @@
+/*
+ * Single-precision inverse error function (SVE variant).
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
+#include "sv_logf_inline.h"
+
+const static struct data
+{
+  /*  We use P_N and Q_N to refer to arrays of coefficients, where P_N
+      is the coeffs of the numerator in table N of Blair et al, and
+      Q_N is the coeffs of the denominator. Coefficients stored in
+      interleaved format to support lookup scheme.  */
+  float P10_2, P29_3, Q10_2, Q29_2;
+  float P10_0, P29_1, P10_1, P29_2;
+  float Q10_0, Q29_0, Q10_1, Q29_1;
+  float P29_0, P_50[6], Q_50[2], tailshift;
+  struct sv_logf_data logf_tbl;
+} data = { .P10_0 = -0x1.a31268p+3,
+	   .P10_1 = 0x1.ac9048p+4,
+	   .P10_2 = -0x1.293ff6p+3,
+	   .P29_0 = -0x1.fc0252p-4,
+	   .P29_1 = 0x1.119d44p+0,
+	   .P29_2 = -0x1.f59ee2p+0,
+	   .P29_3 = 0x1.b13626p-2,
+	   .Q10_0 = -0x1.8265eep+3,
+	   .Q10_1 = 0x1.ef5eaep+4,
+	   .Q10_2 = -0x1.12665p+4,
+	   .Q29_0 = -0x1.69952p-4,
+	   .Q29_1 = 0x1.c7b7d2p-1,
+	   .Q29_2 = -0x1.167d7p+1,
+	   .P_50 = { 0x1.3d8948p-3, 0x1.61f9eap+0, 0x1.61c6bcp-1,
+		     -0x1.20c9f2p+0, 0x1.5c704cp-1, -0x1.50c6bep-3 },
+	   .Q_50 = { 0x1.3d7dacp-3, 0x1.629e5p+0 },
+	   .tailshift = -0.87890625,
+	   .logf_tbl = SV_LOGF_CONSTANTS };
+
+static inline svfloat32_t
+special (svbool_t pg, svfloat32_t x, const struct data *d)
+{
+  svfloat32_t ax = svabs_x (pg, x);
+  svfloat32_t t = svdivr_x (
+      pg,
+      svsqrt_x (pg, svneg_x (pg, sv_logf_inline (pg, svsubr_x (pg, ax, 1),
+						 &d->logf_tbl))),
+      1);
+  svuint32_t sign
+      = sveor_x (pg, svreinterpret_u32 (ax), svreinterpret_u32 (x));
+  svfloat32_t ts
+      = svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (t)));
+  svfloat32_t q
+      = svmla_x (pg, sv_f32 (d->Q_50[0]), svadd_x (pg, t, d->Q_50[1]), t);
+  return svdiv_x (pg, sv_horner_5_f32_x (pg, t, d->P_50), svmul_x (pg, ts, q));
+}
+
+static inline svfloat32_t
+notails (svbool_t pg, svfloat32_t x, const struct data *d)
+{
+  /* Shortcut when no input is in a tail region - no need to gather shift or
+     coefficients.  */
+  svfloat32_t t = svmad_x (pg, x, x, -0.5625);
+  svfloat32_t q = svadd_x (pg, t, d->Q10_2);
+  q = svmad_x (pg, t, q, d->Q10_1);
+  q = svmad_x (pg, t, q, d->Q10_0);
+
+  svfloat32_t p = svmla_x (pg, sv_f32 (d->P10_1), t, d->P10_2);
+  p = svmad_x (pg, p, t, d->P10_0);
+
+  return svdiv_x (pg, svmul_x (pg, x, p), q);
+}
+
+/* Vector implementation of Blair et al's rational approximation to inverse
+   error function in single-precision. Worst-case error is 4.71 ULP, in the
+   tail region:
+   _ZGVsMxv_erfinvf(0x1.f84e9ap-1) got 0x1.b8326ap+0
+				  want 0x1.b83274p+0.  */
+svfloat32_t SV_NAME_F1 (erfinv) (svfloat32_t x, svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* Calculate inverse error using algorithm described in
+     J. M. Blair, C. A. Edwards, and J. H. Johnson,
+     "Rational Chebyshev approximations for the inverse of the error function",
+     Math. Comp. 30, pp. 827--830 (1976).
+     https://doi.org/10.1090/S0025-5718-1976-0421040-7.  */
+
+  /* Algorithm has 3 intervals:
+     - 'Normal' region [-0.75, 0.75]
+     - Tail region [0.75, 0.9375] U [-0.9375, -0.75]
+     - Extreme tail [-1, -0.9375] U [0.9375, 1]
+     Normal and tail are both rational approximation of similar order on
+     shifted input - these are typically performed in parallel using gather
+     loads to obtain correct coefficients depending on interval.  */
+  svbool_t is_tail = svacge (pg, x, 0.75);
+  svbool_t extreme_tail = svacge (pg, x, 0.9375);
+
+  if (likely (!svptest_any (pg, is_tail)))
+    return notails (pg, x, d);
+
+  /* Select requisite shift depending on interval: polynomial is evaluated on
+     x * x - shift.
+     Normal shift = 0.5625
+     Tail shift   = 0.87890625.  */
+  svfloat32_t t = svmla_x (
+      pg, svsel (is_tail, sv_f32 (d->tailshift), sv_f32 (-0.5625)), x, x);
+
+  svuint32_t idx = svdup_u32_z (is_tail, 1);
+  svuint32_t idxhi = svadd_x (pg, idx, 2);
+
+  /* Load coeffs in quadwords and select them according to interval.  */
+  svfloat32_t pqhi = svld1rq (svptrue_b32 (), &d->P10_2);
+  svfloat32_t plo = svld1rq (svptrue_b32 (), &d->P10_0);
+  svfloat32_t qlo = svld1rq (svptrue_b32 (), &d->Q10_0);
+
+  svfloat32_t p2 = svtbl (pqhi, idx);
+  svfloat32_t p1 = svtbl (plo, idxhi);
+  svfloat32_t p0 = svtbl (plo, idx);
+  svfloat32_t q0 = svtbl (qlo, idx);
+  svfloat32_t q1 = svtbl (qlo, idxhi);
+  svfloat32_t q2 = svtbl (pqhi, idxhi);
+
+  svfloat32_t p = svmla_x (pg, p1, p2, t);
+  p = svmla_x (pg, p0, p, t);
+  /* Tail polynomial has higher order - merge with normal lanes.  */
+  p = svmad_m (is_tail, p, t, d->P29_0);
+  svfloat32_t y = svmul_x (pg, x, p);
+
+  /* Least significant term of both Q polynomials is 1, so no need to generate
+     it.  */
+  svfloat32_t q = svadd_x (pg, t, q2);
+  q = svmla_x (pg, q1, q, t);
+  q = svmla_x (pg, q0, q, t);
+
+  if (unlikely (svptest_any (pg, extreme_tail)))
+    return svsel (extreme_tail, special (extreme_tail, x, d),
+		  svdiv_x (pg, y, q));
+  return svdiv_x (pg, y, q);
+}
+
+#if USE_MPFR
+# warning Not generating tests for _ZGVsMxv_erfinvf, as MPFR has no suitable reference
+#else
+TEST_SIG (SV, F, 1, erfinv, -0.99, 0.99)
+TEST_ULP (SV_NAME_F1 (erfinv), 4.09)
+TEST_DISABLE_FENV (SV_NAME_F1 (erfinv))
+TEST_SYM_INTERVAL (SV_NAME_F1 (erfinv), 0, 1, 40000)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.5)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.8)
+TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.95)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_powi.c b/contrib/arm-optimized-routines/math/aarch64/experimental/sve/powi.c
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/sv_powi.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/sve/powi.c
index e53bf2195533..62dd1b114970 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_powi.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/sve/powi.c
@@ -1,48 +1,49 @@
 /*
  * Double-precision SVE powi(x, n) function.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
 
 /* Optimized double-precision vector powi (double base, long integer power).
    powi is developed for environments in which accuracy is of much less
    importance than performance, hence we provide no estimate for worst-case
    error.  */
 svfloat64_t
 _ZGVsMxvv_powk (svfloat64_t as, svint64_t ns, svbool_t p)
 {
   /* Compute powi by successive squaring, right to left.  */
   svfloat64_t acc = sv_f64 (1.0);
   svbool_t want_recip = svcmplt (p, ns, 0);
   svuint64_t ns_abs = svreinterpret_u64 (svabs_x (p, ns));
 
   /* We use a max to avoid needing to check whether any lane != 0 on each
      iteration.  */
   uint64_t max_n = svmaxv (p, ns_abs);
 
   svfloat64_t c = as;
   /* Successively square c, and use merging predication (_m) to determine
      whether or not to perform the multiplication or keep the previous
      iteration.  */
   while (true)
     {
       svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1ull), 1ull);
       acc = svmul_m (px, acc, c);
       max_n >>= 1;
       if (max_n == 0)
 	break;
 
       ns_abs = svlsr_x (p, ns_abs, 1);
       c = svmul_x (p, c, c);
     }
 
   /* Negative powers are handled by computing the abs(n) version and then
      taking the reciprocal.  */
   if (svptest_any (want_recip, want_recip))
     acc = svdivr_m (want_recip, acc, 1.0);
 
   return acc;
 }
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_powif.c b/contrib/arm-optimized-routines/math/aarch64/experimental/sve/powif.c
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/sv_powif.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/sve/powif.c
index 7e032fd86a20..fd74acf12df7 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_powif.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/sve/powif.c
@@ -1,48 +1,49 @@
 /*
  * Single-precision SVE powi(x, n) function.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
 
 /* Optimized single-precision vector powi (float base, integer power).
    powi is developed for environments in which accuracy is of much less
    importance than performance, hence we provide no estimate for worst-case
    error.  */
 svfloat32_t
 _ZGVsMxvv_powi (svfloat32_t as, svint32_t ns, svbool_t p)
 {
   /* Compute powi by successive squaring, right to left.  */
   svfloat32_t acc = sv_f32 (1.f);
   svbool_t want_recip = svcmplt (p, ns, 0);
   svuint32_t ns_abs = svreinterpret_u32 (svabs_x (p, ns));
 
   /* We use a max to avoid needing to check whether any lane != 0 on each
      iteration.  */
   uint32_t max_n = svmaxv (p, ns_abs);
 
   svfloat32_t c = as;
   /* Successively square c, and use merging predication (_m) to determine
      whether or not to perform the multiplication or keep the previous
      iteration.  */
   while (true)
     {
       svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1), 1);
       acc = svmul_m (px, acc, c);
       max_n >>= 1;
       if (max_n == 0)
 	break;
 
       ns_abs = svlsr_x (p, ns_abs, 1);
       c = svmul_x (p, c, c);
     }
 
   /* Negative powers are handled by computing the abs(n) version and then
      taking the reciprocal.  */
   if (svptest_any (want_recip, want_recip))
     acc = svdivr_m (want_recip, acc, 1.0f);
 
   return acc;
 }
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/experimental/sve/sv_logf_inline.h b/contrib/arm-optimized-routines/math/aarch64/experimental/sve/sv_logf_inline.h
new file mode 100644
index 000000000000..c317a23f6fc3
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/sve/sv_logf_inline.h
@@ -0,0 +1,51 @@
+/*
+ * Single-precision vector log function - inline version
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+
+struct sv_logf_data
+{
+  float p1, p3, p5, p6, p0, p2, p4;
+  float ln2;
+  uint32_t off, mantissa_mask;
+};
+
+#define SV_LOGF_CONSTANTS                                                     \
+  {                                                                           \
+    .p0 = -0x1.ffffc8p-2f, .p1 = 0x1.555d7cp-2f, .p2 = -0x1.00187cp-2f,       \
+    .p3 = 0x1.961348p-3f, .p4 = -0x1.4f9934p-3f, .p5 = 0x1.5a9aa2p-3f,        \
+    .p6 = -0x1.3e737cp-3f, .ln2 = 0x1.62e43p-1f, .off = 0x3f2aaaab,           \
+    .mantissa_mask = 0x007fffff                                               \
+  }
+
+static inline svfloat32_t
+sv_logf_inline (svbool_t pg, svfloat32_t x, const struct sv_logf_data *d)
+{
+  svuint32_t u = svreinterpret_u32 (x);
+
+  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
+  u = svsub_x (pg, u, d->off);
+  svfloat32_t n = svcvt_f32_s32_x (
+      pg, svasr_x (pg, svreinterpret_s32_u32 (u), 23)); /* signextend.  */
+  u = svand_x (pg, u, d->mantissa_mask);
+  u = svadd_x (pg, u, d->off);
+  svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
+
+  /* y = log(1+r) + n*ln2.  */
+  svfloat32_t r2 = svmul_x (pg, r, r);
+  /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))).  */
+  svfloat32_t p1356 = svld1rq_f32 (svptrue_b32 (), &d->p1);
+  svfloat32_t p = svmla_lane (sv_f32 (d->p4), r, p1356, 2);
+  svfloat32_t q = svmla_lane (sv_f32 (d->p2), r, p1356, 1);
+  svfloat32_t y = svmla_lane (sv_f32 (d->p0), r, p1356, 0);
+  p = svmla_lane (p, r2, p1356, 3);
+  q = svmla_x (pg, q, p, r2);
+  y = svmla_x (pg, y, q, r2);
+  p = svmla_x (pg, r, n, d->ln2);
+
+  return svmla_x (pg, p, y, r2);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/tanf_3u3.c b/contrib/arm-optimized-routines/math/aarch64/experimental/tanf_3u3.c
similarity index 80%
rename from contrib/arm-optimized-routines/pl/math/tanf_3u3.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/tanf_3u3.c
index 30c86fa89730..c26e92db588f 100644
--- a/contrib/arm-optimized-routines/pl/math/tanf_3u3.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/tanf_3u3.c
@@ -1,193 +1,185 @@
 /*
  * Single-precision scalar tan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "poly_scalar_f32.h"
 
 /* Useful constants.  */
 #define NegPio2_1 (-0x1.921fb6p+0f)
 #define NegPio2_2 (0x1.777a5cp-25f)
 #define NegPio2_3 (0x1.ee59dap-50f)
 /* Reduced from 0x1p20 to 0x1p17 to ensure 3.5ulps.  */
 #define RangeVal (0x1p17f)
 #define InvPio2 ((0x1.45f306p-1f))
 #define Shift (0x1.8p+23f)
 #define AbsMask (0x7fffffff)
 #define Pio4 (0x1.921fb6p-1)
 /* 2PI * 2^-64.  */
 #define Pio2p63 (0x1.921FB54442D18p-62)
 
 static inline float
 eval_P (float z)
 {
   return pw_horner_5_f32 (z, z * z, __tanf_poly_data.poly_tan);
 }
 
 static inline float
 eval_Q (float z)
 {
   return pairwise_poly_3_f32 (z, z * z, __tanf_poly_data.poly_cotan);
 }
 
 /* Reduction of the input argument x using Cody-Waite approach, such that x = r
    + n * pi/2 with r lives in [-pi/4, pi/4] and n is a signed integer.  */
 static inline float
 reduce (float x, int32_t *in)
 {
   /* n = rint(x/(pi/2)).  */
   float r = x;
   float q = fmaf (InvPio2, r, Shift);
   float n = q - Shift;
   /* There is no rounding here, n is representable by a signed integer.  */
   *in = (int32_t) n;
   /* r = x - n * (pi/2)  (range reduction into -pi/4 .. pi/4).  */
   r = fmaf (NegPio2_1, n, r);
   r = fmaf (NegPio2_2, n, r);
   r = fmaf (NegPio2_3, n, r);
   return r;
 }
 
-/* Table with 4/PI to 192 bit precision.  To avoid unaligned accesses
-   only 8 new bits are added per entry, making the table 4 times larger.  */
-static const uint32_t __inv_pio4[24]
-  = {0x000000a2, 0x0000a2f9, 0x00a2f983, 0xa2f9836e, 0xf9836e4e, 0x836e4e44,
-     0x6e4e4415, 0x4e441529, 0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
-     0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0, 0x34ddc0db, 0xddc0db62,
-     0xc0db6295, 0xdb629599, 0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041};
-
 /* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
    XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
    Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
    Reduction uses a table of 4/PI with 192 bits of precision.  A 32x96->128 bit
    multiply computes the exact 2.62-bit fixed-point modulo.  Since the result
    can have at most 29 leading zeros after the binary point, the double
    precision result is accurate to 33 bits.  */
 static inline double
 reduce_large (uint32_t xi, int *np)
 {
   const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
   int shift = (xi >> 23) & 7;
   uint64_t n, res0, res1, res2;
 
   xi = (xi & 0xffffff) | 0x800000;
   xi <<= shift;
 
   res0 = xi * arr[0];
   res1 = (uint64_t) xi * arr[4];
   res2 = (uint64_t) xi * arr[8];
   res0 = (res2 >> 32) | (res0 << 32);
   res0 += res1;
 
   n = (res0 + (1ULL << 61)) >> 62;
   res0 -= n << 62;
   double x = (int64_t) res0;
   *np = n;
   return x * Pio2p63;
 }
 
 /* Top 12 bits of the float representation with the sign bit cleared.  */
 static inline uint32_t
 top12 (float x)
 {
   return (asuint (x) >> 20);
 }
 
 /* Fast single-precision tan implementation.
    Maximum ULP error: 3.293ulps.
    tanf(0x1.c849eap+16) got -0x1.fe8d98p-1 want -0x1.fe8d9ep-1.  */
 float
 tanf (float x)
 {
   /* Get top words.  */
   uint32_t ix = asuint (x);
   uint32_t ia = ix & AbsMask;
   uint32_t ia12 = ia >> 20;
 
   /* Dispatch between no reduction (small numbers), fast reduction and
      slow large numbers reduction. The reduction step determines r float
      (|r| < pi/4) and n signed integer such that x = r + n * pi/2.  */
   int32_t n;
   float r;
   if (ia12 < top12 (Pio4))
     {
       /* Optimize small values.  */
       if (unlikely (ia12 < top12 (0x1p-12f)))
 	{
 	  if (unlikely (ia12 < top12 (0x1p-126f)))
 	    /* Force underflow for tiny x.  */
 	    force_eval_float (x * x);
 	  return x;
 	}
 
       /* tan (x) ~= x + x^3 * P(x^2).  */
       float x2 = x * x;
       float y = eval_P (x2);
       return fmaf (x2, x * y, x);
     }
   /* Similar to other trigonometric routines, fast inaccurate reduction is
-     performed for values of x from pi/4 up to RangeVal. In order to keep errors
-     below 3.5ulps, we set the value of RangeVal to 2^17. This might differ for
-     other trigonometric routines. Above this value more advanced but slower
-     reduction techniques need to be implemented to reach a similar accuracy.
-  */
+     performed for values of x from pi/4 up to RangeVal. In order to keep
+     errors below 3.5ulps, we set the value of RangeVal to 2^17. This might
+     differ for other trigonometric routines. Above this value more advanced
+     but slower reduction techniques need to be implemented to reach a similar
+     accuracy.  */
   else if (ia12 < top12 (RangeVal))
     {
       /* Fast inaccurate reduction.  */
       r = reduce (x, &n);
     }
   else if (ia12 < 0x7f8)
     {
       /* Slow accurate reduction.  */
       uint32_t sign = ix & ~AbsMask;
       double dar = reduce_large (ia, &n);
       float ar = (float) dar;
       r = asfloat (asuint (ar) ^ sign);
     }
   else
     {
       /* tan(Inf or NaN) is NaN.  */
       return __math_invalidf (x);
     }
 
   /* If x lives in an interval where |tan(x)|
      - is finite then use an approximation of tangent in the form
        tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2).
      - grows to infinity then use an approximation of cotangent in the form
        cotan(z) ~ 1/z + z * Q(z^2), where the reciprocal can be computed early.
        Using symmetries of tangent and the identity tan(r) = cotan(pi/2 - r),
        we only need to change the sign of r to obtain tan(x) from cotan(r).
      This 2-interval approach requires 2 different sets of coefficients P and
      Q, where Q is a lower order polynomial than P.  */
 
   /* Determine if x lives in an interval where |tan(x)| grows to infinity.  */
   uint32_t alt = (uint32_t) n & 1;
 
   /* Perform additional reduction if required.  */
   float z = alt ? -r : r;
 
   /* Prepare backward transformation.  */
   float z2 = r * r;
   float offset = alt ? 1.0f / z : z;
   float scale = alt ? z : z * z2;
 
   /* Evaluate polynomial approximation of tan or cotan.  */
   float p = alt ? eval_Q (z2) : eval_P (z2);
 
   /* A unified way of assembling the result on both interval types.  */
   return fmaf (scale, p, offset);
 }
 
-PL_SIG (S, F, 1, tan, -3.1, 3.1)
-PL_TEST_ULP (tanf, 2.80)
-PL_TEST_INTERVAL (tanf, 0, 0xffff0000, 10000)
-PL_TEST_SYM_INTERVAL (tanf, 0x1p-127, 0x1p-14, 50000)
-PL_TEST_SYM_INTERVAL (tanf, 0x1p-14, 0.7, 50000)
-PL_TEST_SYM_INTERVAL (tanf, 0.7, 1.5, 50000)
-PL_TEST_SYM_INTERVAL (tanf, 1.5, 0x1p17, 50000)
-PL_TEST_SYM_INTERVAL (tanf, 0x1p17, 0x1p54, 50000)
-PL_TEST_SYM_INTERVAL (tanf, 0x1p54, inf, 50000)
+TEST_SIG (S, F, 1, tan, -3.1, 3.1)
+TEST_ULP (tanf, 2.80)
+TEST_INTERVAL (tanf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (tanf, 0x1p-127, 0x1p-14, 50000)
+TEST_SYM_INTERVAL (tanf, 0x1p-14, 0.7, 50000)
+TEST_SYM_INTERVAL (tanf, 0.7, 1.5, 50000)
+TEST_SYM_INTERVAL (tanf, 1.5, 0x1p17, 50000)
+TEST_SYM_INTERVAL (tanf, 0x1p17, 0x1p54, 50000)
+TEST_SYM_INTERVAL (tanf, 0x1p54, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/tanf_data.c b/contrib/arm-optimized-routines/math/aarch64/experimental/tanf_data.c
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/tanf_data.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/tanf_data.c
index a6b9d512eed2..f310cd77d4ec 100644
--- a/contrib/arm-optimized-routines/pl/math/tanf_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/tanf_data.c
@@ -1,45 +1,45 @@
 /*
  * Data used in single-precision tan(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 const struct tanf_poly_data __tanf_poly_data = {
 .poly_tan = {
 /* Coefficients generated using:
    poly = fpminimax((tan(sqrt(x))-sqrt(x))/x^(3/2), deg, [|single ...|], [a*a;b*b]);
    optimize relative error
    final prec : 23 bits
    deg : 5
    a : 0x1p-126 ^ 2
    b : ((pi) / 0x1p2) ^ 2
    dirty rel error: 0x1.f7c2e4p-25
    dirty abs error: 0x1.f7c2ecp-25.  */
 0x1.55555p-2,
 0x1.11166p-3,
 0x1.b88a78p-5,
 0x1.7b5756p-6,
 0x1.4ef4cep-8,
 0x1.0e1e74p-7
 },
 .poly_cotan = {
 /* Coefficients generated using:
    fpminimax(f(x) = (0x1p0 / tan(sqrt(x)) - 0x1p0 / sqrt(x)) / sqrt(x), deg, [|dtype ...|], [a;b])
    optimize a single polynomial
    optimize absolute error
    final prec : 23 bits
    working prec : 128 bits
    deg : 3
    a : 0x1p-126
    b : (pi) / 0x1p2
    dirty rel error : 0x1.81298cp-25
    dirty abs error : 0x1.a8acf4p-25.  */
 -0x1.55555p-2, /* -0.33333325.  */
 -0x1.6c23e4p-6, /* -2.2225354e-2.  */
 -0x1.12dbap-9, /* -2.0969994e-3.  */
 -0x1.05a1c2p-12, /* -2.495116e-4.  */
 }
 };
diff --git a/contrib/arm-optimized-routines/pl/math/tanh_3u.c b/contrib/arm-optimized-routines/math/aarch64/experimental/tanh_3u.c
similarity index 80%
rename from contrib/arm-optimized-routines/pl/math/tanh_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/tanh_3u.c
index 86f2904afc32..838b6c4f12c1 100644
--- a/contrib/arm-optimized-routines/pl/math/tanh_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/tanh_3u.c
@@ -1,78 +1,80 @@
 /*
  * Double-precision tanh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
 #include "poly_scalar_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define AbsMask 0x7fffffffffffffff
 #define InvLn2 0x1.71547652b82fep0
 #define Ln2hi 0x1.62e42fefa39efp-1
 #define Ln2lo 0x1.abc9e3b39803fp-56
 #define Shift 0x1.8p52
 
-#define BoringBound 0x403241bf835f9d5f /* asuint64 (0x1.241bf835f9d5fp+4).  */
-#define TinyBound 0x3e40000000000000   /* asuint64 (0x1p-27).  */
+/* asuint64 (0x1.241bf835f9d5fp+4).  */
+#define BoringBound 0x403241bf835f9d5f
+/* asuint64 (0x1p-27).  */
+#define TinyBound 0x3e40000000000000
 #define One 0x3ff0000000000000
 
 static inline double
 expm1_inline (double x)
 {
   /* Helper routine for calculating exp(x) - 1. Copied from expm1_2u5.c, with
      several simplifications:
      - No special-case handling for tiny or special values.
      - Simpler combination of p and t in final stage of the algorithm.
      - Use shift-and-add instead of ldexp to calculate t.  */
 
   /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
   double j = fma (InvLn2, x, Shift) - Shift;
   int64_t i = j;
   double f = fma (j, -Ln2hi, x);
   f = fma (j, -Ln2lo, f);
 
   /* Approximate expm1(f) using polynomial.  */
   double f2 = f * f;
   double f4 = f2 * f2;
   double p = fma (f2, estrin_10_f64 (f, f2, f4, f4 * f4, __expm1_poly), f);
 
   /* t = 2 ^ i.  */
   double t = asdouble ((uint64_t) (i + 1023) << 52);
   /* expm1(x) = p * t + (t - 1).  */
   return fma (p, t, t - 1);
 }
 
 /* Approximation for double-precision tanh(x), using a simplified version of
    expm1. The greatest observed error is 2.77 ULP:
    tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
 			      want -0x1.bd6a21a163624p-3.  */
 double
 tanh (double x)
 {
   uint64_t ix = asuint64 (x);
   uint64_t ia = ix & AbsMask;
   uint64_t sign = ix & ~AbsMask;
 
   if (unlikely (ia > BoringBound))
     {
       if (ia > 0x7ff0000000000000)
 	return __math_invalid (x);
       return asdouble (One | sign);
     }
 
   if (unlikely (ia < TinyBound))
     return x;
 
   /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
   double q = expm1_inline (2 * x);
   return q / (q + 2);
 }
 
-PL_SIG (S, D, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (tanh, 2.27)
-PL_TEST_SYM_INTERVAL (tanh, 0, TinyBound, 1000)
-PL_TEST_SYM_INTERVAL (tanh, TinyBound, BoringBound, 100000)
-PL_TEST_SYM_INTERVAL (tanh, BoringBound, inf, 1000)
+TEST_SIG (S, D, 1, tanh, -10.0, 10.0)
+TEST_ULP (tanh, 2.27)
+TEST_SYM_INTERVAL (tanh, 0, TinyBound, 1000)
+TEST_SYM_INTERVAL (tanh, TinyBound, BoringBound, 100000)
+TEST_SYM_INTERVAL (tanh, BoringBound, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/tanhf_2u6.c b/contrib/arm-optimized-routines/math/aarch64/experimental/tanhf_2u6.c
similarity index 79%
rename from contrib/arm-optimized-routines/pl/math/tanhf_2u6.c
rename to contrib/arm-optimized-routines/math/aarch64/experimental/tanhf_2u6.c
index 93ea3cf5d865..d9adae5c3a76 100644
--- a/contrib/arm-optimized-routines/pl/math/tanhf_2u6.c
+++ b/contrib/arm-optimized-routines/math/aarch64/experimental/tanhf_2u6.c
@@ -1,88 +1,87 @@
 /*
  * Single-precision tanh(x) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
-#define BoringBound                                                            \
-  0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for        \
-		negative).  */
+/* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative).  */
+#define BoringBound 0x41102cb3
 #define AbsMask 0x7fffffff
 #define One 0x3f800000
 
 #define Shift (0x1.8p23f)
 #define InvLn2 (0x1.715476p+0f)
 #define Ln2hi (0x1.62e4p-1f)
 #define Ln2lo (0x1.7f7d1cp-20f)
 
 #define C(i) __expm1f_poly[i]
 
 static inline float
 expm1f_inline (float x)
 {
   /* Helper routine for calculating exp(x) - 1.
      Copied from expm1f_1u6.c, with several simplifications:
-     - No special-case handling for tiny or special values, instead return early
-       from the main routine.
+     - No special-case handling for tiny or special values, instead return
+       early from the main routine.
      - No special handling for large values:
        - No early return for infinity.
        - Simpler combination of p and t in final stage of algorithm.
        - |i| < 27, so can calculate t by simpler shift-and-add, instead of
 	 ldexpf (same as vector algorithm).  */
 
   /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
   float j = fmaf (InvLn2, x, Shift) - Shift;
   int32_t i = j;
   float f = fmaf (j, -Ln2hi, x);
   f = fmaf (j, -Ln2lo, f);
 
   /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
      Uses Estrin scheme, where the main expm1f routine uses Horner.  */
   float f2 = f * f;
   float p_01 = fmaf (f, C (1), C (0));
   float p_23 = fmaf (f, C (3), C (2));
   float p = fmaf (f2, p_23, p_01);
   p = fmaf (f2 * f2, C (4), p);
   p = fmaf (f2, p, f);
 
   /* t = 2^i.  */
   float t = asfloat ((uint32_t) (i + 127) << 23);
   /* expm1(x) ~= p * t + (t - 1).  */
   return fmaf (p, t, t - 1);
 }
 
 /* Approximation for single-precision tanh(x), using a simplified version of
    expm1f. The maximum error is 2.58 ULP:
    tanhf(0x1.fa5eep-5) got 0x1.f9ba02p-5
 		      want 0x1.f9ba08p-5.  */
 float
 tanhf (float x)
 {
   uint32_t ix = asuint (x);
   uint32_t iax = ix & AbsMask;
   uint32_t sign = ix & ~AbsMask;
 
   if (unlikely (iax > BoringBound))
     {
       if (iax > 0x7f800000)
 	return __math_invalidf (x);
       return asfloat (One | sign);
     }
 
   if (unlikely (iax < 0x34000000))
     return x;
 
   /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
   float q = expm1f_inline (2 * x);
   return q / (q + 2);
 }
 
-PL_SIG (S, F, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (tanhf, 2.09)
-PL_TEST_SYM_INTERVAL (tanhf, 0, 0x1p-23, 1000)
-PL_TEST_SYM_INTERVAL (tanhf, 0x1p-23, 0x1.205966p+3, 100000)
-PL_TEST_SYM_INTERVAL (tanhf, 0x1.205966p+3, inf, 100)
+TEST_SIG (S, F, 1, tanh, -10.0, 10.0)
+TEST_ULP (tanhf, 2.09)
+TEST_SYM_INTERVAL (tanhf, 0, 0x1p-23, 1000)
+TEST_SYM_INTERVAL (tanhf, 0x1p-23, 0x1.205966p+3, 100000)
+TEST_SYM_INTERVAL (tanhf, 0x1.205966p+3, inf, 100)
diff --git a/contrib/arm-optimized-routines/math/aarch64/sincospi_4u.c b/contrib/arm-optimized-routines/math/aarch64/sincospi_4u.c
new file mode 100644
index 000000000000..2a944bed23e1
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sincospi_4u.c
@@ -0,0 +1,158 @@
+/*
+ * Double-precision scalar sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "math_config.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "poly_scalar_f64.h"
+
+/* Taylor series coefficents for sin(pi * x).
+   C2 coefficient (orginally ~=5.16771278) has been split into two parts:
+   C2_hi = 4, C2_lo = C2 - C2_hi (~=1.16771278)
+   This change in magnitude reduces floating point rounding errors.
+   C2_hi is then reintroduced after the polynomial approxmation.  */
+const static struct sincospi_data
+{
+  double poly[10];
+} sincospi_data = {
+  /* Taylor series coefficents for sin(pi * x).  */
+  .poly = { 0x1.921fb54442d184p1, -0x1.2aef39896f94bp0, 0x1.466bc6775ab16p1,
+	    -0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8,
+	    0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16,
+	    0x1.af86ae521260bp-21, -0x1.012a9870eeb7dp-25 },
+};
+
+/* Top 12 bits of a double (sign and exponent bits).  */
+static inline uint64_t
+abstop12 (double x)
+{
+  return (asuint64 (x) >> 52) & 0x7ff;
+}
+
+/* Triages special cases into 4 categories:
+     -1 or +1 if iy represents half an integer
+       -1 if round(y) is odd.
+       +1 if round(y) is even.
+     -2 or +2 if iy represents and integer.
+       -2 if iy is odd.
+       +2 if iy is even.
+   The argument is the bit representation of a positive non-zero
+   finite floating-point value which is either a half or an integer.  */
+static inline int
+checkint (uint64_t iy)
+{
+  int e = iy >> 52;
+  if (e > 0x3ff + 52)
+    return 2;
+  if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
+    {
+      if ((iy - 1) & 2)
+	return -1;
+      else
+	return 1;
+    }
+  if (iy & (1 << (0x3ff + 52 - e)))
+    return -2;
+  return 2;
+}
+
+/* Approximation for scalar double-precision sincospi(x).
+   Maximum error for sin: 3.46 ULP:
+      sincospif_sin(0x1.3d8a067cd8961p+14) got 0x1.ffe609a279008p-1 want
+   0x1.ffe609a27900cp-1.
+   Maximum error for cos: 3.66 ULP:
+      sincospif_cos(0x1.a0ec6997557eep-24) got 0x1.ffffffffffe59p-1 want
+   0x1.ffffffffffe5dp-1.  */
+void
+arm_math_sincospi (double x, double *out_sin, double *out_cos)
+{
+  const struct sincospi_data *d = ptr_barrier (&sincospi_data);
+  uint64_t sign = asuint64 (x) & 0x8000000000000000;
+
+  if (likely (abstop12 (x) < abstop12 (0x1p51)))
+    {
+      /* ax = |x| - n (range reduction into -1/2 .. 1/2).  */
+      double ar_s = x - rint (x);
+
+      /* We know that cospi(x) = sinpi(0.5 - x)
+	 range reduction and offset into sinpi range -1/2 .. 1/2
+	 ax = 0.5 - |x - rint(x)|.  */
+      double ar_c = 0.5 - fabs (ar_s);
+
+      /* ss = sin(pi * ax).  */
+      double ar2_s = ar_s * ar_s;
+      double ar2_c = ar_c * ar_c;
+      double ar4_s = ar2_s * ar2_s;
+      double ar4_c = ar2_c * ar2_c;
+
+      uint64_t cc_sign = ((uint64_t) llrint (x)) << 63;
+      uint64_t ss_sign = cc_sign;
+      if (ar_s == 0)
+	ss_sign = sign;
+
+      double ss = pw_horner_9_f64 (ar2_s, ar4_s, d->poly);
+      double cc = pw_horner_9_f64 (ar2_c, ar4_c, d->poly);
+
+      /* As all values are reduced to -1/2 .. 1/2, the result of cos(x)
+	 always be positive, therefore, the sign must be introduced
+	 based upon if x rounds to odd or even. For sin(x) the sign is
+	 copied from x.  */
+      *out_sin
+	  = asdouble (asuint64 (fma (-4 * ar2_s, ar_s, ss * ar_s)) ^ ss_sign);
+      *out_cos
+	  = asdouble (asuint64 (fma (-4 * ar2_c, ar_c, cc * ar_c)) ^ cc_sign);
+    }
+  else
+    {
+      /* When abs(x) > 0x1p51, the x will be either
+	    - Half integer (relevant if abs(x) in [0x1p51, 0x1p52])
+	    - Odd integer  (relevant if abs(x) in [0x1p52, 0x1p53])
+	    - Even integer (relevant if abs(x) in [0x1p53, inf])
+	    - Inf or NaN.  */
+      if (abstop12 (x) >= 0x7ff)
+	{
+	  double inv_result = __math_invalid (x);
+	  *out_sin = inv_result;
+	  *out_cos = inv_result;
+	  return;
+	}
+      else
+	{
+	  uint64_t ax = asuint64 (x) & 0x7fffffffffffffff;
+	  int m = checkint (ax);
+	  /* The case where ax is half integer.  */
+	  if (m & 1)
+	    {
+	      *out_sin = sign ? -m : m;
+	      *out_cos = 0;
+	      return;
+	    }
+	  /* The case where ax is integer.  */
+	  else
+	    {
+	      *out_sin = asdouble (sign);
+	      *out_cos = m >> 1;
+	      return;
+	    }
+	}
+    }
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (arm_math_sincospi_sin)
+TEST_DISABLE_FENV (arm_math_sincospi_cos)
+TEST_ULP (arm_math_sincospi_sin, 2.96)
+TEST_ULP (arm_math_sincospi_cos, 3.16)
+#  define SINCOS_INTERVAL(lo, hi, n)                                          \
+    TEST_SYM_INTERVAL (arm_math_sincospi_sin, lo, hi, n)                      \
+    TEST_SYM_INTERVAL (arm_math_sincospi_cos, lo, hi, n)
+SINCOS_INTERVAL (0, 0x1p-63, 10000)
+SINCOS_INTERVAL (0x1p-63, 0.5, 50000)
+SINCOS_INTERVAL (0.5, 0x1p51, 50000)
+SINCOS_INTERVAL (0x1p51, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/math/aarch64/sincospif_3u2.c b/contrib/arm-optimized-routines/math/aarch64/sincospif_3u2.c
new file mode 100644
index 000000000000..b79694d2ac65
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sincospif_3u2.c
@@ -0,0 +1,145 @@
+/*
+ * Single-precision scalar sincospi function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "poly_scalar_f32.h"
+
+/* Taylor series coefficents for sin(pi * x).  */
+const static struct sincospif_data
+{
+  float poly[6];
+} sincospif_data = {
+  /* Taylor series coefficents for sin(pi * x).  */
+  .poly = { 0x1.921fb6p1f, -0x1.4abbcep2f, 0x1.466bc6p1f, -0x1.32d2ccp-1f,
+	    0x1.50783p-4f, -0x1.e30750p-8f },
+};
+
+/* Top 12 bits of the float representation with the sign bit cleared.  */
+static inline uint32_t
+abstop12 (float x)
+{
+  return (asuint (x) >> 20) & 0x7ff;
+}
+
+/* Triages special cases into 4 categories:
+     -1 or +1 if iy represents half an integer
+       -1 if round(y) is odd.
+       +1 if round(y) is even.
+     -2 or +2 if iy represents and integer.
+       -2 if iy is odd.
+       +2 if iy is even.
+   The argument is the bit representation of a positive non-zero
+   finite floating-point value which is either a half or an integer.  */
+static inline int
+checkint (uint32_t iy)
+{
+  int e = iy >> 23;
+  if (e > 0x7f + 23)
+    return 2;
+  if (iy & ((1 << (0x7f + 23 - e)) - 1))
+    {
+      if ((iy - 1) & 2)
+	return -1;
+      else
+	return 1;
+    }
+  if (iy & (1 << (0x7f + 23 - e)))
+    return -2;
+  return 2;
+}
+
+/* Approximation for scalar single-precision sincospif(x).
+   Maximum error for sin: 3.04 ULP:
+      sincospif_sin(0x1.c597ccp-2) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
+   Maximum error for cos: 3.18 ULP:
+      sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.  */
+void
+arm_math_sincospif (float x, float *out_sin, float *out_cos)
+{
+
+  const struct sincospif_data *d = ptr_barrier (&sincospif_data);
+  uint32_t sign = asuint (x) & 0x80000000;
+
+  /* abs(x) in [0, 0x1p22].  */
+  if (likely (abstop12 (x) < abstop12 (0x1p22)))
+    {
+      /* ar_s = x - n (range reduction into -1/2 .. 1/2).  */
+      float ar_s = x - rintf (x);
+      /* We know that cospi(x) = sinpi(0.5 - x)
+      range reduction and offset into sinpi range -1/2 .. 1/2
+      ar_c = 0.5 - |x - n|.  */
+      float ar_c = 0.5f - fabsf (ar_s);
+
+      float ar2_s = ar_s * ar_s;
+      float ar2_c = ar_c * ar_c;
+      float ar4_s = ar2_s * ar2_s;
+      float ar4_c = ar2_c * ar2_c;
+
+      uint32_t cc_sign = lrintf (x) << 31;
+      uint32_t ss_sign = cc_sign;
+      if (ar_s == 0)
+	ss_sign = sign;
+
+      /* As all values are reduced to -1/2 .. 1/2, the result of cos(x)
+      always be positive, therefore, the sign must be introduced
+      based upon if x rounds to odd or even. For sin(x) the sign is
+      copied from x.  */
+      *out_sin = pw_horner_5_f32 (ar2_s, ar4_s, d->poly)
+		 * asfloat (asuint (ar_s) ^ ss_sign);
+      *out_cos = pw_horner_5_f32 (ar2_c, ar4_c, d->poly)
+		 * asfloat (asuint (ar_c) ^ cc_sign);
+      return;
+    }
+  else
+    {
+      /* When abs(x) > 0x1p22, the x will be either
+	    - Half integer (relevant if abs(x) in [0x1p22, 0x1p23])
+	    - Odd integer  (relevant if abs(x) in [0x1p22, 0x1p24])
+	    - Even integer (relevant if abs(x) in [0x1p22, inf])
+	    - Inf or NaN.  */
+      if (abstop12 (x) >= 0x7f8)
+	{
+	  float inv_result = __math_invalidf (x);
+	  *out_sin = inv_result;
+	  *out_cos = inv_result;
+	  return;
+	}
+      else
+	{
+	  uint32_t ax = asuint (x) & 0x7fffffff;
+	  int m = checkint (ax);
+	  if (m & 1)
+	    {
+	      *out_sin = sign ? -m : m;
+	      *out_cos = 0;
+	      return;
+	    }
+	  else
+	    {
+	      *out_sin = asfloat (sign);
+	      *out_cos = m >> 1;
+	      return;
+	    }
+	}
+    }
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (arm_math_sincospif_sin)
+TEST_DISABLE_FENV (arm_math_sincospif_cos)
+TEST_ULP (arm_math_sincospif_sin, 2.54)
+TEST_ULP (arm_math_sincospif_cos, 2.68)
+#  define SINCOSPIF_INTERVAL(lo, hi, n)                                       \
+    TEST_SYM_INTERVAL (arm_math_sincospif_sin, lo, hi, n)                     \
+    TEST_SYM_INTERVAL (arm_math_sincospif_cos, lo, hi, n)
+SINCOSPIF_INTERVAL (0, 0x1p-31, 10000)
+SINCOSPIF_INTERVAL (0x1p-31, 1, 50000)
+SINCOSPIF_INTERVAL (1, 0x1p22f, 50000)
+SINCOSPIF_INTERVAL (0x1p22f, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/sinpi_3u.c b/contrib/arm-optimized-routines/math/aarch64/sinpi_3u5.c
similarity index 76%
rename from contrib/arm-optimized-routines/pl/math/sinpi_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/sinpi_3u5.c
index a04a352a62e6..f96d9a312b53 100644
--- a/contrib/arm-optimized-routines/pl/math/sinpi_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sinpi_3u5.c
@@ -1,90 +1,101 @@
 /*
  * Double-precision scalar sinpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #define _GNU_SOURCE
 #include <math.h>
 #include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 #include "poly_scalar_f64.h"
 
 /* Taylor series coefficents for sin(pi * x).
    C2 coefficient (orginally ~=5.16771278) has been split into two parts:
    C2_hi = 4, C2_lo = C2 - C2_hi (~=1.16771278)
    This change in magnitude reduces floating point rounding errors.
    C2_hi is then reintroduced after the polynomial approxmation.  */
 static const double poly[]
     = { 0x1.921fb54442d184p1,  -0x1.2aef39896f94bp0,   0x1.466bc6775ab16p1,
 	-0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4,   -0x1.e30750a28c88ep-8,
 	0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16, 0x1.af86ae521260bp-21,
 	-0x1.012a9870eeb7dp-25 };
 
 #define Shift 0x1.8p+52
+/* TODO Store constant in structure for more efficient load.  */
+#define Pi 0x1.921fb54442d18p+1
 
 /* Approximation for scalar double-precision sinpi(x).
    Maximum error: 3.03 ULP:
    sinpi(0x1.a90da2818f8b5p+7) got 0x1.fe358f255a4b3p-1
 			      want 0x1.fe358f255a4b6p-1.  */
 double
-sinpi (double x)
+arm_math_sinpi (double x)
 {
-  if (isinf (x))
+  if (isinf (x) || isnan (x))
     return __math_invalid (x);
 
   double r = asdouble (asuint64 (x) & ~0x8000000000000000);
   uint64_t sign = asuint64 (x) & 0x8000000000000000;
 
   /* Edge cases for when sinpif should be exactly 0. (Integers)
      0x1p53 is the limit for single precision to store any decimal places.  */
   if (r >= 0x1p53)
-    return 0;
+    return asdouble (sign);
 
   /* If x is an integer, return 0.  */
   uint64_t m = (uint64_t) r;
   if (r == m)
-    return 0;
+    return asdouble (sign);
 
   /* For very small inputs, squaring r causes underflow.
      Values below this threshold can be approximated via sinpi(x) ≈ pi*x.  */
   if (r < 0x1p-63)
-    return M_PI * x;
+    return Pi * x;
 
   /* Any non-integer values >= 0x1x51 will be int + 0.5.
      These values should return exactly 1 or -1.  */
   if (r >= 0x1p51)
     {
       uint64_t iy = ((m & 1) << 63) ^ asuint64 (1.0);
       return asdouble (sign ^ iy);
     }
 
   /* n = rint(|x|).  */
   double n = r + Shift;
   sign ^= (asuint64 (n) << 63);
   n = n - Shift;
 
   /* r = |x| - n (range reduction into -1/2 .. 1/2).  */
   r = r - n;
 
   /* y = sin(r).  */
   double r2 = r * r;
   double y = horner_9_f64 (r2, poly);
   y = y * r;
 
   /* Reintroduce C2_hi.  */
   y = fma (-4 * r2, r, y);
 
   /* Copy sign of x to sin(|x|).  */
   return asdouble (asuint64 (y) ^ sign);
 }
 
-PL_SIG (S, D, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (sinpi, 2.53)
-PL_TEST_SYM_INTERVAL (sinpi, 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (sinpi, 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (sinpi, 0.5, 0x1p51, 10000)
-PL_TEST_SYM_INTERVAL (sinpi, 0x1p51, inf, 10000)
+#if WANT_EXPERIMENTAL_MATH
+double
+sinpi (double x)
+{
+  return arm_math_sinpi (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_sinpi, 2.53)
+TEST_SYM_INTERVAL (arm_math_sinpi, 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (arm_math_sinpi, 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (arm_math_sinpi, 0.5, 0x1p51, 10000)
+TEST_SYM_INTERVAL (arm_math_sinpi, 0x1p51, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/sinpif_2u5.c b/contrib/arm-optimized-routines/math/aarch64/sinpif_2u5.c
similarity index 75%
rename from contrib/arm-optimized-routines/pl/math/sinpif_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sinpif_2u5.c
index af9ca0573b37..b5d9cd914577 100644
--- a/contrib/arm-optimized-routines/pl/math/sinpif_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sinpif_2u5.c
@@ -1,83 +1,92 @@
 /*
  * Single-precision scalar sinpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "mathlib.h"
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Taylor series coefficents for sin(pi * x).  */
 #define C0 0x1.921fb6p1f
 #define C1 -0x1.4abbcep2f
 #define C2 0x1.466bc6p1f
 #define C3 -0x1.32d2ccp-1f
 #define C4 0x1.50783p-4f
 #define C5 -0x1.e30750p-8f
 
 #define Shift 0x1.0p+23f
 
 /* Approximation for scalar single-precision sinpi(x) - sinpif.
    Maximum error: 2.48 ULP:
    sinpif(0x1.d062b6p-2) got 0x1.fa8c06p-1
 			want 0x1.fa8c02p-1.  */
 float
-sinpif (float x)
+arm_math_sinpif (float x)
 {
-  if (isinf (x))
+  if (isinf (x) || isnan (x))
     return __math_invalidf (x);
 
   float r = asfloat (asuint (x) & ~0x80000000);
   uint32_t sign = asuint (x) & 0x80000000;
 
   /* Edge cases for when sinpif should be exactly 0. (Integers)
      0x1p23 is the limit for single precision to store any decimal places.  */
   if (r >= 0x1p23f)
-    return 0;
+    return asfloat (sign);
 
   int32_t m = roundf (r);
   if (m == r)
-    return 0;
+    return asfloat (sign);
 
   /* For very small inputs, squaring r causes underflow.
      Values below this threshold can be approximated via sinpi(x) ~= pi*x.  */
   if (r < 0x1p-31f)
     return C0 * x;
 
   /* Any non-integer values >= 0x1p22f will be int + 0.5.
      These values should return exactly 1 or -1.  */
   if (r >= 0x1p22f)
     {
       uint32_t iy = ((m & 1) << 31) ^ asuint (-1.0f);
       return asfloat (sign ^ iy);
     }
 
   /* n = rint(|x|).  */
   float n = r + Shift;
   sign ^= (asuint (n) << 31);
   n = n - Shift;
 
   /* r = |x| - n (range reduction into -1/2 .. 1/2).  */
   r = r - n;
 
   /* y = sin(pi * r).  */
   float r2 = r * r;
   float y = fmaf (C5, r2, C4);
   y = fmaf (y, r2, C3);
   y = fmaf (y, r2, C2);
   y = fmaf (y, r2, C1);
   y = fmaf (y, r2, C0);
 
   /* Copy sign of x to sin(|x|).  */
   return asfloat (asuint (y * r) ^ sign);
 }
 
-PL_SIG (S, F, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (sinpif, 1.99)
-PL_TEST_SYM_INTERVAL (sinpif, 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (sinpif, 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (sinpif, 0.5, 0x1p22f, 10000)
-PL_TEST_SYM_INTERVAL (sinpif, 0x1p22f, inf, 10000)
+#if WANT_EXPERIMENTAL_MATH
+float
+sinpif (float x)
+{
+  return arm_math_sinpif (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_sinpif, 1.99)
+TEST_SYM_INTERVAL (arm_math_sinpif, 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (arm_math_sinpif, 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (arm_math_sinpif, 0.5, 0x1p22f, 10000)
+TEST_SYM_INTERVAL (arm_math_sinpif, 0x1p22f, inf, 10000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/sv_acos_2u.c b/contrib/arm-optimized-routines/math/aarch64/sve/acos.c
similarity index 85%
rename from contrib/arm-optimized-routines/pl/math/sv_acos_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/acos.c
index e06db6cae6af..da633392aa3e 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_acos_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/acos.c
@@ -1,91 +1,93 @@
 /*
  * Double-precision SVE acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64_t poly[12];
   float64_t pi, pi_over_2;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
   .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4, 0x1.6db6db67f6d9fp-5,
 	    0x1.f1c71fbd29fbbp-6, 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6,
 	    0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, 0x1.fd1151acb6bedp-8,
 	    0x1.087182f799c1dp-6, -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, },
   .pi = 0x1.921fb54442d18p+1,
   .pi_over_2 = 0x1.921fb54442d18p+0,
 };
 
 /* Double-precision SVE implementation of vector acos(x).
 
    For |x| in [0, 0.5], use an order 11 polynomial P such that the final
    approximation of asin is an odd polynomial:
 
      acos(x) ~ pi/2 - (x + x^3 P(x^2)).
 
    The largest observed error in this region is 1.18 ulps,
    _ZGVsMxv_acos (0x1.fbc5fe28ee9e3p-2) got 0x1.0d4d0f55667f6p+0
 				       want 0x1.0d4d0f55667f7p+0.
 
    For |x| in [0.5, 1.0], use same approximation with a change of variable
 
      acos(x) = y + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 1.52 ulps,
    _ZGVsMxv_acos (0x1.24024271a500ap-1) got 0x1.ed82df4243f0dp-1
 				       want 0x1.ed82df4243f0bp-1.  */
 svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
   svfloat64_t ax = svabs_x (pg, x);
 
   svbool_t a_gt_half = svacgt (pg, x, 0.5);
 
   /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
      z2 = x ^ 2         and z = |x|     , if |x| < 0.5
      z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
   svfloat64_t z2 = svsel (a_gt_half, svmls_x (pg, sv_f64 (0.5), ax, 0.5),
 			  svmul_x (pg, x, x));
   svfloat64_t z = svsqrt_m (ax, a_gt_half, z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
   svfloat64_t z4 = svmul_x (pg, z2, z2);
   svfloat64_t z8 = svmul_x (pg, z4, z4);
   svfloat64_t z16 = svmul_x (pg, z8, z8);
   svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly);
 
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
 
   /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for  |x| < 0.5
 	       = 2 Q(|x|)               , for  0.5 < x < 1.0
 	       = pi - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
   svfloat64_t y
       = svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (p), sign));
 
   svbool_t is_neg = svcmplt (pg, x, 0.0);
   svfloat64_t off = svdup_f64_z (is_neg, d->pi);
   svfloat64_t mul = svsel (a_gt_half, sv_f64 (2.0), sv_f64 (-1.0));
   svfloat64_t add = svsel (a_gt_half, off, sv_f64 (d->pi_over_2));
 
   return svmla_x (pg, add, mul, y);
 }
 
-PL_SIG (SV, D, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_D1 (acos), 1.02)
-PL_TEST_INTERVAL (SV_NAME_D1 (acos), 0, 0.5, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acos), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acos), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acos), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acos), -0, -inf, 20000)
+TEST_SIG (SV, D, 1, acos, -1.0, 1.0)
+TEST_ULP (SV_NAME_D1 (acos), 1.02)
+TEST_DISABLE_FENV (SV_NAME_D1 (acos))
+TEST_INTERVAL (SV_NAME_D1 (acos), 0, 0.5, 50000)
+TEST_INTERVAL (SV_NAME_D1 (acos), 0.5, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (acos), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (SV_NAME_D1 (acos), 0x1p11, inf, 20000)
+TEST_INTERVAL (SV_NAME_D1 (acos), -0, -inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_acosf_1u4.c b/contrib/arm-optimized-routines/math/aarch64/sve/acosf.c
similarity index 83%
rename from contrib/arm-optimized-routines/pl/math/sv_acosf_1u4.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/acosf.c
index 7ac59ceedfbd..86b7822cefc3 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_acosf_1u4.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/acosf.c
@@ -1,84 +1,86 @@
 /*
  * Single-precision SVE acos(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float32_t poly[5];
   float32_t pi, pi_over_2;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))  on
      [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 .  */
   .poly = { 0x1.55555ep-3, 0x1.33261ap-4, 0x1.70d7dcp-5, 0x1.b059dp-6,
 	    0x1.3af7d8p-5, },
   .pi = 0x1.921fb6p+1f,
   .pi_over_2 = 0x1.921fb6p+0f,
 };
 
 /* Single-precision SVE implementation of vector acos(x).
 
    For |x| in [0, 0.5], use order 4 polynomial P such that the final
    approximation of asin is an odd polynomial:
 
      acos(x) ~ pi/2 - (x + x^3 P(x^2)).
 
     The largest observed error in this region is 1.16 ulps,
       _ZGVsMxv_acosf(0x1.ffbeccp-2) got 0x1.0c27f8p+0
 				   want 0x1.0c27f6p+0.
 
     For |x| in [0.5, 1.0], use same approximation with a change of variable
 
       acos(x) = y + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 1.32 ulps,
    _ZGVsMxv_acosf (0x1.15ba56p-1) got 0x1.feb33p-1
 				 want 0x1.feb32ep-1.  */
 svfloat32_t SV_NAME_F1 (acos) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), 0x80000000);
   svfloat32_t ax = svabs_x (pg, x);
   svbool_t a_gt_half = svacgt (pg, x, 0.5);
 
   /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
      z2 = x ^ 2         and z = |x|     , if |x| < 0.5
      z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
   svfloat32_t z2 = svsel (a_gt_half, svmls_x (pg, sv_f32 (0.5), ax, 0.5),
 			  svmul_x (pg, x, x));
   svfloat32_t z = svsqrt_m (ax, a_gt_half, z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
   svfloat32_t p = sv_horner_4_f32_x (pg, z2, d->poly);
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
 
   /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for  |x| < 0.5
 	       = 2 Q(|x|)               , for  0.5 < x < 1.0
 	       = pi - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
   svfloat32_t y
       = svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (p), sign));
 
   svbool_t is_neg = svcmplt (pg, x, 0.0);
   svfloat32_t off = svdup_f32_z (is_neg, d->pi);
   svfloat32_t mul = svsel (a_gt_half, sv_f32 (2.0), sv_f32 (-1.0));
   svfloat32_t add = svsel (a_gt_half, off, sv_f32 (d->pi_over_2));
 
   return svmla_x (pg, add, mul, y);
 }
 
-PL_SIG (SV, F, 1, acos, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_F1 (acos), 0.82)
-PL_TEST_INTERVAL (SV_NAME_F1 (acos), 0, 0.5, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acos), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acos), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acos), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acos), -0, -inf, 20000)
+TEST_SIG (SV, F, 1, acos, -1.0, 1.0)
+TEST_ULP (SV_NAME_F1 (acos), 0.82)
+TEST_DISABLE_FENV (SV_NAME_F1 (acos))
+TEST_INTERVAL (SV_NAME_F1 (acos), 0, 0.5, 50000)
+TEST_INTERVAL (SV_NAME_F1 (acos), 0.5, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_F1 (acos), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (SV_NAME_F1 (acos), 0x1p11, inf, 20000)
+TEST_INTERVAL (SV_NAME_F1 (acos), -0, -inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/acosh.c b/contrib/arm-optimized-routines/math/aarch64/sve/acosh.c
new file mode 100644
index 000000000000..d54c21922e1b
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/acosh.c
@@ -0,0 +1,51 @@
+/*
+ * Double-precision SVE acosh(x) function.
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define WANT_SV_LOG1P_K0_SHORTCUT 1
+#include "sv_log1p_inline.h"
+
+#define One (0x3ff0000000000000)
+#define Thres (0x1ff0000000000000) /* asuint64 (0x1p511) - One.  */
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+  return sv_call_f64 (acosh, x, y, special);
+}
+
+/* SVE approximation for double-precision acosh, based on log1p.
+   The largest observed error is 3.19 ULP in the region where the
+   argument to log1p falls in the k=0 interval, i.e. x close to 1:
+   SV_NAME_D1 (acosh)(0x1.1e4388d4ca821p+0) got 0x1.ed23399f5137p-2
+					   want 0x1.ed23399f51373p-2.  */
+svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
+{
+  /* (ix - One) >= (BigBound - One).  */
+  svuint64_t ix = svreinterpret_u64 (x);
+  svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
+
+  svfloat64_t xm1 = svsub_x (pg, x, 1.0);
+  svfloat64_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0));
+  svfloat64_t y = svadd_x (pg, xm1, svsqrt_x (pg, u));
+
+  /* Fall back to scalar routine for special lanes.  */
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (x, sv_log1p_inline (y, pg), special);
+  return sv_log1p_inline (y, pg);
+}
+
+TEST_SIG (SV, D, 1, acosh, 1.0, 10.0)
+TEST_ULP (SV_NAME_D1 (acosh), 2.69)
+TEST_DISABLE_FENV (SV_NAME_D1 (acosh))
+TEST_INTERVAL (SV_NAME_D1 (acosh), 1, 0x1p511, 90000)
+TEST_INTERVAL (SV_NAME_D1 (acosh), 0x1p511, inf, 10000)
+TEST_INTERVAL (SV_NAME_D1 (acosh), 0, 1, 1000)
+TEST_INTERVAL (SV_NAME_D1 (acosh), -0, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/acoshf.c b/contrib/arm-optimized-routines/math/aarch64/sve/acoshf.c
new file mode 100644
index 000000000000..f48ef724e8eb
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/acoshf.c
@@ -0,0 +1,51 @@
+/*
+ * Single-precision SVE acosh(x) function.
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define One 0x3f800000
+#define Thres 0x20000000 /* asuint(0x1p64) - One.  */
+
+#include "sv_log1pf_inline.h"
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special)
+{
+  svfloat32_t x = svadd_x (svptrue_b32 (), xm1, 1.0f);
+  svfloat32_t y = sv_log1pf_inline (tmp, svptrue_b32 ());
+  return sv_call_f32 (acoshf, x, y, special);
+}
+
+/* Single-precision SVE acosh(x) routine. Implements the same algorithm as
+   vector acoshf and log1p.
+
+   Maximum error is 2.47 ULPs:
+   SV_NAME_F1 (acosh) (0x1.01ca76p+0) got 0x1.e435a6p-4
+				     want 0x1.e435a2p-4.  */
+svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
+{
+  svuint32_t ix = svreinterpret_u32 (x);
+  svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
+
+  svfloat32_t xm1 = svsub_x (pg, x, 1.0f);
+  svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f));
+  svfloat32_t tmp = svadd_x (pg, xm1, svsqrt_x (pg, u));
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (xm1, tmp, special);
+  return sv_log1pf_inline (tmp, pg);
+}
+
+TEST_SIG (SV, F, 1, acosh, 1.0, 10.0)
+TEST_ULP (SV_NAME_F1 (acosh), 1.97)
+TEST_DISABLE_FENV (SV_NAME_F1 (acosh))
+TEST_INTERVAL (SV_NAME_F1 (acosh), 0, 1, 500)
+TEST_INTERVAL (SV_NAME_F1 (acosh), 1, 0x1p64, 100000)
+TEST_INTERVAL (SV_NAME_F1 (acosh), 0x1p64, inf, 1000)
+TEST_INTERVAL (SV_NAME_F1 (acosh), -0, -inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_asin_3u.c b/contrib/arm-optimized-routines/math/aarch64/sve/asin.c
similarity index 80%
rename from contrib/arm-optimized-routines/pl/math/sv_asin_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/asin.c
index c3dd37b145ae..cac629afae15 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_asin_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/asin.c
@@ -1,84 +1,86 @@
 /*
  * Double-precision SVE asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64_t poly[12];
   float64_t pi_over_2f;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
   .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4,
 	    0x1.6db6db67f6d9fp-5, 0x1.f1c71fbd29fbbp-6,
 	    0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6,
 	    0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7,
 	    0x1.fd1151acb6bedp-8, 0x1.087182f799c1dp-6,
 	    -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, },
   .pi_over_2f = 0x1.921fb54442d18p+0,
 };
 
 #define P(i) sv_f64 (d->poly[i])
 
 /* Double-precision SVE implementation of vector asin(x).
 
    For |x| in [0, 0.5], use an order 11 polynomial P such that the final
    approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
 
    The largest observed error in this region is 0.52 ulps,
    _ZGVsMxv_asin(0x1.d95ae04998b6cp-2) got 0x1.ec13757305f27p-2
 				      want 0x1.ec13757305f26p-2.
 
    For |x| in [0.5, 1.0], use same approximation with a change of variable
 
      asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 2.69 ulps,
-   _ZGVsMxv_asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
-				      want 0x1.110d7e85fdd53p-1.  */
+   _ZGVsMxv_asin (0x1.044e8cefee301p-1) got 0x1.1111dd54ddf96p-1
+				       want 0x1.1111dd54ddf99p-1.  */
 svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
   svfloat64_t ax = svabs_x (pg, x);
   svbool_t a_ge_half = svacge (pg, x, 0.5);
 
   /* Evaluate polynomial Q(x) = y + y * z * P(z) with
      z = x ^ 2 and y = |x|            , if |x| < 0.5
      z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
   svfloat64_t z2 = svsel (a_ge_half, svmls_x (pg, sv_f64 (0.5), ax, 0.5),
 			  svmul_x (pg, x, x));
   svfloat64_t z = svsqrt_m (ax, a_ge_half, z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
   svfloat64_t z4 = svmul_x (pg, z2, z2);
   svfloat64_t z8 = svmul_x (pg, z4, z4);
   svfloat64_t z16 = svmul_x (pg, z8, z8);
   svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly);
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
 
   /* asin(|x|) = Q(|x|)         , for |x| < 0.5
 	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
   svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2f);
 
   /* Copy sign.  */
   return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
 }
 
-PL_SIG (SV, D, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_D1 (asin), 2.19)
-PL_TEST_INTERVAL (SV_NAME_D1 (asin), 0, 0.5, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (asin), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (asin), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (asin), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (SV_NAME_D1 (asin), -0, -inf, 20000)
+TEST_SIG (SV, D, 1, asin, -1.0, 1.0)
+TEST_ULP (SV_NAME_D1 (asin), 2.20)
+TEST_DISABLE_FENV (SV_NAME_D1 (asin))
+TEST_INTERVAL (SV_NAME_D1 (asin), 0, 0.5, 50000)
+TEST_INTERVAL (SV_NAME_D1 (asin), 0.5, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (asin), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (SV_NAME_D1 (asin), 0x1p11, inf, 20000)
+TEST_INTERVAL (SV_NAME_D1 (asin), -0, -inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_asinf_2u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/asinf.c
similarity index 81%
rename from contrib/arm-optimized-routines/pl/math/sv_asinf_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/asinf.c
index 8e9edc2439f5..fe94feba7a42 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_asinf_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/asinf.c
@@ -1,76 +1,78 @@
 /*
  * Single-precision SVE asin(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float32_t poly[5];
   float32_t pi_over_2f;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))  on
     [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 .  */
   .poly = { 0x1.55555ep-3, 0x1.33261ap-4, 0x1.70d7dcp-5, 0x1.b059dp-6,
 	    0x1.3af7d8p-5, },
   .pi_over_2f = 0x1.921fb6p+0f,
 };
 
 /* Single-precision SVE implementation of vector asin(x).
 
    For |x| in [0, 0.5], use order 4 polynomial P such that the final
    approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
 
     The largest observed error in this region is 0.83 ulps,
       _ZGVsMxv_asinf (0x1.ea00f4p-2) got 0x1.fef15ep-2
 				    want 0x1.fef15cp-2.
 
     For |x| in [0.5, 1.0], use same approximation with a change of variable
 
     asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 2.41 ulps,
      _ZGVsMxv_asinf (-0x1.00203ep-1) got -0x1.0c3a64p-1
 				    want -0x1.0c3a6p-1.  */
 svfloat32_t SV_NAME_F1 (asin) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), 0x80000000);
 
   svfloat32_t ax = svabs_x (pg, x);
   svbool_t a_ge_half = svacge (pg, x, 0.5);
 
   /* Evaluate polynomial Q(x) = y + y * z * P(z) with
    z = x ^ 2 and y = |x|            , if |x| < 0.5
    z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
   svfloat32_t z2 = svsel (a_ge_half, svmls_x (pg, sv_f32 (0.5), ax, 0.5),
 			  svmul_x (pg, x, x));
   svfloat32_t z = svsqrt_m (ax, a_ge_half, z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
   svfloat32_t p = sv_horner_4_f32_x (pg, z2, d->poly);
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
 
   /* asin(|x|) = Q(|x|)         , for |x| < 0.5
 		 = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
   svfloat32_t y = svmad_m (a_ge_half, p, sv_f32 (-2.0), d->pi_over_2f);
 
   /* Copy sign.  */
   return svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, asin, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_F1 (asin), 1.91)
-PL_TEST_INTERVAL (SV_NAME_F1 (asin), 0, 0.5, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (asin), 0.5, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (asin), 1.0, 0x1p11, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (asin), 0x1p11, inf, 20000)
-PL_TEST_INTERVAL (SV_NAME_F1 (asin), -0, -inf, 20000)
\ No newline at end of file
+TEST_SIG (SV, F, 1, asin, -1.0, 1.0)
+TEST_ULP (SV_NAME_F1 (asin), 1.91)
+TEST_DISABLE_FENV (SV_NAME_F1 (asin))
+TEST_INTERVAL (SV_NAME_F1 (asin), 0, 0.5, 50000)
+TEST_INTERVAL (SV_NAME_F1 (asin), 0.5, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_F1 (asin), 1.0, 0x1p11, 50000)
+TEST_INTERVAL (SV_NAME_F1 (asin), 0x1p11, inf, 20000)
+TEST_INTERVAL (SV_NAME_F1 (asin), -0, -inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/asinh.c b/contrib/arm-optimized-routines/math/aarch64/sve/asinh.c
new file mode 100644
index 000000000000..5574116de1e1
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/asinh.c
@@ -0,0 +1,197 @@
+/*
+ * Double-precision SVE asinh(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define SignMask (0x8000000000000000)
+#define One (0x3ff0000000000000)
+#define Thres (0x5fe0000000000000) /* asuint64 (0x1p511).  */
+#define IndexMask (((1 << V_LOG_TABLE_BITS) - 1) << 1)
+
+static const struct data
+{
+  double even_coeffs[9];
+  double ln2, p3, p1, p4, p0, p2, c1, c3, c5, c7, c9, c11, c13, c15, c17;
+  uint64_t off, mask;
+
+} data = {
+   /* Polynomial generated using Remez on [2^-26, 1].  */
+  .even_coeffs ={
+    -0x1.55555555554a7p-3,
+    -0x1.6db6db68332e6p-5,
+    -0x1.6e8b8b654a621p-6,
+    -0x1.c9871d10885afp-7,
+    -0x1.3ddca533e9f54p-7,
+    -0x1.b90c7099dd397p-8,
+    -0x1.d217026a669ecp-9,
+    -0x1.e0f37daef9127p-11,
+    -0x1.021a48685e287p-14, },
+
+  .c1 = 0x1.3333333326c7p-4,
+  .c3 = 0x1.f1c71b26fb40dp-6,
+  .c5 = 0x1.1c4daa9e67871p-6,
+  .c7 = 0x1.7a16e8d9d2ecfp-7,
+  .c9 = 0x1.0becef748dafcp-7,
+  .c11 = 0x1.541f2bb1ffe51p-8,
+  .c13 = 0x1.0b5c7977aaf7p-9,
+  .c15 = 0x1.388b5fe542a6p-12,
+  .c17 = 0x1.93d4ba83d34dap-18,
+
+  .ln2 = 0x1.62e42fefa39efp-1,
+  .p0 = -0x1.ffffffffffff7p-2,
+  .p1 = 0x1.55555555170d4p-2,
+  .p2 = -0x1.0000000399c27p-2,
+  .p3 = 0x1.999b2e90e94cap-3,
+  .p4 = -0x1.554e550bd501ep-3,
+  .off = 0x3fe6900900000000,
+  .mask = 0xfffULL << 52,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+  return sv_call_f64 (asinh, x, y, special);
+}
+
+static inline svfloat64_t
+__sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg)
+{
+  /* Double-precision SVE log, copied from SVE log implementation with some
+     cosmetic modification and special-cases removed. See that file for details
+     of the algorithm used.  */
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svuint64_t i_off = svsub_x (pg, ix, d->off);
+  svuint64_t i
+      = svand_x (pg, svlsr_x (pg, i_off, (51 - V_LOG_TABLE_BITS)), IndexMask);
+  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, i_off, d->mask));
+  svfloat64_t z = svreinterpret_f64 (iz);
+
+  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
+  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
+
+  svfloat64_t ln2_p3 = svld1rq (svptrue_b64 (), &d->ln2);
+  svfloat64_t p1_p4 = svld1rq (svptrue_b64 (), &d->p1);
+
+  svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), invc, z);
+  svfloat64_t kd
+      = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (i_off), 52));
+
+  svfloat64_t hi = svmla_lane (svadd_x (pg, logc, r), kd, ln2_p3, 0);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t y = svmla_lane (sv_f64 (d->p2), r, ln2_p3, 1);
+  svfloat64_t p = svmla_lane (sv_f64 (d->p0), r, p1_p4, 0);
+
+  y = svmla_lane (y, r2, p1_p4, 1);
+  y = svmla_x (pg, p, r2, y);
+  y = svmla_x (pg, hi, r2, y);
+  return y;
+}
+
+/* Double-precision implementation of SVE asinh(x).
+   asinh is very sensitive around 1, so it is impractical to devise a single
+   low-cost algorithm which is sufficiently accurate on a wide range of input.
+   Instead we use two different algorithms:
+   asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1)      if |x| >= 1
+	    = sign(x) * (|x| + |x|^3 * P(x^2))       otherwise
+   where log(x) is an optimized log approximation, and P(x) is a polynomial
+   shared with the scalar routine. The greatest observed error 2.51 ULP, in
+   |x| >= 1:
+   _ZGVsMxv_asinh(0x1.170469d024505p+0) got 0x1.e3181c43b0f36p-1
+				       want 0x1.e3181c43b0f39p-1.  */
+svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svuint64_t iax = svbic_x (pg, ix, SignMask);
+  svuint64_t sign = svand_x (pg, ix, SignMask);
+  svfloat64_t ax = svreinterpret_f64 (iax);
+  svbool_t ge1 = svcmpge (pg, iax, One);
+  svbool_t special = svcmpge (pg, iax, Thres);
+
+  /* Option 1: |x| >= 1.
+     Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)).  */
+  svfloat64_t option_1 = sv_f64 (0);
+  if (likely (svptest_any (pg, ge1)))
+    {
+      svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax);
+      option_1 = __sv_log_inline (
+	  svadd_x (pg, ax, svsqrt_x (pg, svadd_x (pg, x2, 1))), d, pg);
+    }
+
+  /* Option 2: |x| < 1.
+     Compute asinh(x) using a polynomial.
+     The largest observed error in this region is 1.51 ULPs:
+     _ZGVsMxv_asinh(0x1.fe12bf8c616a2p-1) got 0x1.c1e649ee2681bp-1
+					 want 0x1.c1e649ee2681dp-1.  */
+
+  svfloat64_t option_2 = sv_f64 (0);
+  if (likely (svptest_any (pg, svnot_z (pg, ge1))))
+    {
+      svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax);
+      svfloat64_t x4 = svmul_x (svptrue_b64 (), x2, x2);
+      /* Order-17 Pairwise Horner scheme.  */
+      svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
+      svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5);
+      svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9);
+      svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13);
+
+      svfloat64_t p01 = svmla_lane (sv_f64 (d->even_coeffs[0]), x2, c13, 0);
+      svfloat64_t p23 = svmla_lane (sv_f64 (d->even_coeffs[1]), x2, c13, 1);
+      svfloat64_t p45 = svmla_lane (sv_f64 (d->even_coeffs[2]), x2, c57, 0);
+      svfloat64_t p67 = svmla_lane (sv_f64 (d->even_coeffs[3]), x2, c57, 1);
+      svfloat64_t p89 = svmla_lane (sv_f64 (d->even_coeffs[4]), x2, c911, 0);
+      svfloat64_t p1011 = svmla_lane (sv_f64 (d->even_coeffs[5]), x2, c911, 1);
+      svfloat64_t p1213
+	  = svmla_lane (sv_f64 (d->even_coeffs[6]), x2, c1315, 0);
+      svfloat64_t p1415
+	  = svmla_lane (sv_f64 (d->even_coeffs[7]), x2, c1315, 1);
+      svfloat64_t p1617 = svmla_x (pg, sv_f64 (d->even_coeffs[8]), x2, d->c17);
+
+      svfloat64_t p = svmla_x (pg, p1415, x4, p1617);
+      p = svmla_x (pg, p1213, x4, p);
+      p = svmla_x (pg, p1011, x4, p);
+      p = svmla_x (pg, p89, x4, p);
+
+      p = svmla_x (pg, p67, x4, p);
+      p = svmla_x (pg, p45, x4, p);
+
+      p = svmla_x (pg, p23, x4, p);
+
+      p = svmla_x (pg, p01, x4, p);
+
+      option_2 = svmla_x (pg, ax, p, svmul_x (svptrue_b64 (), x2, ax));
+    }
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (
+	x,
+	svreinterpret_f64 (sveor_x (
+	    pg, svreinterpret_u64 (svsel (ge1, option_1, option_2)), sign)),
+	special);
+
+  /* Choose the right option for each lane.  */
+  svfloat64_t y = svsel (ge1, option_1, option_2);
+  return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
+}
+
+TEST_SIG (SV, D, 1, asinh, -10.0, 10.0)
+TEST_ULP (SV_NAME_D1 (asinh), 2.52)
+TEST_DISABLE_FENV (SV_NAME_D1 (asinh))
+TEST_SYM_INTERVAL (SV_NAME_D1 (asinh), 0, 0x1p-26, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (asinh), 0x1p-26, 1, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (asinh), 1, 0x1p511, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (asinh), 0x1p511, inf, 40000)
+/* Test vector asinh 3 times, with control lane < 1, > 1 and special.
+   Ensures the v_sel is choosing the right option in all cases.  */
+TEST_CONTROL_VALUE (SV_NAME_D1 (asinh), 0.5)
+TEST_CONTROL_VALUE (SV_NAME_D1 (asinh), 2)
+TEST_CONTROL_VALUE (SV_NAME_D1 (asinh), 0x1p600)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_asinhf_2u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/asinhf.c
similarity index 53%
rename from contrib/arm-optimized-routines/pl/math/sv_asinhf_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/asinhf.c
index 1f1f6e5c846f..32aedbfd3a6d 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_asinhf_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/asinhf.c
@@ -1,55 +1,57 @@
 /*
  * Single-precision SVE asinh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "include/mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #include "sv_log1pf_inline.h"
 
-#define BigBound (0x5f800000)  /* asuint(0x1p64).  */
+#define BigBound 0x5f800000 /* asuint(0x1p64).  */
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t iax, svuint32_t sign, svfloat32_t y, svbool_t special)
 {
+  svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign));
+  y = svreinterpret_f32 (
+      svorr_x (svptrue_b32 (), sign, svreinterpret_u32 (y)));
   return sv_call_f32 (asinhf, x, y, special);
 }
 
 /* Single-precision SVE asinh(x) routine. Implements the same algorithm as
    vector asinhf and log1p.
 
-   Maximum error is 2.48 ULPs:
-   SV_NAME_F1 (asinh) (0x1.008864p-3) got 0x1.ffbbbcp-4
-				     want 0x1.ffbbb8p-4.  */
+   Maximum error is 1.92 ULPs:
+   SV_NAME_F1 (asinh) (-0x1.0922ecp-1) got -0x1.fd0bccp-2
+				      want -0x1.fd0bc8p-2.  */
 svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
 {
   svfloat32_t ax = svabs_x (pg, x);
   svuint32_t iax = svreinterpret_u32 (ax);
   svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
   svbool_t special = svcmpge (pg, iax, BigBound);
 
   /* asinh(x) = log(x + sqrt(x * x + 1)).
      For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))).  */
   svfloat32_t ax2 = svmul_x (pg, ax, ax);
   svfloat32_t d = svadd_x (pg, svsqrt_x (pg, svadd_x (pg, ax2, 1.0f)), 1.0f);
   svfloat32_t y
       = sv_log1pf_inline (svadd_x (pg, ax, svdiv_x (pg, ax2, d)), pg);
 
   if (unlikely (svptest_any (pg, special)))
-    return special_case (
-	x, svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y))),
-	special);
+    return special_case (iax, sign, y, special);
   return svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y)));
 }
 
-PL_SIG (SV, F, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (asinh), 1.98)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0, 0x1p-12, 4000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0x1p-12, 1.0, 20000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 1.0, 0x1p64, 20000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0x1p64, inf, 4000)
+TEST_SIG (SV, F, 1, asinh, -10.0, 10.0)
+TEST_ULP (SV_NAME_F1 (asinh), 1.43)
+TEST_DISABLE_FENV (SV_NAME_F1 (asinh))
+TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0, 0x1p-12, 4000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0x1p-12, 1.0, 20000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 1.0, 0x1p64, 20000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0x1p64, inf, 4000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atan_2u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/atan.c
similarity index 86%
rename from contrib/arm-optimized-routines/pl/math/sv_atan_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/atan.c
index 7ab486a4c9d2..73fc29a94f23 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_atan_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/atan.c
@@ -1,87 +1,89 @@
 /*
  * Double-precision vector atan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f64.h"
 
 static const struct data
 {
   float64_t poly[20];
   float64_t pi_over_2;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-1022, 1.0].  */
   .poly = { -0x1.5555555555555p-2,  0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
             0x1.c71c71bc3951cp-4,   -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
             -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
             0x1.842dbe9b0d916p-5,   -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
             -0x1.00e6eece7de8p-5,   0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
             0x1.14e9dc19a4a4ep-7,  -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
             -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, },
   .pi_over_2 = 0x1.921fb54442d18p+0,
 };
 
 /* Useful constants.  */
 #define SignMask (0x8000000000000000)
 
 /* Fast implementation of SVE atan.
    Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
    z=1/x and shift = pi/2. Largest errors are close to 1. The maximum observed
    error is 2.27 ulps:
    _ZGVsMxv_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
 				       want 0x1.9225645bdd7c3p-1.  */
 svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* No need to trigger special case. Small cases, infs and nans
      are supported by our approximation technique.  */
   svuint64_t ix = svreinterpret_u64 (x);
   svuint64_t sign = svand_x (pg, ix, SignMask);
 
   /* Argument reduction:
      y := arctan(x) for x < 1
      y := pi/2 + arctan(-1/x) for x > 1
      Hence, use z=-1/a if x>=1, otherwise z=a.  */
   svbool_t red = svacgt (pg, x, 1.0);
   /* Avoid dependency in abs(x) in division (and comparison).  */
   svfloat64_t z = svsel (red, svdivr_x (pg, x, 1.0), x);
   /* Use absolute value only when needed (odd powers of z).  */
   svfloat64_t az = svabs_x (pg, z);
   az = svneg_m (az, red, az);
 
   /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
   svfloat64_t z2 = svmul_x (pg, z, z);
   svfloat64_t x2 = svmul_x (pg, z2, z2);
   svfloat64_t x4 = svmul_x (pg, x2, x2);
   svfloat64_t x8 = svmul_x (pg, x4, x4);
 
   svfloat64_t y
       = svmla_x (pg, sv_estrin_7_f64_x (pg, z2, x2, x4, d->poly),
 		 sv_estrin_11_f64_x (pg, z2, x2, x4, x8, d->poly + 8), x8);
 
   /* y = shift + z + z^3 * P(z^2).  */
   svfloat64_t z3 = svmul_x (pg, z2, az);
   y = svmla_x (pg, az, z3, y);
 
   /* Apply shift as indicated by `red` predicate.  */
   y = svadd_m (red, y, d->pi_over_2);
 
   /* y = atan(x) if x>0, -atan(-x) otherwise.  */
   y = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
 
   return y;
 }
 
-PL_SIG (SV, D, 1, atan, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_D1 (atan), 1.78)
-PL_TEST_INTERVAL (SV_NAME_D1 (atan), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (atan), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (atan), 100, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (atan), -0, -inf, 40000)
+TEST_SIG (SV, D, 1, atan, -3.1, 3.1)
+TEST_ULP (SV_NAME_D1 (atan), 1.78)
+TEST_DISABLE_FENV (SV_NAME_D1 (atan))
+TEST_INTERVAL (SV_NAME_D1 (atan), 0.0, 1.0, 40000)
+TEST_INTERVAL (SV_NAME_D1 (atan), 1.0, 100.0, 40000)
+TEST_INTERVAL (SV_NAME_D1 (atan), 100, inf, 40000)
+TEST_INTERVAL (SV_NAME_D1 (atan), -0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atan2_2u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/atan2.c
similarity index 74%
rename from contrib/arm-optimized-routines/pl/math/sv_atan2_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/atan2.c
index 00530a324a76..1e1d00678b1d 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_atan2_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/atan2.c
@@ -1,116 +1,118 @@
 /*
  * Double-precision vector atan2(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f64.h"
 
 static const struct data
 {
   float64_t poly[20];
   float64_t pi_over_2;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-1022, 1.0].  */
   .poly = { -0x1.5555555555555p-2,  0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
             0x1.c71c71bc3951cp-4,   -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
             -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
             0x1.842dbe9b0d916p-5,   -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
             -0x1.00e6eece7de8p-5,   0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
             0x1.14e9dc19a4a4ep-7,  -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
             -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, },
   .pi_over_2 = 0x1.921fb54442d18p+0,
 };
 
-/* Useful constants.  */
-#define SignMask sv_u64 (0x8000000000000000)
-
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
 static svfloat64_t NOINLINE
 special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
 	      const svbool_t cmp)
 {
   return sv_call2_f64 (atan2, y, x, ret, cmp);
 }
 
 /* Returns a predicate indicating true if the input is the bit representation
    of 0, infinity or nan.  */
 static inline svbool_t
 zeroinfnan (svuint64_t i, const svbool_t pg)
 {
   return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
 		  sv_u64 (2 * asuint64 (INFINITY) - 1));
 }
 
 /* Fast implementation of SVE atan2. Errors are greatest when y and
    x are reasonably close together. The greatest observed error is 2.28 ULP:
    _ZGVsMxvv_atan2 (-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
    got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1.  */
-svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
+svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x,
+				const svbool_t pg)
 {
   const struct data *data_ptr = ptr_barrier (&data);
 
   svuint64_t ix = svreinterpret_u64 (x);
   svuint64_t iy = svreinterpret_u64 (y);
 
   svbool_t cmp_x = zeroinfnan (ix, pg);
   svbool_t cmp_y = zeroinfnan (iy, pg);
   svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
 
-  svuint64_t sign_x = svand_x (pg, ix, SignMask);
-  svuint64_t sign_y = svand_x (pg, iy, SignMask);
-  svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
-
   svfloat64_t ax = svabs_x (pg, x);
   svfloat64_t ay = svabs_x (pg, y);
+  svuint64_t iax = svreinterpret_u64 (ax);
+  svuint64_t iay = svreinterpret_u64 (ay);
+
+  svuint64_t sign_x = sveor_x (pg, ix, iax);
+  svuint64_t sign_y = sveor_x (pg, iy, iay);
+  svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
 
-  svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
 
   /* Set up z for call to atan.  */
   svfloat64_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay);
   svfloat64_t d = svsel (pred_aygtax, ay, ax);
   svfloat64_t z = svdiv_x (pg, n, d);
 
   /* Work out the correct shift.  */
-  svfloat64_t shift = svsel (pred_xlt0, sv_f64 (-2.0), sv_f64 (0.0));
-  shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
+  svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1));
+  shift = svsel (pred_aygtax, sv_f64 (1.0), shift);
+  shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift)));
   shift = svmul_x (pg, shift, data_ptr->pi_over_2);
 
   /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
   svfloat64_t z2 = svmul_x (pg, z, z);
   svfloat64_t x2 = svmul_x (pg, z2, z2);
   svfloat64_t x4 = svmul_x (pg, x2, x2);
   svfloat64_t x8 = svmul_x (pg, x4, x4);
 
   svfloat64_t ret = svmla_x (
       pg, sv_estrin_7_f64_x (pg, z2, x2, x4, data_ptr->poly),
       sv_estrin_11_f64_x (pg, z2, x2, x4, x8, data_ptr->poly + 8), x8);
 
   /* y = shift + z + z^3 * P(z^2).  */
   svfloat64_t z3 = svmul_x (pg, z2, z);
   ret = svmla_x (pg, z, z3, ret);
 
   ret = svadd_m (pg, ret, shift);
 
   /* Account for the sign of x and y.  */
-  ret = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
-
   if (unlikely (svptest_any (pg, cmp_xy)))
-    return special_case (y, x, ret, cmp_xy);
-
-  return ret;
+    return special_case (
+	y, x,
+	svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)),
+	cmp_xy);
+  return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
 }
 
 /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (SV, D, 2, atan2)
-PL_TEST_ULP (SV_NAME_D2 (atan2), 1.78)
-PL_TEST_INTERVAL (SV_NAME_D2 (atan2), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D2 (atan2), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D2 (atan2), 100, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_D2 (atan2), -0, -inf, 40000)
+TEST_SIG (SV, D, 2, atan2)
+TEST_ULP (SV_NAME_D2 (atan2), 1.78)
+TEST_DISABLE_FENV (SV_NAME_D2 (atan2))
+TEST_INTERVAL (SV_NAME_D2 (atan2), 0.0, 1.0, 40000)
+TEST_INTERVAL (SV_NAME_D2 (atan2), 1.0, 100.0, 40000)
+TEST_INTERVAL (SV_NAME_D2 (atan2), 100, inf, 40000)
+TEST_INTERVAL (SV_NAME_D2 (atan2), -0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atan2f_3u.c b/contrib/arm-optimized-routines/math/aarch64/sve/atan2f.c
similarity index 68%
rename from contrib/arm-optimized-routines/pl/math/sv_atan2f_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/atan2f.c
index 9ff73ecb74ba..563b708cfcbb 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_atan2f_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/atan2f.c
@@ -1,108 +1,113 @@
 /*
  * Single-precision vector atan2f(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
 
 static const struct data
 {
   float32_t poly[8];
   float32_t pi_over_2;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-128, 1.0].  */
   .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
 	    -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f },
   .pi_over_2 = 0x1.921fb6p+0f,
 };
 
-#define SignMask sv_u32 (0x80000000)
-
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
-static inline svfloat32_t
+static svfloat32_t NOINLINE
 special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
 	      const svbool_t cmp)
 {
   return sv_call2_f32 (atan2f, y, x, ret, cmp);
 }
 
 /* Returns a predicate indicating true if the input is the bit representation
    of 0, infinity or nan.  */
 static inline svbool_t
 zeroinfnan (svuint32_t i, const svbool_t pg)
 {
   return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
 		  sv_u32 (2 * 0x7f800000lu - 1));
 }
 
 /* Fast implementation of SVE atan2f based on atan(x) ~ shift + z + z^3 *
    P(z^2) with reduction to [0,1] using z=1/x and shift = pi/2. Maximum
    observed error is 2.95 ULP:
    _ZGVsMxvv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
 						 want 0x1.967f00p-1.  */
-svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
+svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x,
+				const svbool_t pg)
 {
   const struct data *data_ptr = ptr_barrier (&data);
 
   svuint32_t ix = svreinterpret_u32 (x);
   svuint32_t iy = svreinterpret_u32 (y);
 
   svbool_t cmp_x = zeroinfnan (ix, pg);
   svbool_t cmp_y = zeroinfnan (iy, pg);
   svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
 
-  svuint32_t sign_x = svand_x (pg, ix, SignMask);
-  svuint32_t sign_y = svand_x (pg, iy, SignMask);
-  svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
-
   svfloat32_t ax = svabs_x (pg, x);
   svfloat32_t ay = svabs_x (pg, y);
+  svuint32_t iax = svreinterpret_u32 (ax);
+  svuint32_t iay = svreinterpret_u32 (ay);
+
+  svuint32_t sign_x = sveor_x (pg, ix, iax);
+  svuint32_t sign_y = sveor_x (pg, iy, iay);
+  svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
 
-  svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
 
   /* Set up z for call to atan.  */
   svfloat32_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay);
   svfloat32_t d = svsel (pred_aygtax, ay, ax);
   svfloat32_t z = svdiv_x (pg, n, d);
 
   /* Work out the correct shift.  */
-  svfloat32_t shift = svsel (pred_xlt0, sv_f32 (-2.0), sv_f32 (0.0));
-  shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
+  svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1));
+  shift = svsel (pred_aygtax, sv_f32 (1.0), shift);
+  shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift)));
   shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2));
 
-  /* Use split Estrin scheme for P(z^2) with deg(P)=7.  */
+  /* Use pure Estrin scheme for P(z^2) with deg(P)=7.  */
   svfloat32_t z2 = svmul_x (pg, z, z);
   svfloat32_t z4 = svmul_x (pg, z2, z2);
   svfloat32_t z8 = svmul_x (pg, z4, z4);
 
   svfloat32_t ret = sv_estrin_7_f32_x (pg, z2, z4, z8, data_ptr->poly);
 
   /* ret = shift + z + z^3 * P(z^2).  */
   svfloat32_t z3 = svmul_x (pg, z2, z);
   ret = svmla_x (pg, z, z3, ret);
 
   ret = svadd_m (pg, ret, shift);
 
   /* Account for the sign of x and y.  */
-  ret = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
 
   if (unlikely (svptest_any (pg, cmp_xy)))
-    return special_case (y, x, ret, cmp_xy);
+    return special_case (
+	y, x,
+	svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)),
+	cmp_xy);
 
-  return ret;
+  return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
 }
 
 /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (SV, F, 2, atan2)
-PL_TEST_ULP (SV_NAME_F2 (atan2), 2.45)
-PL_TEST_INTERVAL (SV_NAME_F2 (atan2), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_F2 (atan2), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_F2 (atan2), 100, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_F2 (atan2), -0, -inf, 40000)
+TEST_SIG (SV, F, 2, atan2)
+TEST_ULP (SV_NAME_F2 (atan2), 2.45)
+TEST_DISABLE_FENV (SV_NAME_F2 (atan2))
+TEST_INTERVAL (SV_NAME_F2 (atan2), 0.0, 1.0, 40000)
+TEST_INTERVAL (SV_NAME_F2 (atan2), 1.0, 100.0, 40000)
+TEST_INTERVAL (SV_NAME_F2 (atan2), 100, inf, 40000)
+TEST_INTERVAL (SV_NAME_F2 (atan2), -0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atanf_2u9.c b/contrib/arm-optimized-routines/math/aarch64/sve/atanf.c
similarity index 83%
rename from contrib/arm-optimized-routines/pl/math/sv_atanf_2u9.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/atanf.c
index 4defb356e7f9..a2cd37b12744 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_atanf_2u9.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/atanf.c
@@ -1,76 +1,78 @@
 /*
  * Single-precision vector atan(x) function.
  *
- * Copyright (c) 2021-2023, Arm Limited.
+ * Copyright (c) 2021-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
 
 static const struct data
 {
   float32_t poly[8];
   float32_t pi_over_2;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
     [2**-128, 1.0].  */
   .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
 	    -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f },
   .pi_over_2 = 0x1.921fb6p+0f,
 };
 
 #define SignMask (0x80000000)
 
 /* Fast implementation of SVE atanf based on
    atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
    z=-1/x and shift = pi/2.
    Largest observed error is 2.9 ULP, close to +/-1.0:
    _ZGVsMxv_atanf (0x1.0468f6p+0) got -0x1.967f06p-1
 				 want -0x1.967fp-1.  */
 svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* No need to trigger special case. Small cases, infs and nans
      are supported by our approximation technique.  */
   svuint32_t ix = svreinterpret_u32 (x);
   svuint32_t sign = svand_x (pg, ix, SignMask);
 
   /* Argument reduction:
      y := arctan(x) for x < 1
      y := pi/2 + arctan(-1/x) for x > 1
      Hence, use z=-1/a if x>=1, otherwise z=a.  */
   svbool_t red = svacgt (pg, x, 1.0f);
   /* Avoid dependency in abs(x) in division (and comparison).  */
   svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (1.0f), x), x);
   /* Use absolute value only when needed (odd powers of z).  */
   svfloat32_t az = svabs_x (pg, z);
   az = svneg_m (az, red, az);
 
   /* Use split Estrin scheme for P(z^2) with deg(P)=7.  */
   svfloat32_t z2 = svmul_x (pg, z, z);
   svfloat32_t z4 = svmul_x (pg, z2, z2);
   svfloat32_t z8 = svmul_x (pg, z4, z4);
 
   svfloat32_t y = sv_estrin_7_f32_x (pg, z2, z4, z8, d->poly);
 
   /* y = shift + z + z^3 * P(z^2).  */
   svfloat32_t z3 = svmul_x (pg, z2, az);
   y = svmla_x (pg, az, z3, y);
 
   /* Apply shift as indicated by 'red' predicate.  */
   y = svadd_m (red, y, sv_f32 (d->pi_over_2));
 
   /* y = atan(x) if x>0, -atan(-x) otherwise.  */
   return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, atan, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_F1 (atan), 2.9)
-PL_TEST_INTERVAL (SV_NAME_F1 (atan), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (atan), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (atan), 100, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (atan), -0, -inf, 40000)
+TEST_SIG (SV, F, 1, atan, -3.1, 3.1)
+TEST_ULP (SV_NAME_F1 (atan), 2.9)
+TEST_DISABLE_FENV (SV_NAME_F1 (atan))
+TEST_INTERVAL (SV_NAME_F1 (atan), 0.0, 1.0, 40000)
+TEST_INTERVAL (SV_NAME_F1 (atan), 1.0, 100.0, 40000)
+TEST_INTERVAL (SV_NAME_F1 (atan), 100, inf, 40000)
+TEST_INTERVAL (SV_NAME_F1 (atan), -0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atanh_3u3.c b/contrib/arm-optimized-routines/math/aarch64/sve/atanh.c
similarity index 72%
rename from contrib/arm-optimized-routines/pl/math/sv_atanh_3u3.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/atanh.c
index dcc9350b4962..b404df56fd75 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_atanh_3u3.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/atanh.c
@@ -1,60 +1,62 @@
 /*
  * Double-precision SVE atanh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define WANT_SV_LOG1P_K0_SHORTCUT 0
 #include "sv_log1p_inline.h"
 
 #define One (0x3ff0000000000000)
 #define Half (0x3fe0000000000000)
 
 static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
 {
   return sv_call_f64 (atanh, x, y, special);
 }
 
 /* SVE approximation for double-precision atanh, based on log1p.
    The greatest observed error is 2.81 ULP:
    _ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
 				      want 0x1.ffd8ff31b501cp-6.  */
 svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
 {
 
   svfloat64_t ax = svabs_x (pg, x);
   svuint64_t iax = svreinterpret_u64 (ax);
   svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), iax);
   svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half));
 
   /* It is special if iax >= 1.  */
-//   svbool_t special = svcmpge (pg, iax, One);
   svbool_t special = svacge (pg, x, 1.0);
 
   /* Computation is performed based on the following sequence of equality:
 	(1+x)/(1-x) = 1 + 2x/(1-x).  */
   svfloat64_t y;
   y = svadd_x (pg, ax, ax);
   y = svdiv_x (pg, y, svsub_x (pg, sv_f64 (1), ax));
   /* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y).  */
   y = sv_log1p_inline (y, pg);
 
   if (unlikely (svptest_any (pg, special)))
     return special_case (x, svmul_x (pg, halfsign, y), special);
   return svmul_x (pg, halfsign, y);
 }
 
-PL_SIG (SV, D, 1, atanh, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_D1 (atanh), 3.32)
+TEST_SIG (SV, D, 1, atanh, -1.0, 1.0)
+TEST_ULP (SV_NAME_D1 (atanh), 3.32)
+TEST_DISABLE_FENV (SV_NAME_D1 (atanh))
+TEST_SYM_INTERVAL (SV_NAME_D1 (atanh), 0, 0x1p-23, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (atanh), 0x1p-23, 1, 90000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (atanh), 1, inf, 100)
 /* atanh is asymptotic at 1, which is the default control value - have to set
- -c 0 specially to ensure fp exceptions are triggered correctly (choice of
- control lane is irrelevant if fp exceptions are disabled).  */
-PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (atanh), 0, 0x1p-23, 10000, 0)
-PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (atanh), 0x1p-23, 1, 90000, 0)
-PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (atanh), 1, inf, 100, 0)
+   -c 0 specially to ensure fp exceptions are triggered correctly (choice of
+   control lane is irrelevant if fp exceptions are disabled).  */
+TEST_CONTROL_VALUE (SV_NAME_D1 (atanh), 0)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atanhf_2u8.c b/contrib/arm-optimized-routines/math/aarch64/sve/atanhf.c
similarity index 61%
rename from contrib/arm-optimized-routines/pl/math/sv_atanhf_2u8.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/atanhf.c
index 413c60ce05da..2e10a8cd22f7 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_atanhf_2u8.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/atanhf.c
@@ -1,56 +1,61 @@
 /*
  * Single-precision vector atanh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #include "sv_log1pf_inline.h"
 
 #define One (0x3f800000)
 #define Half (0x3f000000)
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t iax, svuint32_t sign, svfloat32_t halfsign,
+	      svfloat32_t y, svbool_t special)
 {
+  svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign));
+  y = svmul_x (svptrue_b32 (), halfsign, y);
   return sv_call_f32 (atanhf, x, y, special);
 }
 
 /* Approximation for vector single-precision atanh(x) using modified log1p.
-   The maximum error is 2.28 ULP:
-   _ZGVsMxv_atanhf(0x1.ff1194p-5) got 0x1.ffbbbcp-5
-				 want 0x1.ffbbb6p-5.  */
+   The maximum error is 1.99 ULP:
+   _ZGVsMxv_atanhf(0x1.f1583p-5) got 0x1.f1f4fap-5
+				want 0x1.f1f4f6p-5.  */
 svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
 {
   svfloat32_t ax = svabs_x (pg, x);
   svuint32_t iax = svreinterpret_u32 (ax);
   svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
   svfloat32_t halfsign = svreinterpret_f32 (svorr_x (pg, sign, Half));
   svbool_t special = svcmpge (pg, iax, One);
 
   /* Computation is performed based on the following sequence of equality:
    * (1+x)/(1-x) = 1 + 2x/(1-x).  */
   svfloat32_t y = svadd_x (pg, ax, ax);
   y = svdiv_x (pg, y, svsub_x (pg, sv_f32 (1), ax));
   /* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y).  */
   y = sv_log1pf_inline (y, pg);
 
   if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmul_x (pg, halfsign, y), special);
+    return special_case (iax, sign, halfsign, y, special);
 
   return svmul_x (pg, halfsign, y);
 }
 
-PL_SIG (SV, F, 1, atanh, -1.0, 1.0)
-PL_TEST_ULP (SV_NAME_F1 (atanh), 2.59)
+TEST_SIG (SV, F, 1, atanh, -1.0, 1.0)
+TEST_ULP (SV_NAME_F1 (atanh), 1.50)
+TEST_DISABLE_FENV (SV_NAME_F1 (atanh))
+TEST_SYM_INTERVAL (SV_NAME_F1 (atanh), 0, 0x1p-12, 1000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (atanh), 0x1p-12, 1, 20000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (atanh), 1, inf, 1000)
 /* atanh is asymptotic at 1, which is the default control value - have to set
  -c 0 specially to ensure fp exceptions are triggered correctly (choice of
  control lane is irrelevant if fp exceptions are disabled).  */
-PL_TEST_SYM_INTERVAL_C (SV_NAME_F1 (atanh), 0, 0x1p-12, 1000, 0)
-PL_TEST_SYM_INTERVAL_C (SV_NAME_F1 (atanh), 0x1p-12, 1, 20000, 0)
-PL_TEST_SYM_INTERVAL_C (SV_NAME_F1 (atanh), 1, inf, 1000, 0)
+TEST_CONTROL_VALUE (SV_NAME_F1 (atanh), 0)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cbrt_2u.c b/contrib/arm-optimized-routines/math/aarch64/sve/cbrt.c
similarity index 77%
rename from contrib/arm-optimized-routines/pl/math/sv_cbrt_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/cbrt.c
index 192f1cd80d59..3e6a972463f0 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_cbrt_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/cbrt.c
@@ -1,122 +1,135 @@
 /*
  * Double-precision SVE cbrt(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f64.h"
 
 const static struct data
 {
   float64_t poly[4];
   float64_t table[5];
   float64_t one_third, two_thirds, shift;
   int64_t exp_bias;
   uint64_t tiny_bound, thresh;
 } data = {
   /* Generated with FPMinimax in [0.5, 1].  */
   .poly = { 0x1.c14e8ee44767p-2, 0x1.dd2d3f99e4c0ep-1, -0x1.08e83026b7e74p-1,
 	    0x1.2c74eaa3ba428p-3, },
   /* table[i] = 2^((i - 2) / 3).  */
   .table = { 0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0,
 	     0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0, },
   .one_third = 0x1.5555555555555p-2,
   .two_thirds = 0x1.5555555555555p-1,
   .shift = 0x1.8p52,
   .exp_bias = 1022,
   .tiny_bound = 0x0010000000000000, /* Smallest normal.  */
   .thresh = 0x7fe0000000000000, /* asuint64 (infinity) - tiny_bound.  */
 };
 
 #define MantissaMask 0x000fffffffffffff
 #define HalfExp 0x3fe0000000000000
 
 static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
 {
   return sv_call_f64 (cbrt, x, y, special);
 }
 
 static inline svfloat64_t
 shifted_lookup (const svbool_t pg, const float64_t *table, svint64_t i)
 {
   return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
 }
 
 /* Approximation for double-precision vector cbrt(x), using low-order
-   polynomial and two Newton iterations. Greatest observed error is 1.79 ULP.
-   Errors repeat according to the exponent, for instance an error observed for
-   double value m * 2^e will be observed for any input m * 2^(e + 3*i), where i
-   is an integer.
+   polynomial and two Newton iterations.
+
+   The vector version of frexp does not handle subnormals
+   correctly. As a result these need to be handled by the scalar
+   fallback, where accuracy may be worse than that of the vector code
+   path.
+
+   Greatest observed error in the normal range is 1.79 ULP. Errors repeat
+   according to the exponent, for instance an error observed for double value m
+   * 2^e will be observed for any input m * 2^(e + 3*i), where i is an integer.
    _ZGVsMxv_cbrt (0x0.3fffb8d4413f3p-1022) got 0x1.965f53b0e5d97p-342
 					  want 0x1.965f53b0e5d95p-342.  */
 svfloat64_t SV_NAME_D1 (cbrt) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svfloat64_t ax = svabs_x (pg, x);
   svuint64_t iax = svreinterpret_u64 (ax);
   svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), iax);
 
   /* Subnormal, +/-0 and special values.  */
   svbool_t special = svcmpge (pg, svsub_x (pg, iax, d->tiny_bound), d->thresh);
 
   /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
      version of frexp, which gets subnormal values wrong - these have to be
      special-cased as a result.  */
   svfloat64_t m = svreinterpret_f64 (svorr_x (
       pg, svand_x (pg, svreinterpret_u64 (x), MantissaMask), HalfExp));
   svint64_t e
       = svsub_x (pg, svreinterpret_s64 (svlsr_x (pg, iax, 52)), d->exp_bias);
 
   /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point
      for Newton iterations.  */
   svfloat64_t p
       = sv_pairwise_poly_3_f64_x (pg, m, svmul_x (pg, m, m), d->poly);
 
   /* Two iterations of Newton's method for iteratively approximating cbrt.  */
   svfloat64_t m_by_3 = svmul_x (pg, m, d->one_third);
   svfloat64_t a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, p, p)), p,
 			   d->two_thirds);
   a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, a, a)), a, d->two_thirds);
 
   /* Assemble the result by the following:
 
      cbrt(x) = cbrt(m) * 2 ^ (e / 3).
 
      We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
      not necessarily a multiple of 3 we lose some information.
 
      Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
 
      Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
      is an integer in [-2, 2], and can be looked up in the table T. Hence the
      result is assembled as:
 
      cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.  */
   svfloat64_t eb3f = svmul_x (pg, svcvt_f64_x (pg, e), d->one_third);
   svint64_t ey = svcvt_s64_x (pg, eb3f);
   svint64_t em3 = svmls_x (pg, e, ey, 3);
 
   svfloat64_t my = shifted_lookup (pg, d->table, em3);
   my = svmul_x (pg, my, a);
 
   /* Vector version of ldexp.  */
   svfloat64_t y = svscale_x (pg, my, ey);
 
   if (unlikely (svptest_any (pg, special)))
     return special_case (
 	x, svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign)),
 	special);
 
   /* Copy sign.  */
   return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
 }
 
-PL_SIG (SV, D, 1, cbrt, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (cbrt), 1.30)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cbrt), 0, inf, 1000000)
+/* Worse-case ULP error assumes that scalar fallback is GLIBC 2.40 cbrt, which
+   has ULP error of 3.67 at 0x1.7a337e1ba1ec2p-257 [1]. Largest observed error
+   in the vector path is 1.79 ULP.
+   [1] Innocente, V., & Zimmermann, P. (2024). Accuracy of Mathematical
+   Functions in Single, Double, Double Extended, and Quadruple Precision.  */
+TEST_SIG (SV, D, 1, cbrt, -10.0, 10.0)
+TEST_ULP (SV_NAME_D1 (cbrt), 3.17)
+TEST_DISABLE_FENV (SV_NAME_D1 (cbrt))
+TEST_SYM_INTERVAL (SV_NAME_D1 (cbrt), 0, inf, 1000000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cbrtf_1u7.c b/contrib/arm-optimized-routines/math/aarch64/sve/cbrtf.c
similarity index 92%
rename from contrib/arm-optimized-routines/pl/math/sv_cbrtf_1u7.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/cbrtf.c
index 5b625f308827..afdace7865f1 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_cbrtf_1u7.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/cbrtf.c
@@ -1,116 +1,118 @@
 /*
  * Single-precision SVE cbrt(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
 
 const static struct data
 {
   float32_t poly[4];
   float32_t table[5];
   float32_t one_third, two_thirds;
 } data = {
   /* Very rough approximation of cbrt(x) in [0.5, 1], generated with FPMinimax.
    */
   .poly = { 0x1.c14e96p-2, 0x1.dd2d3p-1, -0x1.08e81ap-1,
 	    0x1.2c74c2p-3, },
   /* table[i] = 2^((i - 2) / 3).  */
   .table = { 0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0 },
   .one_third = 0x1.555556p-2f,
   .two_thirds = 0x1.555556p-1f,
 };
 
 #define SmallestNormal 0x00800000
 #define Thresh 0x7f000000 /* asuint(INFINITY) - SmallestNormal.  */
 #define MantissaMask 0x007fffff
 #define HalfExp 0x3f000000
 
 static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
 {
   return sv_call_f32 (cbrtf, x, y, special);
 }
 
 static inline svfloat32_t
 shifted_lookup (const svbool_t pg, const float32_t *table, svint32_t i)
 {
   return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
 }
 
 /* Approximation for vector single-precision cbrt(x) using Newton iteration
    with initial guess obtained by a low-order polynomial. Greatest error
    is 1.64 ULP. This is observed for every value where the mantissa is
    0x1.85a2aa and the exponent is a multiple of 3, for example:
    _ZGVsMxv_cbrtf (0x1.85a2aap+3) got 0x1.267936p+1
 				 want 0x1.267932p+1.  */
 svfloat32_t SV_NAME_F1 (cbrt) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svfloat32_t ax = svabs_x (pg, x);
   svuint32_t iax = svreinterpret_u32 (ax);
   svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
 
   /* Subnormal, +/-0 and special values.  */
   svbool_t special = svcmpge (pg, svsub_x (pg, iax, SmallestNormal), Thresh);
 
   /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
      version of frexpf, which gets subnormal values wrong - these have to be
      special-cased as a result.  */
   svfloat32_t m = svreinterpret_f32 (svorr_x (
       pg, svand_x (pg, svreinterpret_u32 (x), MantissaMask), HalfExp));
   svint32_t e = svsub_x (pg, svreinterpret_s32 (svlsr_x (pg, iax, 23)), 126);
 
   /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
      the less accurate the next stage of the algorithm needs to be. An order-4
      polynomial is enough for one Newton iteration.  */
   svfloat32_t p
       = sv_pairwise_poly_3_f32_x (pg, m, svmul_x (pg, m, m), d->poly);
 
   /* One iteration of Newton's method for iteratively approximating cbrt.  */
   svfloat32_t m_by_3 = svmul_x (pg, m, d->one_third);
   svfloat32_t a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, p, p)), p,
 			   d->two_thirds);
 
   /* Assemble the result by the following:
 
      cbrt(x) = cbrt(m) * 2 ^ (e / 3).
 
      We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
      not necessarily a multiple of 3 we lose some information.
 
      Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
 
      Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
      is an integer in [-2, 2], and can be looked up in the table T. Hence the
      result is assembled as:
 
      cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.  */
   svfloat32_t ef = svmul_x (pg, svcvt_f32_x (pg, e), d->one_third);
   svint32_t ey = svcvt_s32_x (pg, ef);
   svint32_t em3 = svmls_x (pg, e, ey, 3);
 
   svfloat32_t my = shifted_lookup (pg, d->table, em3);
   my = svmul_x (pg, my, a);
 
   /* Vector version of ldexpf.  */
   svfloat32_t y = svscale_x (pg, my, ey);
 
   if (unlikely (svptest_any (pg, special)))
     return special_case (
 	x, svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign)),
 	special);
 
   /* Copy sign.  */
   return svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, cbrt, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (cbrt), 1.15)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cbrt), 0, inf, 1000000)
+TEST_SIG (SV, F, 1, cbrt, -10.0, 10.0)
+TEST_ULP (SV_NAME_F1 (cbrt), 1.15)
+TEST_DISABLE_FENV (SV_NAME_F1 (cbrt))
+TEST_SYM_INTERVAL (SV_NAME_F1 (cbrt), 0, inf, 1000000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cexpi_3u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/cexpi.c
similarity index 79%
rename from contrib/arm-optimized-routines/pl/math/sv_cexpi_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/cexpi.c
index 920acfea5da0..0ccd110484c8 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_cexpi_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/cexpi.c
@@ -1,45 +1,48 @@
 /*
  * Double-precision vector cexpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "sv_sincos_common.h"
 #include "sv_math.h"
-#include "pl_test.h"
+#include "sv_sincos_common.h"
+#include "test_defs.h"
 
 static svfloat64x2_t NOINLINE
 special_case (svfloat64_t x, svbool_t special, svfloat64x2_t y)
 {
   return svcreate2 (sv_call_f64 (sin, x, svget2 (y, 0), special),
 		    sv_call_f64 (cos, x, svget2 (y, 1), special));
 }
 
 /* Double-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate polynomials.
    Largest observed error is for sin, 3.22 ULP:
    sv_cexpi_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
 				       want -0x1.ffe9537d5dbb4p-3.  */
 svfloat64x2_t
 _ZGVsMxv_cexpi (svfloat64_t x, svbool_t pg)
 {
   const struct sv_sincos_data *d = ptr_barrier (&sv_sincos_data);
   svbool_t special = check_ge_rangeval (pg, x, d);
 
   svfloat64x2_t sc = sv_sincos_inline (pg, x, d);
 
   if (unlikely (svptest_any (pg, special)))
     return special_case (x, special, sc);
   return sc;
 }
 
-PL_TEST_ULP (_ZGVsMxv_cexpi_sin, 2.73)
-PL_TEST_ULP (_ZGVsMxv_cexpi_cos, 2.73)
+TEST_DISABLE_FENV (_ZGVsMxv_cexpi_sin)
+TEST_DISABLE_FENV (_ZGVsMxv_cexpi_cos)
+TEST_ULP (_ZGVsMxv_cexpi_sin, 2.73)
+TEST_ULP (_ZGVsMxv_cexpi_cos, 2.73)
 #define SV_CEXPI_INTERVAL(lo, hi, n)                                          \
-  PL_TEST_INTERVAL (_ZGVsMxv_cexpi_sin, lo, hi, n)                            \
-  PL_TEST_INTERVAL (_ZGVsMxv_cexpi_cos, lo, hi, n)
+  TEST_INTERVAL (_ZGVsMxv_cexpi_sin, lo, hi, n)                               \
+  TEST_INTERVAL (_ZGVsMxv_cexpi_cos, lo, hi, n)
 SV_CEXPI_INTERVAL (0, 0x1p23, 500000)
 SV_CEXPI_INTERVAL (-0, -0x1p23, 500000)
 SV_CEXPI_INTERVAL (0x1p23, inf, 10000)
 SV_CEXPI_INTERVAL (-0x1p23, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cexpif_1u8.c b/contrib/arm-optimized-routines/math/aarch64/sve/cexpif.c
similarity index 80%
rename from contrib/arm-optimized-routines/pl/math/sv_cexpif_1u8.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/cexpif.c
index 93f2f998cb38..fd07ce553cd8 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_cexpif_1u8.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/cexpif.c
@@ -1,47 +1,50 @@
 /*
  * Single-precision vector cexpi function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "sv_sincosf_common.h"
 #include "sv_math.h"
-#include "pl_test.h"
+#include "sv_sincosf_common.h"
+#include "test_defs.h"
 
 static svfloat32x2_t NOINLINE
 special_case (svfloat32_t x, svbool_t special, svfloat32x2_t y)
 {
   return svcreate2 (sv_call_f32 (sinf, x, svget2 (y, 0), special),
 		    sv_call_f32 (cosf, x, svget2 (y, 1), special));
 }
 
 /* Single-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate low-order
    polynomials.
    Worst-case error for sin is 1.67 ULP:
    v_cexpif_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
    Worst-case error for cos is 1.81 ULP:
    v_cexpif_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6.  */
 svfloat32x2_t
 _ZGVsMxv_cexpif (svfloat32_t x, svbool_t pg)
 {
   const struct sv_sincosf_data *d = ptr_barrier (&sv_sincosf_data);
   svbool_t special = check_ge_rangeval (pg, x, d);
 
   svfloat32x2_t sc = sv_sincosf_inline (pg, x, d);
 
   if (unlikely (svptest_any (pg, special)))
     return special_case (x, special, sc);
   return sc;
 }
 
-PL_TEST_ULP (_ZGVsMxv_cexpif_sin, 1.17)
-PL_TEST_ULP (_ZGVsMxv_cexpif_cos, 1.31)
+TEST_DISABLE_FENV (_ZGVsMxv_cexpif_sin)
+TEST_DISABLE_FENV (_ZGVsMxv_cexpif_cos)
+TEST_ULP (_ZGVsMxv_cexpif_sin, 1.17)
+TEST_ULP (_ZGVsMxv_cexpif_cos, 1.31)
 #define SV_CEXPIF_INTERVAL(lo, hi, n)                                         \
-  PL_TEST_INTERVAL (_ZGVsMxv_cexpif_sin, lo, hi, n)                           \
-  PL_TEST_INTERVAL (_ZGVsMxv_cexpif_cos, lo, hi, n)
+  TEST_INTERVAL (_ZGVsMxv_cexpif_sin, lo, hi, n)                              \
+  TEST_INTERVAL (_ZGVsMxv_cexpif_cos, lo, hi, n)
 SV_CEXPIF_INTERVAL (0, 0x1p20, 500000)
 SV_CEXPIF_INTERVAL (-0, -0x1p20, 500000)
 SV_CEXPIF_INTERVAL (0x1p20, inf, 10000)
 SV_CEXPIF_INTERVAL (-0x1p20, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cos_2u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/cos.c
similarity index 88%
rename from contrib/arm-optimized-routines/pl/math/sv_cos_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/cos.c
index 76af3459b3f2..93e93674a98a 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_cos_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/cos.c
@@ -1,86 +1,88 @@
 /*
  * Double-precision SVE cos(x) function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   double inv_pio2, pio2_1, pio2_2, pio2_3, shift;
 } data = {
   /* Polynomial coefficients are hardwired in FTMAD instructions.  */
   .inv_pio2 = 0x1.45f306dc9c882p-1,
   .pio2_1 = 0x1.921fb50000000p+0,
   .pio2_2 = 0x1.110b460000000p-26,
   .pio2_3 = 0x1.1a62633145c07p-54,
   /* Original shift used in AdvSIMD cos,
      plus a contribution to set the bit #0 of q
      as expected by trigonometric instructions.  */
   .shift = 0x1.8000000000001p52
 };
 
 #define RangeVal 0x4160000000000000 /* asuint64 (0x1p23).  */
 
 static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t oob)
 {
   return sv_call_f64 (cos, x, y, oob);
 }
 
 /* A fast SVE implementation of cos based on trigonometric
    instructions (FTMAD, FTSSEL, FTSMUL).
    Maximum measured error: 2.108 ULPs.
    SV_NAME_D1 (cos)(0x1.9b0ba158c98f3p+7) got -0x1.fddd4c65c7f07p-3
 					 want -0x1.fddd4c65c7f05p-3.  */
 svfloat64_t SV_NAME_D1 (cos) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svfloat64_t r = svabs_x (pg, x);
   svbool_t oob = svcmpge (pg, svreinterpret_u64 (r), RangeVal);
 
   /* Load some constants in quad-word chunks to minimise memory access.  */
   svbool_t ptrue = svptrue_b64 ();
   svfloat64_t invpio2_and_pio2_1 = svld1rq (ptrue, &d->inv_pio2);
   svfloat64_t pio2_23 = svld1rq (ptrue, &d->pio2_2);
 
   /* n = rint(|x|/(pi/2)).  */
   svfloat64_t q = svmla_lane (sv_f64 (d->shift), r, invpio2_and_pio2_1, 0);
   svfloat64_t n = svsub_x (pg, q, d->shift);
 
   /* r = |x| - n*(pi/2)  (range reduction into -pi/4 .. pi/4).  */
   r = svmls_lane (r, n, invpio2_and_pio2_1, 1);
   r = svmls_lane (r, n, pio2_23, 0);
   r = svmls_lane (r, n, pio2_23, 1);
 
   /* cos(r) poly approx.  */
   svfloat64_t r2 = svtsmul (r, svreinterpret_u64 (q));
   svfloat64_t y = sv_f64 (0.0);
   y = svtmad (y, r2, 7);
   y = svtmad (y, r2, 6);
   y = svtmad (y, r2, 5);
   y = svtmad (y, r2, 4);
   y = svtmad (y, r2, 3);
   y = svtmad (y, r2, 2);
   y = svtmad (y, r2, 1);
   y = svtmad (y, r2, 0);
 
   /* Final multiplicative factor: 1.0 or x depending on bit #0 of q.  */
   svfloat64_t f = svtssel (r, svreinterpret_u64 (q));
 
   if (unlikely (svptest_any (pg, oob)))
     return special_case (x, svmul_x (svnot_z (pg, oob), y, f), oob);
 
   /* Apply factor.  */
   return svmul_x (pg, f, y);
 }
 
-PL_SIG (SV, D, 1, cos, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_D1 (cos), 1.61)
-PL_TEST_INTERVAL (SV_NAME_D1 (cos), 0, 0xffff0000, 10000)
-PL_TEST_INTERVAL (SV_NAME_D1 (cos), 0x1p-4, 0x1p4, 500000)
+TEST_SIG (SV, D, 1, cos, -3.1, 3.1)
+TEST_ULP (SV_NAME_D1 (cos), 1.61)
+TEST_DISABLE_FENV (SV_NAME_D1 (cos))
+TEST_INTERVAL (SV_NAME_D1 (cos), 0, 0xffff0000, 10000)
+TEST_INTERVAL (SV_NAME_D1 (cos), 0x1p-4, 0x1p4, 500000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cosf_2u1.c b/contrib/arm-optimized-routines/math/aarch64/sve/cosf.c
similarity index 87%
rename from contrib/arm-optimized-routines/pl/math/sv_cosf_2u1.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/cosf.c
index 4bdb0dd146bb..7d18f8c2ad21 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_cosf_2u1.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/cosf.c
@@ -1,80 +1,82 @@
 /*
  * Single-precision SVE cos(x) function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float neg_pio2_1, neg_pio2_2, neg_pio2_3, inv_pio2, shift;
 } data = {
   /* Polynomial coefficients are hard-wired in FTMAD instructions.  */
   .neg_pio2_1 = -0x1.921fb6p+0f,
   .neg_pio2_2 = 0x1.777a5cp-25f,
   .neg_pio2_3 = 0x1.ee59dap-50f,
   .inv_pio2 = 0x1.45f306p-1f,
   /* Original shift used in AdvSIMD cosf,
      plus a contribution to set the bit #0 of q
      as expected by trigonometric instructions.  */
   .shift = 0x1.800002p+23f
 };
 
 #define RangeVal 0x49800000 /* asuint32(0x1p20f).  */
 
 static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svfloat32_t y, svbool_t oob)
 {
   return sv_call_f32 (cosf, x, y, oob);
 }
 
 /* A fast SVE implementation of cosf based on trigonometric
    instructions (FTMAD, FTSSEL, FTSMUL).
    Maximum measured error: 2.06 ULPs.
    SV_NAME_F1 (cos)(0x1.dea2f2p+19) got 0x1.fffe7ap-6
 				   want 0x1.fffe76p-6.  */
 svfloat32_t SV_NAME_F1 (cos) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svfloat32_t r = svabs_x (pg, x);
   svbool_t oob = svcmpge (pg, svreinterpret_u32 (r), RangeVal);
 
   /* Load some constants in quad-word chunks to minimise memory access.  */
   svfloat32_t negpio2_and_invpio2 = svld1rq (svptrue_b32 (), &d->neg_pio2_1);
 
   /* n = rint(|x|/(pi/2)).  */
   svfloat32_t q = svmla_lane (sv_f32 (d->shift), r, negpio2_and_invpio2, 3);
   svfloat32_t n = svsub_x (pg, q, d->shift);
 
   /* r = |x| - n*(pi/2)  (range reduction into -pi/4 .. pi/4).  */
   r = svmla_lane (r, n, negpio2_and_invpio2, 0);
   r = svmla_lane (r, n, negpio2_and_invpio2, 1);
   r = svmla_lane (r, n, negpio2_and_invpio2, 2);
 
   /* Final multiplicative factor: 1.0 or x depending on bit #0 of q.  */
   svfloat32_t f = svtssel (r, svreinterpret_u32 (q));
 
   /* cos(r) poly approx.  */
   svfloat32_t r2 = svtsmul (r, svreinterpret_u32 (q));
   svfloat32_t y = sv_f32 (0.0f);
   y = svtmad (y, r2, 4);
   y = svtmad (y, r2, 3);
   y = svtmad (y, r2, 2);
   y = svtmad (y, r2, 1);
   y = svtmad (y, r2, 0);
 
   if (unlikely (svptest_any (pg, oob)))
     return special_case (x, svmul_x (svnot_z (pg, oob), f, y), oob);
   /* Apply factor.  */
   return svmul_x (pg, f, y);
 }
 
-PL_SIG (SV, F, 1, cos, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_F1 (cos), 1.57)
-PL_TEST_INTERVAL (SV_NAME_F1 (cos), 0, 0xffff0000, 10000)
-PL_TEST_INTERVAL (SV_NAME_F1 (cos), 0x1p-4, 0x1p4, 500000)
+TEST_SIG (SV, F, 1, cos, -3.1, 3.1)
+TEST_ULP (SV_NAME_F1 (cos), 1.57)
+TEST_DISABLE_FENV (SV_NAME_F1 (cos))
+TEST_INTERVAL (SV_NAME_F1 (cos), 0, 0xffff0000, 10000)
+TEST_INTERVAL (SV_NAME_F1 (cos), 0x1p-4, 0x1p4, 500000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cosh_2u.c b/contrib/arm-optimized-routines/math/aarch64/sve/cosh.c
similarity index 77%
rename from contrib/arm-optimized-routines/pl/math/sv_cosh_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/cosh.c
index a6d743fb9b96..775854cfbe5a 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_cosh_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/cosh.c
@@ -1,100 +1,104 @@
 /*
  * Double-precision SVE cosh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64_t poly[3];
   float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres;
-  uint64_t index_mask, special_bound;
+  uint64_t special_bound;
 } data = {
   .poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3,
 	    0x1.5555576a59599p-5, },
 
   .inv_ln2 = 0x1.71547652b82fep8, /* N/ln2.  */
   /* -ln2/N.  */
   .ln2_hi = -0x1.62e42fefa39efp-9,
   .ln2_lo = -0x1.abc9e3b39803f3p-64,
   .shift = 0x1.8p+52,
   .thres = 704.0,
 
-  .index_mask = 0xff,
   /* 0x1.6p9, above which exp overflows.  */
   .special_bound = 0x4086000000000000,
 };
 
 static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+special_case (svfloat64_t x, svbool_t pg, svfloat64_t t, svbool_t special)
 {
+  svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
+  svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
+  svfloat64_t y = svadd_x (pg, half_t, half_over_t);
   return sv_call_f64 (cosh, x, y, special);
 }
 
 /* Helper for approximating exp(x). Copied from sv_exp_tail, with no
    special-case handling or tail.  */
 static inline svfloat64_t
 exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
 {
   /* Calculate exp(x).  */
   svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
   svfloat64_t n = svsub_x (pg, z, d->shift);
 
   svfloat64_t r = svmla_x (pg, x, n, d->ln2_hi);
   r = svmla_x (pg, r, n, d->ln2_lo);
 
   svuint64_t u = svreinterpret_u64 (z);
   svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS);
-  svuint64_t i = svand_x (pg, u, d->index_mask);
+  svuint64_t i = svand_x (svptrue_b64 (), u, 0xff);
 
   svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]);
   y = svmla_x (pg, sv_f64 (d->poly[0]), r, y);
   y = svmla_x (pg, sv_f64 (1.0), r, y);
-  y = svmul_x (pg, r, y);
+  y = svmul_x (svptrue_b64 (), r, y);
 
   /* s = 2^(n/N).  */
   u = svld1_gather_index (pg, __v_exp_tail_data, i);
   svfloat64_t s = svreinterpret_f64 (svadd_x (pg, u, e));
 
   return svmla_x (pg, s, s, y);
 }
 
 /* Approximation for SVE double-precision cosh(x) using exp_inline.
    cosh(x) = (exp(x) + exp(-x)) / 2.
    The greatest observed error is in the scalar fall-back region, so is the
    same as the scalar routine, 1.93 ULP:
    _ZGVsMxv_cosh (0x1.628ad45039d2fp+9) got 0x1.fd774e958236dp+1021
 				       want 0x1.fd774e958236fp+1021.
 
    The greatest observed error in the non-special region is 1.54 ULP:
    _ZGVsMxv_cosh (0x1.ba5651dd4486bp+2) got 0x1.f5e2bb8d5c98fp+8
 				       want 0x1.f5e2bb8d5c991p+8.  */
 svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svfloat64_t ax = svabs_x (pg, x);
   svbool_t special = svcmpgt (pg, svreinterpret_u64 (ax), d->special_bound);
 
   /* Up to the point that exp overflows, we can use it to calculate cosh by
      exp(|x|) / 2 + 1 / (2 * exp(|x|)).  */
   svfloat64_t t = exp_inline (ax, pg, d);
-  svfloat64_t half_t = svmul_x (pg, t, 0.5);
-  svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
 
   /* Fall back to scalar for any special cases.  */
   if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svadd_x (pg, half_t, half_over_t), special);
+    return special_case (x, pg, t, special);
 
+  svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
+  svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
   return svadd_x (pg, half_t, half_over_t);
 }
 
-PL_SIG (SV, D, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (cosh), 1.43)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cosh), 0, 0x1.6p9, 100000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cosh), 0x1.6p9, inf, 1000)
+TEST_SIG (SV, D, 1, cosh, -10.0, 10.0)
+TEST_ULP (SV_NAME_D1 (cosh), 1.43)
+TEST_DISABLE_FENV (SV_NAME_D1 (cosh))
+TEST_SYM_INTERVAL (SV_NAME_D1 (cosh), 0, 0x1.6p9, 100000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (cosh), 0x1.6p9, inf, 1000)
+CLOSE_SVE_ATTR
\ No newline at end of file
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/coshf.c b/contrib/arm-optimized-routines/math/aarch64/sve/coshf.c
new file mode 100644
index 000000000000..b79fed2374b5
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/coshf.c
@@ -0,0 +1,62 @@
+/*
+ * Single-precision SVE cosh(x) function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_expf_inline.h"
+
+static const struct data
+{
+  struct sv_expf_data expf_consts;
+  float special_bound;
+} data = {
+  .expf_consts = SV_EXPF_DATA,
+  /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case.  */
+  .special_bound = 0x1.5a92d8p+6,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t half_e, svfloat32_t half_over_e,
+	      svbool_t pg)
+{
+  return sv_call_f32 (coshf, x, svadd_x (svptrue_b32 (), half_e, half_over_e),
+		      pg);
+}
+
+/* Single-precision vector cosh, using vector expf.
+   Maximum error is 2.77 ULP:
+   _ZGVsMxv_coshf(-0x1.5b38f4p+1) got 0x1.e45946p+2
+				 want 0x1.e4594cp+2.  */
+svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svbool_t special = svacge (pg, x, d->special_bound);
+
+  /* Calculate cosh by exp(x) / 2 + exp(-x) / 2.
+     Note that x is passed to exp here, rather than |x|. This is to avoid using
+     destructive unary ABS for better register usage. However it means the
+     routine is not exactly symmetrical, as the exp helper is slightly less
+     accurate in the negative range.  */
+  svfloat32_t e = expf_inline (x, pg, &d->expf_consts);
+  svfloat32_t half_e = svmul_x (svptrue_b32 (), e, 0.5);
+  svfloat32_t half_over_e = svdivr_x (pg, e, 0.5);
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (x, half_e, half_over_e, special);
+
+  return svadd_x (svptrue_b32 (), half_e, half_over_e);
+}
+
+TEST_SIG (SV, F, 1, cosh, -10.0, 10.0)
+TEST_ULP (SV_NAME_F1 (cosh), 2.28)
+TEST_DISABLE_FENV (SV_NAME_F1 (cosh))
+TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0, 0x1p-63, 100)
+TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0, 0x1.5a92d8p+6, 80000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0x1.5a92d8p+6, inf, 2000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cospi_3u2.c b/contrib/arm-optimized-routines/math/aarch64/sve/cospi.c
similarity index 78%
rename from contrib/arm-optimized-routines/pl/math/sv_cospi_3u2.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/cospi.c
index d80f899c41e4..9859dbe7a44c 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_cospi_3u2.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/cospi.c
@@ -1,63 +1,66 @@
 /*
  * Double-precision SVE cospi(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "mathlib.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f64.h"
 
 static const struct data
 {
   double poly[10];
   double range_val;
 } data = {
   /* Polynomial coefficients generated using Remez algorithm,
      see sinpi.sollya for details.  */
   .poly = { 0x1.921fb54442d184p1, -0x1.4abbce625be53p2, 0x1.466bc6775ab16p1,
 	    -0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8,
 	    0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16,
 	    0x1.af86ae521260bp-21, -0x1.012a9870eeb7dp-25 },
   .range_val = 0x1p53,
 };
 
 /* A fast SVE implementation of cospi.
    Maximum error 3.20 ULP:
    _ZGVsMxv_cospi(0x1.f18ba32c63159p-6) got 0x1.fdabf595f9763p-1
 				       want 0x1.fdabf595f9766p-1.  */
 svfloat64_t SV_NAME_D1 (cospi) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* Using cospi(x) = sinpi(0.5 - x)
      range reduction and offset into sinpi range -1/2 .. 1/2
      r = 0.5 - |x - rint(x)|.  */
   svfloat64_t n = svrinta_x (pg, x);
   svfloat64_t r = svsub_x (pg, x, n);
   r = svsub_x (pg, sv_f64 (0.5), svabs_x (pg, r));
 
   /* Result should be negated based on if n is odd or not.
      If ax >= 2^53, the result will always be positive.  */
   svbool_t cmp = svaclt (pg, x, d->range_val);
   svuint64_t intn = svreinterpret_u64 (svcvt_s64_z (pg, n));
   svuint64_t sign = svlsl_z (cmp, intn, 63);
 
   /* y = sin(r).  */
   svfloat64_t r2 = svmul_x (pg, r, r);
   svfloat64_t r4 = svmul_x (pg, r2, r2);
   svfloat64_t y = sv_pw_horner_9_f64_x (pg, r2, r4, d->poly);
   y = svmul_x (pg, y, r);
 
   return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
 }
 
-PL_SIG (SV, D, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (SV_NAME_D1 (cospi), 2.71)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0.5, 0x1p51, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0x1p51, inf, 100000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (SV_NAME_D1 (cospi), 2.71)
+TEST_DISABLE_FENV (SV_NAME_D1 (cospi))
+TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0.5, 0x1p51, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0x1p51, inf, 100000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cospif_2u6.c b/contrib/arm-optimized-routines/math/aarch64/sve/cospif.c
similarity index 75%
rename from contrib/arm-optimized-routines/pl/math/sv_cospif_2u6.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/cospif.c
index fb2922d0533a..d65a2b619023 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_cospif_2u6.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/cospif.c
@@ -1,59 +1,62 @@
 /*
  * Single-precision SVE cospi(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
+#include "mathlib.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
 
 static const struct data
 {
   float poly[6];
   float range_val;
 } data = {
   /* Taylor series coefficents for sin(pi * x).  */
   .poly = { 0x1.921fb6p1f, -0x1.4abbcep2f, 0x1.466bc6p1f, -0x1.32d2ccp-1f,
 	    0x1.50783p-4f, -0x1.e30750p-8f },
   .range_val = 0x1p31f,
 };
 
 /* A fast SVE implementation of cospif.
    Maximum error: 2.60 ULP:
    _ZGVsMxv_cospif(+/-0x1.cae664p-4) got 0x1.e09c9ep-1
 				    want 0x1.e09c98p-1.  */
 svfloat32_t SV_NAME_F1 (cospi) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* Using cospi(x) = sinpi(0.5 - x)
      range reduction and offset into sinpi range -1/2 .. 1/2
      r = 0.5 - |x - rint(x)|.  */
   svfloat32_t n = svrinta_x (pg, x);
   svfloat32_t r = svsub_x (pg, x, n);
   r = svsub_x (pg, sv_f32 (0.5f), svabs_x (pg, r));
 
   /* Result should be negated based on if n is odd or not.
      If ax >= 2^31, the result will always be positive.  */
   svbool_t cmp = svaclt (pg, x, d->range_val);
   svuint32_t intn = svreinterpret_u32 (svcvt_s32_x (pg, n));
   svuint32_t sign = svlsl_z (cmp, intn, 31);
 
   /* y = sin(r).  */
   svfloat32_t r2 = svmul_x (pg, r, r);
   svfloat32_t y = sv_horner_5_f32_x (pg, r2, d->poly);
   y = svmul_x (pg, y, r);
 
   return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, cospi, -0.9, 0.9)
-PL_TEST_ULP (SV_NAME_F1 (cospi), 2.08)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0.5, 0x1p31f, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0x1p31f, inf, 10000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (SV_NAME_F1 (cospi), 2.08)
+TEST_DISABLE_FENV (SV_NAME_F1 (cospi))
+TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0.5, 0x1p31f, 10000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0x1p31f, inf, 10000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erf_2u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/erf.c
similarity index 83%
rename from contrib/arm-optimized-routines/pl/math/sv_erf_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/erf.c
index cbf9718e5bb0..ccade93e1033 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_erf_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/erf.c
@@ -1,111 +1,115 @@
 /*
  * Double-precision vector erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   double third;
   double tenth, two_over_five, two_over_fifteen;
   double two_over_nine, two_over_fortyfive;
   double max, shift;
 } data = {
   .third = 0x1.5555555555556p-2, /* used to compute 2/3 and 1/6 too.  */
   .two_over_fifteen = 0x1.1111111111111p-3,
   .tenth = -0x1.999999999999ap-4,
   .two_over_five = -0x1.999999999999ap-2,
   .two_over_nine = -0x1.c71c71c71c71cp-3,
   .two_over_fortyfive = 0x1.6c16c16c16c17p-5,
   .max = 5.9921875, /* 6 - 1/128.  */
   .shift = 0x1p45,
 };
 
 #define SignMask (0x8000000000000000)
 
 /* Double-precision implementation of vector erf(x).
    Approximation based on series expansion near x rounded to
    nearest multiple of 1/128.
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
    erf(x) ~ erf(r) + scale * d * [
        + 1
        - r d
        + 1/3 (2 r^2 - 1) d^2
        - 1/6 (r (2 r^2 - 3)) d^3
        + 1/30 (4 r^4 - 12 r^2 + 3) d^4
        - 1/90 (4 r^4 - 20 r^2 + 15) d^5
      ]
 
    Maximum measure error: 2.29 ULP
    _ZGVsMxv_erf(-0x1.00003c924e5d1p-8) got -0x1.20dd59132ebadp-8
 				      want -0x1.20dd59132ebafp-8.  */
 svfloat64_t SV_NAME_D1 (erf) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *dat = ptr_barrier (&data);
 
   /* |x| >= 6.0 - 1/128. Opposite conditions except none of them catch NaNs so
      they can be used in lookup and BSLs to yield the expected results.  */
   svbool_t a_ge_max = svacge (pg, x, dat->max);
   svbool_t a_lt_max = svaclt (pg, x, dat->max);
 
   /* Set r to multiple of 1/128 nearest to |x|.  */
   svfloat64_t a = svabs_x (pg, x);
   svfloat64_t shift = sv_f64 (dat->shift);
   svfloat64_t z = svadd_x (pg, a, shift);
-  svuint64_t i
-      = svsub_x (pg, svreinterpret_u64 (z), svreinterpret_u64 (shift));
+  svuint64_t i = svand_x (pg, svreinterpret_u64 (z), 0xfff);
+  i = svadd_x (pg, i, i);
 
   /* Lookup without shortcut for small values but with predicate to avoid
      segfault for large values and NaNs.  */
   svfloat64_t r = svsub_x (pg, z, shift);
-  svfloat64_t erfr = svld1_gather_index (a_lt_max, __sv_erf_data.erf, i);
-  svfloat64_t scale = svld1_gather_index (a_lt_max, __sv_erf_data.scale, i);
+  svfloat64_t erfr
+      = svld1_gather_index (a_lt_max, &__v_erf_data.tab[0].erf, i);
+  svfloat64_t scale
+      = svld1_gather_index (a_lt_max, &__v_erf_data.tab[0].scale, i);
 
   /* erf(x) ~ erf(r) + scale * d * poly (r, d).  */
   svfloat64_t d = svsub_x (pg, a, r);
   svfloat64_t d2 = svmul_x (pg, d, d);
   svfloat64_t r2 = svmul_x (pg, r, r);
 
   /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p5(r) * d^5.  */
   svfloat64_t p1 = r;
   svfloat64_t third = sv_f64 (dat->third);
   svfloat64_t twothird = svmul_x (pg, third, 2.0);
   svfloat64_t sixth = svmul_x (pg, third, 0.5);
   svfloat64_t p2 = svmls_x (pg, third, r2, twothird);
   svfloat64_t p3 = svmad_x (pg, r2, third, -0.5);
   p3 = svmul_x (pg, r, p3);
   svfloat64_t p4
       = svmla_x (pg, sv_f64 (dat->two_over_five), r2, dat->two_over_fifteen);
   p4 = svmls_x (pg, sv_f64 (dat->tenth), r2, p4);
   svfloat64_t p5
       = svmla_x (pg, sv_f64 (dat->two_over_nine), r2, dat->two_over_fortyfive);
   p5 = svmla_x (pg, sixth, r2, p5);
   p5 = svmul_x (pg, r, p5);
 
   svfloat64_t p34 = svmla_x (pg, p3, d, p4);
   svfloat64_t p12 = svmla_x (pg, p1, d, p2);
   svfloat64_t y = svmla_x (pg, p34, d2, p5);
   y = svmla_x (pg, p12, d2, y);
 
   y = svmla_x (pg, erfr, scale, svmls_x (pg, d, d2, y));
 
   /* Solves the |x| = inf and NaN cases.  */
   y = svsel (a_ge_max, sv_f64 (1.0), y);
 
   /* Copy sign.  */
   svuint64_t ix = svreinterpret_u64 (x);
   svuint64_t iy = svreinterpret_u64 (y);
   svuint64_t sign = svand_x (pg, ix, SignMask);
   return svreinterpret_f64 (svorr_x (pg, sign, iy));
 }
 
-PL_SIG (SV, D, 1, erf, -6.0, 6.0)
-PL_TEST_ULP (SV_NAME_D1 (erf), 1.79)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 0, 5.9921875, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 5.9921875, inf, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 0, inf, 4000)
+TEST_SIG (SV, D, 1, erf, -6.0, 6.0)
+TEST_ULP (SV_NAME_D1 (erf), 1.79)
+TEST_DISABLE_FENV (SV_NAME_D1 (erf))
+TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 0, 5.9921875, 40000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 5.9921875, inf, 40000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 0, inf, 4000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erfc_1u8.c b/contrib/arm-optimized-routines/math/aarch64/sve/erfc.c
similarity index 91%
rename from contrib/arm-optimized-routines/pl/math/sv_erfc_1u8.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/erfc.c
index a91bef96f2e7..a85cacb1ae62 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_erfc_1u8.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/erfc.c
@@ -1,164 +1,166 @@
 /*
  * Double-precision vector erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   uint64_t off_idx, off_arr;
   double max, shift;
   double p20, p40, p41, p42;
   double p51, p52;
   double q5, r5;
   double q6, r6;
   double q7, r7;
   double q8, r8;
   double q9, r9;
   uint64_t table_scale;
 } data = {
   /* Set an offset so the range of the index used for lookup is 3487, and it
      can be clamped using a saturated add on an offset index.
      Index offset is 0xffffffffffffffff - asuint64(shift) - 3487.  */
   .off_idx = 0xbd3ffffffffff260,
   .off_arr = 0xfffffffffffff260, /* 0xffffffffffffffff - 3487.  */
   .max = 0x1.b3ep+4,		 /* 3487/128.  */
   .shift = 0x1p45,
   .table_scale = 0x37f0000000000000, /* asuint64(0x1p-128).  */
   .p20 = 0x1.5555555555555p-2,	     /* 1/3, used to compute 2/3 and 1/6.  */
   .p40 = -0x1.999999999999ap-4,	     /* 1/10.  */
   .p41 = -0x1.999999999999ap-2,	     /* 2/5.  */
   .p42 = 0x1.1111111111111p-3,	     /* 2/15.  */
   .p51 = -0x1.c71c71c71c71cp-3,	     /* 2/9.  */
   .p52 = 0x1.6c16c16c16c17p-5,	     /* 2/45.  */
   /* Qi = (i+1) / i, for i = 5, ..., 9.  */
   .q5 = 0x1.3333333333333p0,
   .q6 = 0x1.2aaaaaaaaaaabp0,
   .q7 = 0x1.2492492492492p0,
   .q8 = 0x1.2p0,
   .q9 = 0x1.1c71c71c71c72p0,
   /* Ri = -2 * i / ((i+1)*(i+2)), for i = 5, ..., 9.  */
   .r5 = -0x1.e79e79e79e79ep-3,
   .r6 = -0x1.b6db6db6db6dbp-3,
   .r7 = -0x1.8e38e38e38e39p-3,
   .r8 = -0x1.6c16c16c16c17p-3,
   .r9 = -0x1.4f2094f2094f2p-3,
 };
 
 /* Optimized double-precision vector erfc(x).
    Approximation based on series expansion near x rounded to
    nearest multiple of 1/128.
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
 
    erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
 
    poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
 		+ (2/15 r^4 - 2/5 r^2 + 1/10) d^4
 		- r * (2/45 r^4 - 2/9 r^2 + 1/6) d^5
 		+ p6(r) d^6 + ... + p10(r) d^10
 
    Polynomials p6(r) to p10(r) are computed using recurrence relation
 
    2(i+1)p_i + 2r(i+2)p_{i+1} + (i+2)(i+3)p_{i+2} = 0,
    with p0 = 1, and p1(r) = -r.
 
    Values of erfc(r) and scale are read from lookup tables. Stored values
    are scaled to avoid hitting the subnormal range.
 
    Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
 
    Maximum measured error: 1.71 ULP
    _ZGVsMxv_erfc(0x1.46cfe976733p+4) got 0x1.e15fcbea3e7afp-608
 				    want 0x1.e15fcbea3e7adp-608.  */
 svfloat64_t SV_NAME_D1 (erfc) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *dat = ptr_barrier (&data);
 
   svfloat64_t a = svabs_x (pg, x);
 
   /* Clamp input at |x| <= 3487/128.  */
   a = svmin_x (pg, a, dat->max);
 
   /* Reduce x to the nearest multiple of 1/128.  */
   svfloat64_t shift = sv_f64 (dat->shift);
   svfloat64_t z = svadd_x (pg, a, shift);
 
   /* Saturate index for the NaN case.  */
   svuint64_t i = svqadd (svreinterpret_u64 (z), dat->off_idx);
 
   /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables.  */
   i = svadd_x (pg, i, i);
-  const float64_t *p = &__erfc_data.tab[0].erfc - 2 * dat->off_arr;
+  const float64_t *p = &__v_erfc_data.tab[0].erfc - 2 * dat->off_arr;
   svfloat64_t erfcr = svld1_gather_index (pg, p, i);
   svfloat64_t scale = svld1_gather_index (pg, p + 1, i);
 
   /* erfc(x) ~ erfc(r) - scale * d * poly(r, d).  */
   svfloat64_t r = svsub_x (pg, z, shift);
   svfloat64_t d = svsub_x (pg, a, r);
   svfloat64_t d2 = svmul_x (pg, d, d);
   svfloat64_t r2 = svmul_x (pg, r, r);
 
   /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p9(r) * d^9.  */
   svfloat64_t p1 = r;
   svfloat64_t third = sv_f64 (dat->p20);
   svfloat64_t twothird = svmul_x (pg, third, 2.0);
   svfloat64_t sixth = svmul_x (pg, third, 0.5);
   svfloat64_t p2 = svmls_x (pg, third, r2, twothird);
   svfloat64_t p3 = svmad_x (pg, r2, third, -0.5);
   p3 = svmul_x (pg, r, p3);
   svfloat64_t p4 = svmla_x (pg, sv_f64 (dat->p41), r2, dat->p42);
   p4 = svmls_x (pg, sv_f64 (dat->p40), r2, p4);
   svfloat64_t p5 = svmla_x (pg, sv_f64 (dat->p51), r2, dat->p52);
   p5 = svmla_x (pg, sixth, r2, p5);
   p5 = svmul_x (pg, r, p5);
   /* Compute p_i using recurrence relation:
      p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}.  */
   svfloat64_t qr5 = svld1rq (svptrue_b64 (), &dat->q5);
   svfloat64_t qr6 = svld1rq (svptrue_b64 (), &dat->q6);
   svfloat64_t qr7 = svld1rq (svptrue_b64 (), &dat->q7);
   svfloat64_t qr8 = svld1rq (svptrue_b64 (), &dat->q8);
   svfloat64_t qr9 = svld1rq (svptrue_b64 (), &dat->q9);
   svfloat64_t p6 = svmla_x (pg, p4, p5, svmul_lane (r, qr5, 0));
   p6 = svmul_lane (p6, qr5, 1);
   svfloat64_t p7 = svmla_x (pg, p5, p6, svmul_lane (r, qr6, 0));
   p7 = svmul_lane (p7, qr6, 1);
   svfloat64_t p8 = svmla_x (pg, p6, p7, svmul_lane (r, qr7, 0));
   p8 = svmul_lane (p8, qr7, 1);
   svfloat64_t p9 = svmla_x (pg, p7, p8, svmul_lane (r, qr8, 0));
   p9 = svmul_lane (p9, qr8, 1);
   svfloat64_t p10 = svmla_x (pg, p8, p9, svmul_lane (r, qr9, 0));
   p10 = svmul_lane (p10, qr9, 1);
   /* Compute polynomial in d using pairwise Horner scheme.  */
   svfloat64_t p90 = svmla_x (pg, p9, d, p10);
   svfloat64_t p78 = svmla_x (pg, p7, d, p8);
   svfloat64_t p56 = svmla_x (pg, p5, d, p6);
   svfloat64_t p34 = svmla_x (pg, p3, d, p4);
   svfloat64_t p12 = svmla_x (pg, p1, d, p2);
   svfloat64_t y = svmla_x (pg, p78, d2, p90);
   y = svmla_x (pg, p56, d2, y);
   y = svmla_x (pg, p34, d2, y);
   y = svmla_x (pg, p12, d2, y);
 
   y = svmls_x (pg, erfcr, scale, svmls_x (pg, d, d2, y));
 
   /* Offset equals 2.0 if sign, else 0.0.  */
   svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
   svfloat64_t off = svreinterpret_f64 (svlsr_x (pg, sign, 1));
   /* Handle sign and scale back in a single fma.  */
   svfloat64_t fac = svreinterpret_f64 (svorr_x (pg, sign, dat->table_scale));
 
   return svmla_x (pg, off, fac, y);
 }
 
-PL_SIG (SV, D, 1, erfc, -6.0, 28.0)
-PL_TEST_ULP (SV_NAME_D1 (erfc), 1.21)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erfc), 0.0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (erfc), 0x1p-26, 28.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (erfc), -0x1p-26, -6.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (erfc), 28.0, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_D1 (erfc), 6.0, -inf, 40000)
+TEST_SIG (SV, D, 1, erfc, -6.0, 28.0)
+TEST_ULP (SV_NAME_D1 (erfc), 1.21)
+TEST_DISABLE_FENV (SV_NAME_D1 (erfc))
+TEST_SYM_INTERVAL (SV_NAME_D1 (erfc), 0.0, 0x1p-26, 40000)
+TEST_INTERVAL (SV_NAME_D1 (erfc), 0x1p-26, 28.0, 40000)
+TEST_INTERVAL (SV_NAME_D1 (erfc), -0x1p-26, -6.0, 40000)
+TEST_INTERVAL (SV_NAME_D1 (erfc), 28.0, inf, 40000)
+TEST_INTERVAL (SV_NAME_D1 (erfc), 6.0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erfcf_1u7.c b/contrib/arm-optimized-routines/math/aarch64/sve/erfcf.c
similarity index 77%
rename from contrib/arm-optimized-routines/pl/math/sv_erfcf_1u7.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/erfcf.c
index cda8f0b3752e..936881332291 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_erfcf_1u7.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/erfcf.c
@@ -1,111 +1,113 @@
 /*
  * Single-precision vector erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   uint32_t off_idx, off_arr;
   float max, shift;
   float third, two_thirds, two_over_fifteen, two_over_five, tenth;
 } data = {
   /* Set an offset so the range of the index used for lookup is 644, and it can
      be clamped using a saturated add.  */
   .off_idx = 0xb7fffd7b, /* 0xffffffff - asuint(shift) - 644.  */
   .off_arr = 0xfffffd7b, /* 0xffffffff - 644.  */
   .max = 10.0625f,	 /* 644/64.  */
   .shift = 0x1p17f,
   .third = 0x1.555556p-2f,
   .two_thirds = 0x1.555556p-1f,
   .two_over_fifteen = 0x1.111112p-3f,
   .two_over_five = -0x1.99999ap-2f,
   .tenth = -0x1.99999ap-4f,
 };
 
 #define SignMask 0x80000000
 #define TableScale 0x28000000 /* 0x1p-47.  */
 
 /* Optimized single-precision vector erfcf(x).
    Approximation based on series expansion near x rounded to
    nearest multiple of 1/64.
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
 
    erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
 
    poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
 		+ (2/15 r^4 - 2/5 r^2 + 1/10) d^4
 
    Values of erfc(r) and scale are read from lookup tables. Stored values
    are scaled to avoid hitting the subnormal range.
 
    Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
 
    Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0).
    _ZGVsMxv_erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120
 				want 0x1.f51216p-120.  */
 svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *dat = ptr_barrier (&data);
 
   svfloat32_t a = svabs_x (pg, x);
 
   /* Clamp input at |x| <= 10.0 + 4/64.  */
   a = svmin_x (pg, a, dat->max);
 
   /* Reduce x to the nearest multiple of 1/64.  */
   svfloat32_t shift = sv_f32 (dat->shift);
   svfloat32_t z = svadd_x (pg, a, shift);
 
   /* Saturate index for the NaN case.  */
   svuint32_t i = svqadd (svreinterpret_u32 (z), dat->off_idx);
 
   /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables.  */
-  i = svmul_x (pg, i, 2);
-  const float32_t *p = &__erfcf_data.tab[0].erfc - 2 * dat->off_arr;
+  i = svlsl_x (svptrue_b32 (), i, 1);
+  const float32_t *p = &__v_erfcf_data.tab[0].erfc - 2 * dat->off_arr;
   svfloat32_t erfcr = svld1_gather_index (pg, p, i);
   svfloat32_t scale = svld1_gather_index (pg, p + 1, i);
 
   /* erfc(x) ~ erfc(r) - scale * d * poly(r, d).  */
   svfloat32_t r = svsub_x (pg, z, shift);
   svfloat32_t d = svsub_x (pg, a, r);
-  svfloat32_t d2 = svmul_x (pg, d, d);
-  svfloat32_t r2 = svmul_x (pg, r, r);
+  svfloat32_t d2 = svmul_x (svptrue_b32 (), d, d);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
 
   svfloat32_t coeffs = svld1rq (svptrue_b32 (), &dat->third);
-  svfloat32_t third = svdup_lane (coeffs, 0);
 
   svfloat32_t p1 = r;
-  svfloat32_t p2 = svmls_lane (third, r2, coeffs, 1);
-  svfloat32_t p3 = svmul_x (pg, r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0));
+  svfloat32_t p2 = svmls_lane (sv_f32 (dat->third), r2, coeffs, 1);
+  svfloat32_t p3
+      = svmul_x (svptrue_b32 (), r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0));
   svfloat32_t p4 = svmla_lane (sv_f32 (dat->two_over_five), r2, coeffs, 2);
   p4 = svmls_x (pg, sv_f32 (dat->tenth), r2, p4);
 
   svfloat32_t y = svmla_x (pg, p3, d, p4);
   y = svmla_x (pg, p2, d, y);
   y = svmla_x (pg, p1, d, y);
 
   /* Solves the |x| = inf/nan case.  */
   y = svmls_x (pg, erfcr, scale, svmls_x (pg, d, d2, y));
 
   /* Offset equals 2.0f if sign, else 0.0f.  */
   svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), SignMask);
   svfloat32_t off = svreinterpret_f32 (svlsr_x (pg, sign, 1));
   /* Handle sign and scale back in a single fma.  */
   svfloat32_t fac = svreinterpret_f32 (svorr_x (pg, sign, TableScale));
 
   return svmla_x (pg, off, fac, y);
 }
 
-PL_SIG (SV, F, 1, erfc, -4.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (erfc), 1.14)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erfc), 0.0, 0x1p-26, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (erfc), 0x1p-26, 10.0625, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (erfc), -0x1p-26, -4.0, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (erfc), 10.0625, inf, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (erfc), -4.0, -inf, 40000)
+TEST_SIG (SV, F, 1, erfc, -4.0, 10.0)
+TEST_ULP (SV_NAME_F1 (erfc), 1.14)
+TEST_DISABLE_FENV (SV_NAME_F1 (erfc))
+TEST_SYM_INTERVAL (SV_NAME_F1 (erfc), 0.0, 0x1p-26, 40000)
+TEST_INTERVAL (SV_NAME_F1 (erfc), 0x1p-26, 10.0625, 40000)
+TEST_INTERVAL (SV_NAME_F1 (erfc), -0x1p-26, -4.0, 40000)
+TEST_INTERVAL (SV_NAME_F1 (erfc), 10.0625, inf, 40000)
+TEST_INTERVAL (SV_NAME_F1 (erfc), -4.0, -inf, 40000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erff_2u.c b/contrib/arm-optimized-routines/math/aarch64/sve/erff.c
similarity index 77%
rename from contrib/arm-optimized-routines/pl/math/sv_erff_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/erff.c
index adeee798ee2e..c8c87499a63f 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_erff_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/erff.c
@@ -1,90 +1,91 @@
 /*
  * Single-precision vector erf(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float min, max, scale, shift, third;
 } data = {
   .min = 0x1.cp-7f,	   /* 1/64 - 1/512.  */
   .max = 3.9375,	   /* 4 - 8/128.  */
   .scale = 0x1.20dd76p+0f, /* 2/sqrt(pi).  */
   .shift = 0x1p16f,
   .third = 0x1.555556p-2f, /* 1/3.  */
 };
 
 #define SignMask (0x80000000)
 
 /* Single-precision implementation of vector erf(x).
    Approximation based on series expansion near x rounded to
    nearest multiple of 1/128.
    Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
 
    erf(x) ~ erf(r) + scale * d * [1 - r * d - 1/3 * d^2]
 
    Values of erf(r) and scale are read from lookup tables.
    For |x| < 0x1.cp-7, the algorithm sets r = 0, erf(r) = 0, and scale = 2 /
    sqrt(pi), so it simply boils down to a Taylor series expansion near 0. For
    |x| > 3.9375, erf(|x|) rounds to 1.0f.
 
    Maximum error on each interval:
    - [0, 0x1.cp-7]: 1.93 ULP
      _ZGVsMxv_erff(0x1.c373e6p-9) got 0x1.fd686cp-9 want 0x1.fd6868p-9
    - [0x1.cp-7, 4.0]: 1.26 ULP
      _ZGVsMxv_erff(0x1.1d002ep+0) got 0x1.c4eb9ap-1 want 0x1.c4eb98p-1.  */
 svfloat32_t SV_NAME_F1 (erf) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *dat = ptr_barrier (&data);
 
   /* |x| > 1/64 - 1/512.  */
   svbool_t a_gt_min = svacgt (pg, x, dat->min);
 
   /* |x| >= 4.0 - 8/128.  */
   svbool_t a_ge_max = svacge (pg, x, dat->max);
   svfloat32_t a = svabs_x (pg, x);
 
   svfloat32_t shift = sv_f32 (dat->shift);
   svfloat32_t z = svadd_x (pg, a, shift);
-  svuint32_t i
-      = svsub_x (pg, svreinterpret_u32 (z), svreinterpret_u32 (shift));
-
-  /* Saturate lookup index.  */
-  i = svsel (a_ge_max, sv_u32 (512), i);
+  svuint32_t i = svand_x (pg, svreinterpret_u32 (z), 0xfff);
+  i = svadd_x (pg, i, i);
 
   /* r and erf(r) set to 0 for |x| below min.  */
   svfloat32_t r = svsub_z (a_gt_min, z, shift);
-  svfloat32_t erfr = svld1_gather_index (a_gt_min, __sv_erff_data.erf, i);
+  svfloat32_t erfr
+      = svld1_gather_index (a_gt_min, &__v_erff_data.tab[0].erf, i);
 
   /* scale set to 2/sqrt(pi) for |x| below min.  */
-  svfloat32_t scale = svld1_gather_index (a_gt_min, __sv_erff_data.scale, i);
+  svfloat32_t scale
+      = svld1_gather_index (a_gt_min, &__v_erff_data.tab[0].scale, i);
   scale = svsel (a_gt_min, scale, sv_f32 (dat->scale));
 
   /* erf(x) ~ erf(r) + scale * d * (1 - r * d + 1/3 * d^2).  */
   svfloat32_t d = svsub_x (pg, a, r);
   svfloat32_t d2 = svmul_x (pg, d, d);
   svfloat32_t y = svmla_x (pg, r, d, dat->third);
   y = svmla_x (pg, erfr, scale, svmls_x (pg, d, d2, y));
 
   /* Solves the |x| = inf case.  */
   y = svsel (a_ge_max, sv_f32 (1.0f), y);
 
   /* Copy sign.  */
   svuint32_t ix = svreinterpret_u32 (x);
   svuint32_t iy = svreinterpret_u32 (y);
   svuint32_t sign = svand_x (pg, ix, SignMask);
   return svreinterpret_f32 (svorr_x (pg, sign, iy));
 }
 
-PL_SIG (SV, F, 1, erf, -4.0, 4.0)
-PL_TEST_ULP (SV_NAME_F1 (erf), 1.43)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0, 0x1.cp-7, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0x1.cp-7, 3.9375, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 3.9375, inf, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0, inf, 4000)
+TEST_SIG (SV, F, 1, erf, -4.0, 4.0)
+TEST_ULP (SV_NAME_F1 (erf), 1.43)
+TEST_DISABLE_FENV (SV_NAME_F1 (erf))
+TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0, 0x1.cp-7, 40000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0x1.cp-7, 3.9375, 40000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 3.9375, inf, 40000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0, inf, 4000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_exp_1u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/exp.c
similarity index 79%
rename from contrib/arm-optimized-routines/pl/math/sv_exp_1u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/exp.c
index c187def9e625..b021e64ffedf 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_exp_1u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/exp.c
@@ -1,137 +1,141 @@
 /*
  * Double-precision vector e^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
-  double poly[4];
+  double c0, c2;
+  double c1, c3;
   double ln2_hi, ln2_lo, inv_ln2, shift, thres;
+
 } data = {
-  .poly = { /* ulp error: 0.53.  */
-	    0x1.fffffffffdbcdp-2, 0x1.555555555444cp-3, 0x1.555573c6a9f7dp-5,
-	    0x1.1111266d28935p-7 },
+  .c0 = 0x1.fffffffffdbcdp-2,
+  .c1 = 0x1.555555555444cp-3,
+  .c2 = 0x1.555573c6a9f7dp-5,
+  .c3 = 0x1.1111266d28935p-7,
   .ln2_hi = 0x1.62e42fefa3800p-1,
   .ln2_lo = 0x1.ef35793c76730p-45,
   /* 1/ln2.  */
   .inv_ln2 = 0x1.71547652b82fep+0,
   /* 1.5*2^46+1023. This value is further explained below.  */
   .shift = 0x1.800000000ffc0p+46,
   .thres = 704.0,
 };
 
-#define C(i) sv_f64 (d->poly[i])
 #define SpecialOffset 0x6000000000000000 /* 0x1p513.  */
 /* SpecialBias1 + SpecialBias1 = asuint(1.0).  */
 #define SpecialBias1 0x7000000000000000 /* 0x1p769.  */
 #define SpecialBias2 0x3010000000000000 /* 0x1p-254.  */
 
 /* Update of both special and non-special cases, if any special case is
    detected.  */
 static inline svfloat64_t
 special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n)
 {
   /* s=2^n may overflow, break it up into s=s1*s2,
      such that exp = s + s*y can be computed as s1*(s2+s2*y)
      and s1*s1 overflows only if n>0.  */
 
   /* If n<=0 then set b to 0x6, 0 otherwise.  */
   svbool_t p_sign = svcmple (pg, n, 0.0); /* n <= 0.  */
   svuint64_t b
       = svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0.  */
 
-  /* Set s1 to generate overflow depending on sign of exponent n.  */
-  svfloat64_t s1 = svreinterpret_f64 (
-      svsubr_x (pg, b, SpecialBias1)); /* 0x70...0 - b.  */
-  /* Offset s to avoid overflow in final result if n is below threshold.  */
+  /* Set s1 to generate overflow depending on sign of exponent n,
+     ie. s1 = 0x70...0 - b.  */
+  svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1));
+  /* Offset s to avoid overflow in final result if n is below threshold.
+     ie. s2 = as_u64 (s) - 0x3010...0 + b.  */
   svfloat64_t s2 = svreinterpret_f64 (
-      svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2),
-	       b)); /* as_u64 (s) - 0x3010...0 + b.  */
+      svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
 
   /* |n| > 1280 => 2^(n) overflows.  */
   svbool_t p_cmp = svacgt (pg, n, 1280.0);
 
-  svfloat64_t r1 = svmul_x (pg, s1, s1);
+  svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
   svfloat64_t r2 = svmla_x (pg, s2, s2, y);
-  svfloat64_t r0 = svmul_x (pg, r2, s1);
+  svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
 
   return svsel (p_cmp, r1, r0);
 }
 
 /* SVE exp algorithm. Maximum measured error is 1.01ulps:
    SV_NAME_D1 (exp)(0x1.4619d7b04da41p+6) got 0x1.885d9acc41da7p+117
 					 want 0x1.885d9acc41da6p+117.  */
 svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svbool_t special = svacgt (pg, x, d->thres);
 
   /* Use a modifed version of the shift used for flooring, such that x/ln2 is
      rounded to a multiple of 2^-6=1/64, shift = 1.5 * 2^52 * 2^-6 = 1.5 *
      2^46.
 
      n is not an integer but can be written as n = m + i/64, with i and m
      integer, 0 <= i < 64 and m <= n.
 
      Bits 5:0 of z will be null every time x/ln2 reaches a new integer value
      (n=m, i=0), and is incremented every time z (or n) is incremented by 1/64.
      FEXPA expects i in bits 5:0 of the input so it can be used as index into
      FEXPA hardwired table T[i] = 2^(i/64) for i = 0:63, that will in turn
      populate the mantissa of the output. Therefore, we use u=asuint(z) as
      input to FEXPA.
 
      We add 1023 to the modified shift value in order to set bits 16:6 of u to
      1, such that once these bits are moved to the exponent of the output of
      FEXPA, we get the exponent of 2^n right, i.e. we get 2^m.  */
   svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
   svuint64_t u = svreinterpret_u64 (z);
   svfloat64_t n = svsub_x (pg, z, d->shift);
-
+  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
   /* r = x - n * ln2, r is in [-ln2/(2N), ln2/(2N)].  */
   svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
   svfloat64_t r = svmls_lane (x, n, ln2, 0);
   r = svmls_lane (r, n, ln2, 1);
 
   /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5.  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t p01 = svmla_x (pg, C (0), C (1), r);
-  svfloat64_t p23 = svmla_x (pg, C (2), C (3), r);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
   svfloat64_t p04 = svmla_x (pg, p01, p23, r2);
   svfloat64_t y = svmla_x (pg, r, p04, r2);
 
   /* s = 2^n, computed using FEXPA. FEXPA does not propagate NaNs, so for
      consistent NaN handling we have to manually propagate them. This comes at
      significant performance cost.  */
   svfloat64_t s = svexpa (u);
 
   /* Assemble result as exp(x) = 2^n * exp(r).  If |x| > Thresh the
      multiplication may overflow, so use special case routine.  */
 
   if (unlikely (svptest_any (pg, special)))
     {
       /* FEXPA zeroes the sign bit, however the sign is meaningful to the
 	 special case function so needs to be copied.
 	 e = sign bit of u << 46.  */
       svuint64_t e = svand_x (pg, svlsl_x (pg, u, 46), 0x8000000000000000);
       /* Copy sign to s.  */
       s = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (s)));
       return special_case (pg, s, y, n);
     }
 
   /* No special case.  */
   return svmla_x (pg, s, s, y);
 }
 
-PL_SIG (SV, D, 1, exp, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_D1 (exp), 1.46)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0, 0x1p-23, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0x1p-23, 1, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 1, 0x1p23, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0x1p23, inf, 50000)
+TEST_SIG (SV, D, 1, exp, -9.9, 9.9)
+TEST_ULP (SV_NAME_D1 (exp), 1.46)
+TEST_DISABLE_FENV (SV_NAME_D1 (exp))
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0, 0x1p-23, 40000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0x1p-23, 1, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 1, 0x1p23, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0x1p23, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_exp10_1u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/exp10.c
similarity index 79%
rename from contrib/arm-optimized-routines/pl/math/sv_exp10_1u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/exp10.c
index 519693afcab0..3d6af334e155 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_exp10_1u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/exp10.c
@@ -1,122 +1,131 @@
 /*
  * Double-precision SVE 10^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define SpecialBound 307.0 /* floor (log10 (2^1023)).  */
 
 static const struct data
 {
-  double poly[5];
+  double c1, c3, c2, c4, c0;
   double shift, log10_2, log2_10_hi, log2_10_lo, scale_thres, special_bound;
 } data = {
   /* Coefficients generated using Remez algorithm.
      rel error: 0x1.9fcb9b3p-60
      abs error: 0x1.a20d9598p-60 in [ -log10(2)/128, log10(2)/128 ]
      max ulp err 0.52 +0.5.  */
-  .poly = { 0x1.26bb1bbb55516p1, 0x1.53524c73cd32ap1, 0x1.0470591daeafbp1,
-	    0x1.2bd77b1361ef6p0, 0x1.142b5d54e9621p-1 },
+  .c0 = 0x1.26bb1bbb55516p1,
+  .c1 = 0x1.53524c73cd32ap1,
+  .c2 = 0x1.0470591daeafbp1,
+  .c3 = 0x1.2bd77b1361ef6p0,
+  .c4 = 0x1.142b5d54e9621p-1,
   /* 1.5*2^46+1023. This value is further explained below.  */
   .shift = 0x1.800000000ffc0p+46,
   .log10_2 = 0x1.a934f0979a371p1,     /* 1/log2(10).  */
   .log2_10_hi = 0x1.34413509f79ffp-2, /* log2(10).  */
   .log2_10_lo = -0x1.9dc1da994fd21p-59,
   .scale_thres = 1280.0,
   .special_bound = SpecialBound,
 };
 
 #define SpecialOffset 0x6000000000000000 /* 0x1p513.  */
 /* SpecialBias1 + SpecialBias1 = asuint(1.0).  */
 #define SpecialBias1 0x7000000000000000 /* 0x1p769.  */
 #define SpecialBias2 0x3010000000000000 /* 0x1p-254.  */
 
 /* Update of both special and non-special cases, if any special case is
    detected.  */
 static inline svfloat64_t
 special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
 	      const struct data *d)
 {
   /* s=2^n may overflow, break it up into s=s1*s2,
      such that exp = s + s*y can be computed as s1*(s2+s2*y)
      and s1*s1 overflows only if n>0.  */
 
   /* If n<=0 then set b to 0x6, 0 otherwise.  */
   svbool_t p_sign = svcmple (pg, n, 0.0); /* n <= 0.  */
   svuint64_t b = svdup_u64_z (p_sign, SpecialOffset);
 
   /* Set s1 to generate overflow depending on sign of exponent n.  */
   svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1));
   /* Offset s to avoid overflow in final result if n is below threshold.  */
   svfloat64_t s2 = svreinterpret_f64 (
       svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
 
   /* |n| > 1280 => 2^(n) overflows.  */
   svbool_t p_cmp = svacgt (pg, n, d->scale_thres);
 
-  svfloat64_t r1 = svmul_x (pg, s1, s1);
+  svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
   svfloat64_t r2 = svmla_x (pg, s2, s2, y);
-  svfloat64_t r0 = svmul_x (pg, r2, s1);
+  svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
 
   return svsel (p_cmp, r1, r0);
 }
 
 /* Fast vector implementation of exp10 using FEXPA instruction.
    Maximum measured error is 1.02 ulp.
    SV_NAME_D1 (exp10)(-0x1.2862fec805e58p+2) got 0x1.885a89551d782p-16
 					    want 0x1.885a89551d781p-16.  */
 svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t no_big_scale = svacle (pg, x, d->special_bound);
   svbool_t special = svnot_z (pg, no_big_scale);
 
   /* n = round(x/(log10(2)/N)).  */
   svfloat64_t shift = sv_f64 (d->shift);
   svfloat64_t z = svmla_x (pg, shift, x, d->log10_2);
   svfloat64_t n = svsub_x (pg, z, shift);
 
   /* r = x - n*log10(2)/N.  */
   svfloat64_t log2_10 = svld1rq (svptrue_b64 (), &d->log2_10_hi);
   svfloat64_t r = x;
   r = svmls_lane (r, n, log2_10, 0);
   r = svmls_lane (r, n, log2_10, 1);
 
   /* scale = 2^(n/N), computed using FEXPA. FEXPA does not propagate NaNs, so
      for consistent NaN handling we have to manually propagate them. This
      comes at significant performance cost.  */
   svuint64_t u = svreinterpret_u64 (z);
   svfloat64_t scale = svexpa (u);
-
+  svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
   /* Approximate exp10(r) using polynomial.  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t y = svmla_x (pg, svmul_x (pg, r, d->poly[0]), r2,
-			   sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly + 1));
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+  svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+  svfloat64_t p14 = svmla_x (pg, p12, p34, r2);
+
+  svfloat64_t y = svmla_x (pg, svmul_x (svptrue_b64 (), r, d->c0), r2, p14);
 
   /* Assemble result as exp10(x) = 2^n * exp10(r).  If |x| > SpecialBound
      multiplication may overflow, so use special case routine.  */
   if (unlikely (svptest_any (pg, special)))
     {
       /* FEXPA zeroes the sign bit, however the sign is meaningful to the
 	 special case function so needs to be copied.
 	 e = sign bit of u << 46.  */
       svuint64_t e = svand_x (pg, svlsl_x (pg, u, 46), 0x8000000000000000);
       /* Copy sign to scale.  */
       scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale)));
       return special_case (pg, scale, y, n, d);
     }
 
   /* No special case.  */
   return svmla_x (pg, scale, scale, y);
 }
 
-PL_SIG (SV, D, 1, exp10, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_D1 (exp10), 0.52)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp10), 0, 307, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp10), 307, inf, 1000)
+#if WANT_EXP10_TESTS
+TEST_SIG (SV, D, 1, exp10, -9.9, 9.9)
+TEST_ULP (SV_NAME_D1 (exp10), 0.52)
+TEST_DISABLE_FENV (SV_NAME_D1 (exp10))
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp10), 0, SpecialBound, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp10), SpecialBound, inf, 1000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/exp10f.c b/contrib/arm-optimized-routines/math/aarch64/sve/exp10f.c
new file mode 100644
index 000000000000..8679df87702f
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/exp10f.c
@@ -0,0 +1,101 @@
+/*
+ * Single-precision SVE 10^x function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#define _GNU_SOURCE
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
+
+/* For x < -Thres, the result is subnormal and not handled correctly by
+   FEXPA.  */
+#define Thres 37.9
+
+static const struct data
+{
+  float log2_10_lo, c0, c2, c4;
+  float c1, c3, log10_2;
+  float shift, log2_10_hi, thres;
+} data = {
+  /* Coefficients generated using Remez algorithm with minimisation of relative
+     error.
+     rel error: 0x1.89dafa3p-24
+     abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
+     maxerr: 0.52 +0.5 ulp.  */
+  .c0 = 0x1.26bb16p+1f,
+  .c1 = 0x1.5350d2p+1f,
+  .c2 = 0x1.04744ap+1f,
+  .c3 = 0x1.2d8176p+0f,
+  .c4 = 0x1.12b41ap-1f,
+  /* 1.5*2^17 + 127, a shift value suitable for FEXPA.  */
+  .shift = 0x1.803f8p17f,
+  .log10_2 = 0x1.a934fp+1,
+  .log2_10_hi = 0x1.344136p-2,
+  .log2_10_lo = -0x1.ec10cp-27,
+  .thres = Thres,
+};
+
+static inline svfloat32_t
+sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
+{
+  /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
+     with poly(r) in [1/sqrt(2), sqrt(2)] and
+     x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N].  */
+
+  svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log2_10_lo);
+
+  /* n = round(x/(log10(2)/N)).  */
+  svfloat32_t shift = sv_f32 (d->shift);
+  svfloat32_t z = svmad_x (pg, sv_f32 (d->log10_2), x, shift);
+  svfloat32_t n = svsub_x (svptrue_b32 (), z, shift);
+
+  /* r = x - n*log10(2)/N.  */
+  svfloat32_t r = svmsb_x (pg, sv_f32 (d->log2_10_hi), n, x);
+  r = svmls_lane (r, n, lane_consts, 0);
+
+  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
+
+  /* Polynomial evaluation: poly(r) ~ exp10(r)-1.  */
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
+  svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
+  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+
+  return svmla_x (pg, scale, scale, poly);
+}
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special, const struct data *d)
+{
+  return sv_call_f32 (exp10f, x, sv_exp10f_inline (x, svptrue_b32 (), d),
+		      special);
+}
+
+/* Single-precision SVE exp10f routine. Implements the same algorithm
+   as AdvSIMD exp10f.
+   Worst case error is 1.02 ULPs.
+   _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
+				  want 0x1.ba5f9cp-1.  */
+svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t special = svacgt (pg, x, d->thres);
+  if (unlikely (svptest_any (special, special)))
+    return special_case (x, special, d);
+  return sv_exp10f_inline (x, pg, d);
+}
+
+#if WANT_EXP10_TESTS
+TEST_SIG (SV, F, 1, exp10, -9.9, 9.9)
+TEST_ULP (SV_NAME_F1 (exp10), 0.52)
+TEST_DISABLE_FENV (SV_NAME_F1 (exp10))
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp10), 0, Thres, 50000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp10), Thres, inf, 50000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_exp2_2u.c b/contrib/arm-optimized-routines/math/aarch64/sve/exp2.c
similarity index 72%
rename from contrib/arm-optimized-routines/pl/math/sv_exp2_2u.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/exp2.c
index dcbca8adddd1..adbe40c648ac 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_exp2_2u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/exp2.c
@@ -1,107 +1,111 @@
 /*
  * Double-precision SVE 2^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define N (1 << V_EXP_TABLE_BITS)
 
 #define BigBound 1022
 #define UOFlowBound 1280
 
 static const struct data
 {
-  double poly[4];
+  double c0, c2;
+  double c1, c3;
   double shift, big_bound, uoflow_bound;
 } data = {
   /* Coefficients are computed using Remez algorithm with
      minimisation of the absolute error.  */
-  .poly = { 0x1.62e42fefa3686p-1, 0x1.ebfbdff82c241p-3, 0x1.c6b09b16de99ap-5,
-	    0x1.3b2abf5571ad8p-7 },
-  .shift = 0x1.8p52 / N,
-  .uoflow_bound = UOFlowBound,
+  .c0 = 0x1.62e42fefa3686p-1, .c1 = 0x1.ebfbdff82c241p-3,
+  .c2 = 0x1.c6b09b16de99ap-5, .c3 = 0x1.3b2abf5571ad8p-7,
+  .shift = 0x1.8p52 / N,      .uoflow_bound = UOFlowBound,
   .big_bound = BigBound,
 };
 
 #define SpecialOffset 0x6000000000000000 /* 0x1p513.  */
 /* SpecialBias1 + SpecialBias1 = asuint(1.0).  */
 #define SpecialBias1 0x7000000000000000 /* 0x1p769.  */
 #define SpecialBias2 0x3010000000000000 /* 0x1p-254.  */
 
 /* Update of both special and non-special cases, if any special case is
    detected.  */
 static inline svfloat64_t
 special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
 	      const struct data *d)
 {
   /* s=2^n may overflow, break it up into s=s1*s2,
      such that exp = s + s*y can be computed as s1*(s2+s2*y)
      and s1*s1 overflows only if n>0.  */
 
   /* If n<=0 then set b to 0x6, 0 otherwise.  */
   svbool_t p_sign = svcmple (pg, n, 0.0); /* n <= 0.  */
   svuint64_t b = svdup_u64_z (p_sign, SpecialOffset);
 
   /* Set s1 to generate overflow depending on sign of exponent n.  */
   svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1));
   /* Offset s to avoid overflow in final result if n is below threshold.  */
   svfloat64_t s2 = svreinterpret_f64 (
       svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
 
   /* |n| > 1280 => 2^(n) overflows.  */
   svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound);
 
-  svfloat64_t r1 = svmul_x (pg, s1, s1);
+  svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
   svfloat64_t r2 = svmla_x (pg, s2, s2, y);
-  svfloat64_t r0 = svmul_x (pg, r2, s1);
+  svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
 
   return svsel (p_cmp, r1, r0);
 }
 
 /* Fast vector implementation of exp2.
    Maximum measured error is 1.65 ulp.
    _ZGVsMxv_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1
 				       want 0x1.f8db0d4df721dp-1.  */
 svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t no_big_scale = svacle (pg, x, d->big_bound);
   svbool_t special = svnot_z (pg, no_big_scale);
 
   /* Reduce x to k/N + r, where k is integer and r in [-1/2N, 1/2N].  */
   svfloat64_t shift = sv_f64 (d->shift);
   svfloat64_t kd = svadd_x (pg, x, shift);
   svuint64_t ki = svreinterpret_u64 (kd);
   /* kd = k/N.  */
   kd = svsub_x (pg, kd, shift);
   svfloat64_t r = svsub_x (pg, x, kd);
 
   /* scale ~= 2^(k/N).  */
   svuint64_t idx = svand_x (pg, ki, N - 1);
   svuint64_t sbits = svld1_gather_index (pg, __v_exp_data, idx);
   /* This is only a valid scale when -1023*N < k < 1024*N.  */
   svuint64_t top = svlsl_x (pg, ki, 52 - V_EXP_TABLE_BITS);
   svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top));
 
+  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
   /* Approximate exp2(r) using polynomial.  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t p = sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly);
-  svfloat64_t y = svmul_x (pg, r, p);
-
+  /* y = exp2(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4.  */
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
+  svfloat64_t p = svmla_x (pg, p01, p23, r2);
+  svfloat64_t y = svmul_x (svptrue_b64 (), r, p);
   /* Assemble exp2(x) = exp2(r) * scale.  */
   if (unlikely (svptest_any (pg, special)))
     return special_case (pg, scale, y, kd, d);
   return svmla_x (pg, scale, scale, y);
 }
 
-PL_SIG (SV, D, 1, exp2, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_D1 (exp2), 1.15)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), 0, BigBound, 1000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), BigBound, UOFlowBound, 100000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), UOFlowBound, inf, 1000)
+TEST_SIG (SV, D, 1, exp2, -9.9, 9.9)
+TEST_ULP (SV_NAME_D1 (exp2), 1.15)
+TEST_DISABLE_FENV (SV_NAME_D1 (exp2))
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), 0, BigBound, 1000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), BigBound, UOFlowBound, 100000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), UOFlowBound, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/exp2f.c b/contrib/arm-optimized-routines/math/aarch64/sve/exp2f.c
new file mode 100644
index 000000000000..f4c1d0ae607e
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/exp2f.c
@@ -0,0 +1,83 @@
+/*
+ * Single-precision SVE 2^x function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define Thres 0x1.5d5e2ap+6f
+
+static const struct data
+{
+  float c0, c2, c4, c1, c3;
+  float shift, thres;
+} data = {
+  /* Coefficients copied from the polynomial in AdvSIMD variant.  */
+  .c0 = 0x1.62e422p-1f,
+  .c1 = 0x1.ebf9bcp-3f,
+  .c2 = 0x1.c6bd32p-5f,
+  .c3 = 0x1.3ce9e4p-7f,
+  .c4 = 0x1.59977ap-10f,
+  /* 1.5*2^17 + 127.  */
+  .shift = 0x1.803f8p17f,
+  /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
+     correctly by FEXPA.  */
+  .thres = Thres,
+};
+
+static inline svfloat32_t
+sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
+{
+  /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+    x = n + r, with r in [-1/2, 1/2].  */
+  svfloat32_t z = svadd_x (svptrue_b32 (), x, d->shift);
+  svfloat32_t n = svsub_x (svptrue_b32 (), z, d->shift);
+  svfloat32_t r = svsub_x (svptrue_b32 (), x, n);
+
+  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
+
+  /* Polynomial evaluation: poly(r) ~ exp2(r)-1.
+     Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for
+     coefficients 1 to 4, and apply most significant coefficient directly.  */
+  svfloat32_t even_coeffs = svld1rq (svptrue_b32 (), &d->c0);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, even_coeffs, 1);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, even_coeffs, 2);
+  svfloat32_t p14 = svmla_x (pg, p12, r2, p34);
+  svfloat32_t p0 = svmul_lane (r, even_coeffs, 0);
+  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+
+  return svmla_x (pg, scale, scale, poly);
+}
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special, const struct data *d)
+{
+  return sv_call_f32 (exp2f, x, sv_exp2f_inline (x, svptrue_b32 (), d),
+		      special);
+}
+
+/* Single-precision SVE exp2f routine. Implements the same algorithm
+   as AdvSIMD exp2f.
+   Worst case error is 1.04 ULPs.
+   _ZGVsMxv_exp2f(-0x1.af994ap-3) got 0x1.ba6a66p-1
+				 want 0x1.ba6a64p-1.  */
+svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t special = svacgt (pg, x, d->thres);
+  if (unlikely (svptest_any (special, special)))
+    return special_case (x, special, d);
+  return sv_exp2f_inline (x, pg, d);
+}
+
+TEST_SIG (SV, F, 1, exp2, -9.9, 9.9)
+TEST_ULP (SV_NAME_F1 (exp2), 0.54)
+TEST_DISABLE_FENV (SV_NAME_F1 (exp2))
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp2), 0, Thres, 50000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp2), Thres, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/expf.c b/contrib/arm-optimized-routines/math/aarch64/sve/expf.c
new file mode 100644
index 000000000000..11528abdbbaf
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/expf.c
@@ -0,0 +1,50 @@
+/*
+ * Single-precision vector e^x function.
+ *
+ * Copyright (c) 2019-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_expf_inline.h"
+
+/* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
+   correctly by FEXPA.  */
+#define Thres 0x1.5d5e2ap+6f
+
+static const struct data
+{
+  struct sv_expf_data d;
+  float thres;
+} data = {
+  .d = SV_EXPF_DATA,
+  .thres = Thres,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d)
+{
+  return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special);
+}
+
+/* Optimised single-precision SVE exp function.
+   Worst-case error is 1.04 ulp:
+   SV_NAME_F1 (exp)(0x1.a8eda4p+1) got 0x1.ba74bcp+4
+				  want 0x1.ba74bap+4.  */
+svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t is_special_case = svacgt (pg, x, d->thres);
+  if (unlikely (svptest_any (pg, is_special_case)))
+    return special_case (x, is_special_case, &d->d);
+  return expf_inline (x, pg, &d->d);
+}
+
+TEST_SIG (SV, F, 1, exp, -9.9, 9.9)
+TEST_ULP (SV_NAME_F1 (exp), 0.55)
+TEST_DISABLE_FENV (SV_NAME_F1 (exp))
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0, Thres, 50000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (exp), Thres, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_expm1_2u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/expm1.c
similarity index 86%
rename from contrib/arm-optimized-routines/pl/math/sv_expm1_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/expm1.c
index 82a31f6d9c0e..f4fb8cb982f0 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_expm1_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/expm1.c
@@ -1,95 +1,97 @@
 /*
  * Double-precision vector exp(x) - 1 function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define SpecialBound 0x1.62b7d369a5aa9p+9
 #define ExponentBias 0x3ff0000000000000
 
 static const struct data
 {
   double poly[11];
   double shift, inv_ln2, special_bound;
   /* To be loaded in one quad-word.  */
   double ln2_hi, ln2_lo;
 } data = {
   /* Generated using fpminimax.  */
   .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
             0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10, 0x1.a01a01affa35dp-13,
             0x1.a01a018b4ecbbp-16, 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
             0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
 
   .special_bound = SpecialBound,
   .inv_ln2 = 0x1.71547652b82fep0,
   .ln2_hi = 0x1.62e42fefa39efp-1,
   .ln2_lo = 0x1.abc9e3b39803fp-56,
   .shift = 0x1.8p52,
 };
 
 static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t pg)
 {
   return sv_call_f64 (expm1, x, y, pg);
 }
 
 /* Double-precision vector exp(x) - 1 function.
    The maximum error observed error is 2.18 ULP:
    _ZGVsMxv_expm1(0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2
 				       want 0x1.a8b9ea8d66e2p-2.  */
 svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* Large, Nan/Inf.  */
   svbool_t special = svnot_z (pg, svaclt (pg, x, d->special_bound));
 
   /* Reduce argument to smaller range:
      Let i = round(x / ln2)
      and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
      where 2^i is exact because i is an integer.  */
   svfloat64_t shift = sv_f64 (d->shift);
   svfloat64_t n = svsub_x (pg, svmla_x (pg, shift, x, d->inv_ln2), shift);
   svint64_t i = svcvt_s64_x (pg, n);
   svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
   svfloat64_t f = svmls_lane (x, n, ln2, 0);
   f = svmls_lane (f, n, ln2, 1);
 
   /* Approximate expm1(f) using polynomial.
      Taylor expansion for expm1(x) has the form:
 	 x + ax^2 + bx^3 + cx^4 ....
      So we calculate the polynomial P(f) = a + bf + cf^2 + ...
      and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
   svfloat64_t f2 = svmul_x (pg, f, f);
   svfloat64_t f4 = svmul_x (pg, f2, f2);
   svfloat64_t f8 = svmul_x (pg, f4, f4);
   svfloat64_t p
       = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly));
 
   /* Assemble the result.
    expm1(x) ~= 2^i * (p + 1) - 1
    Let t = 2^i.  */
   svint64_t u = svadd_x (pg, svlsl_x (pg, i, 52), ExponentBias);
   svfloat64_t t = svreinterpret_f64 (u);
 
   /* expm1(x) ~= p * t + (t - 1).  */
   svfloat64_t y = svmla_x (pg, svsub_x (pg, t, 1), p, t);
 
   if (unlikely (svptest_any (pg, special)))
     return special_case (x, y, special);
 
   return y;
 }
 
-PL_SIG (SV, D, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_D1 (expm1), 1.68)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), 0, 0x1p-23, 1000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), 0x1p-23, SpecialBound, 200000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), SpecialBound, inf, 1000)
+TEST_SIG (SV, D, 1, expm1, -9.9, 9.9)
+TEST_ULP (SV_NAME_D1 (expm1), 1.68)
+TEST_DISABLE_FENV (SV_NAME_D1 (expm1))
+TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), 0, 0x1p-23, 1000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), 0x1p-23, SpecialBound, 200000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), SpecialBound, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_expm1f_1u6.c b/contrib/arm-optimized-routines/math/aarch64/sve/expm1f.c
similarity index 67%
rename from contrib/arm-optimized-routines/pl/math/sv_expm1f_1u6.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/expm1f.c
index 0ec7c00f5300..95f7c09a403d 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_expm1f_1u6.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/expm1f.c
@@ -1,93 +1,91 @@
 /*
  * Single-precision vector exp(x) - 1 function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Largest value of x for which expm1(x) should round to -1.  */
 #define SpecialBound 0x1.5ebc4p+6f
 
 static const struct data
 {
   /* These 4 are grouped together so they can be loaded as one quadword, then
      used with _lane forms of svmla/svmls.  */
   float c2, c4, ln2_hi, ln2_lo;
-  float c0, c1, c3, inv_ln2, special_bound, shift;
+  float c0, inv_ln2, c1, c3, special_bound;
 } data = {
   /* Generated using fpminimax.  */
   .c0 = 0x1.fffffep-2,		 .c1 = 0x1.5554aep-3,
   .c2 = 0x1.555736p-5,		 .c3 = 0x1.12287cp-7,
-  .c4 = 0x1.6b55a2p-10,
+  .c4 = 0x1.6b55a2p-10,		 .inv_ln2 = 0x1.715476p+0f,
+  .special_bound = SpecialBound, .ln2_lo = 0x1.7f7d1cp-20f,
+  .ln2_hi = 0x1.62e4p-1f,
 
-  .special_bound = SpecialBound, .shift = 0x1.8p23f,
-  .inv_ln2 = 0x1.715476p+0f,	 .ln2_hi = 0x1.62e4p-1f,
-  .ln2_lo = 0x1.7f7d1cp-20f,
 };
 
-#define C(i) sv_f32 (d->c##i)
-
 static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svbool_t pg)
 {
   return sv_call_f32 (expm1f, x, x, pg);
 }
 
 /* Single-precision SVE exp(x) - 1. Maximum error is 1.52 ULP:
    _ZGVsMxv_expm1f(0x1.8f4ebcp-2) got 0x1.e859dp-2
 				 want 0x1.e859d4p-2.  */
 svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* Large, NaN/Inf.  */
   svbool_t special = svnot_z (pg, svaclt (pg, x, d->special_bound));
 
   if (unlikely (svptest_any (pg, special)))
     return special_case (x, pg);
 
   /* This vector is reliant on layout of data - it contains constants
      that can be used with _lane forms of svmla/svmls. Values are:
      [ coeff_2, coeff_4, ln2_hi, ln2_lo ].  */
   svfloat32_t lane_constants = svld1rq (svptrue_b32 (), &d->c2);
 
   /* Reduce argument to smaller range:
      Let i = round(x / ln2)
      and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
      where 2^i is exact because i is an integer.  */
-  svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2);
-  j = svsub_x (pg, j, d->shift);
-  svint32_t i = svcvt_s32_x (pg, j);
+  svfloat32_t j = svmul_x (svptrue_b32 (), x, d->inv_ln2);
+  j = svrinta_x (pg, j);
 
   svfloat32_t f = svmls_lane (x, j, lane_constants, 2);
   f = svmls_lane (f, j, lane_constants, 3);
 
   /* Approximate expm1(f) using polynomial.
      Taylor expansion for expm1(x) has the form:
 	 x + ax^2 + bx^3 + cx^4 ....
      So we calculate the polynomial P(f) = a + bf + cf^2 + ...
      and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
-  svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0);
-  svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1);
-  svfloat32_t f2 = svmul_x (pg, f, f);
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), f, lane_constants, 0);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), f, lane_constants, 1);
+  svfloat32_t f2 = svmul_x (svptrue_b32 (), f, f);
   svfloat32_t p = svmla_x (pg, p12, f2, p34);
-  p = svmla_x (pg, C (0), f, p);
+
+  p = svmla_x (pg, sv_f32 (d->c0), f, p);
   p = svmla_x (pg, f, f2, p);
 
   /* Assemble the result.
      expm1(x) ~= 2^i * (p + 1) - 1
      Let t = 2^i.  */
-  svfloat32_t t = svreinterpret_f32 (
-      svadd_x (pg, svreinterpret_u32 (svlsl_x (pg, i, 23)), 0x3f800000));
-  return svmla_x (pg, svsub_x (pg, t, 1), p, t);
+  svfloat32_t t = svscale_x (pg, sv_f32 (1.0f), svcvt_s32_x (pg, j));
+  return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t);
 }
 
-PL_SIG (SV, F, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_F1 (expm1), 1.02)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (expm1), 0, SpecialBound, 100000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (expm1), SpecialBound, inf, 1000)
+TEST_SIG (SV, F, 1, expm1, -9.9, 9.9)
+TEST_ULP (SV_NAME_F1 (expm1), 1.02)
+TEST_DISABLE_FENV (SV_NAME_F1 (expm1))
+TEST_SYM_INTERVAL (SV_NAME_F1 (expm1), 0, SpecialBound, 100000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (expm1), SpecialBound, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_hypot_1u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/hypot.c
similarity index 72%
rename from contrib/arm-optimized-routines/pl/math/sv_hypot_1u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/hypot.c
index cf1590e4b9ab..2ed298623acc 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_hypot_1u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/hypot.c
@@ -1,51 +1,53 @@
 /*
  * Double-precision SVE hypot(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   uint64_t tiny_bound, thres;
 } data = {
   .tiny_bound = 0x0c80000000000000, /* asuint (0x1p-102).  */
   .thres = 0x7300000000000000,	    /* asuint (inf) - tiny_bound.  */
 };
 
 static svfloat64_t NOINLINE
 special_case (svfloat64_t sqsum, svfloat64_t x, svfloat64_t y, svbool_t pg,
 	      svbool_t special)
 {
   return sv_call2_f64 (hypot, x, y, svsqrt_x (pg, sqsum), special);
 }
 
 /* SVE implementation of double-precision hypot.
    Maximum error observed is 1.21 ULP:
    _ZGVsMxvv_hypot (-0x1.6a22d0412cdd3p+352, 0x1.d3d89bd66fb1ap+330)
     got 0x1.6a22d0412cfp+352
    want 0x1.6a22d0412cf01p+352.  */
 svfloat64_t SV_NAME_D2 (hypot) (svfloat64_t x, svfloat64_t y, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svfloat64_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
 
   svbool_t special = svcmpge (
       pg, svsub_x (pg, svreinterpret_u64 (sqsum), d->tiny_bound), d->thres);
 
   if (unlikely (svptest_any (pg, special)))
     return special_case (sqsum, x, y, pg, special);
   return svsqrt_x (pg, sqsum);
 }
 
-PL_SIG (SV, D, 2, hypot, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D2 (hypot), 0.71)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, -0, -inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000)
+TEST_SIG (SV, D, 2, hypot, -10.0, 10.0)
+TEST_ULP (SV_NAME_D2 (hypot), 0.71)
+TEST_DISABLE_FENV (SV_NAME_D2 (hypot))
+TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, 0, inf, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, -0, -inf, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, 0, inf, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_hypotf_1u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/hypotf.c
similarity index 69%
rename from contrib/arm-optimized-routines/pl/math/sv_hypotf_1u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/hypotf.c
index f428832b3dbc..b977b998986b 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_hypotf_1u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/hypotf.c
@@ -1,45 +1,47 @@
 /*
  * Single-precision SVE hypot(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 #define TinyBound 0x0c800000 /* asuint (0x1p-102).  */
 #define Thres 0x73000000     /* 0x70000000 - TinyBound.  */
 
 static svfloat32_t NOINLINE
 special_case (svfloat32_t sqsum, svfloat32_t x, svfloat32_t y, svbool_t pg,
 	      svbool_t special)
 {
   return sv_call2_f32 (hypotf, x, y, svsqrt_x (pg, sqsum), special);
 }
 
 /* SVE implementation of single-precision hypot.
    Maximum error observed is 1.21 ULP:
    _ZGVsMxvv_hypotf (0x1.6a213cp-19, -0x1.32b982p-26) got 0x1.6a2346p-19
 						     want 0x1.6a2344p-19.  */
 svfloat32_t SV_NAME_F2 (hypot) (svfloat32_t x, svfloat32_t y,
 				const svbool_t pg)
 {
   svfloat32_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
 
   svbool_t special = svcmpge (
       pg, svsub_x (pg, svreinterpret_u32 (sqsum), TinyBound), Thres);
 
   if (unlikely (svptest_any (pg, special)))
     return special_case (sqsum, x, y, pg, special);
 
   return svsqrt_x (pg, sqsum);
 }
 
-PL_SIG (SV, F, 2, hypot, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F2 (hypot), 0.71)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, -0, -inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, 0, inf, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000)
+TEST_SIG (SV, F, 2, hypot, -10.0, 10.0)
+TEST_ULP (SV_NAME_F2 (hypot), 0.71)
+TEST_DISABLE_FENV (SV_NAME_F2 (hypot))
+TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, 0, inf, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, -0, -inf, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, 0, inf, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/log.c b/contrib/arm-optimized-routines/math/aarch64/sve/log.c
new file mode 100644
index 000000000000..c612df48c1fd
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/log.c
@@ -0,0 +1,97 @@
+/*
+ * Double-precision SVE log(x) function.
+ *
+ * Copyright (c) 2020-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define N (1 << V_LOG_TABLE_BITS)
+#define Max (0x7ff0000000000000)
+#define Min (0x0010000000000000)
+#define Thresh (0x7fe0000000000000) /* Max - Min.  */
+
+static const struct data
+{
+  double c0, c2;
+  double c1, c3;
+  double ln2, c4;
+  uint64_t off;
+} data = {
+  .c0 = -0x1.ffffffffffff7p-2,
+  .c1 = 0x1.55555555170d4p-2,
+  .c2 = -0x1.0000000399c27p-2,
+  .c3 = 0x1.999b2e90e94cap-3,
+  .c4 = -0x1.554e550bd501ep-3,
+  .ln2 = 0x1.62e42fefa39efp-1,
+  .off = 0x3fe6900900000000,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t hi, svuint64_t tmp, svfloat64_t y, svfloat64_t r2,
+	      svbool_t special, const struct data *d)
+{
+  svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off));
+  return sv_call_f64 (log, x, svmla_x (svptrue_b64 (), hi, r2, y), special);
+}
+
+/* Double-precision SVE log routine.
+   Maximum measured error is 2.64 ulp:
+   SV_NAME_D1 (log)(0x1.95e54bc91a5e2p+184) got 0x1.fffffffe88cacp+6
+					   want 0x1.fffffffe88cafp+6.  */
+svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  svuint64_t tmp = svsub_x (pg, ix, d->off);
+  /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N.
+     The actual value of i is double this due to table layout.  */
+  svuint64_t i
+      = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
+  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
+  svfloat64_t z = svreinterpret_f64 (iz);
+  /* Lookup in 2 global lists (length N).  */
+  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
+  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  svfloat64_t kd = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
+  /* hi = r + log(c) + k*Ln2.  */
+  svfloat64_t ln2_and_c4 = svld1rq_f64 (svptrue_b64 (), &d->ln2);
+  svfloat64_t r = svmad_x (pg, invc, z, -1);
+  svfloat64_t hi = svmla_lane_f64 (logc, kd, ln2_and_c4, 0);
+  hi = svadd_x (pg, r, hi);
+
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1);
+  svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0);
+  y = svmla_lane_f64 (y, r2, ln2_and_c4, 1);
+  y = svmla_x (pg, p, r2, y);
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (hi, tmp, y, r2, special, d);
+  return svmla_x (pg, hi, r2, y);
+}
+
+TEST_SIG (SV, D, 1, log, 0.01, 11.1)
+TEST_ULP (SV_NAME_D1 (log), 2.15)
+TEST_DISABLE_FENV (SV_NAME_D1 (log))
+TEST_INTERVAL (SV_NAME_D1 (log), -0.0, -inf, 1000)
+TEST_INTERVAL (SV_NAME_D1 (log), 0, 0x1p-149, 1000)
+TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/log10.c b/contrib/arm-optimized-routines/math/aarch64/sve/log10.c
new file mode 100644
index 000000000000..5af142d79f55
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/log10.c
@@ -0,0 +1,101 @@
+/*
+ * Double-precision SVE log10(x) function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define Min 0x0010000000000000
+#define Max 0x7ff0000000000000
+#define Thres 0x7fe0000000000000 /* Max - Min.  */
+#define N (1 << V_LOG10_TABLE_BITS)
+
+static const struct data
+{
+  double c0, c2;
+  double c1, c3;
+  double invln10, log10_2;
+  double c4;
+  uint64_t off;
+} data = {
+  .c0 = -0x1.bcb7b1526e506p-3,
+  .c1 = 0x1.287a7636be1d1p-3,
+  .c2 = -0x1.bcb7b158af938p-4,
+  .c3 = 0x1.63c78734e6d07p-4,
+  .c4 = -0x1.287461742fee4p-4,
+  .invln10 = 0x1.bcb7b1526e50ep-2,
+  .log10_2 = 0x1.34413509f79ffp-2,
+  .off = 0x3fe6900900000000,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t hi, svuint64_t tmp, svfloat64_t y, svfloat64_t r2,
+	      svbool_t special, const struct data *d)
+{
+  svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off));
+  return sv_call_f64 (log10, x, svmla_x (svptrue_b64 (), hi, r2, y), special);
+}
+
+/* Double-precision SVE log10 routine.
+   Maximum measured error is 2.46 ulps.
+   SV_NAME_D1 (log10)(0x1.131956cd4b627p+0) got 0x1.fffbdf6eaa669p-6
+					   want 0x1.fffbdf6eaa667p-6.  */
+svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  svuint64_t tmp = svsub_x (pg, ix, d->off);
+  svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG10_TABLE_BITS);
+  i = svand_x (pg, i, (N - 1) << 1);
+  svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
+  svfloat64_t z = svreinterpret_f64 (
+      svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)));
+
+  /* log(x) = k*log(2) + log(c) + log(z/c).  */
+  svfloat64_t invc = svld1_gather_index (pg, &__v_log10_data.table[0].invc, i);
+  svfloat64_t logc
+      = svld1_gather_index (pg, &__v_log10_data.table[0].log10c, i);
+
+  /* We approximate log(z/c) with a polynomial P(x) ~= log(x + 1):
+     r = z/c - 1 (we look up precomputed 1/c)
+     log(z/c) ~= P(r).  */
+  svfloat64_t r = svmad_x (pg, invc, z, -1.0);
+
+  /* hi = log(c) + k*log(2).  */
+  svfloat64_t invln10_log10_2 = svld1rq_f64 (svptrue_b64 (), &d->invln10);
+  svfloat64_t w = svmla_lane_f64 (logc, r, invln10_log10_2, 0);
+  svfloat64_t hi = svmla_lane_f64 (w, k, invln10_log10_2, 1);
+
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1);
+  svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0);
+  y = svmla_x (pg, y, r2, d->c4);
+  y = svmla_x (pg, p, r2, y);
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (hi, tmp, y, r2, special, d);
+  return svmla_x (pg, hi, r2, y);
+}
+
+TEST_SIG (SV, D, 1, log10, 0.01, 11.1)
+TEST_ULP (SV_NAME_D1 (log10), 1.97)
+TEST_DISABLE_FENV (SV_NAME_D1 (log10))
+TEST_INTERVAL (SV_NAME_D1 (log10), -0.0, -0x1p126, 100)
+TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log10), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log10), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log10f_3u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/log10f.c
similarity index 56%
rename from contrib/arm-optimized-routines/pl/math/sv_log10f_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/log10f.c
index a685b23e5de5..6c3add451761 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_log10f_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/log10f.c
@@ -1,93 +1,102 @@
 /*
  * Single-precision SVE log10 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float poly_0246[4];
   float poly_1357[4];
   float ln2, inv_ln10;
+  uint32_t off, lower;
 } data = {
   .poly_1357 = {
     /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs
        1, 3, 5 and 7 can be loaded as a single quad-word, hence used with _lane
        variant of MLA intrinsic.  */
     0x1.2879c8p-3f, 0x1.6408f8p-4f, 0x1.f0e514p-5f, 0x1.f5f76ap-5f
   },
   .poly_0246 = { -0x1.bcb79cp-3f, -0x1.bcd472p-4f, -0x1.246f8p-4f,
 		 -0x1.0fc92cp-4f },
   .ln2 = 0x1.62e43p-1f,
   .inv_ln10 = 0x1.bcb7b2p-2f,
+  .off = 0x3f2aaaab,
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .lower = 0x00800000 - 0x3f2aaaab
 };
 
-#define Min 0x00800000
-#define Max 0x7f800000
-#define Thres 0x7f000000  /* Max - Min.  */
-#define Offset 0x3f2aaaab /* 0.666667.  */
+#define Thres 0x7f000000 /* asuint32(inf) - 0x00800000.  */
 #define MantissaMask 0x007fffff
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+	      svbool_t cmp)
 {
-  return sv_call_f32 (log10f, x, y, special);
+  return sv_call_f32 (
+      log10f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+      svmla_x (svptrue_b32 (), p, r2, y), cmp);
 }
 
 /* Optimised implementation of SVE log10f using the same algorithm and
    polynomial as AdvSIMD log10f.
    Maximum error is 3.31ulps:
    SV_NAME_F1 (log10)(0x1.555c16p+0) got 0x1.ffe2fap-4
 				    want 0x1.ffe2f4p-4.  */
 svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
-  svuint32_t ix = svreinterpret_u32 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres);
+
+  svuint32_t u_off = svreinterpret_u32 (x);
+
+  u_off = svsub_x (pg, u_off, d->off);
+  svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thres);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  ix = svsub_x (pg, ix, Offset);
   svfloat32_t n = svcvt_f32_x (
-      pg, svasr_x (pg, svreinterpret_s32 (ix), 23)); /* signextend.  */
-  ix = svand_x (pg, ix, MantissaMask);
-  ix = svadd_x (pg, ix, Offset);
+      pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* signextend.  */
+  svuint32_t ix = svand_x (pg, u_off, MantissaMask);
+  ix = svadd_x (pg, ix, d->off);
   svfloat32_t r = svsub_x (pg, svreinterpret_f32 (ix), 1.0f);
 
   /* y = log10(1+r) + n*log10(2)
      log10(1+r) ~ r * InvLn(10) + P(r)
      where P(r) is a polynomial. Use order 9 for log10(1+x), i.e. order 8 for
      log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3).  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t r4 = svmul_x (pg, r2, r2);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+  svfloat32_t r4 = svmul_x (svptrue_b32 (), r2, r2);
   svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]);
   svfloat32_t q_01 = svmla_lane (sv_f32 (d->poly_0246[0]), r, p_1357, 0);
   svfloat32_t q_23 = svmla_lane (sv_f32 (d->poly_0246[1]), r, p_1357, 1);
   svfloat32_t q_45 = svmla_lane (sv_f32 (d->poly_0246[2]), r, p_1357, 2);
   svfloat32_t q_67 = svmla_lane (sv_f32 (d->poly_0246[3]), r, p_1357, 3);
   svfloat32_t q_47 = svmla_x (pg, q_45, r2, q_67);
   svfloat32_t q_03 = svmla_x (pg, q_01, r2, q_23);
   svfloat32_t y = svmla_x (pg, q_03, r4, q_47);
 
   /* Using hi = Log10(2)*n + r*InvLn(10) is faster but less accurate.  */
   svfloat32_t hi = svmla_x (pg, r, n, d->ln2);
   hi = svmul_x (pg, hi, d->inv_ln10);
 
   if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y),
-			 special);
-  return svmla_x (pg, hi, r2, y);
+    return special_case (u_off, hi, r2, y, special);
+  return svmla_x (svptrue_b32 (), hi, r2, y);
 }
 
-PL_SIG (SV, F, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_F1 (log10), 2.82)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), -0.0, -0x1p126, 100)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log10), 100, inf, 50000)
+TEST_SIG (SV, F, 1, log10, 0.01, 11.1)
+TEST_ULP (SV_NAME_F1 (log10), 2.82)
+TEST_DISABLE_FENV (SV_NAME_F1 (log10))
+TEST_INTERVAL (SV_NAME_F1 (log10), -0.0, -0x1p126, 100)
+TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log10), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log10), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log1p_2u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/log1p.c
similarity index 88%
rename from contrib/arm-optimized-routines/pl/math/sv_log1p_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/log1p.c
index f178ab16238a..e6b895b52908 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_log1p_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/log1p.c
@@ -1,116 +1,118 @@
 /*
  * Double-precision SVE log(1+x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   double poly[19];
   double ln2_hi, ln2_lo;
   uint64_t hfrt2_top, onemhfrt2_top, inf, mone;
 } data = {
   /* Generated using Remez in [ sqrt(2)/2 - 1, sqrt(2) - 1]. Order 20
      polynomial, however first 2 coefficients are 0 and 1 so are not stored.  */
   .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
 	    0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
 	    -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
 	    0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
 	    -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
 	    0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
 	    -0x1.cfa7385bdb37ep-6, },
   .ln2_hi = 0x1.62e42fefa3800p-1,
   .ln2_lo = 0x1.ef35793c76730p-45,
   /* top32(asuint64(sqrt(2)/2)) << 32.  */
   .hfrt2_top = 0x3fe6a09e00000000,
   /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32.  */
   .onemhfrt2_top = 0x00095f6200000000,
   .inf = 0x7ff0000000000000,
   .mone = 0xbff0000000000000,
 };
 
 #define AbsMask 0x7fffffffffffffff
 #define BottomMask 0xffffffff
 
 static svfloat64_t NOINLINE
 special_case (svbool_t special, svfloat64_t x, svfloat64_t y)
 {
   return sv_call_f64 (log1p, x, y, special);
 }
 
 /* Vector approximation for log1p using polynomial on reduced interval. Maximum
    observed error is 2.46 ULP:
    _ZGVsMxv_log1p(0x1.654a1307242a4p+11) got 0x1.fd5565fb590f4p+2
 					want 0x1.fd5565fb590f6p+2.  */
 svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svuint64_t ix = svreinterpret_u64 (x);
   svuint64_t ax = svand_x (pg, ix, AbsMask);
   svbool_t special
       = svorr_z (pg, svcmpge (pg, ax, d->inf), svcmpge (pg, ix, d->mone));
 
   /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
 			   is in [sqrt(2)/2, sqrt(2)]):
      log1p(x) = k*log(2) + log1p(f).
 
      f may not be representable exactly, so we need a correction term:
      let m = round(1 + x), c = (1 + x) - m.
      c << m: at very small x, log1p(x) ~ x, hence:
      log(1+x) - log(m) ~ c/m.
 
      We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m.  */
 
   /* Obtain correctly scaled k by manipulation in the exponent.
      The scalar algorithm casts down to 32-bit at this point to calculate k and
      u_red. We stay in double-width to obtain f and k, using the same constants
      as the scalar algorithm but shifted left by 32.  */
   svfloat64_t m = svadd_x (pg, x, 1);
   svuint64_t mi = svreinterpret_u64 (m);
   svuint64_t u = svadd_x (pg, mi, d->onemhfrt2_top);
 
   svint64_t ki = svsub_x (pg, svreinterpret_s64 (svlsr_x (pg, u, 52)), 0x3ff);
   svfloat64_t k = svcvt_f64_x (pg, ki);
 
   /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
   svuint64_t utop
       = svadd_x (pg, svand_x (pg, u, 0x000fffff00000000), d->hfrt2_top);
   svuint64_t u_red = svorr_x (pg, utop, svand_x (pg, mi, BottomMask));
   svfloat64_t f = svsub_x (pg, svreinterpret_f64 (u_red), 1);
 
   /* Correction term c/m.  */
   svfloat64_t cm = svdiv_x (pg, svsub_x (pg, x, svsub_x (pg, m, 1)), m);
 
   /* Approximate log1p(x) on the reduced input using a polynomial. Because
      log1p(0)=0 we choose an approximation of the form:
 	x + C0*x^2 + C1*x^3 + C2x^4 + ...
      Hence approximation has the form f + f^2 * P(f)
      where P(x) = C0 + C1*x + C2x^2 + ...
      Assembling this all correctly is dealt with at the final step.  */
   svfloat64_t f2 = svmul_x (pg, f, f), f4 = svmul_x (pg, f2, f2),
 	      f8 = svmul_x (pg, f4, f4), f16 = svmul_x (pg, f8, f8);
   svfloat64_t p = sv_estrin_18_f64_x (pg, f, f2, f4, f8, f16, d->poly);
 
   svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2_lo);
   svfloat64_t yhi = svmla_x (pg, f, k, d->ln2_hi);
   svfloat64_t y = svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p);
 
   if (unlikely (svptest_any (pg, special)))
     return special_case (special, x, y);
 
   return y;
 }
 
-PL_SIG (SV, D, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (log1p), 1.97)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0.0, 0x1p-23, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0x1p-23, 0.001, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0.001, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log1p), 1, inf, 10000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log1p), -1, -inf, 10)
+TEST_SIG (SV, D, 1, log1p, -0.9, 10.0)
+TEST_ULP (SV_NAME_D1 (log1p), 1.97)
+TEST_DISABLE_FENV (SV_NAME_D1 (log1p))
+TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0.0, 0x1p-23, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0x1p-23, 0.001, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0.001, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log1p), 1, inf, 10000)
+TEST_INTERVAL (SV_NAME_D1 (log1p), -1, -inf, 10)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/log1pf.c b/contrib/arm-optimized-routines/math/aarch64/sve/log1pf.c
new file mode 100644
index 000000000000..77ae6218f931
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/log1pf.c
@@ -0,0 +1,43 @@
+/*
+ * Single-precision vector log(x + 1) function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_log1pf_inline.h"
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special)
+{
+  return sv_call_f32 (log1pf, x, sv_log1pf_inline (x, svptrue_b32 ()),
+		      special);
+}
+
+/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
+   error is 1.27 ULP very close to 0.5.
+   _ZGVsMxv_log1pf(0x1.fffffep-2) got 0x1.9f324p-2
+				 want 0x1.9f323ep-2.  */
+svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
+{
+  /* x < -1, Inf/Nan.  */
+  svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000);
+  special = svorn_z (pg, special, svcmpge (pg, x, -1));
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (x, special);
+
+  return sv_log1pf_inline (x, pg);
+}
+
+TEST_SIG (SV, F, 1, log1p, -0.9, 10.0)
+TEST_ULP (SV_NAME_F1 (log1p), 0.77)
+TEST_DISABLE_FENV (SV_NAME_F1 (log1p))
+TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0, 0x1p-23, 5000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0x1p-23, 1, 5000)
+TEST_INTERVAL (SV_NAME_F1 (log1p), 1, inf, 10000)
+TEST_INTERVAL (SV_NAME_F1 (log1p), -1, -inf, 10)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/log2.c b/contrib/arm-optimized-routines/math/aarch64/sve/log2.c
new file mode 100644
index 000000000000..11c65c1b2963
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/log2.c
@@ -0,0 +1,96 @@
+/*
+ * Double-precision SVE log2 function.
+ *
+ * Copyright (c) 2022-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+#define N (1 << V_LOG2_TABLE_BITS)
+#define Max (0x7ff0000000000000)
+#define Min (0x0010000000000000)
+#define Thresh (0x7fe0000000000000) /* Max - Min.  */
+
+static const struct data
+{
+  double c0, c2;
+  double c1, c3;
+  double invln2, c4;
+  uint64_t off;
+} data = {
+  .c0 = -0x1.71547652b83p-1,
+  .c1 = 0x1.ec709dc340953p-2,
+  .c2 = -0x1.71547651c8f35p-2,
+  .c3 = 0x1.2777ebe12dda5p-2,
+  .c4 = -0x1.ec738d616fe26p-3,
+  .invln2 = 0x1.71547652b82fep0,
+  .off = 0x3fe6900900000000,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t w, svuint64_t tmp, svfloat64_t y, svfloat64_t r2,
+	      svbool_t special, const struct data *d)
+{
+  svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off));
+  return sv_call_f64 (log2, x, svmla_x (svptrue_b64 (), w, r2, y), special);
+}
+
+/* Double-precision SVE log2 routine.
+   Implements the same algorithm as AdvSIMD log10, with coefficients and table
+   entries scaled in extended precision.
+   The maximum observed error is 2.58 ULP:
+   SV_NAME_D1 (log2)(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
+					  want 0x1.fffb34198d9ddp-5.  */
+svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  svuint64_t tmp = svsub_x (pg, ix, d->off);
+  svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG2_TABLE_BITS);
+  i = svand_x (pg, i, (N - 1) << 1);
+  svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
+  svfloat64_t z = svreinterpret_f64 (
+      svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)));
+
+  svfloat64_t invc = svld1_gather_index (pg, &__v_log2_data.table[0].invc, i);
+  svfloat64_t log2c
+      = svld1_gather_index (pg, &__v_log2_data.table[0].log2c, i);
+
+  /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k.  */
+
+  svfloat64_t invln2_and_c4 = svld1rq_f64 (svptrue_b64 (), &d->invln2);
+  svfloat64_t r = svmad_x (pg, invc, z, -1.0);
+  svfloat64_t w = svmla_lane_f64 (log2c, r, invln2_and_c4, 0);
+  w = svadd_x (pg, k, w);
+
+  svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1);
+  svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0);
+  y = svmla_lane_f64 (y, r2, invln2_and_c4, 1);
+  y = svmla_x (pg, p, r2, y);
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (w, tmp, y, r2, special, d);
+  return svmla_x (pg, w, r2, y);
+}
+
+TEST_SIG (SV, D, 1, log2, 0.01, 11.1)
+TEST_ULP (SV_NAME_D1 (log2), 2.09)
+TEST_DISABLE_FENV (SV_NAME_D1 (log2))
+TEST_INTERVAL (SV_NAME_D1 (log2), -0.0, -0x1p126, 1000)
+TEST_INTERVAL (SV_NAME_D1 (log2), 0.0, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_D1 (log2), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log2), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log2), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_D1 (log2), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log2f_2u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/log2f.c
similarity index 53%
rename from contrib/arm-optimized-routines/pl/math/sv_log2f_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/log2f.c
index 9e96c62bbcc6..312fd448226b 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_log2f_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/log2f.c
@@ -1,86 +1,94 @@
 /*
  * Single-precision vector/SVE log2 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float poly_02468[5];
   float poly_1357[4];
+  uint32_t off, lower;
 } data = {
   .poly_1357 = {
     /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs
        1, 3, 5 and 7 can be loaded as a single quad-word, hence used with _lane
        variant of MLA intrinsic.  */
     -0x1.715458p-1f, -0x1.7171a4p-2f, -0x1.e5143ep-3f, -0x1.c675bp-3f
   },
   .poly_02468 = { 0x1.715476p0f, 0x1.ec701cp-2f, 0x1.27a0b8p-2f,
 		  0x1.9d8ecap-3f, 0x1.9e495p-3f },
+  .off = 0x3f2aaaab,
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .lower = 0x00800000 - 0x3f2aaaab
 };
 
-#define Min (0x00800000)
-#define Max (0x7f800000)
-#define Thres (0x7f000000) /* Max - Min.  */
+#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000.  */
 #define MantissaMask (0x007fffff)
-#define Off (0x3f2aaaab) /* 0.666667.  */
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+	      svbool_t cmp)
 {
-  return sv_call_f32 (log2f, x, y, cmp);
+  return sv_call_f32 (
+      log2f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+      svmla_x (svptrue_b32 (), p, r2, y), cmp);
 }
 
 /* Optimised implementation of SVE log2f, using the same algorithm
    and polynomial as AdvSIMD log2f.
    Maximum error is 2.48 ULPs:
    SV_NAME_F1 (log2)(0x1.558174p+0) got 0x1.a9be84p-2
 				   want 0x1.a9be8p-2.  */
 svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  svuint32_t u = svreinterpret_u32 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, u, Min), Thres);
+  svuint32_t u_off = svreinterpret_u32 (x);
+
+  u_off = svsub_x (pg, u_off, d->off);
+  svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = svsub_x (pg, u, Off);
   svfloat32_t n = svcvt_f32_x (
-      pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend.  */
-  u = svand_x (pg, u, MantissaMask);
-  u = svadd_x (pg, u, Off);
+      pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend.  */
+  svuint32_t u = svand_x (pg, u_off, MantissaMask);
+  u = svadd_x (pg, u, d->off);
   svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
 
   /* y = log2(1+r) + n.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
 
   /* Evaluate polynomial using pairwise Horner scheme.  */
   svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]);
   svfloat32_t q_01 = svmla_lane (sv_f32 (d->poly_02468[0]), r, p_1357, 0);
   svfloat32_t q_23 = svmla_lane (sv_f32 (d->poly_02468[1]), r, p_1357, 1);
   svfloat32_t q_45 = svmla_lane (sv_f32 (d->poly_02468[2]), r, p_1357, 2);
   svfloat32_t q_67 = svmla_lane (sv_f32 (d->poly_02468[3]), r, p_1357, 3);
   svfloat32_t y = svmla_x (pg, q_67, r2, sv_f32 (d->poly_02468[4]));
   y = svmla_x (pg, q_45, r2, y);
   y = svmla_x (pg, q_23, r2, y);
   y = svmla_x (pg, q_01, r2, y);
 
   if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (svnot_z (pg, special), n, r, y), special);
-  return svmla_x (pg, n, r, y);
+    return special_case (u_off, n, r, y, special);
+  return svmla_x (svptrue_b32 (), n, r, y);
 }
 
-PL_SIG (SV, F, 1, log2, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_F1 (log2), 1.99)
-PL_TEST_EXPECT_FENV_ALWAYS (SV_NAME_F1 (log2))
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), -0.0, -0x1p126, 4000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), 0.0, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log2), 100, inf, 50000)
+TEST_SIG (SV, F, 1, log2, 0.01, 11.1)
+TEST_ULP (SV_NAME_F1 (log2), 1.99)
+TEST_DISABLE_FENV (SV_NAME_F1 (log2))
+TEST_INTERVAL (SV_NAME_F1 (log2), -0.0, -0x1p126, 4000)
+TEST_INTERVAL (SV_NAME_F1 (log2), 0.0, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_F1 (log2), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log2), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log2), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log2), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_logf_3u4.c b/contrib/arm-optimized-routines/math/aarch64/sve/logf.c
similarity index 52%
rename from contrib/arm-optimized-routines/pl/math/sv_logf_3u4.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/logf.c
index 967355247036..2898e36974d6 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_logf_3u4.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/logf.c
@@ -1,86 +1,94 @@
 /*
  * Single-precision vector log function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float poly_0135[4];
   float poly_246[3];
   float ln2;
+  uint32_t off, lower;
 } data = {
   .poly_0135 = {
     /* Coefficients copied from the AdvSIMD routine in math/, then rearranged so
        that coeffs 0, 1, 3 and 5 can be loaded as a single quad-word, hence used
        with _lane variant of MLA intrinsic.  */
     -0x1.3e737cp-3f, 0x1.5a9aa2p-3f, 0x1.961348p-3f, 0x1.555d7cp-2f
   },
   .poly_246 = { -0x1.4f9934p-3f, -0x1.00187cp-2f, -0x1.ffffc8p-2f },
-  .ln2 = 0x1.62e43p-1f
+  .ln2 = 0x1.62e43p-1f,
+  .off = 0x3f2aaaab,
+  /* Lower bound is the smallest positive normal float 0x00800000. For
+     optimised register use subnormals are detected after offset has been
+     subtracted, so lower bound is 0x0080000 - offset (which wraps around).  */
+  .lower = 0x00800000 - 0x3f2aaaab
 };
 
-#define Min (0x00800000)
-#define Max (0x7f800000)
-#define Thresh (0x7f000000) /* Max - Min.  */
+#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000.  */
 #define Mask (0x007fffff)
-#define Off (0x3f2aaaab) /* 0.666667.  */
-
-float optr_aor_log_f32 (float);
 
 static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+	      svbool_t cmp)
 {
-  return sv_call_f32 (optr_aor_log_f32, x, y, cmp);
+  return sv_call_f32 (
+      logf, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+      svmla_x (svptrue_b32 (), p, r2, y), cmp);
 }
 
 /* Optimised implementation of SVE logf, using the same algorithm and
    polynomial as the AdvSIMD routine. Maximum error is 3.34 ULPs:
    SV_NAME_F1 (log)(0x1.557298p+0) got 0x1.26edecp-2
 				  want 0x1.26ede6p-2.  */
 svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  svuint32_t u = svreinterpret_u32 (x);
-  svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh);
+  svuint32_t u_off = svreinterpret_u32 (x);
+
+  u_off = svsub_x (pg, u_off, d->off);
+  svbool_t cmp = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh);
 
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = svsub_x (pg, u, Off);
   svfloat32_t n = svcvt_f32_x (
-      pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend.  */
-  u = svand_x (pg, u, Mask);
-  u = svadd_x (pg, u, Off);
+      pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend.  */
+
+  svuint32_t u = svand_x (pg, u_off, Mask);
+  u = svadd_x (pg, u, d->off);
   svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
 
   /* y = log(1+r) + n*ln2.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
   /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))).  */
   svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[0]);
   svfloat32_t p = svmla_lane (sv_f32 (d->poly_246[0]), r, p_0135, 1);
   svfloat32_t q = svmla_lane (sv_f32 (d->poly_246[1]), r, p_0135, 2);
   svfloat32_t y = svmla_lane (sv_f32 (d->poly_246[2]), r, p_0135, 3);
   p = svmla_lane (p, r2, p_0135, 0);
 
   q = svmla_x (pg, q, r2, p);
   y = svmla_x (pg, y, r2, q);
   p = svmla_x (pg, r, n, d->ln2);
 
   if (unlikely (svptest_any (pg, cmp)))
-    return special_case (x, svmla_x (svnot_z (pg, cmp), p, r2, y), cmp);
+    return special_case (u_off, p, r2, y, cmp);
   return svmla_x (pg, p, r2, y);
 }
 
-PL_SIG (SV, F, 1, log, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_F1 (log), 2.85)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), -0.0, -inf, 100)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), 0, 0x1p-126, 100)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log), 100, inf, 50000)
+TEST_SIG (SV, F, 1, log, 0.01, 11.1)
+TEST_ULP (SV_NAME_F1 (log), 2.85)
+TEST_DISABLE_FENV (SV_NAME_F1 (log))
+TEST_INTERVAL (SV_NAME_F1 (log), -0.0, -inf, 100)
+TEST_INTERVAL (SV_NAME_F1 (log), 0, 0x1p-126, 100)
+TEST_INTERVAL (SV_NAME_F1 (log), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log), 0x1p-23, 1.0, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log), 1.0, 100, 50000)
+TEST_INTERVAL (SV_NAME_F1 (log), 100, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/modf.c b/contrib/arm-optimized-routines/math/aarch64/sve/modf.c
new file mode 100644
index 000000000000..5944c7d37c4c
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/modf.c
@@ -0,0 +1,36 @@
+/*
+ * Double-precision SVE modf(x, *y) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+/* Modf algorithm. Produces exact values in all rounding modes.  */
+svfloat64_t SV_NAME_D1_L1 (modf) (svfloat64_t x, double *out_int,
+				  const svbool_t pg)
+{
+  /* Get integer component of x.  */
+  svfloat64_t fint_comp = svrintz_x (pg, x);
+
+  svst1_f64 (pg, out_int, fint_comp);
+
+  /* Subtract integer component from input.  */
+  svfloat64_t remaining = svsub_f64_x (svptrue_b64 (), x, fint_comp);
+
+  /* Return +0 for integer x.  */
+  svbool_t is_integer = svcmpeq (pg, x, fint_comp);
+  return svsel (is_integer, sv_f64 (0), remaining);
+}
+
+TEST_ULP (_ZGVsMxvl8_modf_frac, 0.0)
+TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_frac, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_frac, 1, inf, 20000)
+
+TEST_ULP (_ZGVsMxvl8_modf_int, 0.0)
+TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_int, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_int, 1, inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/modff.c b/contrib/arm-optimized-routines/math/aarch64/sve/modff.c
new file mode 100644
index 000000000000..ad7ce4e2c88f
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/modff.c
@@ -0,0 +1,36 @@
+/*
+ * Single-precision SVE modff(x, *y) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+/* Modff algorithm. Produces exact values in all rounding modes.  */
+svfloat32_t SV_NAME_F1_L1 (modf) (svfloat32_t x, float *out_int,
+				  const svbool_t pg)
+{
+  /* Get integer component of x.  */
+  svfloat32_t fint_comp = svrintz_x (pg, x);
+
+  svst1_f32 (pg, out_int, fint_comp);
+
+  /* Subtract integer component from input.  */
+  svfloat32_t remaining = svsub_f32_x (svptrue_b32 (), x, fint_comp);
+
+  /* Return +0 for integer x.  */
+  svbool_t is_integer = svcmpeq (pg, x, fint_comp);
+  return svsel (is_integer, sv_f32 (0), remaining);
+}
+
+TEST_ULP (_ZGVsMxvl4_modff_frac, 0.0)
+TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_frac, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_frac, 1, inf, 20000)
+
+TEST_ULP (_ZGVsMxvl4_modff_int, 0.0)
+TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_int, 0, 1, 20000)
+TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_int, 1, inf, 20000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_pow_1u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/pow.c
similarity index 64%
rename from contrib/arm-optimized-routines/pl/math/sv_pow_1u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/pow.c
index 0838810206a1..12b2fb42b2cb 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_pow_1u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/pow.c
@@ -1,444 +1,483 @@
 /*
  * Double-precision SVE pow(x, y) function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* This version share a similar algorithm as AOR scalar pow.
 
    The core computation consists in computing pow(x, y) as
 
      exp (y * log (x)).
 
    The algorithms for exp and log are very similar to scalar exp and log.
    The log relies on table lookup for 3 variables and an order 8 polynomial.
    It returns a high and a low contribution that are then passed to the exp,
    to minimise the loss of accuracy in both routines.
    The exp is based on 8-bit table lookup for scale and order-4 polynomial.
    The SVE algorithm drops the tail in the exp computation at the price of
    a lower accuracy, slightly above 1ULP.
    The SVE algorithm also drops the special treatement of small (< 2^-65) and
-   large (> 2^63) finite values of |y|, as they only affect non-round to nearest
-   modes.
+   large (> 2^63) finite values of |y|, as they only affect non-round to
+   nearest modes.
 
    Maximum measured error is 1.04 ULPs:
    SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
      got 0x1.f7116284221fcp-1
     want 0x1.f7116284221fdp-1.  */
 
 /* Data is defined in v_pow_log_data.c.  */
 #define N_LOG (1 << V_POW_LOG_TABLE_BITS)
-#define A __v_pow_log_data.poly
 #define Off 0x3fe6955500000000
 
 /* Data is defined in v_pow_exp_data.c.  */
 #define N_EXP (1 << V_POW_EXP_TABLE_BITS)
 #define SignBias (0x800 << V_POW_EXP_TABLE_BITS)
-#define C __v_pow_exp_data.poly
 #define SmallExp 0x3c9 /* top12(0x1p-54).  */
 #define BigExp 0x408   /* top12(512.).  */
 #define ThresExp 0x03f /* BigExp - SmallExp.  */
 #define HugeExp 0x409  /* top12(1024.).  */
 
 /* Constants associated with pow.  */
+#define SmallBoundX 0x1p-126
 #define SmallPowX 0x001 /* top12(0x1p-126).  */
 #define BigPowX 0x7ff	/* top12(INFINITY).  */
 #define ThresPowX 0x7fe /* BigPowX - SmallPowX.  */
 #define SmallPowY 0x3be /* top12(0x1.e7b6p-65).  */
 #define BigPowY 0x43e	/* top12(0x1.749p62).  */
 #define ThresPowY 0x080 /* BigPowY - SmallPowY.  */
 
+static const struct data
+{
+  double log_c0, log_c2, log_c4, log_c6, ln2_hi, ln2_lo;
+  double log_c1, log_c3, log_c5, off;
+  double n_over_ln2, exp_c2, ln2_over_n_hi, ln2_over_n_lo;
+  double exp_c0, exp_c1;
+} data = {
+  .log_c0 = -0x1p-1,
+  .log_c1 = -0x1.555555555556p-1,
+  .log_c2 = 0x1.0000000000006p-1,
+  .log_c3 = 0x1.999999959554ep-1,
+  .log_c4 = -0x1.555555529a47ap-1,
+  .log_c5 = -0x1.2495b9b4845e9p0,
+  .log_c6 = 0x1.0002b8b263fc3p0,
+  .off = Off,
+  .exp_c0 = 0x1.fffffffffffd4p-2,
+  .exp_c1 = 0x1.5555571d6ef9p-3,
+  .exp_c2 = 0x1.5555576a5adcep-5,
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
+  .n_over_ln2 = 0x1.71547652b82fep0 * N_EXP,
+  .ln2_over_n_hi = 0x1.62e42fefc0000p-9,
+  .ln2_over_n_lo = -0x1.c610ca86c3899p-45,
+};
+
 /* Check if x is an integer.  */
 static inline svbool_t
 sv_isint (svbool_t pg, svfloat64_t x)
 {
   return svcmpeq (pg, svrintz_z (pg, x), x);
 }
 
 /* Check if x is real not integer valued.  */
 static inline svbool_t
 sv_isnotint (svbool_t pg, svfloat64_t x)
 {
   return svcmpne (pg, svrintz_z (pg, x), x);
 }
 
 /* Check if x is an odd integer.  */
 static inline svbool_t
 sv_isodd (svbool_t pg, svfloat64_t x)
 {
-  svfloat64_t y = svmul_x (pg, x, 0.5);
+  svfloat64_t y = svmul_x (svptrue_b64 (), x, 0.5);
   return sv_isnotint (pg, y);
 }
 
 /* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
    the bit representation of a non-zero finite floating-point value.  */
 static inline int
 checkint (uint64_t iy)
 {
   int e = iy >> 52 & 0x7ff;
   if (e < 0x3ff)
     return 0;
   if (e > 0x3ff + 52)
     return 2;
   if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
     return 0;
   if (iy & (1ULL << (0x3ff + 52 - e)))
     return 1;
   return 2;
 }
 
 /* Top 12 bits (sign and exponent of each double float lane).  */
 static inline svuint64_t
 sv_top12 (svfloat64_t x)
 {
   return svlsr_x (svptrue_b64 (), svreinterpret_u64 (x), 52);
 }
 
 /* Returns 1 if input is the bit representation of 0, infinity or nan.  */
 static inline int
 zeroinfnan (uint64_t i)
 {
   return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
 }
 
 /* Returns 1 if input is the bit representation of 0, infinity or nan.  */
 static inline svbool_t
 sv_zeroinfnan (svbool_t pg, svuint64_t i)
 {
-  return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2), 1),
+  return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),
 		  2 * asuint64 (INFINITY) - 1);
 }
 
 /* Handle cases that may overflow or underflow when computing the result that
    is scale*(1+TMP) without intermediate rounding.  The bit representation of
    scale is in SBITS, however it has a computed exponent that may have
    overflown into the sign bit so that needs to be adjusted before using it as
    a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
 static inline double
 specialcase (double tmp, uint64_t sbits, uint64_t ki)
 {
   double scale;
   if ((ki & 0x80000000) == 0)
     {
       /* k > 0, the exponent of scale might have overflowed by <= 460.  */
       sbits -= 1009ull << 52;
       scale = asdouble (sbits);
       return 0x1p1009 * (scale + scale * tmp);
     }
   /* k < 0, need special care in the subnormal range.  */
   sbits += 1022ull << 52;
   /* Note: sbits is signed scale.  */
   scale = asdouble (sbits);
   double y = scale + scale * tmp;
   return 0x1p-1022 * y;
 }
 
 /* Scalar fallback for special cases of SVE pow's exp.  */
 static inline svfloat64_t
 sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2,
 		     svfloat64_t y, svbool_t cmp)
 {
   svbool_t p = svpfirst (cmp, svpfalse ());
   while (svptest_any (cmp, p))
     {
       double sx1 = svclastb (p, 0, x1);
       uint64_t su1 = svclastb (p, 0, u1);
       uint64_t su2 = svclastb (p, 0, u2);
       double elem = specialcase (sx1, su1, su2);
       svfloat64_t y2 = sv_f64 (elem);
       y = svsel (p, y2, y);
       p = svpnext_b64 (cmp, p);
     }
   return y;
 }
 
 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
    additional 15 bits precision.  IX is the bit representation of x, but
    normalized in the subnormal range using the sign bit for the exponent.  */
 static inline svfloat64_t
-sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail)
+sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail,
+	       const struct data *d)
 {
   /* x = 2^k z; where z is in range [Off,2*Off) and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
-  svuint64_t tmp = svsub_x (pg, ix, Off);
+  svuint64_t tmp = svsub_x (pg, ix, d->off);
   svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, 52 - V_POW_LOG_TABLE_BITS),
 			  sv_u64 (N_LOG - 1));
   svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);
-  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, sv_u64 (0xfffULL << 52)));
+  svuint64_t iz = svsub_x (pg, ix, svlsl_x (pg, svreinterpret_u64 (k), 52));
   svfloat64_t z = svreinterpret_f64 (iz);
   svfloat64_t kd = svcvt_f64_x (pg, k);
 
   /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
   /* SVE lookup requires 3 separate lookup tables, as opposed to scalar version
-     that uses array of structures. We also do the lookup earlier in the code to
-     make sure it finishes as early as possible.  */
+     that uses array of structures. We also do the lookup earlier in the code
+     to make sure it finishes as early as possible.  */
   svfloat64_t invc = svld1_gather_index (pg, __v_pow_log_data.invc, i);
   svfloat64_t logc = svld1_gather_index (pg, __v_pow_log_data.logc, i);
   svfloat64_t logctail = svld1_gather_index (pg, __v_pow_log_data.logctail, i);
 
   /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
   svfloat64_t r = svmad_x (pg, z, invc, -1.0);
   /* k*Ln2 + log(c) + r.  */
-  svfloat64_t t1 = svmla_x (pg, logc, kd, __v_pow_log_data.ln2_hi);
+
+  svfloat64_t ln2_hilo = svld1rq_f64 (svptrue_b64 (), &d->ln2_hi);
+  svfloat64_t t1 = svmla_lane_f64 (logc, kd, ln2_hilo, 0);
   svfloat64_t t2 = svadd_x (pg, t1, r);
-  svfloat64_t lo1 = svmla_x (pg, logctail, kd, __v_pow_log_data.ln2_lo);
+  svfloat64_t lo1 = svmla_lane_f64 (logctail, kd, ln2_hilo, 1);
   svfloat64_t lo2 = svadd_x (pg, svsub_x (pg, t1, t2), r);
 
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
-  svfloat64_t ar = svmul_x (pg, r, -0.5); /* A[0] = -0.5.  */
-  svfloat64_t ar2 = svmul_x (pg, r, ar);
-  svfloat64_t ar3 = svmul_x (pg, r, ar2);
+
+  svfloat64_t log_c02 = svld1rq_f64 (svptrue_b64 (), &d->log_c0);
+  svfloat64_t ar = svmul_lane_f64 (r, log_c02, 0);
+  svfloat64_t ar2 = svmul_x (svptrue_b64 (), r, ar);
+  svfloat64_t ar3 = svmul_x (svptrue_b64 (), r, ar2);
   /* k*Ln2 + log(c) + r + A[0]*r*r.  */
   svfloat64_t hi = svadd_x (pg, t2, ar2);
-  svfloat64_t lo3 = svmla_x (pg, svneg_x (pg, ar2), ar, r);
+  svfloat64_t lo3 = svmls_x (pg, ar2, ar, r);
   svfloat64_t lo4 = svadd_x (pg, svsub_x (pg, t2, hi), ar2);
   /* p = log1p(r) - r - A[0]*r*r.  */
   /* p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r *
      A[6])))).  */
-  svfloat64_t a56 = svmla_x (pg, sv_f64 (A[5]), r, A[6]);
-  svfloat64_t a34 = svmla_x (pg, sv_f64 (A[3]), r, A[4]);
-  svfloat64_t a12 = svmla_x (pg, sv_f64 (A[1]), r, A[2]);
+
+  svfloat64_t log_c46 = svld1rq_f64 (svptrue_b64 (), &d->log_c4);
+  svfloat64_t a56 = svmla_lane_f64 (sv_f64 (d->log_c5), r, log_c46, 1);
+  svfloat64_t a34 = svmla_lane_f64 (sv_f64 (d->log_c3), r, log_c46, 0);
+  svfloat64_t a12 = svmla_lane_f64 (sv_f64 (d->log_c1), r, log_c02, 1);
   svfloat64_t p = svmla_x (pg, a34, ar2, a56);
   p = svmla_x (pg, a12, ar2, p);
-  p = svmul_x (pg, ar3, p);
+  p = svmul_x (svptrue_b64 (), ar3, p);
   svfloat64_t lo = svadd_x (
-      pg, svadd_x (pg, svadd_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p);
+      pg, svadd_x (pg, svsub_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p);
   svfloat64_t y = svadd_x (pg, hi, lo);
   *tail = svadd_x (pg, svsub_x (pg, hi, y), lo);
   return y;
 }
 
+static inline svfloat64_t
+sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
+	     svuint64_t sign_bias, svfloat64_t *tmp, svuint64_t *sbits,
+	     svuint64_t *ki, const struct data *d)
+{
+  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+  /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
+  svfloat64_t n_over_ln2_and_c2 = svld1rq_f64 (svptrue_b64 (), &d->n_over_ln2);
+  svfloat64_t z = svmul_lane_f64 (x, n_over_ln2_and_c2, 0);
+  /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
+  svfloat64_t kd = svrinta_x (pg, z);
+  *ki = svreinterpret_u64 (svcvt_s64_x (pg, kd));
+
+  svfloat64_t ln2_over_n_hilo
+      = svld1rq_f64 (svptrue_b64 (), &d->ln2_over_n_hi);
+  svfloat64_t r = x;
+  r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 0);
+  r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 1);
+  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
+  r = svadd_x (pg, r, xtail);
+  /* 2^(k/N) ~= scale.  */
+  svuint64_t idx = svand_x (pg, *ki, N_EXP - 1);
+  svuint64_t top
+      = svlsl_x (pg, svadd_x (pg, *ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  *sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx);
+  *sbits = svadd_x (pg, *sbits, top);
+  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1).  */
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  *tmp = svmla_lane_f64 (sv_f64 (d->exp_c1), r, n_over_ln2_and_c2, 1);
+  *tmp = svmla_x (pg, sv_f64 (d->exp_c0), r, *tmp);
+  *tmp = svmla_x (pg, r, r2, *tmp);
+  svfloat64_t scale = svreinterpret_f64 (*sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+     is no spurious underflow here even without fma.  */
+  z = svmla_x (pg, scale, scale, *tmp);
+  return z;
+}
+
 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1.  */
 static inline svfloat64_t
 sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
-	       svuint64_t sign_bias)
+	       svuint64_t sign_bias, const struct data *d)
 {
   /* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow)
      and other cases of large values of x (scale * (1 + TMP) oflow).  */
   svuint64_t abstop = svand_x (pg, sv_top12 (x), 0x7ff);
   /* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54).  */
   svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp);
 
-  /* Conditions special, uflow and oflow are all expressed as uoflow &&
-     something, hence do not bother computing anything if no lane in uoflow is
-     true.  */
-  svbool_t special = svpfalse_b ();
-  svbool_t uflow = svpfalse_b ();
-  svbool_t oflow = svpfalse_b ();
+  svfloat64_t tmp;
+  svuint64_t sbits, ki;
   if (unlikely (svptest_any (pg, uoflow)))
     {
+      svfloat64_t z
+	  = sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d);
+
       /* |x| is tiny (|x| <= 0x1p-54).  */
-      uflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000);
+      svbool_t uflow
+	  = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000);
       uflow = svand_z (pg, uoflow, uflow);
       /* |x| is huge (|x| >= 1024).  */
-      oflow = svcmpge (pg, abstop, HugeExp);
+      svbool_t oflow = svcmpge (pg, abstop, HugeExp);
       oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow));
+
       /* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow
-	 or underflow.  */
-      special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));
+    or underflow.  */
+      svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));
+
+      /* Update result with special and large cases.  */
+      z = sv_call_specialcase (tmp, sbits, ki, z, special);
+
+      /* Handle underflow and overflow.  */
+      svbool_t x_is_neg = svcmplt (pg, x, 0);
+      svuint64_t sign_mask
+	  = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);
+      svfloat64_t res_uoflow
+	  = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));
+      res_uoflow = svreinterpret_f64 (
+	  svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));
+      /* Avoid spurious underflow for tiny x.  */
+      svfloat64_t res_spurious_uflow
+	  = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));
+
+      z = svsel (oflow, res_uoflow, z);
+      z = svsel (uflow, res_spurious_uflow, z);
+      return z;
     }
 
-  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
-  /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
-  svfloat64_t z = svmul_x (pg, x, __v_pow_exp_data.n_over_ln2);
-  /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
-  svfloat64_t shift = sv_f64 (__v_pow_exp_data.shift);
-  svfloat64_t kd = svadd_x (pg, z, shift);
-  svuint64_t ki = svreinterpret_u64 (kd);
-  kd = svsub_x (pg, kd, shift);
-  svfloat64_t r = x;
-  r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_hi);
-  r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_lo);
-  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
-  r = svadd_x (pg, r, xtail);
-  /* 2^(k/N) ~= scale.  */
-  svuint64_t idx = svand_x (pg, ki, N_EXP - 1);
-  svuint64_t top
-      = svlsl_x (pg, svadd_x (pg, ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS);
-  /* This is only a valid scale when -1023*N < k < 1024*N.  */
-  svuint64_t sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx);
-  sbits = svadd_x (pg, sbits, top);
-  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1).  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t tmp = svmla_x (pg, sv_f64 (C[1]), r, C[2]);
-  tmp = svmla_x (pg, sv_f64 (C[0]), r, tmp);
-  tmp = svmla_x (pg, r, r2, tmp);
-  svfloat64_t scale = svreinterpret_f64 (sbits);
-  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
-     is no spurious underflow here even without fma.  */
-  z = svmla_x (pg, scale, scale, tmp);
-
-  /* Update result with special and large cases.  */
-  if (unlikely (svptest_any (pg, special)))
-    z = sv_call_specialcase (tmp, sbits, ki, z, special);
-
-  /* Handle underflow and overflow.  */
-  svuint64_t sign_bit = svlsr_x (pg, svreinterpret_u64 (x), 63);
-  svbool_t x_is_neg = svcmpne (pg, sign_bit, 0);
-  svuint64_t sign_mask = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);
-  svfloat64_t res_uoflow = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));
-  res_uoflow = svreinterpret_f64 (
-      svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));
-  z = svsel (oflow, res_uoflow, z);
-  /* Avoid spurious underflow for tiny x.  */
-  svfloat64_t res_spurious_uflow
-      = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));
-  z = svsel (uflow, res_spurious_uflow, z);
-
-  return z;
+  return sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d);
 }
 
 static inline double
 pow_sc (double x, double y)
 {
   uint64_t ix = asuint64 (x);
   uint64_t iy = asuint64 (y);
   /* Special cases: |x| or |y| is 0, inf or nan.  */
   if (unlikely (zeroinfnan (iy)))
     {
       if (2 * iy == 0)
 	return issignaling_inline (x) ? x + y : 1.0;
       if (ix == asuint64 (1.0))
 	return issignaling_inline (y) ? x + y : 1.0;
       if (2 * ix > 2 * asuint64 (INFINITY) || 2 * iy > 2 * asuint64 (INFINITY))
 	return x + y;
       if (2 * ix == 2 * asuint64 (1.0))
 	return 1.0;
       if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
 	return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
       return y * y;
     }
   if (unlikely (zeroinfnan (ix)))
     {
       double_t x2 = x * x;
       if (ix >> 63 && checkint (iy) == 1)
 	x2 = -x2;
-      /* Without the barrier some versions of clang hoist the 1/x2 and
-	 thus division by zero exception can be signaled spuriously.  */
-      return (iy >> 63) ? opt_barrier_double (1 / x2) : x2;
+      return (iy >> 63) ? 1 / x2 : x2;
     }
   return x;
 }
 
 svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
 {
+  const struct data *d = ptr_barrier (&data);
+
   /* This preamble handles special case conditions used in the final scalar
      fallbacks. It also updates ix and sign_bias, that are used in the core
      computation too, i.e., exp( y * log (x) ).  */
   svuint64_t vix0 = svreinterpret_u64 (x);
   svuint64_t viy0 = svreinterpret_u64 (y);
-  svuint64_t vtopx0 = svlsr_x (svptrue_b64 (), vix0, 52);
 
   /* Negative x cases.  */
-  svuint64_t sign_bit = svlsr_m (pg, vix0, 63);
-  svbool_t xisneg = svcmpeq (pg, sign_bit, 1);
+  svbool_t xisneg = svcmplt (pg, x, 0);
 
   /* Set sign_bias and ix depending on sign of x and nature of y.  */
-  svbool_t yisnotint_xisneg = svpfalse_b ();
+  svbool_t yint_or_xpos = pg;
   svuint64_t sign_bias = sv_u64 (0);
   svuint64_t vix = vix0;
-  svuint64_t vtopx1 = vtopx0;
   if (unlikely (svptest_any (pg, xisneg)))
     {
       /* Determine nature of y.  */
-      yisnotint_xisneg = sv_isnotint (xisneg, y);
-      svbool_t yisint_xisneg = sv_isint (xisneg, y);
+      yint_or_xpos = sv_isint (xisneg, y);
       svbool_t yisodd_xisneg = sv_isodd (xisneg, y);
       /* ix set to abs(ix) if y is integer.  */
-      vix = svand_m (yisint_xisneg, vix0, 0x7fffffffffffffff);
-      vtopx1 = svand_m (yisint_xisneg, vtopx0, 0x7ff);
+      vix = svand_m (yint_or_xpos, vix0, 0x7fffffffffffffff);
       /* Set to SignBias if x is negative and y is odd.  */
       sign_bias = svsel (yisodd_xisneg, sv_u64 (SignBias), sv_u64 (0));
     }
 
-  /* Special cases of x or y: zero, inf and nan.  */
-  svbool_t xspecial = sv_zeroinfnan (pg, vix0);
-  svbool_t yspecial = sv_zeroinfnan (pg, viy0);
-  svbool_t special = svorr_z (pg, xspecial, yspecial);
-
   /* Small cases of x: |x| < 0x1p-126.  */
-  svuint64_t vabstopx0 = svand_x (pg, vtopx0, 0x7ff);
-  svbool_t xsmall = svcmplt (pg, vabstopx0, SmallPowX);
-  if (unlikely (svptest_any (pg, xsmall)))
+  svbool_t xsmall = svaclt (yint_or_xpos, x, SmallBoundX);
+  if (unlikely (svptest_any (yint_or_xpos, xsmall)))
     {
       /* Normalize subnormal x so exponent becomes negative.  */
-      svbool_t topx_is_null = svcmpeq (xsmall, vtopx1, 0);
+      svuint64_t vtopx = svlsr_x (svptrue_b64 (), vix, 52);
+      svbool_t topx_is_null = svcmpeq (xsmall, vtopx, 0);
 
       svuint64_t vix_norm = svreinterpret_u64 (svmul_m (xsmall, x, 0x1p52));
       vix_norm = svand_m (xsmall, vix_norm, 0x7fffffffffffffff);
       vix_norm = svsub_m (xsmall, vix_norm, 52ULL << 52);
       vix = svsel (topx_is_null, vix_norm, vix);
     }
 
   /* y_hi = log(ix, &y_lo).  */
   svfloat64_t vlo;
-  svfloat64_t vhi = sv_log_inline (pg, vix, &vlo);
+  svfloat64_t vhi = sv_log_inline (yint_or_xpos, vix, &vlo, d);
 
   /* z = exp(y_hi, y_lo, sign_bias).  */
-  svfloat64_t vehi = svmul_x (pg, y, vhi);
-  svfloat64_t velo = svmul_x (pg, y, vlo);
-  svfloat64_t vemi = svmls_x (pg, vehi, y, vhi);
-  velo = svsub_x (pg, velo, vemi);
-  svfloat64_t vz = sv_exp_inline (pg, vehi, velo, sign_bias);
+  svfloat64_t vehi = svmul_x (svptrue_b64 (), y, vhi);
+  svfloat64_t vemi = svmls_x (yint_or_xpos, vehi, y, vhi);
+  svfloat64_t velo = svnmls_x (yint_or_xpos, vemi, y, vlo);
+  svfloat64_t vz = sv_exp_inline (yint_or_xpos, vehi, velo, sign_bias, d);
 
   /* Cases of finite y and finite negative x.  */
-  vz = svsel (yisnotint_xisneg, sv_f64 (__builtin_nan ("")), vz);
+  vz = svsel (yint_or_xpos, vz, sv_f64 (__builtin_nan ("")));
+
+  /* Special cases of x or y: zero, inf and nan.  */
+  svbool_t xspecial = sv_zeroinfnan (svptrue_b64 (), vix0);
+  svbool_t yspecial = sv_zeroinfnan (svptrue_b64 (), viy0);
+  svbool_t special = svorr_z (svptrue_b64 (), xspecial, yspecial);
 
   /* Cases of zero/inf/nan x or y.  */
-  if (unlikely (svptest_any (pg, special)))
+  if (unlikely (svptest_any (svptrue_b64 (), special)))
     vz = sv_call2_f64 (pow_sc, x, y, vz, special);
 
   return vz;
 }
 
-PL_SIG (SV, D, 2, pow)
-PL_TEST_ULP (SV_NAME_D2 (pow), 0.55)
+TEST_SIG (SV, D, 2, pow)
+TEST_ULP (SV_NAME_D2 (pow), 0.55)
+TEST_DISABLE_FENV (SV_NAME_D2 (pow))
 /* Wide intervals spanning the whole domain but shared between x and y.  */
-#define SV_POW_INTERVAL2(xlo, xhi, ylo, yhi, n)                                \
-  PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), xlo, xhi, ylo, yhi, n)                  \
-  PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), xlo, xhi, -ylo, -yhi, n)                \
-  PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -xlo, -xhi, ylo, yhi, n)                \
-  PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -xlo, -xhi, -ylo, -yhi, n)
+#define SV_POW_INTERVAL2(xlo, xhi, ylo, yhi, n)                               \
+  TEST_INTERVAL2 (SV_NAME_D2 (pow), xlo, xhi, ylo, yhi, n)                    \
+  TEST_INTERVAL2 (SV_NAME_D2 (pow), xlo, xhi, -ylo, -yhi, n)                  \
+  TEST_INTERVAL2 (SV_NAME_D2 (pow), -xlo, -xhi, ylo, yhi, n)                  \
+  TEST_INTERVAL2 (SV_NAME_D2 (pow), -xlo, -xhi, -ylo, -yhi, n)
 #define EXPAND(str) str##000000000
 #define SHL52(str) EXPAND (str)
 SV_POW_INTERVAL2 (0, SHL52 (SmallPowX), 0, inf, 40000)
 SV_POW_INTERVAL2 (SHL52 (SmallPowX), SHL52 (BigPowX), 0, inf, 40000)
 SV_POW_INTERVAL2 (SHL52 (BigPowX), inf, 0, inf, 40000)
 SV_POW_INTERVAL2 (0, inf, 0, SHL52 (SmallPowY), 40000)
 SV_POW_INTERVAL2 (0, inf, SHL52 (SmallPowY), SHL52 (BigPowY), 40000)
 SV_POW_INTERVAL2 (0, inf, SHL52 (BigPowY), inf, 40000)
 SV_POW_INTERVAL2 (0, inf, 0, inf, 1000)
 /* x~1 or y~1.  */
 SV_POW_INTERVAL2 (0x1p-1, 0x1p1, 0x1p-10, 0x1p10, 10000)
 SV_POW_INTERVAL2 (0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p16, 10000)
 SV_POW_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000)
 /* around estimated argmaxs of ULP error.  */
 SV_POW_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
 SV_POW_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
 /* x is negative, y is odd or even integer, or y is real not integer.  */
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
 /* |x| is inf, y is odd or even integer, or y is real not integer.  */
 SV_POW_INTERVAL2 (inf, inf, 0.5, 0.5, 1)
 SV_POW_INTERVAL2 (inf, inf, 1.0, 1.0, 1)
 SV_POW_INTERVAL2 (inf, inf, 2.0, 2.0, 1)
 SV_POW_INTERVAL2 (inf, inf, 3.0, 3.0, 1)
 /* 0.0^y.  */
 SV_POW_INTERVAL2 (0.0, 0.0, 0.0, 0x1p120, 1000)
 /* 1.0^y.  */
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
+TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_powf_2u6.c b/contrib/arm-optimized-routines/math/aarch64/sve/powf.c
similarity index 69%
rename from contrib/arm-optimized-routines/pl/math/sv_powf_2u6.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/powf.c
index 2db0636aea62..8457e83e7495 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_powf_2u6.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/powf.c
@@ -1,360 +1,363 @@
 /*
  * Single-precision SVE powf function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* The following data is used in the SVE pow core computation
    and special case detection.  */
 #define Tinvc __v_powf_data.invc
 #define Tlogc __v_powf_data.logc
 #define Texp __v_powf_data.scale
 #define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11))
-#define Shift 0x1.8p52
 #define Norm 0x1p23f /* 0x4b000000.  */
 
 /* Overall ULP error bound for pow is 2.6 ulp
    ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2).  */
 static const struct data
 {
   double log_poly[4];
   double exp_poly[3];
   float uflow_bound, oflow_bound, small_bound;
-  uint32_t sign_bias, sign_mask, subnormal_bias, off;
+  uint32_t sign_bias, subnormal_bias, off;
 } data = {
   /* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of
      V_POWF_EXP2_N.  */
   .log_poly = { -0x1.6ff5daa3b3d7cp+3, 0x1.ec81d03c01aebp+3,
 		-0x1.71547bb43f101p+4, 0x1.7154764a815cbp+5 },
   /* rel err: 1.69 * 2^-34.  */
   .exp_poly = {
     0x1.c6af84b912394p-20, /* A0 / V_POWF_EXP2_N^3.  */
     0x1.ebfce50fac4f3p-13, /* A1 / V_POWF_EXP2_N^2.  */
     0x1.62e42ff0c52d6p-6,   /* A3 / V_POWF_EXP2_N.  */
   },
   .uflow_bound = -0x1.2cp+12f, /* -150.0 * V_POWF_EXP2_N.  */
   .oflow_bound = 0x1p+12f, /* 128.0 * V_POWF_EXP2_N.  */
   .small_bound = 0x1p-126f,
   .off = 0x3f35d000,
   .sign_bias = SignBias,
-  .sign_mask = 0x80000000,
   .subnormal_bias = 0x0b800000, /* 23 << 23.  */
 };
 
 #define A(i) sv_f64 (d->log_poly[i])
 #define C(i) sv_f64 (d->exp_poly[i])
 
 /* Check if x is an integer.  */
 static inline svbool_t
 svisint (svbool_t pg, svfloat32_t x)
 {
   return svcmpeq (pg, svrintz_z (pg, x), x);
 }
 
 /* Check if x is real not integer valued.  */
 static inline svbool_t
 svisnotint (svbool_t pg, svfloat32_t x)
 {
   return svcmpne (pg, svrintz_z (pg, x), x);
 }
 
 /* Check if x is an odd integer.  */
 static inline svbool_t
 svisodd (svbool_t pg, svfloat32_t x)
 {
   svfloat32_t y = svmul_x (pg, x, 0.5f);
   return svisnotint (pg, y);
 }
 
 /* Check if zero, inf or nan.  */
 static inline svbool_t
 sv_zeroinfnan (svbool_t pg, svuint32_t i)
 {
-  return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2u), 1),
+  return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),
 		  2u * 0x7f800000 - 1);
 }
 
 /* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
    the bit representation of a non-zero finite floating-point value.  */
 static inline int
 checkint (uint32_t iy)
 {
   int e = iy >> 23 & 0xff;
   if (e < 0x7f)
     return 0;
   if (e > 0x7f + 23)
     return 2;
   if (iy & ((1 << (0x7f + 23 - e)) - 1))
     return 0;
   if (iy & (1 << (0x7f + 23 - e)))
     return 1;
   return 2;
 }
 
 /* Check if zero, inf or nan.  */
 static inline int
 zeroinfnan (uint32_t ix)
 {
   return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
 }
 
 /* A scalar subroutine used to fix main power special cases. Similar to the
-   preamble of finite_powf except that we do not update ix and sign_bias. This
+   preamble of scalar powf except that we do not update ix and sign_bias. This
    is done in the preamble of the SVE powf.  */
 static inline float
 powf_specialcase (float x, float y, float z)
 {
   uint32_t ix = asuint (x);
   uint32_t iy = asuint (y);
   /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).  */
   if (unlikely (zeroinfnan (iy)))
     {
       if (2 * iy == 0)
 	return issignalingf_inline (x) ? x + y : 1.0f;
       if (ix == 0x3f800000)
 	return issignalingf_inline (y) ? x + y : 1.0f;
       if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
 	return x + y;
       if (2 * ix == 2 * 0x3f800000)
 	return 1.0f;
       if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
 	return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
       return y * y;
     }
   if (unlikely (zeroinfnan (ix)))
     {
       float_t x2 = x * x;
       if (ix & 0x80000000 && checkint (iy) == 1)
 	x2 = -x2;
       return iy & 0x80000000 ? 1 / x2 : x2;
     }
   /* We need a return here in case x<0 and y is integer, but all other tests
    need to be run.  */
   return z;
 }
 
 /* Scalar fallback for special case routines with custom signature.  */
-static inline svfloat32_t
-sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
+static svfloat32_t NOINLINE
+sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y)
 {
+  /* Special cases of x or y: zero, inf and nan.  */
+  svbool_t xspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x1));
+  svbool_t yspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x2));
+  svbool_t cmp = svorr_z (svptrue_b32 (), xspecial, yspecial);
+
   svbool_t p = svpfirst (cmp, svpfalse ());
   while (svptest_any (cmp, p))
     {
       float sx1 = svclastb (p, 0, x1);
       float sx2 = svclastb (p, 0, x2);
       float elem = svclastb (p, 0, y);
       elem = powf_specialcase (sx1, sx2, elem);
       svfloat32_t y2 = sv_f32 (elem);
       y = svsel (p, y2, y);
       p = svpnext_b32 (cmp, p);
     }
   return y;
 }
 
 /* Compute core for half of the lanes in double precision.  */
 static inline svfloat64_t
 sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
 		  svfloat64_t y, svuint64_t sign_bias, svfloat64_t *pylogx,
 		  const struct data *d)
 {
   svfloat64_t invc = svld1_gather_index (pg, Tinvc, i);
   svfloat64_t logc = svld1_gather_index (pg, Tlogc, i);
 
   /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k.  */
   svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), z, invc);
   svfloat64_t y0 = svadd_x (pg, logc, svcvt_f64_x (pg, k));
 
   /* Polynomial to approximate log1p(r)/ln2.  */
   svfloat64_t logx = A (0);
-  logx = svmla_x (pg, A (1), r, logx);
-  logx = svmla_x (pg, A (2), r, logx);
-  logx = svmla_x (pg, A (3), r, logx);
-  logx = svmla_x (pg, y0, r, logx);
+  logx = svmad_x (pg, r, logx, A (1));
+  logx = svmad_x (pg, r, logx, A (2));
+  logx = svmad_x (pg, r, logx, A (3));
+  logx = svmad_x (pg, r, logx, y0);
   *pylogx = svmul_x (pg, y, logx);
 
   /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
-  svfloat64_t kd = svadd_x (pg, *pylogx, Shift);
-  svuint64_t ki = svreinterpret_u64 (kd);
-  kd = svsub_x (pg, kd, Shift);
+  svfloat64_t kd = svrinta_x (svptrue_b64 (), *pylogx);
+  svuint64_t ki = svreinterpret_u64 (svcvt_s64_x (svptrue_b64 (), kd));
 
   r = svsub_x (pg, *pylogx, kd);
 
   /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1).  */
-  svuint64_t t
-      = svld1_gather_index (pg, Texp, svand_x (pg, ki, V_POWF_EXP2_N - 1));
-  svuint64_t ski = svadd_x (pg, ki, sign_bias);
-  t = svadd_x (pg, t, svlsl_x (pg, ski, 52 - V_POWF_EXP2_TABLE_BITS));
+  svuint64_t t = svld1_gather_index (
+      svptrue_b64 (), Texp, svand_x (svptrue_b64 (), ki, V_POWF_EXP2_N - 1));
+  svuint64_t ski = svadd_x (svptrue_b64 (), ki, sign_bias);
+  t = svadd_x (svptrue_b64 (), t,
+	       svlsl_x (svptrue_b64 (), ski, 52 - V_POWF_EXP2_TABLE_BITS));
   svfloat64_t s = svreinterpret_f64 (t);
 
   svfloat64_t p = C (0);
   p = svmla_x (pg, C (1), p, r);
   p = svmla_x (pg, C (2), p, r);
-  p = svmla_x (pg, s, p, svmul_x (pg, s, r));
+  p = svmla_x (pg, s, p, svmul_x (svptrue_b64 (), s, r));
 
   return p;
 }
 
 /* Widen vector to double precision and compute core on both halves of the
    vector. Lower cost of promotion by considering all lanes active.  */
 static inline svfloat32_t
 sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
 	      svfloat32_t y, svuint32_t sign_bias, svfloat32_t *pylogx,
 	      const struct data *d)
 {
   const svbool_t ptrue = svptrue_b64 ();
 
-  /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two in
-     order to perform core computation in double precision.  */
+  /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two
+     in order to perform core computation in double precision.  */
   const svbool_t pg_lo = svunpklo (pg);
   const svbool_t pg_hi = svunpkhi (pg);
-  svfloat64_t y_lo = svcvt_f64_x (
-      ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
-  svfloat64_t y_hi = svcvt_f64_x (
-      ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
-  svfloat32_t z = svreinterpret_f32 (iz);
-  svfloat64_t z_lo = svcvt_f64_x (
-      ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (z))));
-  svfloat64_t z_hi = svcvt_f64_x (
-      ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (z))));
+  svfloat64_t y_lo
+      = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
+  svfloat64_t y_hi
+      = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
+  svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz)));
+  svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz)));
   svuint64_t i_lo = svunpklo (i);
   svuint64_t i_hi = svunpkhi (i);
   svint64_t k_lo = svunpklo (k);
   svint64_t k_hi = svunpkhi (k);
   svuint64_t sign_bias_lo = svunpklo (sign_bias);
   svuint64_t sign_bias_hi = svunpkhi (sign_bias);
 
   /* Compute each part in double precision.  */
   svfloat64_t ylogx_lo, ylogx_hi;
   svfloat64_t lo = sv_powf_core_ext (pg_lo, i_lo, z_lo, k_lo, y_lo,
 				     sign_bias_lo, &ylogx_lo, d);
   svfloat64_t hi = sv_powf_core_ext (pg_hi, i_hi, z_hi, k_hi, y_hi,
 				     sign_bias_hi, &ylogx_hi, d);
 
   /* Convert back to single-precision and interleave.  */
   svfloat32_t ylogx_lo_32 = svcvt_f32_x (ptrue, ylogx_lo);
   svfloat32_t ylogx_hi_32 = svcvt_f32_x (ptrue, ylogx_hi);
   *pylogx = svuzp1 (ylogx_lo_32, ylogx_hi_32);
   svfloat32_t lo_32 = svcvt_f32_x (ptrue, lo);
   svfloat32_t hi_32 = svcvt_f32_x (ptrue, hi);
   return svuzp1 (lo_32, hi_32);
 }
 
 /* Implementation of SVE powf.
    Provides the same accuracy as AdvSIMD powf, since it relies on the same
    algorithm. The theoretical maximum error is under 2.60 ULPs.
-   Maximum measured error is 2.56 ULPs:
-   SV_NAME_F2 (pow) (0x1.004118p+0, 0x1.5d14a4p+16) got 0x1.fd4bp+127
-						   want 0x1.fd4b06p+127.  */
+   Maximum measured error is 2.57 ULPs:
+   SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127
+						   want 0x1.fff862p+127.  */
 svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svuint32_t vix0 = svreinterpret_u32 (x);
   svuint32_t viy0 = svreinterpret_u32 (y);
 
   /* Negative x cases.  */
-  svuint32_t sign_bit = svand_m (pg, vix0, d->sign_mask);
-  svbool_t xisneg = svcmpeq (pg, sign_bit, d->sign_mask);
+  svbool_t xisneg = svcmplt (pg, x, sv_f32 (0));
 
   /* Set sign_bias and ix depending on sign of x and nature of y.  */
-  svbool_t yisnotint_xisneg = svpfalse_b ();
+  svbool_t yint_or_xpos = pg;
   svuint32_t sign_bias = sv_u32 (0);
   svuint32_t vix = vix0;
   if (unlikely (svptest_any (pg, xisneg)))
     {
       /* Determine nature of y.  */
-      yisnotint_xisneg = svisnotint (xisneg, y);
-      svbool_t yisint_xisneg = svisint (xisneg, y);
+      yint_or_xpos = svisint (xisneg, y);
       svbool_t yisodd_xisneg = svisodd (xisneg, y);
       /* ix set to abs(ix) if y is integer.  */
-      vix = svand_m (yisint_xisneg, vix0, 0x7fffffff);
+      vix = svand_m (yint_or_xpos, vix0, 0x7fffffff);
       /* Set to SignBias if x is negative and y is odd.  */
       sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0));
     }
 
   /* Special cases of x or y: zero, inf and nan.  */
   svbool_t xspecial = sv_zeroinfnan (pg, vix0);
   svbool_t yspecial = sv_zeroinfnan (pg, viy0);
   svbool_t cmp = svorr_z (pg, xspecial, yspecial);
 
   /* Small cases of x: |x| < 0x1p-126.  */
-  svbool_t xsmall = svaclt (pg, x, d->small_bound);
-  if (unlikely (svptest_any (pg, xsmall)))
+  svbool_t xsmall = svaclt (yint_or_xpos, x, d->small_bound);
+  if (unlikely (svptest_any (yint_or_xpos, xsmall)))
     {
       /* Normalize subnormal x so exponent becomes negative.  */
       svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm));
       vix_norm = svand_x (xsmall, vix_norm, 0x7fffffff);
       vix_norm = svsub_x (xsmall, vix_norm, d->subnormal_bias);
       vix = svsel (xsmall, vix_norm, vix);
     }
   /* Part of core computation carried in working precision.  */
-  svuint32_t tmp = svsub_x (pg, vix, d->off);
-  svuint32_t i = svand_x (pg, svlsr_x (pg, tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
-			  V_POWF_LOG2_N - 1);
-  svuint32_t top = svand_x (pg, tmp, 0xff800000);
-  svuint32_t iz = svsub_x (pg, vix, top);
-  svint32_t k
-      = svasr_x (pg, svreinterpret_s32 (top), (23 - V_POWF_EXP2_TABLE_BITS));
-
-  /* Compute core in extended precision and return intermediate ylogx results to
-      handle cases of underflow and underflow in exp.  */
+  svuint32_t tmp = svsub_x (yint_or_xpos, vix, d->off);
+  svuint32_t i = svand_x (
+      yint_or_xpos, svlsr_x (yint_or_xpos, tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
+      V_POWF_LOG2_N - 1);
+  svuint32_t top = svand_x (yint_or_xpos, tmp, 0xff800000);
+  svuint32_t iz = svsub_x (yint_or_xpos, vix, top);
+  svint32_t k = svasr_x (yint_or_xpos, svreinterpret_s32 (top),
+			 (23 - V_POWF_EXP2_TABLE_BITS));
+
+  /* Compute core in extended precision and return intermediate ylogx results
+     to handle cases of underflow and underflow in exp.  */
   svfloat32_t ylogx;
-  svfloat32_t ret = sv_powf_core (pg, i, iz, k, y, sign_bias, &ylogx, d);
+  svfloat32_t ret
+      = sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d);
 
   /* Handle exp special cases of underflow and overflow.  */
-  svuint32_t sign = svlsl_x (pg, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS);
+  svuint32_t sign
+      = svlsl_x (yint_or_xpos, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS);
   svfloat32_t ret_oflow
-      = svreinterpret_f32 (svorr_x (pg, sign, asuint (INFINITY)));
+      = svreinterpret_f32 (svorr_x (yint_or_xpos, sign, asuint (INFINITY)));
   svfloat32_t ret_uflow = svreinterpret_f32 (sign);
-  ret = svsel (svcmple (pg, ylogx, d->uflow_bound), ret_uflow, ret);
-  ret = svsel (svcmpgt (pg, ylogx, d->oflow_bound), ret_oflow, ret);
+  ret = svsel (svcmple (yint_or_xpos, ylogx, d->uflow_bound), ret_uflow, ret);
+  ret = svsel (svcmpgt (yint_or_xpos, ylogx, d->oflow_bound), ret_oflow, ret);
 
   /* Cases of finite y and finite negative x.  */
-  ret = svsel (yisnotint_xisneg, sv_f32 (__builtin_nanf ("")), ret);
+  ret = svsel (yint_or_xpos, ret, sv_f32 (__builtin_nanf ("")));
 
-  if (unlikely (svptest_any (pg, cmp)))
-    return sv_call_powf_sc (x, y, ret, cmp);
+  if (unlikely (svptest_any (cmp, cmp)))
+    return sv_call_powf_sc (x, y, ret);
 
   return ret;
 }
 
-PL_SIG (SV, F, 2, pow)
-PL_TEST_ULP (SV_NAME_F2 (pow), 2.06)
+TEST_SIG (SV, F, 2, pow)
+TEST_ULP (SV_NAME_F2 (pow), 2.08)
+TEST_DISABLE_FENV (SV_NAME_F2 (pow))
 /* Wide intervals spanning the whole domain but shared between x and y.  */
-#define SV_POWF_INTERVAL2(xlo, xhi, ylo, yhi, n)                               \
-  PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, ylo, yhi, n)                  \
-  PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, -ylo, -yhi, n)                \
-  PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, ylo, yhi, n)                \
-  PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, -ylo, -yhi, n)
+#define SV_POWF_INTERVAL2(xlo, xhi, ylo, yhi, n)                              \
+  TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, ylo, yhi, n)                    \
+  TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, -ylo, -yhi, n)                  \
+  TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, ylo, yhi, n)                  \
+  TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, -ylo, -yhi, n)
 SV_POWF_INTERVAL2 (0, 0x1p-126, 0, inf, 40000)
 SV_POWF_INTERVAL2 (0x1p-126, 1, 0, inf, 50000)
 SV_POWF_INTERVAL2 (1, inf, 0, inf, 50000)
 /* x~1 or y~1.  */
 SV_POWF_INTERVAL2 (0x1p-1, 0x1p1, 0x1p-10, 0x1p10, 10000)
 SV_POWF_INTERVAL2 (0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p16, 10000)
 SV_POWF_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000)
 /* around estimated argmaxs of ULP error.  */
 SV_POWF_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
 SV_POWF_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
 /* x is negative, y is odd or even integer, or y is real not integer.  */
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
 /* |x| is inf, y is odd or even integer, or y is real not integer.  */
 SV_POWF_INTERVAL2 (inf, inf, 0.5, 0.5, 1)
 SV_POWF_INTERVAL2 (inf, inf, 1.0, 1.0, 1)
 SV_POWF_INTERVAL2 (inf, inf, 2.0, 2.0, 1)
 SV_POWF_INTERVAL2 (inf, inf, 3.0, 3.0, 1)
 /* 0.0^y.  */
 SV_POWF_INTERVAL2 (0.0, 0.0, 0.0, 0x1p120, 1000)
 /* 1.0^y.  */
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
-PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
+TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sin_3u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/sin.c
similarity index 89%
rename from contrib/arm-optimized-routines/pl/math/sv_sin_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/sin.c
index a81f3fc80f3d..7e22515ceb79 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_sin_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sin.c
@@ -1,96 +1,98 @@
 /*
  * Double-precision SVE sin(x) function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   double inv_pi, pi_1, pi_2, pi_3, shift, range_val;
   double poly[7];
 } data = {
   .poly = { -0x1.555555555547bp-3, 0x1.1111111108a4dp-7, -0x1.a01a019936f27p-13,
             0x1.71de37a97d93ep-19, -0x1.ae633919987c6p-26,
             0x1.60e277ae07cecp-33, -0x1.9e9540300a1p-41, },
 
   .inv_pi = 0x1.45f306dc9c883p-2,
   .pi_1 = 0x1.921fb54442d18p+1,
   .pi_2 = 0x1.1a62633145c06p-53,
   .pi_3 = 0x1.c1cd129024e09p-106,
   .shift = 0x1.8p52,
   .range_val = 0x1p23,
 };
 
 #define C(i) sv_f64 (d->poly[i])
 
 static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
 {
   return sv_call_f64 (sin, x, y, cmp);
 }
 
 /* A fast SVE implementation of sin.
    Maximum observed error in [-pi/2, pi/2], where argument is not reduced,
    is 2.87 ULP:
    _ZGVsMxv_sin (0x1.921d5c6a07142p+0) got 0x1.fffffffa7dc02p-1
 				      want 0x1.fffffffa7dc05p-1
    Maximum observed error in the entire non-special domain ([-2^23, 2^23])
    is 3.22 ULP:
    _ZGVsMxv_sin (0x1.5702447b6f17bp+22) got 0x1.ffdcd125c84fbp-3
 				       want 0x1.ffdcd125c84f8p-3.  */
 svfloat64_t SV_NAME_D1 (sin) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* Load some values in quad-word chunks to minimise memory access.  */
   const svbool_t ptrue = svptrue_b64 ();
   svfloat64_t shift = sv_f64 (d->shift);
   svfloat64_t inv_pi_and_pi1 = svld1rq (ptrue, &d->inv_pi);
   svfloat64_t pi2_and_pi3 = svld1rq (ptrue, &d->pi_2);
 
   /* n = rint(|x|/pi).  */
   svfloat64_t n = svmla_lane (shift, x, inv_pi_and_pi1, 0);
   svuint64_t odd = svlsl_x (pg, svreinterpret_u64 (n), 63);
   n = svsub_x (pg, n, shift);
 
   /* r = |x| - n*(pi/2)  (range reduction into -pi/2 .. pi/2).  */
   svfloat64_t r = x;
   r = svmls_lane (r, n, inv_pi_and_pi1, 1);
   r = svmls_lane (r, n, pi2_and_pi3, 0);
   r = svmls_lane (r, n, pi2_and_pi3, 1);
 
   /* sin(r) poly approx.  */
   svfloat64_t r2 = svmul_x (pg, r, r);
   svfloat64_t r3 = svmul_x (pg, r2, r);
   svfloat64_t r4 = svmul_x (pg, r2, r2);
 
   svfloat64_t t1 = svmla_x (pg, C (4), C (5), r2);
   svfloat64_t t2 = svmla_x (pg, C (2), C (3), r2);
   svfloat64_t t3 = svmla_x (pg, C (0), C (1), r2);
 
   svfloat64_t y = svmla_x (pg, t1, C (6), r4);
   y = svmla_x (pg, t2, y, r4);
   y = svmla_x (pg, t3, y, r4);
   y = svmla_x (pg, r, y, r3);
 
   svbool_t cmp = svacle (pg, x, d->range_val);
   cmp = svnot_z (pg, cmp);
   if (unlikely (svptest_any (pg, cmp)))
     return special_case (x,
 			 svreinterpret_f64 (sveor_z (
 			     svnot_z (pg, cmp), svreinterpret_u64 (y), odd)),
 			 cmp);
 
   /* Copy sign.  */
   return svreinterpret_f64 (sveor_z (pg, svreinterpret_u64 (y), odd));
 }
 
-PL_SIG (SV, D, 1, sin, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_D1 (sin), 2.73)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sin), 0, 0x1p23, 1000000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sin), 0x1p23, inf, 10000)
+TEST_SIG (SV, D, 1, sin, -3.1, 3.1)
+TEST_ULP (SV_NAME_D1 (sin), 2.73)
+TEST_DISABLE_FENV (SV_NAME_D1 (sin))
+TEST_SYM_INTERVAL (SV_NAME_D1 (sin), 0, 0x1p23, 1000000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sin), 0x1p23, inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sincos_3u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/sincos.c
similarity index 72%
rename from contrib/arm-optimized-routines/pl/math/sv_sincos_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/sincos.c
index f73550082d5b..26b8bb3c6a5a 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_sincos_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sincos.c
@@ -1,61 +1,73 @@
 /*
  * Double-precision vector sincos function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Define _GNU_SOURCE in order to include sincos declaration. If building
    pre-GLIBC 2.1, or on a non-GNU conforming system, this routine will need to
    be linked against the scalar sincosf from math/.  */
 #define _GNU_SOURCE
-#include <math.h>
-#undef _GNU_SOURCE
 
-#include "sv_sincos_common.h"
 #include "sv_math.h"
-#include "pl_test.h"
+#include "sv_sincos_common.h"
+#include "test_defs.h"
+
+#include <math.h>
+
+/* sincos not available for all scalar libm implementations.  */
+#ifndef __GLIBC__
+static void
+sincos (double x, double *out_sin, double *out_cos)
+{
+  *out_sin = sin (x);
+  *out_cos = cos (x);
+}
+#endif
 
 static void NOINLINE
 special_case (svfloat64_t x, svbool_t special, double *out_sin,
 	      double *out_cos)
 {
   svbool_t p = svptrue_pat_b64 (SV_VL1);
   for (int i = 0; i < svcntd (); i++)
     {
       if (svptest_any (special, p))
 	sincos (svlastb (p, x), out_sin + i, out_cos + i);
       p = svpnext_b64 (svptrue_b64 (), p);
     }
 }
 
 /* Double-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate polynomials.
    Largest observed error is for sin, 3.22 ULP:
    sv_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
 					want -0x1.ffe9537d5dbb4p-3.  */
 void
 _ZGVsMxvl8l8_sincos (svfloat64_t x, double *out_sin, double *out_cos,
 		     svbool_t pg)
 {
   const struct sv_sincos_data *d = ptr_barrier (&sv_sincos_data);
   svbool_t special = check_ge_rangeval (pg, x, d);
 
   svfloat64x2_t sc = sv_sincos_inline (pg, x, d);
 
   svst1 (pg, out_sin, svget2 (sc, 0));
   svst1 (pg, out_cos, svget2 (sc, 1));
 
   if (unlikely (svptest_any (pg, special)))
     special_case (x, special, out_sin, out_cos);
 }
 
-PL_TEST_ULP (_ZGVsMxv_sincos_sin, 2.73)
-PL_TEST_ULP (_ZGVsMxv_sincos_cos, 2.73)
+TEST_DISABLE_FENV (_ZGVsMxv_sincos_sin)
+TEST_DISABLE_FENV (_ZGVsMxv_sincos_cos)
+TEST_ULP (_ZGVsMxv_sincos_sin, 2.73)
+TEST_ULP (_ZGVsMxv_sincos_cos, 2.73)
 #define SV_SINCOS_INTERVAL(lo, hi, n)                                         \
-  PL_TEST_INTERVAL (_ZGVsMxv_sincos_sin, lo, hi, n)                           \
-  PL_TEST_INTERVAL (_ZGVsMxv_sincos_cos, lo, hi, n)
-SV_SINCOS_INTERVAL (0, 0x1p23, 500000)
-SV_SINCOS_INTERVAL (-0, -0x1p23, 500000)
+  TEST_SYM_INTERVAL (_ZGVsMxv_sincos_sin, lo, hi, n)                          \
+  TEST_SYM_INTERVAL (_ZGVsMxv_sincos_cos, lo, hi, n)
+SV_SINCOS_INTERVAL (0, 0x1p-63, 50000)
+SV_SINCOS_INTERVAL (0x1p-63, 0x1p23, 500000)
 SV_SINCOS_INTERVAL (0x1p23, inf, 10000)
-SV_SINCOS_INTERVAL (-0x1p23, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sincosf_1u8.c b/contrib/arm-optimized-routines/math/aarch64/sve/sincosf.c
similarity index 72%
rename from contrib/arm-optimized-routines/pl/math/sv_sincosf_1u8.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/sincosf.c
index c335de8d3dbb..f3e956ee62e2 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_sincosf_1u8.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sincosf.c
@@ -1,62 +1,74 @@
 /*
  * Single-precision vector sincos function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Define _GNU_SOURCE in order to include sincosf declaration. If building
    pre-GLIBC 2.1, or on a non-GNU conforming system, this routine will need to
    be linked against the scalar sincosf from math/.  */
 #define _GNU_SOURCE
-#include <math.h>
-#undef _GNU_SOURCE
 
-#include "sv_sincosf_common.h"
 #include "sv_math.h"
-#include "pl_test.h"
+#include "sv_sincosf_common.h"
+#include "test_defs.h"
+
+#include <math.h>
+
+/* sincos not available for all scalar libm implementations.  */
+#ifndef __GLIBC__
+static void
+sincosf (float x, float *out_sin, float *out_cos)
+{
+  *out_sin = sinf (x);
+  *out_cos = cosf (x);
+}
+#endif
 
 static void NOINLINE
 special_case (svfloat32_t x, svbool_t special, float *out_sin, float *out_cos)
 {
   svbool_t p = svptrue_pat_b32 (SV_VL1);
   for (int i = 0; i < svcntw (); i++)
     {
       if (svptest_any (special, p))
 	sincosf (svlastb (p, x), out_sin + i, out_cos + i);
       p = svpnext_b32 (svptrue_b32 (), p);
     }
 }
 
 /* Single-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate low-order
    polynomials.
    Worst-case error for sin is 1.67 ULP:
    sv_sincosf_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
    Worst-case error for cos is 1.81 ULP:
    sv_sincosf_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6.  */
 void
 _ZGVsMxvl4l4_sincosf (svfloat32_t x, float *out_sin, float *out_cos,
 		      svbool_t pg)
 {
   const struct sv_sincosf_data *d = ptr_barrier (&sv_sincosf_data);
   svbool_t special = check_ge_rangeval (pg, x, d);
 
   svfloat32x2_t sc = sv_sincosf_inline (pg, x, d);
 
   svst1_f32 (pg, out_sin, svget2 (sc, 0));
   svst1_f32 (pg, out_cos, svget2 (sc, 1));
 
   if (unlikely (svptest_any (pg, special)))
     special_case (x, special, out_sin, out_cos);
 }
 
-PL_TEST_ULP (_ZGVsMxv_sincosf_sin, 1.17)
-PL_TEST_ULP (_ZGVsMxv_sincosf_cos, 1.31)
+TEST_DISABLE_FENV (_ZGVsMxv_sincosf_sin)
+TEST_DISABLE_FENV (_ZGVsMxv_sincosf_cos)
+TEST_ULP (_ZGVsMxv_sincosf_sin, 1.17)
+TEST_ULP (_ZGVsMxv_sincosf_cos, 1.31)
 #define SV_SINCOSF_INTERVAL(lo, hi, n)                                        \
-  PL_TEST_INTERVAL (_ZGVsMxv_sincosf_sin, lo, hi, n)                          \
-  PL_TEST_INTERVAL (_ZGVsMxv_sincosf_cos, lo, hi, n)
-SV_SINCOSF_INTERVAL (0, 0x1p20, 500000)
-SV_SINCOSF_INTERVAL (-0, -0x1p20, 500000)
+  TEST_SYM_INTERVAL (_ZGVsMxv_sincosf_sin, lo, hi, n)                         \
+  TEST_SYM_INTERVAL (_ZGVsMxv_sincosf_cos, lo, hi, n)
+SV_SINCOSF_INTERVAL (0, 0x1p-31, 50000)
+SV_SINCOSF_INTERVAL (0x1p-31, 0x1p20, 500000)
 SV_SINCOSF_INTERVAL (0x1p20, inf, 10000)
-SV_SINCOSF_INTERVAL (-0x1p20, -inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/sincospi.c b/contrib/arm-optimized-routines/math/aarch64/sve/sincospi.c
new file mode 100644
index 000000000000..d06ca8cc4165
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sincospi.c
@@ -0,0 +1,47 @@
+/*
+ * Double-precision SVE sincospi(x, *y, *z) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_defs.h"
+#include "mathlib.h"
+#include "sv_sincospi_common.h"
+
+/* Double-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+    Worst-case error for sin is 3.09 ULP:
+    _ZGVsMxvl8l8_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1
+						    want 0x1.fd54d0b327cf4p-1.
+   Worst-case error for sin is 3.16 ULP:
+    _ZGVsMxvl8l8_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1
+						    want 0x1.fd2da484ff402p-1.
+ */
+void
+_ZGVsMxvl8l8_sincospi (svfloat64_t x, double *out_sin, double *out_cos,
+		       svbool_t pg)
+{
+  const struct sv_sincospi_data *d = ptr_barrier (&sv_sincospi_data);
+
+  svfloat64x2_t sc = sv_sincospi_inline (pg, x, d);
+
+  svst1 (pg, out_sin, svget2 (sc, 0));
+  svst1 (pg, out_cos, svget2 (sc, 1));
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (_ZGVsMxvl8l8_sincospi_sin)
+TEST_DISABLE_FENV (_ZGVsMxvl8l8_sincospi_cos)
+TEST_ULP (_ZGVsMxvl8l8_sincospi_sin, 2.59)
+TEST_ULP (_ZGVsMxvl8l8_sincospi_cos, 2.66)
+#  define SV_SINCOSPI_INTERVAL(lo, hi, n)                                     \
+    TEST_SYM_INTERVAL (_ZGVsMxvl8l8_sincospi_sin, lo, hi, n)                  \
+    TEST_SYM_INTERVAL (_ZGVsMxvl8l8_sincospi_cos, lo, hi, n)
+SV_SINCOSPI_INTERVAL (0, 0x1p-63, 10000)
+SV_SINCOSPI_INTERVAL (0x1p-63, 0.5, 50000)
+SV_SINCOSPI_INTERVAL (0.5, 0x1p53, 50000)
+SV_SINCOSPI_INTERVAL (0x1p53, inf, 10000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/sincospif.c b/contrib/arm-optimized-routines/math/aarch64/sve/sincospif.c
new file mode 100644
index 000000000000..20476f9346e9
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sincospif.c
@@ -0,0 +1,46 @@
+/*
+ * Single-precision SVE sincospi(x, *y, *z) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_defs.h"
+#include "mathlib.h"
+#include "sv_sincospif_common.h"
+
+/* Single-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+   Worst-case error for sin is 3.04 ULP:
+   _ZGVsMxvl4l4_sincospif_sin(0x1.b51b8p-2) got 0x1.f28b5ep-1 want
+   0x1.f28b58p-1.
+   Worst-case error for cos is 3.18 ULP:
+   _ZGVsMxvl4l4_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want
+   0x1.f7cd5p-1.  */
+void
+_ZGVsMxvl4l4_sincospif (svfloat32_t x, float *out_sin, float *out_cos,
+			svbool_t pg)
+{
+  const struct sv_sincospif_data *d = ptr_barrier (&sv_sincospif_data);
+
+  svfloat32x2_t sc = sv_sincospif_inline (pg, x, d);
+
+  svst1 (pg, out_sin, svget2 (sc, 0));
+  svst1 (pg, out_cos, svget2 (sc, 1));
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (_ZGVsMxvl4l4_sincospif_sin)
+TEST_DISABLE_FENV (_ZGVsMxvl4l4_sincospif_cos)
+TEST_ULP (_ZGVsMxvl4l4_sincospif_sin, 2.54)
+TEST_ULP (_ZGVsMxvl4l4_sincospif_cos, 2.68)
+#  define SV_SINCOSPIF_INTERVAL(lo, hi, n)                                    \
+    TEST_SYM_INTERVAL (_ZGVsMxvl4l4_sincospif_sin, lo, hi, n)                 \
+    TEST_SYM_INTERVAL (_ZGVsMxvl4l4_sincospif_cos, lo, hi, n)
+SV_SINCOSPIF_INTERVAL (0, 0x1p-31, 10000)
+SV_SINCOSPIF_INTERVAL (0x1p-31, 0.5, 50000)
+SV_SINCOSPIF_INTERVAL (0.5, 0x1p31, 50000)
+SV_SINCOSPIF_INTERVAL (0x1p31, inf, 10000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sinf_1u9.c b/contrib/arm-optimized-routines/math/aarch64/sve/sinf.c
similarity index 89%
rename from contrib/arm-optimized-routines/pl/math/sv_sinf_1u9.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/sinf.c
index 675d7b2480f7..62127194d60f 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_sinf_1u9.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sinf.c
@@ -1,93 +1,95 @@
 /*
  * Single-precision SVE sin(x) function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float poly[4];
   /* Pi-related values to be loaded as one quad-word and used with
      svmla_lane.  */
   float negpi1, negpi2, negpi3, invpi;
   float shift;
 } data = {
   .poly = {
     /* Non-zero coefficients from the degree 9 Taylor series expansion of
        sin.  */
     -0x1.555548p-3f, 0x1.110df4p-7f, -0x1.9f42eap-13f, 0x1.5b2e76p-19f
   },
   .negpi1 = -0x1.921fb6p+1f,
   .negpi2 = 0x1.777a5cp-24f,
   .negpi3 = 0x1.ee59dap-49f,
   .invpi = 0x1.45f306p-2f,
   .shift = 0x1.8p+23f
 };
 
 #define RangeVal 0x49800000 /* asuint32 (0x1p20f).  */
 #define C(i) sv_f32 (d->poly[i])
 
 static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
 {
   return sv_call_f32 (sinf, x, y, cmp);
 }
 
 /* A fast SVE implementation of sinf.
    Maximum error: 1.89 ULPs.
    This maximum error is achieved at multiple values in [-2^18, 2^18]
    but one example is:
    SV_NAME_F1 (sin)(0x1.9247a4p+0) got 0x1.fffff6p-1 want 0x1.fffffap-1.  */
 svfloat32_t SV_NAME_F1 (sin) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svfloat32_t ax = svabs_x (pg, x);
   svuint32_t sign
       = sveor_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (ax));
   svbool_t cmp = svcmpge (pg, svreinterpret_u32 (ax), RangeVal);
 
   /* pi_vals are a quad-word of helper values - the first 3 elements contain
      -pi in extended precision, the last contains 1 / pi.  */
   svfloat32_t pi_vals = svld1rq (svptrue_b32 (), &d->negpi1);
 
   /* n = rint(|x|/pi).  */
   svfloat32_t n = svmla_lane (sv_f32 (d->shift), ax, pi_vals, 3);
   svuint32_t odd = svlsl_x (pg, svreinterpret_u32 (n), 31);
   n = svsub_x (pg, n, d->shift);
 
   /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
   svfloat32_t r;
   r = svmla_lane (ax, n, pi_vals, 0);
   r = svmla_lane (r, n, pi_vals, 1);
   r = svmla_lane (r, n, pi_vals, 2);
 
   /* sin(r) approx using a degree 9 polynomial from the Taylor series
      expansion. Note that only the odd terms of this are non-zero.  */
   svfloat32_t r2 = svmul_x (pg, r, r);
   svfloat32_t y;
   y = svmla_x (pg, C (2), r2, C (3));
   y = svmla_x (pg, C (1), r2, y);
   y = svmla_x (pg, C (0), r2, y);
   y = svmla_x (pg, r, r, svmul_x (pg, y, r2));
 
   /* sign = y^sign^odd.  */
   sign = sveor_x (pg, sign, odd);
 
   if (unlikely (svptest_any (pg, cmp)))
     return special_case (x,
 			 svreinterpret_f32 (sveor_x (
 			     svnot_z (pg, cmp), svreinterpret_u32 (y), sign)),
 			 cmp);
   return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, sin, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_F1 (sin), 1.40)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sin), 0, 0x1p23, 1000000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sin), 0x1p23, inf, 10000)
+TEST_SIG (SV, F, 1, sin, -3.1, 3.1)
+TEST_ULP (SV_NAME_F1 (sin), 1.40)
+TEST_DISABLE_FENV (SV_NAME_F1 (sin))
+TEST_SYM_INTERVAL (SV_NAME_F1 (sin), 0, 0x1p23, 1000000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sin), 0x1p23, inf, 10000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sinh_3u.c b/contrib/arm-optimized-routines/math/aarch64/sve/sinh.c
similarity index 88%
rename from contrib/arm-optimized-routines/pl/math/sv_sinh_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/sinh.c
index a01e19caecda..8a35c1c38525 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_sinh_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sinh.c
@@ -1,103 +1,105 @@
 /*
  * Double-precision SVE sinh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "sv_poly_f64.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64_t poly[11];
   float64_t inv_ln2, m_ln2_hi, m_ln2_lo, shift;
   uint64_t halff;
   int64_t onef;
   uint64_t large_bound;
 } data = {
   /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2].  */
   .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
 	    0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
 	    0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
 	    0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
 	    0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
 
   .inv_ln2 = 0x1.71547652b82fep0,
   .m_ln2_hi = -0x1.62e42fefa39efp-1,
   .m_ln2_lo = -0x1.abc9e3b39803fp-56,
   .shift = 0x1.8p52,
 
   .halff = 0x3fe0000000000000,
   .onef = 0x3ff0000000000000,
   /* 2^9. expm1 helper overflows for large input.  */
   .large_bound = 0x4080000000000000,
 };
 
 static inline svfloat64_t
 expm1_inline (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* Reduce argument:
      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
      where i = round(x / ln2)
      and   f = x - i * ln2 (f in [-ln2/2, ln2/2]).  */
   svfloat64_t j
       = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
   svint64_t i = svcvt_s64_x (pg, j);
   svfloat64_t f = svmla_x (pg, x, j, d->m_ln2_hi);
   f = svmla_x (pg, f, j, d->m_ln2_lo);
   /* Approximate expm1(f) using polynomial.  */
   svfloat64_t f2 = svmul_x (pg, f, f);
   svfloat64_t f4 = svmul_x (pg, f2, f2);
   svfloat64_t f8 = svmul_x (pg, f4, f4);
   svfloat64_t p
       = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly));
   /* t = 2^i.  */
   svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
   /* expm1(x) ~= p * t + (t - 1).  */
   return svmla_x (pg, svsub_x (pg, t, 1.0), p, t);
 }
 
 static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svbool_t pg)
 {
   return sv_call_f64 (sinh, x, x, pg);
 }
 
 /* Approximation for SVE double-precision sinh(x) using expm1.
    sinh(x) = (exp(x) - exp(-x)) / 2.
    The greatest observed error is 2.57 ULP:
    _ZGVsMxv_sinh (0x1.a008538399931p-2) got 0x1.ab929fc64bd66p-2
 				       want 0x1.ab929fc64bd63p-2.  */
 svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svfloat64_t ax = svabs_x (pg, x);
   svuint64_t sign
       = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax));
   svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, d->halff));
 
   svbool_t special = svcmpge (pg, svreinterpret_u64 (ax), d->large_bound);
 
   /* Fall back to scalar variant for all lanes if any are special.  */
   if (unlikely (svptest_any (pg, special)))
     return special_case (x, pg);
 
   /* Up to the point that expm1 overflows, we can use it to calculate sinh
      using a slight rearrangement of the definition of sinh. This allows us to
      retain acceptable accuracy for very small inputs.  */
   svfloat64_t t = expm1_inline (ax, pg);
   t = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
   return svmul_x (pg, t, halfsign);
 }
 
-PL_SIG (SV, D, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (sinh), 2.08)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0, 0x1p-26, 1000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0x1p9, inf, 1000)
+TEST_SIG (SV, D, 1, sinh, -10.0, 10.0)
+TEST_ULP (SV_NAME_D1 (sinh), 2.08)
+TEST_DISABLE_FENV (SV_NAME_D1 (sinh))
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0, 0x1p-26, 1000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0x1p9, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sinhf_2u3.c b/contrib/arm-optimized-routines/math/aarch64/sve/sinhf.c
similarity index 78%
rename from contrib/arm-optimized-routines/pl/math/sv_sinhf_2u3.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/sinhf.c
index e34ecf378ad3..82b7ee442780 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_sinhf_2u3.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sinhf.c
@@ -1,64 +1,65 @@
 /*
  * Single-precision SVE sinh(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
+#include "test_sig.h"
+#include "test_defs.h"
 #include "sv_expm1f_inline.h"
 
 static const struct data
 {
   struct sv_expm1f_data expm1f_consts;
   uint32_t halff, large_bound;
 } data = {
   .expm1f_consts = SV_EXPM1F_DATA,
   .halff = 0x3f000000,
   /* 0x1.61814ep+6, above which expm1f helper overflows.  */
   .large_bound = 0x42b0c0a7,
 };
 
 static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svfloat32_t y, svbool_t pg)
 {
   return sv_call_f32 (sinhf, x, y, pg);
 }
 
 /* Approximation for SVE single-precision sinh(x) using expm1.
    sinh(x) = (exp(x) - exp(-x)) / 2.
    The maximum error is 2.26 ULP:
    _ZGVsMxv_sinhf (0x1.e34a9ep-4) got 0x1.e469ep-4
 				 want 0x1.e469e4p-4.  */
 svfloat32_t SV_NAME_F1 (sinh) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svfloat32_t ax = svabs_x (pg, x);
   svuint32_t sign
       = sveor_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (ax));
   svfloat32_t halfsign = svreinterpret_f32 (svorr_x (pg, sign, d->halff));
 
   svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->large_bound);
 
   /* Up to the point that expm1f overflows, we can use it to calculate sinhf
    using a slight rearrangement of the definition of asinh. This allows us to
    retain acceptable accuracy for very small inputs.  */
   svfloat32_t t = expm1f_inline (ax, pg, &d->expm1f_consts);
   t = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
 
   /* Fall back to the scalar variant for any lanes which would cause
      expm1f to overflow.  */
   if (unlikely (svptest_any (pg, special)))
     return special_case (x, svmul_x (pg, t, halfsign), special);
 
-  return svmul_x (pg, t, halfsign);
+  return svmul_x (svptrue_b32 (), t, halfsign);
 }
 
-PL_SIG (SV, F, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (sinh), 1.76)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0, 0x1.6a09e8p-32, 1000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0x1.6a09e8p-32, 0x42b0c0a7, 100000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0x42b0c0a7, inf, 1000)
+TEST_SIG (SV, F, 1, sinh, -10.0, 10.0)
+TEST_ULP (SV_NAME_F1 (sinh), 1.76)
+TEST_DISABLE_FENV (SV_NAME_F1 (sinh))
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0, 0x1.6a09e8p-32, 1000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0x1.6a09e8p-32, 0x42b0c0a7, 100000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0x42b0c0a7, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sinpi_3u1.c b/contrib/arm-optimized-routines/math/aarch64/sve/sinpi.c
similarity index 66%
rename from contrib/arm-optimized-routines/pl/math/sv_sinpi_3u1.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/sinpi.c
index c9f23da1b19b..8fad3678b172 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_sinpi_3u1.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sinpi.c
@@ -1,57 +1,62 @@
 /*
  * Double-precision SVE sinpi(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
+#include "mathlib.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f64.h"
 
 static const struct data
 {
-  double poly[10];
+  double poly[10], range_val;
 } data = {
   /* Polynomial coefficients generated using Remez algorithm,
      see sinpi.sollya for details.  */
   .poly = { 0x1.921fb54442d184p1, -0x1.4abbce625be53p2, 0x1.466bc6775ab16p1,
 	    -0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8,
 	    0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16,
 	    0x1.af86ae521260bp-21, -0x1.012a9870eeb7dp-25 },
+  .range_val = 0x1p63,
 };
 
 /* A fast SVE implementation of sinpi.
    Maximum error 3.10 ULP:
    _ZGVsMxv_sinpi(0x1.df1a14f1b235p-2) got 0x1.fd64f541606cp-1
 				      want 0x1.fd64f541606c3p-1.  */
 svfloat64_t SV_NAME_D1 (sinpi) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* range reduction into -1/2 .. 1/2)
      with n = rint(x) and r = r - n.  */
   svfloat64_t n = svrinta_x (pg, x);
   svfloat64_t r = svsub_x (pg, x, n);
 
   /* Result should be negated based on if n is odd or not.  */
-  svuint64_t intn = svreinterpret_u64 (svcvt_s64_x (pg, n));
-  svuint64_t sign = svlsl_z (pg, intn, 63);
+  svbool_t cmp = svaclt (pg, x, d->range_val);
+  svuint64_t intn = svreinterpret_u64 (svcvt_s64_z (pg, n));
+  svuint64_t sign = svlsl_z (cmp, intn, 63);
 
   /* y = sin(r).  */
   svfloat64_t r2 = svmul_x (pg, r, r);
   svfloat64_t r4 = svmul_x (pg, r2, r2);
   svfloat64_t y = sv_pw_horner_9_f64_x (pg, r2, r4, d->poly);
   y = svmul_x (pg, y, r);
 
   return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
 }
 
-PL_SIG (SV, D, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (SV_NAME_D1 (sinpi), 2.61)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0, 0x1p-63, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0x1p-63, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0.5, 0x1p51, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0x1p51, inf, 10000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (SV_NAME_D1 (sinpi), 2.61)
+TEST_DISABLE_FENV (SV_NAME_D1 (sinpi))
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0, 0x1p-63, 5000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0x1p-63, 0.5, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0.5, 0x1p51, 10000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0x1p51, inf, 10000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sinpif_2u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/sinpif.c
similarity index 61%
rename from contrib/arm-optimized-routines/pl/math/sv_sinpif_2u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/sinpif.c
index ac3f924bed68..b91768a29cb6 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_sinpif_2u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sinpif.c
@@ -1,53 +1,58 @@
 /*
  * Single-precision SVE sinpi(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "mathlib.h"
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
+#include "mathlib.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_poly_f32.h"
 
 static const struct data
 {
-  float poly[6];
+  float poly[6], range_val;
 } data = {
   /* Taylor series coefficents for sin(pi * x).  */
   .poly = { 0x1.921fb6p1f, -0x1.4abbcep2f, 0x1.466bc6p1f, -0x1.32d2ccp-1f,
 	    0x1.50783p-4f, -0x1.e30750p-8f },
+  .range_val = 0x1p31,
 };
 
 /* A fast SVE implementation of sinpif.
    Maximum error 2.48 ULP:
    _ZGVsMxv_sinpif(0x1.d062b6p-2) got 0x1.fa8c06p-1
 				 want 0x1.fa8c02p-1.  */
 svfloat32_t SV_NAME_F1 (sinpi) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   /* range reduction into -1/2 .. 1/2
      with n = rint(x) and r = r - n.  */
   svfloat32_t n = svrinta_x (pg, x);
   svfloat32_t r = svsub_x (pg, x, n);
 
   /* Result should be negated based on if n is odd or not.  */
-  svuint32_t intn = svreinterpret_u32 (svcvt_s32_x (pg, n));
-  svuint32_t sign = svlsl_z (pg, intn, 31);
+  svbool_t cmp = svaclt (pg, x, d->range_val);
+  svuint32_t intn = svreinterpret_u32 (svcvt_s32_z (pg, n));
+  svuint32_t sign = svlsl_z (cmp, intn, 31);
 
   /* y = sin(r).  */
   svfloat32_t r2 = svmul_x (pg, r, r);
   svfloat32_t y = sv_horner_5_f32_x (pg, r2, d->poly);
   y = svmul_x (pg, y, r);
 
   return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
 }
 
-PL_SIG (SV, F, 1, sinpi, -0.9, 0.9)
-PL_TEST_ULP (SV_NAME_F1 (sinpi), 1.99)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0, 0x1p-31, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0x1p-31, 0.5, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0.5, 0x1p22f, 10000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0x1p22f, inf, 10000)
+#if WANT_TRIGPI_TESTS
+TEST_ULP (SV_NAME_F1 (sinpi), 1.99)
+TEST_DISABLE_FENV (SV_NAME_F1 (sinpi))
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0, 0x1p-31, 5000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0x1p-31, 0.5, 10000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0.5, 0x1p22f, 10000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0x1p22f, inf, 10000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/sv_expf_inline.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_expf_inline.h
new file mode 100644
index 000000000000..6054e65bb202
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_expf_inline.h
@@ -0,0 +1,66 @@
+/*
+ * SVE helper for single-precision routines which calculate exp(x) and do
+ * not need special-case handling
+ *
+ * Copyright (c) 2023-2025, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_SV_EXPF_INLINE_H
+#define MATH_SV_EXPF_INLINE_H
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+struct sv_expf_data
+{
+  float c1, c3, inv_ln2;
+  float ln2_lo, c0, c2, c4;
+  float ln2_hi, shift;
+};
+
+/* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
+   compatibility with polynomial helpers. Shift is 1.5*2^17 + 127.  */
+#define SV_EXPF_DATA                                                          \
+  {                                                                           \
+    /* Coefficients copied from the polynomial in AdvSIMD variant.  */        \
+    .c0 = 0x1.ffffecp-1f, .c1 = 0x1.fffdb6p-2f, .c2 = 0x1.555e66p-3f,         \
+    .c3 = 0x1.573e2ep-5f, .c4 = 0x1.0e4020p-7f, .inv_ln2 = 0x1.715476p+0f,    \
+    .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f,                        \
+    .shift = 0x1.803f8p17f,                                                   \
+  }
+
+#define C(i) sv_f32 (d->poly[i])
+
+static inline svfloat32_t
+expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
+{
+  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
+
+  svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_lo);
+
+  /* n = round(x/(ln2/N)).  */
+  svfloat32_t z = svmad_x (pg, sv_f32 (d->inv_ln2), x, d->shift);
+  svfloat32_t n = svsub_x (pg, z, d->shift);
+
+  /* r = x - n*ln2/N.  */
+  svfloat32_t r = svmsb_x (pg, sv_f32 (d->ln2_hi), n, x);
+  r = svmls_lane (r, n, lane_consts, 0);
+
+  /* scale = 2^(n/N).  */
+  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
+
+  /* poly(r) = exp(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4 + C4 r^5.  */
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
+  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
+  svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
+  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+
+  return svmla_x (pg, scale, scale, poly);
+}
+
+#endif // MATH_SV_EXPF_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/sv_expm1f_inline.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_expm1f_inline.h
similarity index 65%
rename from contrib/arm-optimized-routines/pl/math/sv_expm1f_inline.h
rename to contrib/arm-optimized-routines/math/aarch64/sve/sv_expm1f_inline.h
index a6e2050ff4a6..35892f519690 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_expm1f_inline.h
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_expm1f_inline.h
@@ -1,73 +1,69 @@
 /*
  * SVE helper for single-precision routines which calculate exp(x) - 1 and do
  * not need special-case handling
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_SV_EXPM1F_INLINE_H
-#define PL_MATH_SV_EXPM1F_INLINE_H
+#ifndef MATH_SV_EXPM1F_INLINE_H
+#define MATH_SV_EXPM1F_INLINE_H
 
 #include "sv_math.h"
 
 struct sv_expm1f_data
 {
   /* These 4 are grouped together so they can be loaded as one quadword, then
    used with _lane forms of svmla/svmls.  */
   float32_t c2, c4, ln2_hi, ln2_lo;
-  float32_t c0, c1, c3, inv_ln2, shift;
+  float c0, inv_ln2, c1, c3, special_bound;
 };
 
 /* Coefficients generated using fpminimax.  */
 #define SV_EXPM1F_DATA                                                        \
   {                                                                           \
-    .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .c2 = 0x1.555736p-5,            \
-    .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10,                                \
+    .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .inv_ln2 = 0x1.715476p+0f,      \
+    .c2 = 0x1.555736p-5, .c3 = 0x1.12287cp-7,                                 \
                                                                               \
-    .shift = 0x1.8p23f, .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f,    \
-    .ln2_lo = 0x1.7f7d1cp-20f,                                                \
+    .c4 = 0x1.6b55a2p-10, .ln2_lo = 0x1.7f7d1cp-20f, .ln2_hi = 0x1.62e4p-1f,  \
   }
 
-#define C(i) sv_f32 (d->c##i)
-
 static inline svfloat32_t
 expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d)
 {
   /* This vector is reliant on layout of data - it contains constants
    that can be used with _lane forms of svmla/svmls. Values are:
    [ coeff_2, coeff_4, ln2_hi, ln2_lo ].  */
   svfloat32_t lane_constants = svld1rq (svptrue_b32 (), &d->c2);
 
   /* Reduce argument to smaller range:
      Let i = round(x / ln2)
      and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
      where 2^i is exact because i is an integer.  */
-  svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2);
-  j = svsub_x (pg, j, d->shift);
-  svint32_t i = svcvt_s32_x (pg, j);
+  svfloat32_t j = svmul_x (svptrue_b32 (), x, d->inv_ln2);
+  j = svrinta_x (pg, j);
 
   svfloat32_t f = svmls_lane (x, j, lane_constants, 2);
   f = svmls_lane (f, j, lane_constants, 3);
 
   /* Approximate expm1(f) using polynomial.
      Taylor expansion for expm1(x) has the form:
 	 x + ax^2 + bx^3 + cx^4 ....
      So we calculate the polynomial P(f) = a + bf + cf^2 + ...
      and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
-  svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0);
-  svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1);
-  svfloat32_t f2 = svmul_x (pg, f, f);
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), f, lane_constants, 0);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), f, lane_constants, 1);
+  svfloat32_t f2 = svmul_x (svptrue_b32 (), f, f);
   svfloat32_t p = svmla_x (pg, p12, f2, p34);
-  p = svmla_x (pg, C (0), f, p);
+  p = svmla_x (pg, sv_f32 (d->c0), f, p);
   p = svmla_x (pg, f, f2, p);
 
   /* Assemble the result.
      expm1(x) ~= 2^i * (p + 1) - 1
      Let t = 2^i.  */
-  svfloat32_t t = svscale_x (pg, sv_f32 (1), i);
-  return svmla_x (pg, svsub_x (pg, t, 1), p, t);
+  svfloat32_t t = svscale_x (pg, sv_f32 (1.0f), svcvt_s32_x (pg, j));
+  return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t);
 }
 
-#endif // PL_MATH_SV_EXPM1F_INLINE_H
\ No newline at end of file
+#endif // MATH_SV_EXPM1F_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log1p_inline.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_log1p_inline.h
similarity index 90%
rename from contrib/arm-optimized-routines/pl/math/sv_log1p_inline.h
rename to contrib/arm-optimized-routines/math/aarch64/sve/sv_log1p_inline.h
index 983f8e1b0413..86a5bb1456f6 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_log1p_inline.h
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_log1p_inline.h
@@ -1,96 +1,96 @@
 /*
  * Helper for SVE double-precision routines which calculate log(1 + x) and do
  * not need special-case handling
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
-#ifndef PL_MATH_SV_LOG1P_INLINE_H
-#define PL_MATH_SV_LOG1P_INLINE_H
+#ifndef MATH_SV_LOG1P_INLINE_H
+#define MATH_SV_LOG1P_INLINE_H
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
+#include "sv_poly_f64.h"
 
 static const struct sv_log1p_data
 {
   double poly[19], ln2[2];
   uint64_t hf_rt2_top;
   uint64_t one_m_hf_rt2_top;
   uint32_t bottom_mask;
   int64_t one_top;
 } sv_log1p_data = {
   /* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].
    */
   .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
 	    0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
 	    -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
 	    0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
 	    -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
 	    0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
 	    -0x1.cfa7385bdb37ep-6 },
   .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 },
   .hf_rt2_top = 0x3fe6a09e00000000,
   .one_m_hf_rt2_top = 0x00095f6200000000,
   .bottom_mask = 0xffffffff,
   .one_top = 0x3ff
 };
 
 static inline svfloat64_t
 sv_log1p_inline (svfloat64_t x, const svbool_t pg)
 {
   /* Helper for calculating log(x + 1). Adapted from v_log1p_inline.h, which
      differs from v_log1p_2u5.c by:
      - No special-case handling - this should be dealt with by the caller.
      - Pairwise Horner polynomial evaluation for improved accuracy.
      - Optionally simulate the shortcut for k=0, used in the scalar routine,
        using svsel, for improved accuracy when the argument to log1p is close
      to 0. This feature is enabled by defining WANT_SV_LOG1P_K0_SHORTCUT as 1
      in the source of the caller before including this file.
      See sv_log1p_2u1.c for details of the algorithm.  */
   const struct sv_log1p_data *d = ptr_barrier (&sv_log1p_data);
   svfloat64_t m = svadd_x (pg, x, 1);
   svuint64_t mi = svreinterpret_u64 (m);
   svuint64_t u = svadd_x (pg, mi, d->one_m_hf_rt2_top);
 
   svint64_t ki
       = svsub_x (pg, svreinterpret_s64 (svlsr_x (pg, u, 52)), d->one_top);
   svfloat64_t k = svcvt_f64_x (pg, ki);
 
   /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
   svuint64_t utop
       = svadd_x (pg, svand_x (pg, u, 0x000fffff00000000), d->hf_rt2_top);
   svuint64_t u_red = svorr_x (pg, utop, svand_x (pg, mi, d->bottom_mask));
   svfloat64_t f = svsub_x (pg, svreinterpret_f64 (u_red), 1);
 
   /* Correction term c/m.  */
   svfloat64_t c = svsub_x (pg, x, svsub_x (pg, m, 1));
   svfloat64_t cm;
 
 #ifndef WANT_SV_LOG1P_K0_SHORTCUT
-#error                                                                         \
-  "Cannot use sv_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
+# error                                                                       \
+      "Cannot use sv_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
 #elif WANT_SV_LOG1P_K0_SHORTCUT
   /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
      that the approximation is solely the polynomial.  */
   svbool_t knot0 = svcmpne (pg, k, 0);
   cm = svdiv_z (knot0, c, m);
   if (likely (!svptest_any (pg, knot0)))
     {
       f = svsel (knot0, f, x);
     }
 #else
   /* No shortcut.  */
   cm = svdiv_x (pg, c, m);
 #endif
 
   /* Approximate log1p(f) on the reduced input using a polynomial.  */
   svfloat64_t f2 = svmul_x (pg, f, f);
   svfloat64_t p = sv_pw_horner_18_f64_x (pg, f, f2, d->poly);
 
   /* Assemble log1p(x) = k * log2 + log1p(f) + c/m.  */
   svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2[0]);
   svfloat64_t yhi = svmla_x (pg, f, k, d->ln2[1]);
 
   return svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p);
 }
-#endif // PL_MATH_SV_LOG1P_INLINE_H
+#endif // MATH_SV_LOG1P_INLINE_H
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/sv_log1pf_inline.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_log1pf_inline.h
new file mode 100644
index 000000000000..238079c61a5b
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_log1pf_inline.h
@@ -0,0 +1,83 @@
+/*
+ * Helper for SVE routines which calculate log(1 + x) and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef MATH_SV_LOG1PF_INLINE_H
+#define MATH_SV_LOG1PF_INLINE_H
+
+#define SignExponentMask 0xff800000
+
+static const struct sv_log1pf_data
+{
+  float c0, c2, c4, c6;
+  float c1, c3, c5, c7;
+  float ln2, exp_bias, quarter;
+  uint32_t four, three_quarters;
+} sv_log1pf_data = {
+  /* Do not store first term of polynomial, which is -0.5, as
+     this can be fmov-ed directly instead of including it in
+     the main load-and-mla polynomial schedule.  */
+  .c0 = 0x1.5555aap-2f,		.c1 = -0x1.000038p-2f, .c2 = 0x1.99675cp-3f,
+  .c3 = -0x1.54ef78p-3f,	.c4 = 0x1.28a1f4p-3f,  .c5 = -0x1.0da91p-3f,
+  .c6 = 0x1.abcb6p-4f,		.c7 = -0x1.6f0d5ep-5f, .ln2 = 0x1.62e43p-1f,
+  .exp_bias = 0x1p-23f,		.quarter = 0x1p-2f,    .four = 0x40800000,
+  .three_quarters = 0x3f400000,
+};
+
+static inline svfloat32_t
+sv_log1pf_inline (svfloat32_t x, svbool_t pg)
+{
+  const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data);
+
+  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+			 is in [-0.25, 0.5]):
+   log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+   We approximate log1p(m) with a polynomial, then scale by
+   k*log(2). Instead of doing this directly, we use an intermediate
+   scale factor s = 4*k*log(2) to ensure the scale is representable
+   as a normalised fp32 number.  */
+  svfloat32_t m = svadd_x (pg, x, 1);
+
+  /* Choose k to scale x to the range [-1/4, 1/2].  */
+  svint32_t k
+      = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters),
+		 sv_s32 (SignExponentMask));
+
+  /* Scale x by exponent manipulation.  */
+  svfloat32_t m_scale = svreinterpret_f32 (
+      svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k)));
+
+  /* Scale up to ensure that the scale factor is representable as normalised
+     fp32 number, and scale m down accordingly.  */
+  svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four));
+  svfloat32_t fconst = svld1rq_f32 (svptrue_b32 (), &d->ln2);
+  m_scale = svadd_x (pg, m_scale, svmla_lane_f32 (sv_f32 (-1), s, fconst, 2));
+
+  /* Evaluate polynomial on reduced interval.  */
+  svfloat32_t ms2 = svmul_x (svptrue_b32 (), m_scale, m_scale);
+
+  svfloat32_t c1357 = svld1rq_f32 (svptrue_b32 (), &d->c1);
+  svfloat32_t p01 = svmla_lane_f32 (sv_f32 (d->c0), m_scale, c1357, 0);
+  svfloat32_t p23 = svmla_lane_f32 (sv_f32 (d->c2), m_scale, c1357, 1);
+  svfloat32_t p45 = svmla_lane_f32 (sv_f32 (d->c4), m_scale, c1357, 2);
+  svfloat32_t p67 = svmla_lane_f32 (sv_f32 (d->c6), m_scale, c1357, 3);
+
+  svfloat32_t p = svmla_x (pg, p45, p67, ms2);
+  p = svmla_x (pg, p23, p, ms2);
+  p = svmla_x (pg, p01, p, ms2);
+
+  p = svmad_x (pg, m_scale, p, -0.5);
+  p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p));
+
+  /* The scale factor to be applied back at the end - by multiplying float(k)
+   by 2^-23 we get the unbiased exponent of k.  */
+  svfloat32_t scale_back = svmul_lane_f32 (svcvt_f32_x (pg, k), fconst, 1);
+  return svmla_lane_f32 (p, scale_back, fconst, 0);
+}
+
+#endif //  SV_LOG1PF_INLINE_H
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/sv_log_inline.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_log_inline.h
new file mode 100644
index 000000000000..a1b169a0b727
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_log_inline.h
@@ -0,0 +1,83 @@
+/*
+ * Double-precision vector log(x) function - inline version
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "math_config.h"
+
+#ifndef SV_LOG_INLINE_POLY_ORDER
+#  error Cannot use inline log helper without specifying poly order (options are 4 or 5)
+#endif
+
+#if SV_LOG_INLINE_POLY_ORDER == 4
+#  define POLY                                                                \
+    {                                                                         \
+      -0x1.ffffffffcbad3p-2, 0x1.555555578ed68p-2, -0x1.0000d3a1e7055p-2,     \
+	  0x1.999392d02a63ep-3                                                \
+    }
+#elif SV_LOG_INLINE_POLY_ORDER == 5
+#  define POLY                                                                \
+    {                                                                         \
+      -0x1.ffffffffffff7p-2, 0x1.55555555170d4p-2, -0x1.0000000399c27p-2,     \
+	  0x1.999b2e90e94cap-3, -0x1.554e550bd501ep-3                         \
+    }
+#else
+#  error Can only choose order 4 or 5 for log poly
+#endif
+
+struct sv_log_inline_data
+{
+  double poly[SV_LOG_INLINE_POLY_ORDER];
+  double ln2;
+  uint64_t off, sign_exp_mask;
+};
+
+#define SV_LOG_CONSTANTS                                                      \
+  {                                                                           \
+    .poly = POLY, .ln2 = 0x1.62e42fefa39efp-1,                                \
+    .sign_exp_mask = 0xfff0000000000000, .off = 0x3fe6900900000000            \
+  }
+
+#define P(i) sv_f64 (d->poly[i])
+#define N (1 << V_LOG_TABLE_BITS)
+
+static inline svfloat64_t
+sv_log_inline (svbool_t pg, svfloat64_t x, const struct sv_log_inline_data *d)
+{
+  svuint64_t ix = svreinterpret_u64 (x);
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  svuint64_t tmp = svsub_x (pg, ix, d->off);
+  /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N.
+     The actual value of i is double this due to table layout.  */
+  svuint64_t i
+      = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
+  svint64_t k
+      = svasr_x (pg, svreinterpret_s64 (tmp), 52); /* Arithmetic shift.  */
+  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
+  svfloat64_t z = svreinterpret_f64 (iz);
+
+  /* Lookup in 2 global lists (length N).  */
+  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
+  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  svfloat64_t r = svmad_x (pg, invc, z, -1);
+  svfloat64_t kd = svcvt_f64_x (pg, k);
+  /* hi = r + log(c) + k*Ln2.  */
+  svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, __v_log_data.ln2);
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  svfloat64_t r2 = svmul_x (pg, r, r);
+  svfloat64_t y = svmla_x (pg, P (2), r, P (3));
+  svfloat64_t p = svmla_x (pg, P (0), r, P (1));
+#if SV_LOG_INLINE_POLY_ORDER == 5
+  y = svmla_x (pg, P (4), r2);
+#endif
+  y = svmla_x (pg, p, r2, y);
+  return svmla_x (pg, hi, r2, y);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/sv_math.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_math.h
similarity index 72%
rename from contrib/arm-optimized-routines/pl/math/sv_math.h
rename to contrib/arm-optimized-routines/math/aarch64/sve/sv_math.h
index f67fe91803ba..db688a893032 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_math.h
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_math.h
@@ -1,133 +1,145 @@
 /*
  * Wrapper functions for SVE ACLE.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef SV_MATH_H
 #define SV_MATH_H
 
-#ifndef WANT_VMATH
-/* Enable the build of vector math code.  */
-# define WANT_VMATH 1
+/* Enable SVE in this translation unit. Note, because this is 'pushed' in
+   clang, any file including sv_math.h will have to pop it back off again by
+   ending the source file with CLOSE_SVE_ATTR. It is important that sv_math.h
+   is included first so that all functions have the target attribute.  */
+#ifdef __clang__
+# pragma clang attribute push(__attribute__((target("sve"))),                \
+			       apply_to = any(function))
+# define CLOSE_SVE_ATTR _Pragma("clang attribute pop")
+#else
+# pragma GCC target("+sve")
+# define CLOSE_SVE_ATTR
 #endif
 
-#if WANT_VMATH
+#include <arm_sve.h>
+#include <stdbool.h>
 
-# include <arm_sve.h>
-# include <stdbool.h>
+#include "math_config.h"
 
-# include "math_config.h"
+#define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
+#define SV_NAME_D1(fun) _ZGVsMxv_##fun
+#define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
+#define SV_NAME_D2(fun) _ZGVsMxvv_##fun
+#define SV_NAME_F1_L1(fun) _ZGVsMxvl4_##fun##f
+#define SV_NAME_D1_L1(fun) _ZGVsMxvl8_##fun
+#define SV_NAME_F1_L2(fun) _ZGVsMxvl4l4_##fun##f
 
 /* Double precision.  */
 static inline svint64_t
 sv_s64 (int64_t x)
 {
   return svdup_s64 (x);
 }
 
 static inline svuint64_t
 sv_u64 (uint64_t x)
 {
   return svdup_u64 (x);
 }
 
 static inline svfloat64_t
 sv_f64 (double x)
 {
   return svdup_f64 (x);
 }
 
 static inline svfloat64_t
 sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp)
 {
   svbool_t p = svpfirst (cmp, svpfalse ());
   while (svptest_any (cmp, p))
     {
       double elem = svclastb (p, 0, x);
       elem = (*f) (elem);
       svfloat64_t y2 = sv_f64 (elem);
       y = svsel (p, y2, y);
       p = svpnext_b64 (cmp, p);
     }
   return y;
 }
 
 static inline svfloat64_t
 sv_call2_f64 (double (*f) (double, double), svfloat64_t x1, svfloat64_t x2,
 	      svfloat64_t y, svbool_t cmp)
 {
   svbool_t p = svpfirst (cmp, svpfalse ());
   while (svptest_any (cmp, p))
     {
       double elem1 = svclastb (p, 0, x1);
       double elem2 = svclastb (p, 0, x2);
       double ret = (*f) (elem1, elem2);
       svfloat64_t y2 = sv_f64 (ret);
       y = svsel (p, y2, y);
       p = svpnext_b64 (cmp, p);
     }
   return y;
 }
 
 static inline svuint64_t
 sv_mod_n_u64_x (svbool_t pg, svuint64_t x, uint64_t y)
 {
   svuint64_t q = svdiv_x (pg, x, y);
   return svmls_x (pg, x, q, y);
 }
 
 /* Single precision.  */
 static inline svint32_t
 sv_s32 (int32_t x)
 {
   return svdup_s32 (x);
 }
 
 static inline svuint32_t
 sv_u32 (uint32_t x)
 {
   return svdup_u32 (x);
 }
 
 static inline svfloat32_t
 sv_f32 (float x)
 {
   return svdup_f32 (x);
 }
 
 static inline svfloat32_t
 sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp)
 {
   svbool_t p = svpfirst (cmp, svpfalse ());
   while (svptest_any (cmp, p))
     {
       float elem = svclastb (p, 0, x);
       elem = (*f) (elem);
       svfloat32_t y2 = sv_f32 (elem);
       y = svsel (p, y2, y);
       p = svpnext_b32 (cmp, p);
     }
   return y;
 }
 
 static inline svfloat32_t
 sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2,
 	      svfloat32_t y, svbool_t cmp)
 {
   svbool_t p = svpfirst (cmp, svpfalse ());
   while (svptest_any (cmp, p))
     {
       float elem1 = svclastb (p, 0, x1);
       float elem2 = svclastb (p, 0, x2);
       float ret = (*f) (elem1, elem2);
       svfloat32_t y2 = sv_f32 (ret);
       y = svsel (p, y2, y);
       p = svpnext_b32 (cmp, p);
     }
   return y;
 }
 #endif
-
-#endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_sve_f32.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_poly_f32.h
similarity index 78%
rename from contrib/arm-optimized-routines/pl/math/poly_sve_f32.h
rename to contrib/arm-optimized-routines/math/aarch64/sve/sv_poly_f32.h
index a97e2ced027a..2d73014a4b45 100644
--- a/contrib/arm-optimized-routines/pl/math/poly_sve_f32.h
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_poly_f32.h
@@ -1,26 +1,26 @@
 /*
  * Helpers for evaluating polynomials on single-precision SVE input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_SVE_F32_H
-#define PL_MATH_POLY_SVE_F32_H
+#ifndef MATH_POLY_SVE_F32_H
+#define MATH_POLY_SVE_F32_H
 
 #include <arm_sve.h>
 
 /* Wrap SVE f32 helpers: evaluation of some scheme/order has form:
    sv_[scheme]_[order]_f32_x.  */
 #define VTYPE svfloat32_t
 #define STYPE float
 #define VWRAP(f) sv_##f##_f32_x
 #define DUP svdup_f32
-#include "poly_sve_generic.h"
+#include "sv_poly_generic.h"
 #undef DUP
 #undef VWRAP
 #undef STYPE
 #undef VTYPE
 
 #endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_sve_f64.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_poly_f64.h
similarity index 78%
rename from contrib/arm-optimized-routines/pl/math/poly_sve_f64.h
rename to contrib/arm-optimized-routines/math/aarch64/sve/sv_poly_f64.h
index 5fb14b3c1700..f92be9bf8e9c 100644
--- a/contrib/arm-optimized-routines/pl/math/poly_sve_f64.h
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_poly_f64.h
@@ -1,26 +1,26 @@
 /*
  * Helpers for evaluating polynomials on double-precision SVE input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_SVE_F64_H
-#define PL_MATH_POLY_SVE_F64_H
+#ifndef MATH_POLY_SVE_F64_H
+#define MATH_POLY_SVE_F64_H
 
 #include <arm_sve.h>
 
 /* Wrap SVE f64 helpers: evaluation of some scheme/order has form:
    sv_[scheme]_[order]_f64_x.  */
 #define VTYPE svfloat64_t
 #define STYPE double
 #define VWRAP(f) sv_##f##_f64_x
 #define DUP svdup_f64
-#include "poly_sve_generic.h"
+#include "sv_poly_generic.h"
 #undef DUP
 #undef VWRAP
 #undef STYPE
 #undef VTYPE
 
 #endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_sve_generic.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_poly_generic.h
similarity index 91%
rename from contrib/arm-optimized-routines/pl/math/poly_sve_generic.h
rename to contrib/arm-optimized-routines/math/aarch64/sve/sv_poly_generic.h
index b568e4cddff3..a1fc59baa8d3 100644
--- a/contrib/arm-optimized-routines/pl/math/poly_sve_generic.h
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_poly_generic.h
@@ -1,301 +1,331 @@
 /*
  * Helpers for evaluating polynomials with various schemes - specific to SVE
  * but precision-agnostic.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef VTYPE
 # error Cannot use poly_generic without defining VTYPE
 #endif
 #ifndef STYPE
 # error Cannot use poly_generic without defining STYPE
 #endif
 #ifndef VWRAP
 # error Cannot use poly_generic without defining VWRAP
 #endif
 #ifndef DUP
 # error Cannot use poly_generic without defining DUP
 #endif
 
 static inline VTYPE VWRAP (pairwise_poly_3) (svbool_t pg, VTYPE x, VTYPE x2,
 					     const STYPE *poly)
 {
   /* At order 3, Estrin and Pairwise Horner are identical.  */
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   VTYPE p23 = svmla_x (pg, DUP (poly[2]), x, poly[3]);
   return svmla_x (pg, p01, p23, x2);
 }
 
 static inline VTYPE VWRAP (estrin_4) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
 				      const STYPE *poly)
 {
   VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
   return svmla_x (pg, p03, x4, poly[4]);
 }
 static inline VTYPE VWRAP (estrin_5) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
 				      const STYPE *poly)
 {
   VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
   VTYPE p45 = svmla_x (pg, DUP (poly[4]), x, poly[5]);
   return svmla_x (pg, p03, p45, x4);
 }
 static inline VTYPE VWRAP (estrin_6) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
 				      const STYPE *poly)
 {
   VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
   VTYPE p45 = svmla_x (pg, DUP (poly[4]), x, poly[5]);
   VTYPE p46 = svmla_x (pg, p45, x, poly[6]);
   return svmla_x (pg, p03, p46, x4);
 }
 static inline VTYPE VWRAP (estrin_7) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
 				      const STYPE *poly)
 {
   VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
   VTYPE p47 = VWRAP (pairwise_poly_3) (pg, x, x2, poly + 4);
   return svmla_x (pg, p03, p47, x4);
 }
 static inline VTYPE VWRAP (estrin_8) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
 				      VTYPE x8, const STYPE *poly)
 {
   return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), x8, poly[8]);
 }
 static inline VTYPE VWRAP (estrin_9) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
 				      VTYPE x8, const STYPE *poly)
 {
   VTYPE p89 = svmla_x (pg, DUP (poly[8]), x, poly[9]);
   return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), p89, x8);
 }
 static inline VTYPE VWRAP (estrin_10) (svbool_t pg, VTYPE x, VTYPE x2,
 				       VTYPE x4, VTYPE x8, const STYPE *poly)
 {
   VTYPE p89 = svmla_x (pg, DUP (poly[8]), x, poly[9]);
   VTYPE p8_10 = svmla_x (pg, p89, x2, poly[10]);
   return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), p8_10, x8);
 }
 static inline VTYPE VWRAP (estrin_11) (svbool_t pg, VTYPE x, VTYPE x2,
 				       VTYPE x4, VTYPE x8, const STYPE *poly)
 {
   VTYPE p8_11 = VWRAP (pairwise_poly_3) (pg, x, x2, poly + 8);
   return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), p8_11, x8);
 }
 static inline VTYPE VWRAP (estrin_12) (svbool_t pg, VTYPE x, VTYPE x2,
 				       VTYPE x4, VTYPE x8, const STYPE *poly)
 {
   return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
 		  VWRAP (estrin_4) (pg, x, x2, x4, poly + 8), x8);
 }
 static inline VTYPE VWRAP (estrin_13) (svbool_t pg, VTYPE x, VTYPE x2,
 				       VTYPE x4, VTYPE x8, const STYPE *poly)
 {
   return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
 		  VWRAP (estrin_5) (pg, x, x2, x4, poly + 8), x8);
 }
 static inline VTYPE VWRAP (estrin_14) (svbool_t pg, VTYPE x, VTYPE x2,
 				       VTYPE x4, VTYPE x8, const STYPE *poly)
 {
   return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
 		  VWRAP (estrin_6) (pg, x, x2, x4, poly + 8), x8);
 }
 static inline VTYPE VWRAP (estrin_15) (svbool_t pg, VTYPE x, VTYPE x2,
 				       VTYPE x4, VTYPE x8, const STYPE *poly)
 {
   return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
 		  VWRAP (estrin_7) (pg, x, x2, x4, poly + 8), x8);
 }
 static inline VTYPE VWRAP (estrin_16) (svbool_t pg, VTYPE x, VTYPE x2,
 				       VTYPE x4, VTYPE x8, VTYPE x16,
 				       const STYPE *poly)
 {
   return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly), x16,
 		  poly[16]);
 }
 static inline VTYPE VWRAP (estrin_17) (svbool_t pg, VTYPE x, VTYPE x2,
 				       VTYPE x4, VTYPE x8, VTYPE x16,
 				       const STYPE *poly)
 {
   VTYPE p16_17 = svmla_x (pg, DUP (poly[16]), x, poly[17]);
   return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly), p16_17,
 		  x16);
 }
 static inline VTYPE VWRAP (estrin_18) (svbool_t pg, VTYPE x, VTYPE x2,
 				       VTYPE x4, VTYPE x8, VTYPE x16,
 				       const STYPE *poly)
 {
   VTYPE p16_17 = svmla_x (pg, DUP (poly[16]), x, poly[17]);
   VTYPE p16_18 = svmla_x (pg, p16_17, x2, poly[18]);
   return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly), p16_18,
 		  x16);
 }
 static inline VTYPE VWRAP (estrin_19) (svbool_t pg, VTYPE x, VTYPE x2,
 				       VTYPE x4, VTYPE x8, VTYPE x16,
 				       const STYPE *poly)
 {
   return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly),
 		  VWRAP (pairwise_poly_3) (pg, x, x2, poly + 16), x16);
 }
 
 static inline VTYPE VWRAP (horner_3) (svbool_t pg, VTYPE x, const STYPE *poly)
 {
   VTYPE p = svmla_x (pg, DUP (poly[2]), x, poly[3]);
   p = svmad_x (pg, x, p, poly[1]);
   p = svmad_x (pg, x, p, poly[0]);
   return p;
 }
 static inline VTYPE VWRAP (horner_4) (svbool_t pg, VTYPE x, const STYPE *poly)
 {
   VTYPE p = svmla_x (pg, DUP (poly[3]), x, poly[4]);
   p = svmad_x (pg, x, p, poly[2]);
   p = svmad_x (pg, x, p, poly[1]);
   p = svmad_x (pg, x, p, poly[0]);
   return p;
 }
 static inline VTYPE VWRAP (horner_5) (svbool_t pg, VTYPE x, const STYPE *poly)
 {
   return svmad_x (pg, x, VWRAP (horner_4) (pg, x, poly + 1), poly[0]);
 }
 static inline VTYPE VWRAP (horner_6) (svbool_t pg, VTYPE x, const STYPE *poly)
 {
   return svmad_x (pg, x, VWRAP (horner_5) (pg, x, poly + 1), poly[0]);
 }
 static inline VTYPE VWRAP (horner_7) (svbool_t pg, VTYPE x, const STYPE *poly)
 {
   return svmad_x (pg, x, VWRAP (horner_6) (pg, x, poly + 1), poly[0]);
 }
 static inline VTYPE VWRAP (horner_8) (svbool_t pg, VTYPE x, const STYPE *poly)
 {
   return svmad_x (pg, x, VWRAP (horner_7) (pg, x, poly + 1), poly[0]);
 }
 static inline VTYPE VWRAP (horner_9) (svbool_t pg, VTYPE x, const STYPE *poly)
 {
   return svmad_x (pg, x, VWRAP (horner_8) (pg, x, poly + 1), poly[0]);
 }
 static inline VTYPE
 sv_horner_10_f32_x (svbool_t pg, VTYPE x, const STYPE *poly)
 {
   return svmad_x (pg, x, VWRAP (horner_9) (pg, x, poly + 1), poly[0]);
 }
 static inline VTYPE
 sv_horner_11_f32_x (svbool_t pg, VTYPE x, const STYPE *poly)
 {
   return svmad_x (pg, x, sv_horner_10_f32_x (pg, x, poly + 1), poly[0]);
 }
 static inline VTYPE
 sv_horner_12_f32_x (svbool_t pg, VTYPE x, const STYPE *poly)
 {
   return svmad_x (pg, x, sv_horner_11_f32_x (pg, x, poly + 1), poly[0]);
 }
 
 static inline VTYPE VWRAP (pw_horner_4) (svbool_t pg, VTYPE x, VTYPE x2,
 					 const STYPE *poly)
 {
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   VTYPE p23 = svmla_x (pg, DUP (poly[2]), x, poly[3]);
   VTYPE p;
   p = svmla_x (pg, p23, x2, poly[4]);
   p = svmla_x (pg, p01, x2, p);
   return p;
 }
 static inline VTYPE VWRAP (pw_horner_5) (svbool_t pg, VTYPE x, VTYPE x2,
 					 const STYPE *poly)
 {
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   VTYPE p23 = svmla_x (pg, DUP (poly[2]), x, poly[3]);
   VTYPE p45 = svmla_x (pg, DUP (poly[4]), x, poly[5]);
   VTYPE p;
   p = svmla_x (pg, p23, x2, p45);
   p = svmla_x (pg, p01, x2, p);
   return p;
 }
 static inline VTYPE VWRAP (pw_horner_6) (svbool_t pg, VTYPE x, VTYPE x2,
 					 const STYPE *poly)
 {
   VTYPE p26 = VWRAP (pw_horner_4) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p26);
 }
 static inline VTYPE VWRAP (pw_horner_7) (svbool_t pg, VTYPE x, VTYPE x2,
 					 const STYPE *poly)
 {
   VTYPE p27 = VWRAP (pw_horner_5) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p27);
 }
 static inline VTYPE VWRAP (pw_horner_8) (svbool_t pg, VTYPE x, VTYPE x2,
 					 const STYPE *poly)
 {
   VTYPE p28 = VWRAP (pw_horner_6) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p28);
 }
 static inline VTYPE VWRAP (pw_horner_9) (svbool_t pg, VTYPE x, VTYPE x2,
 					 const STYPE *poly)
 {
   VTYPE p29 = VWRAP (pw_horner_7) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p29);
 }
 static inline VTYPE VWRAP (pw_horner_10) (svbool_t pg, VTYPE x, VTYPE x2,
 					  const STYPE *poly)
 {
   VTYPE p2_10 = VWRAP (pw_horner_8) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p2_10);
 }
 static inline VTYPE VWRAP (pw_horner_11) (svbool_t pg, VTYPE x, VTYPE x2,
 					  const STYPE *poly)
 {
   VTYPE p2_11 = VWRAP (pw_horner_9) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p2_11);
 }
 static inline VTYPE VWRAP (pw_horner_12) (svbool_t pg, VTYPE x, VTYPE x2,
 					  const STYPE *poly)
 {
   VTYPE p2_12 = VWRAP (pw_horner_10) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p2_12);
 }
 static inline VTYPE VWRAP (pw_horner_13) (svbool_t pg, VTYPE x, VTYPE x2,
 					  const STYPE *poly)
 {
   VTYPE p2_13 = VWRAP (pw_horner_11) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p2_13);
 }
 static inline VTYPE VWRAP (pw_horner_14) (svbool_t pg, VTYPE x, VTYPE x2,
 					  const STYPE *poly)
 {
   VTYPE p2_14 = VWRAP (pw_horner_12) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p2_14);
 }
 static inline VTYPE VWRAP (pw_horner_15) (svbool_t pg, VTYPE x, VTYPE x2,
 					  const STYPE *poly)
 {
   VTYPE p2_15 = VWRAP (pw_horner_13) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p2_15);
 }
 static inline VTYPE VWRAP (pw_horner_16) (svbool_t pg, VTYPE x, VTYPE x2,
 					  const STYPE *poly)
 {
   VTYPE p2_16 = VWRAP (pw_horner_14) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p2_16);
 }
 static inline VTYPE VWRAP (pw_horner_17) (svbool_t pg, VTYPE x, VTYPE x2,
 					  const STYPE *poly)
 {
   VTYPE p2_17 = VWRAP (pw_horner_15) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p2_17);
 }
 static inline VTYPE VWRAP (pw_horner_18) (svbool_t pg, VTYPE x, VTYPE x2,
 					  const STYPE *poly)
 {
   VTYPE p2_18 = VWRAP (pw_horner_16) (pg, x, x2, poly + 2);
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p2_18);
 }
+
+static inline VTYPE VWRAP (lw_pw_horner_5) (svbool_t pg, VTYPE x, VTYPE x2,
+					    const STYPE *poly_even,
+					    const STYPE *poly_odd)
+{
+  VTYPE c13 = svld1rq (pg, poly_odd);
+
+  VTYPE p01 = svmla_lane (DUP (poly_even[0]), x, c13, 0);
+  VTYPE p23 = svmla_lane (DUP (poly_even[1]), x, c13, 1);
+  VTYPE p45 = svmla_x (pg, DUP (poly_even[2]), x, poly_odd[2]);
+
+  VTYPE p;
+  p = svmla_x (pg, p23, x2, p45);
+  p = svmla_x (pg, p01, x2, p);
+  return p;
+}
+static inline VTYPE VWRAP (lw_pw_horner_9) (svbool_t pg, VTYPE x, VTYPE x2,
+					    const STYPE *poly_even,
+					    const STYPE *poly_odd)
+{
+  VTYPE c13 = svld1rq (pg, poly_odd);
+
+  VTYPE p49 = VWRAP (lw_pw_horner_5) (pg, x, x2, poly_even + 2, poly_odd + 2);
+  VTYPE p23 = svmla_lane (DUP (poly_even[1]), x, c13, 1);
+
+  VTYPE p29 = svmla_x (pg, p23, x2, p49);
+  VTYPE p01 = svmla_lane (DUP (poly_even[0]), x, c13, 0);
+
+  return svmla_x (pg, p01, x2, p29);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sincos_common.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_sincos_common.h
similarity index 97%
rename from contrib/arm-optimized-routines/pl/math/sv_sincos_common.h
rename to contrib/arm-optimized-routines/math/aarch64/sve/sv_sincos_common.h
index f7b58deb90bd..2a537da157b0 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_sincos_common.h
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_sincos_common.h
@@ -1,85 +1,85 @@
 /*
  * Core approximation for double-precision vector sincos
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
+#include "sv_poly_f64.h"
 
 static const struct sv_sincos_data
 {
   double sin_poly[7], cos_poly[6], pio2[3];
   double inv_pio2, shift, range_val;
 } sv_sincos_data = {
   .inv_pio2 = 0x1.45f306dc9c882p-1,
   .pio2 = { 0x1.921fb50000000p+0, 0x1.110b460000000p-26,
 	    0x1.1a62633145c07p-54 },
   .shift = 0x1.8p52,
   .sin_poly = { /* Computed using Remez in [-pi/2, pi/2].  */
 	        -0x1.555555555547bp-3, 0x1.1111111108a4dp-7,
 		-0x1.a01a019936f27p-13, 0x1.71de37a97d93ep-19,
 		-0x1.ae633919987c6p-26, 0x1.60e277ae07cecp-33,
 		-0x1.9e9540300a1p-41 },
   .cos_poly = { /* Computed using Remez in [-pi/4, pi/4].  */
 	        0x1.555555555554cp-5, -0x1.6c16c16c1521fp-10,
 		0x1.a01a019cbf62ap-16, -0x1.27e4f812b681ep-22,
 		0x1.1ee9f152a57cdp-29, -0x1.8fb131098404bp-37 },
   .range_val = 0x1p23, };
 
 static inline svbool_t
 check_ge_rangeval (svbool_t pg, svfloat64_t x, const struct sv_sincos_data *d)
 {
   svbool_t in_bounds = svaclt (pg, x, d->range_val);
   return svnot_z (pg, in_bounds);
 }
 
 /* Double-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate polynomials.
    Largest observed error is for sin, 3.22 ULP:
    v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
 				       want -0x1.ffe9537d5dbb4p-3.  */
 static inline svfloat64x2_t
 sv_sincos_inline (svbool_t pg, svfloat64_t x, const struct sv_sincos_data *d)
 {
   /* q = nearest integer to 2 * x / pi.  */
   svfloat64_t q = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_pio2),
 			   d->shift);
   svint64_t n = svcvt_s64_x (pg, q);
 
   /* Reduce x such that r is in [ -pi/4, pi/4 ].  */
   svfloat64_t r = x;
   r = svmls_x (pg, r, q, d->pio2[0]);
   r = svmls_x (pg, r, q, d->pio2[1]);
   r = svmls_x (pg, r, q, d->pio2[2]);
 
   svfloat64_t r2 = svmul_x (pg, r, r), r3 = svmul_x (pg, r2, r),
 	      r4 = svmul_x (pg, r2, r2);
 
   /* Approximate sin(r) ~= r + r^3 * poly_sin(r^2).  */
   svfloat64_t s = sv_pw_horner_6_f64_x (pg, r2, r4, d->sin_poly);
   s = svmla_x (pg, r, r3, s);
 
   /* Approximate cos(r) ~= 1 - (r^2)/2 + r^4 * poly_cos(r^2).  */
   svfloat64_t c = sv_pw_horner_5_f64_x (pg, r2, r4, d->cos_poly);
   c = svmad_x (pg, c, r2, -0.5);
   c = svmad_x (pg, c, r2, 1);
 
   svuint64_t un = svreinterpret_u64 (n);
   /* If odd quadrant, swap cos and sin.  */
   svbool_t swap = svcmpeq (pg, svlsl_x (pg, un, 63), 0);
   svfloat64_t ss = svsel (swap, s, c);
   svfloat64_t cc = svsel (swap, c, s);
 
   /* Fix signs according to quadrant.
      ss = asdouble(asuint64(ss) ^ ((n       & 2) << 62))
      cc = asdouble(asuint64(cc) & (((n + 1) & 2) << 62)).  */
   svuint64_t sin_sign = svlsl_x (pg, svand_x (pg, un, 2), 62);
   svuint64_t cos_sign = svlsl_x (
       pg, svand_x (pg, svreinterpret_u64 (svadd_x (pg, n, 1)), 2), 62);
   ss = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ss), sin_sign));
   cc = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (cc), cos_sign));
 
   return svcreate2 (ss, cc);
 }
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sincosf_common.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_sincosf_common.h
similarity index 98%
rename from contrib/arm-optimized-routines/pl/math/sv_sincosf_common.h
rename to contrib/arm-optimized-routines/math/aarch64/sve/sv_sincosf_common.h
index 714e996443b3..bda89ed24680 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_sincosf_common.h
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_sincosf_common.h
@@ -1,81 +1,81 @@
 /*
  * Core approximation for single-precision vector sincos
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
 
 const static struct sv_sincosf_data
 {
   float poly_sin[3], poly_cos[3], pio2[3], inv_pio2, shift, range_val;
 } sv_sincosf_data = {
   .poly_sin = { /* Generated using Remez, odd coeffs only, in [-pi/4, pi/4].  */
 	        -0x1.555546p-3, 0x1.11076p-7, -0x1.994eb4p-13 },
   .poly_cos = { /* Generated using Remez, even coeffs only, in [-pi/4, pi/4].  */
 	        0x1.55554ap-5, -0x1.6c0c1ap-10, 0x1.99e0eep-16 },
   .pio2 = { 0x1.921fb6p+0f, -0x1.777a5cp-25f, -0x1.ee59dap-50f },
   .inv_pio2 = 0x1.45f306p-1f,
   .shift = 0x1.8p23,
   .range_val = 0x1p20
 };
 
 static inline svbool_t
 check_ge_rangeval (svbool_t pg, svfloat32_t x, const struct sv_sincosf_data *d)
 {
   svbool_t in_bounds = svaclt (pg, x, d->range_val);
   return svnot_z (pg, in_bounds);
 }
 
 /* Single-precision vector function allowing calculation of both sin and cos in
    one function call, using shared argument reduction and separate low-order
    polynomials.
    Worst-case error for sin is 1.67 ULP:
    sv_sincosf_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
    Worst-case error for cos is 1.81 ULP:
    sv_sincosf_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6.  */
 static inline svfloat32x2_t
 sv_sincosf_inline (svbool_t pg, svfloat32_t x, const struct sv_sincosf_data *d)
 {
   /* n = rint ( x / (pi/2) ).  */
   svfloat32_t q = svmla_x (pg, sv_f32 (d->shift), x, d->inv_pio2);
   q = svsub_x (pg, q, d->shift);
   svint32_t n = svcvt_s32_x (pg, q);
 
   /* Reduce x such that r is in [ -pi/4, pi/4 ].  */
   svfloat32_t r = x;
   r = svmls_x (pg, r, q, d->pio2[0]);
   r = svmls_x (pg, r, q, d->pio2[1]);
   r = svmls_x (pg, r, q, d->pio2[2]);
 
   /* Approximate sin(r) ~= r + r^3 * poly_sin(r^2).  */
   svfloat32_t r2 = svmul_x (pg, r, r), r3 = svmul_x (pg, r, r2);
   svfloat32_t s = svmla_x (pg, sv_f32 (d->poly_sin[1]), r2, d->poly_sin[2]);
   s = svmad_x (pg, r2, s, d->poly_sin[0]);
   s = svmla_x (pg, r, r3, s);
 
   /* Approximate cos(r) ~= 1 - (r^2)/2 + r^4 * poly_cos(r^2).  */
   svfloat32_t r4 = svmul_x (pg, r2, r2);
   svfloat32_t p = svmla_x (pg, sv_f32 (d->poly_cos[1]), r2, d->poly_cos[2]);
   svfloat32_t c = svmad_x (pg, sv_f32 (d->poly_cos[0]), r2, -0.5);
   c = svmla_x (pg, c, r4, p);
   c = svmad_x (pg, r2, c, 1);
 
   svuint32_t un = svreinterpret_u32 (n);
   /* If odd quadrant, swap cos and sin.  */
   svbool_t swap = svcmpeq (pg, svlsl_x (pg, un, 31), 0);
   svfloat32_t ss = svsel (swap, s, c);
   svfloat32_t cc = svsel (swap, c, s);
 
   /* Fix signs according to quadrant.
      ss = asfloat(asuint(ss) ^ ((n       & 2) << 30))
      cc = asfloat(asuint(cc) & (((n + 1) & 2) << 30)).  */
   svuint32_t sin_sign = svlsl_x (pg, svand_x (pg, un, 2), 30);
   svuint32_t cos_sign = svlsl_x (
       pg, svand_x (pg, svreinterpret_u32 (svadd_x (pg, n, 1)), 2), 30);
   ss = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ss), sin_sign));
   cc = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (cc), cos_sign));
 
   return svcreate2 (ss, cc);
 }
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/sv_sincospi_common.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_sincospi_common.h
new file mode 100644
index 000000000000..672ebbc8e855
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_sincospi_common.h
@@ -0,0 +1,76 @@
+/*
+ * Core approximation for double-precision SVE sincospi
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "sv_poly_f64.h"
+
+static const struct sv_sincospi_data
+{
+  double c0, c2, c4, c6, c8;
+  double c1, c3, c5, c7, c9;
+  double range_val;
+} sv_sincospi_data = {
+  /* Polynomial coefficients generated using Remez algorithm,
+     see sinpi.sollya for details.  */
+  .c0 = 0x1.921fb54442d184p1,
+  .c1 = -0x1.4abbce625be53p2,
+  .c2 = 0x1.466bc6775ab16p1,
+  .c3 = -0x1.32d2cce62dc33p-1,
+  .c4 = 0x1.507834891188ep-4,
+  .c5 = -0x1.e30750a28c88ep-8,
+  .c6 = 0x1.e8f48308acda4p-12,
+  .c7 = -0x1.6fc0032b3c29fp-16,
+  .c8 = 0x1.af86ae521260bp-21,
+  .c9 = -0x1.012a9870eeb7dp-25,
+  /* Exclusive upper bound for a signed integer.  */
+  .range_val = 0x1p63
+};
+
+/* Double-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+    Worst-case error for sin is 3.09 ULP:
+    _ZGVsMxvl8l8_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1
+						    want 0x1.fd54d0b327cf4p-1.
+   Worst-case error for cos is 3.16 ULP:
+    _ZGVsMxvl8l8_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1
+						    want 0x1.fd2da484ff402p-1.
+ */
+static inline svfloat64x2_t
+sv_sincospi_inline (svbool_t pg, svfloat64_t x,
+		    const struct sv_sincospi_data *d)
+{
+  const svbool_t pt = svptrue_b64 ();
+
+  /* r = x - rint(x).  */
+  /* pt hints unpredicated instruction.  */
+  svfloat64_t rx = svrinta_x (pg, x);
+  svfloat64_t sr = svsub_x (pt, x, rx);
+
+  /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2.  */
+  svfloat64_t cr = svsubr_x (pg, svabs_x (pg, sr), 0.5);
+
+  /* Pairwise Horner approximation for y = sin(r * pi).  */
+  /* pt hints unpredicated instruction.  */
+  svfloat64_t sr2 = svmul_x (pt, sr, sr);
+  svfloat64_t cr2 = svmul_x (pt, cr, cr);
+  svfloat64_t sr4 = svmul_x (pt, sr2, sr2);
+  svfloat64_t cr4 = svmul_x (pt, cr2, cr2);
+
+  /* If rint(x) is odd, the sign of the result should be inverted for sinpi and
+    re-introduced for cospi. cmp filters rxs that saturate to max sint.  */
+  svbool_t cmp = svaclt (pg, x, d->range_val);
+  svuint64_t odd = svlsl_x (pt, svreinterpret_u64 (svcvt_s64_z (pg, rx)), 63);
+  sr = svreinterpret_f64 (sveor_x (pt, svreinterpret_u64 (sr), odd));
+  cr = svreinterpret_f64 (sveor_m (cmp, svreinterpret_u64 (cr), odd));
+
+  svfloat64_t sinpix = svmul_x (
+      pt, sv_lw_pw_horner_9_f64_x (pg, sr2, sr4, &(d->c0), &(d->c1)), sr);
+  svfloat64_t cospix = svmul_x (
+      pt, sv_lw_pw_horner_9_f64_x (pg, cr2, cr4, &(d->c0), &(d->c1)), cr);
+
+  return svcreate2 (sinpix, cospix);
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/sv_sincospif_common.h b/contrib/arm-optimized-routines/math/aarch64/sve/sv_sincospif_common.h
new file mode 100644
index 000000000000..4b9101de74ed
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/sv_sincospif_common.h
@@ -0,0 +1,82 @@
+/*
+ * Helper for single-precision SVE sincospi
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "sv_poly_f32.h"
+
+const static struct sv_sincospif_data
+{
+  float c0, c2, c4;
+  float c1, c3, c5;
+  float range_val;
+} sv_sincospif_data = {
+  /* Taylor series coefficents for sin(pi * x).  */
+  .c0 = 0x1.921fb6p1f,
+  .c1 = -0x1.4abbcep2f,
+  .c2 = 0x1.466bc6p1f,
+  .c3 = -0x1.32d2ccp-1f,
+  .c4 = 0x1.50783p-4f,
+  .c5 = -0x1.e30750p-8f,
+  /* Exclusive upper bound for a signed integer.  */
+  .range_val = 0x1p31f,
+};
+
+/* Single-precision vector function allowing calculation of both sinpi and
+   cospi in one function call, using shared argument reduction and polynomials.
+   Worst-case error for sin is 3.04 ULP:
+   _ZGVsMxvl4l4_sincospif_sin(0x1.b51b8p-2) got 0x1.f28b5ep-1 want
+   0x1.f28b58p-1.
+   Worst-case error for cos is 3.18 ULP:
+   _ZGVsMxvl4l4_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want
+   0x1.f7cd5p-1.  */
+static inline svfloat32x2_t
+sv_sincospif_inline (svbool_t pg, svfloat32_t x,
+		     const struct sv_sincospif_data *d)
+{
+  const svbool_t pt = svptrue_b32 ();
+
+  /* r = x - rint(x).  */
+  svfloat32_t rx = svrinta_x (pg, x);
+  svfloat32_t sr = svsub_x (pt, x, rx);
+
+  /* cospi(x) = sinpi(0.5 - abs(r)) for values -1/2 .. 1/2.  */
+  svfloat32_t cr = svsubr_x (pt, svabs_x (pg, sr), 0.5f);
+
+  /* Pairwise Horner approximation for y = sin(r * pi).  */
+  svfloat32_t sr2 = svmul_x (pt, sr, sr);
+  svfloat32_t sr4 = svmul_x (pt, sr2, sr2);
+  svfloat32_t cr2 = svmul_x (pt, cr, cr);
+  svfloat32_t cr4 = svmul_x (pt, cr2, cr2);
+
+  /* If rint(x) is odd, the sign of the result should be inverted for sinpi and
+     re-introduced for cospi. cmp filters rxs that saturate to max sint.  */
+  svbool_t cmp = svaclt (pg, x, d->range_val);
+  svuint32_t odd = svlsl_x (pt, svreinterpret_u32 (svcvt_s32_z (pg, rx)), 31);
+  sr = svreinterpret_f32 (sveor_x (pt, svreinterpret_u32 (sr), odd));
+  cr = svreinterpret_f32 (sveor_m (cmp, svreinterpret_u32 (cr), odd));
+
+  svfloat32_t c135 = svld1rq_f32 (svptrue_b32 (), &d->c1);
+
+  svfloat32_t sp01 = svmla_lane (sv_f32 (d->c0), sr2, c135, 0);
+  svfloat32_t sp23 = svmla_lane (sv_f32 (d->c2), sr2, c135, 1);
+  svfloat32_t sp45 = svmla_lane (sv_f32 (d->c4), sr2, c135, 2);
+
+  svfloat32_t cp01 = svmla_lane (sv_f32 (d->c0), cr2, c135, 0);
+  svfloat32_t cp23 = svmla_lane (sv_f32 (d->c2), cr2, c135, 1);
+  svfloat32_t cp45 = svmla_lane (sv_f32 (d->c4), cr2, c135, 2);
+
+  svfloat32_t sp = svmla_x (pg, sp23, sr4, sp45);
+  svfloat32_t cp = svmla_x (pg, cp23, cr4, cp45);
+
+  sp = svmla_x (pg, sp01, sr4, sp);
+  cp = svmla_x (pg, cp01, cr4, cp);
+
+  svfloat32_t sinpix = svmul_x (pt, sp, sr);
+  svfloat32_t cospix = svmul_x (pt, cp, cr);
+
+  return svcreate2 (sinpix, cospix);
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/tan.c b/contrib/arm-optimized-routines/math/aarch64/sve/tan.c
new file mode 100644
index 000000000000..1dfc5c422d5e
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/tan.c
@@ -0,0 +1,131 @@
+/*
+ * Double-precision SVE tan(x) function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+static const struct data
+{
+  double c2, c4, c6, c8;
+  double poly_1357[4];
+  double c0, inv_half_pi;
+  double half_pi_hi, half_pi_lo, range_val;
+} data = {
+  /* Polynomial generated with FPMinimax.  */
+  .c2 = 0x1.ba1ba1bb46414p-5,
+  .c4 = 0x1.226e5e5ecdfa3p-7,
+  .c6 = 0x1.7ea75d05b583ep-10,
+  .c8 = 0x1.4e4fd14147622p-12,
+  .poly_1357 = { 0x1.1111111110a63p-3, 0x1.664f47e5b5445p-6,
+		 0x1.d6c7ddbf87047p-9, 0x1.289f22964a03cp-11 },
+  .c0 = 0x1.5555555555556p-2,
+  .inv_half_pi = 0x1.45f306dc9c883p-1,
+  .half_pi_hi = 0x1.921fb54442d18p0,
+  .half_pi_lo = 0x1.1a62633145c07p-54,
+  .range_val = 0x1p23,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t p, svfloat64_t q, svbool_t pg,
+	      svbool_t special)
+{
+  svbool_t use_recip = svcmpeq (
+      pg, svand_x (pg, svreinterpret_u64 (svcvt_s64_x (pg, q)), 1), 0);
+
+  svfloat64_t n = svmad_x (pg, p, p, -1);
+  svfloat64_t d = svmul_x (svptrue_b64 (), p, 2);
+  svfloat64_t swap = n;
+  n = svneg_m (n, use_recip, d);
+  d = svsel (use_recip, swap, d);
+  svfloat64_t y = svdiv_x (svnot_z (pg, special), n, d);
+  return sv_call_f64 (tan, x, y, special);
+}
+
+/* Vector approximation for double-precision tan.
+   Maximum measured error is 3.48 ULP:
+   _ZGVsMxv_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
+				      want -0x1.f6ccd8ecf7deap+37.  */
+svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg)
+{
+  const struct data *dat = ptr_barrier (&data);
+  svfloat64_t half_pi_c0 = svld1rq (svptrue_b64 (), &dat->c0);
+  /* q = nearest integer to 2 * x / pi.  */
+  svfloat64_t q = svmul_lane (x, half_pi_c0, 1);
+  q = svrinta_x (pg, q);
+
+  /* Use q to reduce x to r in [-pi/4, pi/4], by:
+     r = x - q * pi/2, in extended precision.  */
+  svfloat64_t r = x;
+  svfloat64_t half_pi = svld1rq (svptrue_b64 (), &dat->half_pi_hi);
+  r = svmls_lane (r, q, half_pi, 0);
+  r = svmls_lane (r, q, half_pi, 1);
+  /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
+     formula.  */
+  r = svmul_x (svptrue_b64 (), r, 0.5);
+
+  /* Approximate tan(r) using order 8 polynomial.
+     tan(x) is odd, so polynomial has the form:
+     tan(x) ~= x + C0 * x^3 + C1 * x^5 + C3 * x^7 + ...
+     Hence we first approximate P(r) = C1 + C2 * r^2 + C3 * r^4 + ...
+     Then compute the approximation by:
+     tan(r) ~= r + r^3 * (C0 + r^2 * P(r)).  */
+
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t r4 = svmul_x (svptrue_b64 (), r2, r2);
+  svfloat64_t r8 = svmul_x (svptrue_b64 (), r4, r4);
+  /* Use offset version coeff array by 1 to evaluate from C1 onwards.  */
+  svfloat64_t C_24 = svld1rq (svptrue_b64 (), &dat->c2);
+  svfloat64_t C_68 = svld1rq (svptrue_b64 (), &dat->c6);
+
+  /* Use offset version coeff array by 1 to evaluate from C1 onwards.  */
+  svfloat64_t p01 = svmla_lane (sv_f64 (dat->poly_1357[0]), r2, C_24, 0);
+  svfloat64_t p23 = svmla_lane_f64 (sv_f64 (dat->poly_1357[1]), r2, C_24, 1);
+  svfloat64_t p03 = svmla_x (pg, p01, p23, r4);
+
+  svfloat64_t p45 = svmla_lane (sv_f64 (dat->poly_1357[2]), r2, C_68, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (dat->poly_1357[3]), r2, C_68, 1);
+  svfloat64_t p47 = svmla_x (pg, p45, p67, r4);
+
+  svfloat64_t p = svmla_x (pg, p03, p47, r8);
+
+  svfloat64_t z = svmul_x (svptrue_b64 (), p, r);
+  z = svmul_x (svptrue_b64 (), r2, z);
+  z = svmla_lane (z, r, half_pi_c0, 0);
+  p = svmla_x (pg, r, r2, z);
+
+  /* Recombination uses double-angle formula:
+     tan(2x) = 2 * tan(x) / (1 - (tan(x))^2)
+     and reciprocity around pi/2:
+     tan(x) = 1 / (tan(pi/2 - x))
+     to assemble result using change-of-sign and conditional selection of
+     numerator/denominator dependent on odd/even-ness of q (quadrant).  */
+
+  /* Invert condition to catch NaNs and Infs as well as large values.  */
+  svbool_t special = svnot_z (pg, svaclt (pg, x, dat->range_val));
+
+  if (unlikely (svptest_any (pg, special)))
+    {
+      return special_case (x, p, q, pg, special);
+    }
+  svbool_t use_recip = svcmpeq (
+      pg, svand_x (pg, svreinterpret_u64 (svcvt_s64_x (pg, q)), 1), 0);
+
+  svfloat64_t n = svmad_x (pg, p, p, -1);
+  svfloat64_t d = svmul_x (svptrue_b64 (), p, 2);
+  svfloat64_t swap = n;
+  n = svneg_m (n, use_recip, d);
+  d = svsel (use_recip, swap, d);
+  return svdiv_x (pg, n, d);
+}
+
+TEST_SIG (SV, D, 1, tan, -3.1, 3.1)
+TEST_ULP (SV_NAME_D1 (tan), 2.99)
+TEST_DISABLE_FENV (SV_NAME_D1 (tan))
+TEST_SYM_INTERVAL (SV_NAME_D1 (tan), 0, 0x1p23, 500000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tan), 0x1p23, inf, 5000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_tanf_3u5.c b/contrib/arm-optimized-routines/math/aarch64/sve/tanf.c
similarity index 79%
rename from contrib/arm-optimized-routines/pl/math/sv_tanf_3u5.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/tanf.c
index 6b8cd1e64b44..d34fc2fc1a4e 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_tanf_3u5.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/tanf.c
@@ -1,119 +1,117 @@
 /*
  * Single-precision vector tan(x) function.
  *
- * Copyright (c) 2020-2023, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float pio2_1, pio2_2, pio2_3, invpio2;
   float c1, c3, c5;
   float c0, c2, c4, range_val, shift;
 } data = {
   /* Coefficients generated using:
      poly = fpminimax((tan(sqrt(x))-sqrt(x))/x^(3/2),
 		      deg,
 		      [|single ...|],
 		      [a*a;b*b]);
      optimize relative error
      final prec : 23 bits
      deg : 5
      a : 0x1p-126 ^ 2
      b : ((pi) / 0x1p2) ^ 2
      dirty rel error: 0x1.f7c2e4p-25
      dirty abs error: 0x1.f7c2ecp-25.  */
   .c0 = 0x1.55555p-2,	      .c1 = 0x1.11166p-3,
   .c2 = 0x1.b88a78p-5,	      .c3 = 0x1.7b5756p-6,
   .c4 = 0x1.4ef4cep-8,	      .c5 = 0x1.0e1e74p-7,
 
   .pio2_1 = 0x1.921fb6p+0f,   .pio2_2 = -0x1.777a5cp-25f,
   .pio2_3 = -0x1.ee59dap-50f, .invpio2 = 0x1.45f306p-1f,
   .range_val = 0x1p15f,	      .shift = 0x1.8p+23f
 };
 
 static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
 {
   return sv_call_f32 (tanf, x, y, cmp);
 }
 
 /* Fast implementation of SVE tanf.
    Maximum error is 3.45 ULP:
    SV_NAME_F1 (tan)(-0x1.e5f0cap+13) got 0x1.ff9856p-1
 				    want 0x1.ff9850p-1.  */
 svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  /* Determine whether input is too large to perform fast regression.  */
-  svbool_t cmp = svacge (pg, x, d->range_val);
-
   svfloat32_t odd_coeffs = svld1rq (svptrue_b32 (), &d->c1);
   svfloat32_t pi_vals = svld1rq (svptrue_b32 (), &d->pio2_1);
 
   /* n = rint(x/(pi/2)).  */
-  svfloat32_t q = svmla_lane (sv_f32 (d->shift), x, pi_vals, 3);
-  svfloat32_t n = svsub_x (pg, q, d->shift);
+  svfloat32_t n = svrintn_x (pg, svmul_lane (x, pi_vals, 3));
   /* n is already a signed integer, simply convert it.  */
   svint32_t in = svcvt_s32_x (pg, n);
   /* Determine if x lives in an interval, where |tan(x)| grows to infinity.  */
   svint32_t alt = svand_x (pg, in, 1);
   svbool_t pred_alt = svcmpne (pg, alt, 0);
-
   /* r = x - n * (pi/2)  (range reduction into 0 .. pi/4).  */
   svfloat32_t r;
   r = svmls_lane (x, n, pi_vals, 0);
   r = svmls_lane (r, n, pi_vals, 1);
   r = svmls_lane (r, n, pi_vals, 2);
 
   /* If x lives in an interval, where |tan(x)|
      - is finite, then use a polynomial approximation of the form
        tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2).
      - grows to infinity then use symmetries of tangent and the identity
        tan(r) = cotan(pi/2 - r) to express tan(x) as 1/tan(-r). Finally, use
        the same polynomial approximation of tan as above.  */
 
   /* Perform additional reduction if required.  */
   svfloat32_t z = svneg_m (r, pred_alt, r);
 
   /* Evaluate polynomial approximation of tangent on [-pi/4, pi/4],
      using Estrin on z^2.  */
-  svfloat32_t z2 = svmul_x (pg, z, z);
+  svfloat32_t z2 = svmul_x (svptrue_b32 (), r, r);
   svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0);
   svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1);
   svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2);
 
   svfloat32_t z4 = svmul_x (pg, z2, z2);
   svfloat32_t p = svmla_x (pg, p01, z4, p23);
 
   svfloat32_t z8 = svmul_x (pg, z4, z4);
   p = svmla_x (pg, p, z8, p45);
 
   svfloat32_t y = svmla_x (pg, z, p, svmul_x (pg, z, z2));
 
-  /* Transform result back, if necessary.  */
-  svfloat32_t inv_y = svdivr_x (pg, y, 1.0f);
-
   /* No need to pass pg to specialcase here since cmp is a strict subset,
      guaranteed by the cmpge above.  */
+
+  /* Determine whether input is too large to perform fast regression.  */
+  svbool_t cmp = svacge (pg, x, d->range_val);
   if (unlikely (svptest_any (pg, cmp)))
-    return special_case (x, svsel (pred_alt, inv_y, y), cmp);
+    return special_case (x, svdivr_x (pg, y, 1.0f), cmp);
 
+  svfloat32_t inv_y = svdivr_x (pg, y, 1.0f);
   return svsel (pred_alt, inv_y, y);
 }
 
-PL_SIG (SV, F, 1, tan, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_F1 (tan), 2.96)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), -0.0, -0x1p126, 100)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-23, 0.7, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0.7, 1.5, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 1.5, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 100, 0x1p17, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p17, inf, 50000)
+TEST_SIG (SV, F, 1, tan, -3.1, 3.1)
+TEST_ULP (SV_NAME_F1 (tan), 2.96)
+TEST_DISABLE_FENV (SV_NAME_F1 (tan))
+TEST_INTERVAL (SV_NAME_F1 (tan), -0.0, -0x1p126, 100)
+TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-149, 0x1p-126, 4000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-126, 0x1p-23, 50000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-23, 0.7, 50000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 0.7, 1.5, 50000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 1.5, 100, 50000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 100, 0x1p17, 50000)
+TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p17, inf, 50000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/pl/math/sv_tanh_3u.c b/contrib/arm-optimized-routines/math/aarch64/sve/tanh.c
similarity index 86%
rename from contrib/arm-optimized-routines/pl/math/sv_tanh_3u.c
rename to contrib/arm-optimized-routines/math/aarch64/sve/tanh.c
index f54139f1ddbc..41f64cb4b2c7 100644
--- a/contrib/arm-optimized-routines/pl/math/sv_tanh_3u.c
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/tanh.c
@@ -1,96 +1,98 @@
 /*
  * Double-precision SVE tanh(x) function.
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
+#include "sv_poly_f64.h"
 #include "mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 static const struct data
 {
   float64_t poly[11];
   float64_t inv_ln2, ln2_hi, ln2_lo, shift;
   uint64_t thresh, tiny_bound;
 } data = {
   /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2].  */
   .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
 	    0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
 	    0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
 	    0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
 	    0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
 
   .inv_ln2 = 0x1.71547652b82fep0,
   .ln2_hi = -0x1.62e42fefa39efp-1,
   .ln2_lo = -0x1.abc9e3b39803fp-56,
   .shift = 0x1.8p52,
 
   .tiny_bound = 0x3e40000000000000, /* asuint64 (0x1p-27).  */
   /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound).  */
   .thresh = 0x01f241bf835f9d5f,
 };
 
 static inline svfloat64_t
 expm1_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
 {
   /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
      the scalar variant of tanh.  */
 
   /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
   svfloat64_t j
       = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
   svint64_t i = svcvt_s64_x (pg, j);
   svfloat64_t f = svmla_x (pg, x, j, d->ln2_hi);
   f = svmla_x (pg, f, j, d->ln2_lo);
 
   /* Approximate expm1(f) using polynomial.  */
   svfloat64_t f2 = svmul_x (pg, f, f);
   svfloat64_t f4 = svmul_x (pg, f2, f2);
   svfloat64_t p = svmla_x (
       pg, f, f2,
       sv_estrin_10_f64_x (pg, f, f2, f4, svmul_x (pg, f4, f4), d->poly));
 
   /* t = 2 ^ i.  */
   svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
   /* expm1(x) = p * t + (t - 1).  */
   return svmla_x (pg, svsub_x (pg, t, 1), p, t);
 }
 
 static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
 {
   return sv_call_f64 (tanh, x, y, special);
 }
 
 /* SVE approximation for double-precision tanh(x), using a simplified
    version of expm1. The greatest observed error is 2.77 ULP:
    _ZGVsMxv_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
 				       want -0x1.bd6a21a163624p-3.  */
 svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
   svuint64_t ia = svreinterpret_u64 (svabs_x (pg, x));
 
   /* Trigger special-cases for tiny, boring and infinity/NaN.  */
   svbool_t special = svcmpgt (pg, svsub_x (pg, ia, d->tiny_bound), d->thresh);
 
   svfloat64_t u = svadd_x (pg, x, x);
 
   /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
   svfloat64_t q = expm1_inline (u, pg, d);
   svfloat64_t qp2 = svadd_x (pg, q, 2);
 
   if (unlikely (svptest_any (pg, special)))
     return special_case (x, svdiv_x (pg, q, qp2), special);
   return svdiv_x (pg, q, qp2);
 }
 
-PL_SIG (SV, D, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (tanh), 2.27)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0, 0x1p-27, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0x1p-27, 0x1.241bf835f9d5fp+4, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0x1.241bf835f9d5fp+4, inf, 1000)
+TEST_SIG (SV, D, 1, tanh, -10.0, 10.0)
+TEST_ULP (SV_NAME_D1 (tanh), 2.27)
+TEST_DISABLE_FENV (SV_NAME_D1 (tanh))
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0, 0x1p-27, 5000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0x1p-27, 0x1.241bf835f9d5fp+4, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0x1.241bf835f9d5fp+4, inf, 1000)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/tanhf.c b/contrib/arm-optimized-routines/math/aarch64/sve/tanhf.c
new file mode 100644
index 000000000000..9007e7badb0d
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/tanhf.c
@@ -0,0 +1,68 @@
+/*
+ * Single-precision SVE tanh(x) function.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "sv_expm1f_inline.h"
+
+/* Largest value of x for which tanhf(x) rounds to 1 (or -1 for negative).  */
+#define BoringBound 0x1.205966p+3f
+
+static const struct data
+{
+  struct sv_expm1f_data expm1f_consts;
+  uint32_t onef, special_bound;
+  float boring_bound;
+} data = {
+  .expm1f_consts = SV_EXPM1F_DATA,
+  .onef = 0x3f800000,
+  .special_bound = 0x7f800000,
+  .boring_bound = BoringBound,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t pg, svbool_t is_boring,
+	      svfloat32_t boring, svfloat32_t q, svbool_t special)
+{
+  svfloat32_t y
+      = svsel_f32 (is_boring, boring, svdiv_x (pg, q, svadd_x (pg, q, 2.0)));
+  return sv_call_f32 (tanhf, x, y, special);
+}
+
+/* Approximation for single-precision SVE tanh(x), using a simplified
+   version of expm1f. The maximum error is 2.57 ULP:
+   _ZGVsMxv_tanhf (0x1.fc1832p-5) got 0x1.fb71a4p-5
+				 want 0x1.fb71aap-5.  */
+svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svfloat32_t ax = svabs_x (pg, x);
+  svuint32_t iax = svreinterpret_u32 (ax);
+  svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+  svfloat32_t boring = svreinterpret_f32 (svorr_x (pg, sign, d->onef));
+  svbool_t special = svcmpgt (pg, iax, d->special_bound);
+  svbool_t is_boring = svacgt (pg, x, d->boring_bound);
+
+  /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
+  svfloat32_t q = expm1f_inline (svmul_x (svptrue_b32 (), x, 2.0), pg,
+				 &d->expm1f_consts);
+
+  if (unlikely (svptest_any (pg, special)))
+    return special_case (x, pg, is_boring, boring, q, special);
+  svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0));
+  return svsel_f32 (is_boring, boring, y);
+}
+
+TEST_SIG (SV, F, 1, tanh, -10.0, 10.0)
+TEST_ULP (SV_NAME_F1 (tanh), 2.07)
+TEST_DISABLE_FENV (SV_NAME_F1 (tanh))
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0, 0x1p-23, 1000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0x1p-23, BoringBound, 100000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), BoringBound, inf, 100)
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/tanpi.c b/contrib/arm-optimized-routines/math/aarch64/sve/tanpi.c
new file mode 100644
index 000000000000..d9e7d2487d53
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/tanpi.c
@@ -0,0 +1,89 @@
+/*
+ * Double-precision vector tanpi(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_sig.h"
+#include "test_defs.h"
+
+const static struct v_tanpi_data
+{
+  double c0, c2, c4, c6, c8, c10, c12;
+  double c1, c3, c5, c7, c9, c11, c13, c14;
+} tanpi_data = {
+  /* Coefficents for tan(pi * x) computed with fpminimax
+     on [ 0x1p-1022 0x1p-2 ]
+     approx rel error: 0x1.7eap-55
+     approx abs error: 0x1.7eap-55.  */
+  .c0 = 0x1.921fb54442d18p1, /* pi.  */
+  .c1 = 0x1.4abbce625be52p3,	.c2 = 0x1.466bc6775b0f9p5,
+  .c3 = 0x1.45fff9b426f5ep7,	.c4 = 0x1.45f4730dbca5cp9,
+  .c5 = 0x1.45f3265994f85p11,	.c6 = 0x1.45f4234b330cap13,
+  .c7 = 0x1.45dca11be79ebp15,	.c8 = 0x1.47283fc5eea69p17,
+  .c9 = 0x1.3a6d958cdefaep19,	.c10 = 0x1.927896baee627p21,
+  .c11 = -0x1.89333f6acd922p19, .c12 = 0x1.5d4e912bb8456p27,
+  .c13 = -0x1.a854d53ab6874p29, .c14 = 0x1.1b76de7681424p32,
+};
+
+/* Approximation for double-precision vector tanpi(x)
+   The maximum error is 3.06 ULP:
+   _ZGVsMxv_tanpi(0x1.0a4a07dfcca3ep-1) got -0x1.fa30112702c98p+3
+				       want -0x1.fa30112702c95p+3.  */
+svfloat64_t SV_NAME_D1 (tanpi) (svfloat64_t x, const svbool_t pg)
+{
+  const struct v_tanpi_data *d = ptr_barrier (&tanpi_data);
+
+  svfloat64_t n = svrintn_x (pg, x);
+
+  /* inf produces nan that propagates.  */
+  svfloat64_t xr = svsub_x (pg, x, n);
+  svfloat64_t ar = svabd_x (pg, x, n);
+  svbool_t flip = svcmpgt (pg, ar, 0.25);
+  svfloat64_t r = svsel (flip, svsubr_x (pg, ar, 0.5), ar);
+
+  /* Order-14 pairwise Horner.  */
+  svfloat64_t r2 = svmul_x (pg, r, r);
+  svfloat64_t r4 = svmul_x (pg, r2, r2);
+
+  svfloat64_t c_1_3 = svld1rq (pg, &d->c1);
+  svfloat64_t c_5_7 = svld1rq (pg, &d->c5);
+  svfloat64_t c_9_11 = svld1rq (pg, &d->c9);
+  svfloat64_t c_13_14 = svld1rq (pg, &d->c13);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r2, c_1_3, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r2, c_1_3, 1);
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), r2, c_5_7, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), r2, c_5_7, 1);
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), r2, c_9_11, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), r2, c_9_11, 1);
+  svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), r2, c_13_14, 0);
+
+  svfloat64_t p = svmla_lane (p1213, r4, c_13_14, 1);
+  p = svmad_x (pg, p, r4, p1011);
+  p = svmad_x (pg, p, r4, p89);
+  p = svmad_x (pg, p, r4, p67);
+  p = svmad_x (pg, p, r4, p45);
+  p = svmad_x (pg, p, r4, p23);
+  p = svmad_x (pg, p, r4, p01);
+  p = svmul_x (pg, r, p);
+
+  svfloat64_t p_recip = svdivr_x (pg, p, 1.0);
+  svfloat64_t y = svsel (flip, p_recip, p);
+
+  svuint64_t sign
+      = sveor_x (pg, svreinterpret_u64 (xr), svreinterpret_u64 (ar));
+  return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (SV_NAME_D1 (tanpi))
+TEST_ULP (SV_NAME_D1 (tanpi), 2.57)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanpi), 0, 0x1p-31, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanpi), 0x1p-31, 0.5, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanpi), 0.5, 1.0, 200000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanpi), 1.0, 0x1p23, 50000)
+TEST_SYM_INTERVAL (SV_NAME_D1 (tanpi), 0x1p23, inf, 50000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/sve/tanpif.c b/contrib/arm-optimized-routines/math/aarch64/sve/tanpif.c
new file mode 100644
index 000000000000..2ba968a799fe
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/sve/tanpif.c
@@ -0,0 +1,68 @@
+/*
+ * Single-precision vector tanpif(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "test_defs.h"
+#include "test_sig.h"
+
+const static struct v_tanpif_data
+{
+  float c0, c2, c4, c6;
+  float c1, c3, c5, c7;
+} tanpif_data = {
+  /* Coefficients for tan(pi * x).  */
+  .c0 = 0x1.921fb4p1f,	.c1 = 0x1.4abbcep3f,  .c2 = 0x1.466b8p5f,
+  .c3 = 0x1.461c72p7f,	.c4 = 0x1.42e9d4p9f,  .c5 = 0x1.69e2c4p11f,
+  .c6 = 0x1.e85558p11f, .c7 = 0x1.a52e08p16f,
+};
+
+/* Approximation for single-precision vector tanpif(x)
+   The maximum error is 3.34 ULP:
+   _ZGVsMxv_tanpif(0x1.d6c09ap-2) got 0x1.f70aacp+2
+				 want 0x1.f70aa6p+2.  */
+svfloat32_t SV_NAME_F1 (tanpi) (svfloat32_t x, const svbool_t pg)
+{
+  const struct v_tanpif_data *d = ptr_barrier (&tanpif_data);
+  svfloat32_t odd_coeffs = svld1rq (pg, &d->c1);
+  svfloat32_t n = svrintn_x (pg, x);
+
+  /* inf produces nan that propagates.  */
+  svfloat32_t xr = svsub_x (pg, x, n);
+  svfloat32_t ar = svabd_x (pg, x, n);
+  svbool_t flip = svcmpgt (pg, ar, 0.25f);
+  svfloat32_t r = svsel (flip, svsub_x (pg, sv_f32 (0.5f), ar), ar);
+
+  svfloat32_t r2 = svmul_x (pg, r, r);
+  svfloat32_t r4 = svmul_x (pg, r2, r2);
+
+  /* Order-7 Pairwise Horner.  */
+  svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), r2, odd_coeffs, 0);
+  svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), r2, odd_coeffs, 1);
+  svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), r2, odd_coeffs, 2);
+  svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), r2, odd_coeffs, 3);
+  svfloat32_t p = svmad_x (pg, p67, r4, p45);
+  p = svmad_x (pg, p, r4, p23);
+  p = svmad_x (pg, p, r4, p01);
+  svfloat32_t poly = svmul_x (pg, r, p);
+
+  svfloat32_t poly_recip = svdiv_x (pg, sv_f32 (1.0), poly);
+  svfloat32_t y = svsel (flip, poly_recip, poly);
+
+  svuint32_t sign
+      = sveor_x (pg, svreinterpret_u32 (xr), svreinterpret_u32 (ar));
+  return svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign));
+}
+
+#if WANT_TRIGPI_TESTS
+TEST_DISABLE_FENV (SV_NAME_F1 (tanpi))
+TEST_ULP (SV_NAME_F1 (tanpi), 2.84)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanpi), 0, 0x1p-31, 50000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanpi), 0x1p-31, 0.5, 100000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanpi), 0.5, 0x1p23f, 100000)
+TEST_SYM_INTERVAL (SV_NAME_F1 (tanpi), 0x1p23f, inf, 100000)
+#endif
+CLOSE_SVE_ATTR
diff --git a/contrib/arm-optimized-routines/math/aarch64/tanpi_2u5.c b/contrib/arm-optimized-routines/math/aarch64/tanpi_2u5.c
new file mode 100644
index 000000000000..154b9faf454d
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/tanpi_2u5.c
@@ -0,0 +1,158 @@
+/*
+ * Double-precision scalar tanpi(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "mathlib.h"
+#include "math_config.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "poly_scalar_f64.h"
+
+#define SIGN_MASK 0x8000000000000000
+
+const static struct tanpi_data
+{
+  double tan_poly[14], cot_poly[9], pi, invpi;
+} tanpi_data = {
+  /* Coefficents for tan(pi * x).  */
+  .tan_poly = {
+    0x1.4abbce625be52p3,
+    0x1.466bc6775b0f9p5,
+    0x1.45fff9b426f5ep7,
+    0x1.45f4730dbca5cp9,
+    0x1.45f3265994f85p11,
+    0x1.45f4234b330cap13,
+    0x1.45dca11be79ebp15,
+    0x1.47283fc5eea69p17,
+    0x1.3a6d958cdefaep19,
+    0x1.927896baee627p21,
+    -0x1.89333f6acd922p19,
+    0x1.5d4e912bb8456p27,
+    -0x1.a854d53ab6874p29,
+    0x1.1b76de7681424p32,
+  },
+  /* Coefficents for cot(pi * x).  */
+  .cot_poly = {
+    -0x1.0c152382d7366p0,
+    -0x1.60c8539c1d316p-1,
+    -0x1.4b9a2f3516354p-1,
+    -0x1.47474060b6ba8p-1,
+    -0x1.464633ad9dcb1p-1,
+    -0x1.45ff229d7edd6p-1,
+    -0x1.46d8dbf492923p-1,
+    -0x1.3873892311c6bp-1,
+    -0x1.b2f3d0ff96d73p-1,
+  },
+  .pi = 0x1.921fb54442d18p1,
+  .invpi = 0x1.45f306dc9c883p-2,
+};
+
+/* Double-precision scalar tanpi(x) implementation.
+   Maximum error 2.19 ULP:
+   tanpi(0x1.68847e177a855p-2) got 0x1.fe9a0ff9bb9d7p+0
+			      want 0x1.fe9a0ff9bb9d5p+0.  */
+double
+arm_math_tanpi (double x)
+{
+  uint64_t xabs_12 = asuint64 (x) >> 52 & 0x7ff;
+
+  /* x >= 0x1p54.  */
+  if (unlikely (xabs_12 >= 0x434))
+    {
+      /* tanpi(+/-inf) and tanpi(+/-nan) = nan.  */
+      if (unlikely (xabs_12 == 0x7ff))
+	{
+	  return __math_invalid (x);
+	}
+
+      uint64_t x_sign = asuint64 (x) & SIGN_MASK;
+      return asdouble (x_sign);
+    }
+
+  const struct tanpi_data *d = ptr_barrier (&tanpi_data);
+
+  double rounded = round (x);
+  if (unlikely (rounded == x))
+    {
+      /* If x == 0, return with sign.  */
+      if (x == 0)
+	{
+	  return x;
+	}
+      /* Otherwise, return zero with alternating sign.  */
+      int64_t m = (int64_t) rounded;
+      if (x < 0)
+	{
+	  return m & 1 ? 0.0 : -0.0;
+	}
+      else
+	{
+	  return m & 1 ? -0.0 : 0.0;
+	}
+    }
+
+  double x_reduced = x - rounded;
+  double abs_x_reduced = 0.5 - fabs (x_reduced);
+
+  /* Prevent underflow exceptions. x <= 0x1p-63.  */
+  if (unlikely (xabs_12 < 0x3c0))
+    {
+      return d->pi * x;
+    }
+
+  double result, offset, scale;
+
+  /* Test  0.25 < abs_x < 0.5 independent from abs_x_reduced.  */
+  double x2 = x + x;
+  int64_t rounded_x2 = (int64_t) round (x2);
+  if (rounded_x2 & 1)
+    {
+      double r_x = abs_x_reduced;
+
+      double r_x2 = r_x * r_x;
+      double r_x4 = r_x2 * r_x2;
+
+      uint64_t sign = asuint64 (x_reduced) & SIGN_MASK;
+      r_x = asdouble (asuint64 (r_x) ^ sign);
+
+      // calculate sign for half-fractional inf values
+      uint64_t is_finite = asuint64 (abs_x_reduced);
+      uint64_t is_odd = (rounded_x2 & 2) << 62;
+      uint64_t is_neg = rounded_x2 & SIGN_MASK;
+      uint64_t keep_sign = is_finite | (is_odd ^ is_neg);
+      offset = d->invpi / (keep_sign ? r_x : -r_x);
+      scale = r_x;
+
+      result = pw_horner_8_f64 (r_x2, r_x4, d->cot_poly);
+    }
+  else
+    {
+      double r_x2 = x_reduced * x_reduced;
+      double r_x4 = r_x2 * r_x2;
+
+      offset = d->pi * x_reduced;
+      scale = x_reduced * r_x2;
+
+      result = pw_horner_13_f64 (r_x2, r_x4, d->tan_poly);
+    }
+
+  return fma (scale, result, offset);
+}
+
+#if WANT_EXPERIMENTAL_MATH
+double
+tanpi (double x)
+{
+  return arm_math_tanpi (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_tanpi, 1.69)
+TEST_SYM_INTERVAL (arm_math_tanpi, 0, 0x1p-63, 50000)
+TEST_SYM_INTERVAL (arm_math_tanpi, 0x1p-63, 0.5, 100000)
+TEST_SYM_INTERVAL (arm_math_tanpi, 0.5, 0x1p53, 100000)
+TEST_SYM_INTERVAL (arm_math_tanpi, 0x1p53, inf, 100000)
+#endif
diff --git a/contrib/arm-optimized-routines/math/aarch64/tanpif_3u1.c b/contrib/arm-optimized-routines/math/aarch64/tanpif_3u1.c
new file mode 100644
index 000000000000..8cd66594c290
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/tanpif_3u1.c
@@ -0,0 +1,145 @@
+/*
+ * Single-precision scalar tanpi(x) function.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "mathlib.h"
+#include "math_config.h"
+#include "test_sig.h"
+#include "test_defs.h"
+#include "poly_scalar_f32.h"
+
+const static struct tanpif_data
+{
+  float tan_poly[6], cot_poly[4], pi, invpi;
+} tanpif_data = {
+  /* Coefficents for tan(pi * x).  */
+  .tan_poly = {
+    0x1.4abbc8p3,
+    0x1.467284p5,
+    0x1.44cf12p7,
+    0x1.596b5p9,
+    0x1.753858p10,
+    0x1.76ff52p14,
+  },
+  /* Coefficents for cot(pi * x).  */
+  .cot_poly = {
+    -0x1.0c1522p0,
+    -0x1.60ce32p-1,
+    -0x1.49cd42p-1,
+    -0x1.73f786p-1,
+  },
+  .pi = 0x1.921fb6p1f,
+  .invpi = 0x1.45f308p-2f,
+};
+
+/* Single-precision scalar tanpi(x) implementation.
+   Maximum error 2.56 ULP:
+   tanpif(0x1.4bf948p-1) got -0x1.fcc9ep+0
+			want -0x1.fcc9e6p+0.  */
+float
+arm_math_tanpif (float x)
+{
+  uint32_t xabs_12 = asuint (x) >> 20 & 0x7f8;
+
+  /* x >= 0x1p24f.  */
+  if (unlikely (xabs_12 >= 0x4b1))
+    {
+      /* tanpif(+/-inf) and tanpif(+/-nan) = nan.  */
+      if (unlikely (xabs_12 == 0x7f8))
+	{
+	  return __math_invalidf (x);
+	}
+
+      uint32_t x_sign = asuint (x) & 0x80000000;
+      return asfloat (x_sign);
+    }
+
+  const struct tanpif_data *d = ptr_barrier (&tanpif_data);
+
+  /* Prevent underflow exceptions. x <= 0x1p-31.  */
+  if (unlikely (xabs_12 < 0x300))
+    {
+      return d->pi * x;
+    }
+
+  float rounded = roundf (x);
+  if (unlikely (rounded == x))
+    {
+      /* If x == 0, return with sign.  */
+      if (x == 0)
+	{
+	  return x;
+	}
+      /* Otherwise, return zero with alternating sign.  */
+      int32_t m = (int32_t) rounded;
+      if (x < 0)
+	{
+	  return m & 1 ? 0.0f : -0.0f;
+	}
+      else
+	{
+	  return m & 1 ? -0.0f : 0.0f;
+	}
+    }
+
+  float x_reduced = x - rounded;
+  float abs_x_reduced = 0.5f - asfloat (asuint (x_reduced) & 0x7fffffff);
+
+  float result, offset, scale;
+
+  /* Test  0.25 < abs_x < 0.5 independent from abs_x_reduced.  */
+  float x2 = x + x;
+  int32_t rounded_x2 = (int32_t) roundf (x2);
+  if (rounded_x2 & 1)
+    {
+      float r_x = abs_x_reduced;
+
+      float r_x2 = r_x * r_x;
+      float r_x4 = r_x2 * r_x2;
+
+      uint32_t sign = asuint (x_reduced) & 0x80000000;
+      r_x = asfloat (asuint (r_x) ^ sign);
+
+      // calculate sign for half-fractional inf values
+      uint32_t is_finite = asuint (abs_x_reduced);
+      uint32_t is_odd = (rounded_x2 & 2) << 30;
+      uint32_t is_neg = rounded_x2 & 0x80000000;
+      uint32_t keep_sign = is_finite | (is_odd ^ is_neg);
+      offset = d->invpi / (keep_sign ? r_x : -r_x);
+      scale = r_x;
+
+      result = pairwise_poly_3_f32 (r_x2, r_x4, d->cot_poly);
+    }
+  else
+    {
+      float r_x = x_reduced;
+
+      float r_x2 = r_x * r_x;
+      float r_x4 = r_x2 * r_x2;
+
+      offset = d->pi * r_x;
+      scale = r_x * r_x2;
+
+      result = pw_horner_5_f32 (r_x2, r_x4, d->tan_poly);
+    }
+
+  return fmaf (scale, result, offset);
+}
+
+#if WANT_EXPERIMENTAL_MATH
+float
+tanpif (float x)
+{
+  return arm_math_tanpif (x);
+}
+#endif
+
+#if WANT_TRIGPI_TESTS
+TEST_ULP (arm_math_tanpif, 2.57)
+TEST_SYM_INTERVAL (arm_math_tanpif, 0, 0x1p-31f, 50000)
+TEST_SYM_INTERVAL (arm_math_tanpif, 0x1p-31f, 0.5, 100000)
+TEST_SYM_INTERVAL (arm_math_tanpif, 0.5, 0x1p23f, 100000)
+TEST_SYM_INTERVAL (arm_math_tanpif, 0x1p23f, inf, 100000)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/erf_data.c b/contrib/arm-optimized-routines/math/aarch64/v_erf_data.c
similarity index 99%
rename from contrib/arm-optimized-routines/pl/math/erf_data.c
rename to contrib/arm-optimized-routines/math/aarch64/v_erf_data.c
index 138e03578e77..5400d6b8d0e3 100644
--- a/contrib/arm-optimized-routines/pl/math/erf_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_erf_data.c
@@ -1,788 +1,788 @@
 /*
  * Data for approximation of erf.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
-/* Lookup table used in erf.
+/* Lookup table used in vector erf.
    For each possible rounded input r (multiples of 1/128), between
    r = 0.0 and r = 6.0 (769 values):
-   - the first entry __erff_data.tab.erf contains the values of erf(r),
-   - the second entry __erff_data.tab.scale contains the values of
+   - the first entry __v_erff_data.tab.erf contains the values of erf(r),
+   - the second entry __v_erff_data.tab.scale contains the values of
    2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the
    algorithm, since lookup is performed only for x >= 1/64-1/512.  */
-const struct erf_data __erf_data = {
+const struct v_erf_data __v_erf_data = {
   .tab = { { 0x0.0000000000000p+0, 0x1.20dd750429b6dp+0 },
 	   { 0x1.20dbf3deb1340p-7, 0x1.20d8f1975c85dp+0 },
 	   { 0x1.20d77083f17a0p-6, 0x1.20cb67bd452c7p+0 },
 	   { 0x1.b137e0cf584dcp-6, 0x1.20b4d8bac36c1p+0 },
 	   { 0x1.20c5645dd2538p-5, 0x1.209546ad13ccfp+0 },
 	   { 0x1.68e5d3bbc9526p-5, 0x1.206cb4897b148p+0 },
 	   { 0x1.b0fafef135745p-5, 0x1.203b261cd0052p+0 },
 	   { 0x1.f902a77bd3821p-5, 0x1.2000a00ae3804p+0 },
 	   { 0x1.207d480e90658p-4, 0x1.1fbd27cdc72d3p+0 },
 	   { 0x1.44703e87e8593p-4, 0x1.1f70c3b4f2cc7p+0 },
 	   { 0x1.68591a1e83b5dp-4, 0x1.1f1b7ae44867fp+0 },
 	   { 0x1.8c36beb8a8d23p-4, 0x1.1ebd5552f795bp+0 },
 	   { 0x1.b0081148a873ap-4, 0x1.1e565bca400d4p+0 },
 	   { 0x1.d3cbf7e70a4b3p-4, 0x1.1de697e413d28p+0 },
 	   { 0x1.f78159ec8bb50p-4, 0x1.1d6e14099944ap+0 },
 	   { 0x1.0d939005f65e5p-3, 0x1.1cecdb718d61cp+0 },
 	   { 0x1.1f5e1a35c3b89p-3, 0x1.1c62fa1e869b6p+0 },
 	   { 0x1.311fc15f56d14p-3, 0x1.1bd07cdd189acp+0 },
 	   { 0x1.42d7fc2f64959p-3, 0x1.1b357141d95d5p+0 },
 	   { 0x1.548642321d7c6p-3, 0x1.1a91e5a748165p+0 },
 	   { 0x1.662a0bdf7a89fp-3, 0x1.19e5e92b964abp+0 },
 	   { 0x1.77c2d2a765f9ep-3, 0x1.19318bae53a04p+0 },
 	   { 0x1.895010fdbdbfdp-3, 0x1.1874ddcdfce24p+0 },
 	   { 0x1.9ad142662e14dp-3, 0x1.17aff0e56ec10p+0 },
 	   { 0x1.ac45e37fe2526p-3, 0x1.16e2d7093cd8cp+0 },
 	   { 0x1.bdad72110a648p-3, 0x1.160da304ed92fp+0 },
 	   { 0x1.cf076d1233237p-3, 0x1.153068581b781p+0 },
 	   { 0x1.e05354b96ff36p-3, 0x1.144b3b337c90cp+0 },
 	   { 0x1.f190aa85540e2p-3, 0x1.135e3075d076bp+0 },
 	   { 0x1.015f78a3dcf3dp-2, 0x1.12695da8b5bdep+0 },
 	   { 0x1.09eed6982b948p-2, 0x1.116cd8fd67618p+0 },
 	   { 0x1.127631eb8de32p-2, 0x1.1068b94962e5ep+0 },
 	   { 0x1.1af54e232d609p-2, 0x1.0f5d1602f7e41p+0 },
 	   { 0x1.236bef825d9a2p-2, 0x1.0e4a073dc1b91p+0 },
 	   { 0x1.2bd9db0f7827fp-2, 0x1.0d2fa5a70c168p+0 },
 	   { 0x1.343ed6989b7d9p-2, 0x1.0c0e0a8223359p+0 },
 	   { 0x1.3c9aa8b84bedap-2, 0x1.0ae54fa490722p+0 },
 	   { 0x1.44ed18d9f6462p-2, 0x1.09b58f724416bp+0 },
 	   { 0x1.4d35ef3e5372ep-2, 0x1.087ee4d9ad247p+0 },
 	   { 0x1.5574f4ffac98ep-2, 0x1.07416b4fbfe7cp+0 },
 	   { 0x1.5da9f415ff23fp-2, 0x1.05fd3ecbec297p+0 },
 	   { 0x1.65d4b75b00471p-2, 0x1.04b27bc403d30p+0 },
 	   { 0x1.6df50a8dff772p-2, 0x1.03613f2812dafp+0 },
 	   { 0x1.760aba57a76bfp-2, 0x1.0209a65e29545p+0 },
 	   { 0x1.7e15944d9d3e4p-2, 0x1.00abcf3e187a9p+0 },
 	   { 0x1.861566f5fd3c0p-2, 0x1.fe8fb01a47307p-1 },
 	   { 0x1.8e0a01cab516bp-2, 0x1.fbbbbef34b4b2p-1 },
 	   { 0x1.95f3353cbb146p-2, 0x1.f8dc092d58ff8p-1 },
 	   { 0x1.9dd0d2b721f39p-2, 0x1.f5f0cdaf15313p-1 },
 	   { 0x1.a5a2aca209394p-2, 0x1.f2fa4c16c0019p-1 },
 	   { 0x1.ad68966569a87p-2, 0x1.eff8c4b1375dbp-1 },
 	   { 0x1.b522646bbda68p-2, 0x1.ecec7870ebca7p-1 },
 	   { 0x1.bccfec24855b8p-2, 0x1.e9d5a8e4c934ep-1 },
 	   { 0x1.c4710406a65fcp-2, 0x1.e6b4982f158b9p-1 },
 	   { 0x1.cc058392a6d2dp-2, 0x1.e38988fc46e72p-1 },
 	   { 0x1.d38d4354c3bd0p-2, 0x1.e054be79d3042p-1 },
 	   { 0x1.db081ce6e2a48p-2, 0x1.dd167c4cf9d2ap-1 },
 	   { 0x1.e275eaf25e458p-2, 0x1.d9cf06898cdafp-1 },
 	   { 0x1.e9d68931ae650p-2, 0x1.d67ea1a8b5368p-1 },
 	   { 0x1.f129d471eabb1p-2, 0x1.d325927fb9d89p-1 },
 	   { 0x1.f86faa9428f9dp-2, 0x1.cfc41e36c7df9p-1 },
 	   { 0x1.ffa7ea8eb5fd0p-2, 0x1.cc5a8a3fbea40p-1 },
 	   { 0x1.03693a371519cp-1, 0x1.c8e91c4d01368p-1 },
 	   { 0x1.06f794ab2cae7p-1, 0x1.c5701a484ef9dp-1 },
 	   { 0x1.0a7ef5c18edd2p-1, 0x1.c1efca49a5011p-1 },
 	   { 0x1.0dff4f247f6c6p-1, 0x1.be68728e29d5dp-1 },
 	   { 0x1.1178930ada115p-1, 0x1.bada596f25436p-1 },
 	   { 0x1.14eab43841b55p-1, 0x1.b745c55905bf8p-1 },
 	   { 0x1.1855a5fd3dd50p-1, 0x1.b3aafcc27502ep-1 },
 	   { 0x1.1bb95c3746199p-1, 0x1.b00a46237d5bep-1 },
 	   { 0x1.1f15cb50bc4dep-1, 0x1.ac63e7ecc1411p-1 },
 	   { 0x1.226ae840d4d70p-1, 0x1.a8b8287ec6a09p-1 },
 	   { 0x1.25b8a88b6dd7fp-1, 0x1.a5074e2157620p-1 },
 	   { 0x1.28ff0240d52cdp-1, 0x1.a1519efaf889ep-1 },
 	   { 0x1.2c3debfd7d6c1p-1, 0x1.9d97610879642p-1 },
 	   { 0x1.2f755ce9a21f4p-1, 0x1.99d8da149c13fp-1 },
 	   { 0x1.32a54cb8db67bp-1, 0x1.96164fafd8de3p-1 },
 	   { 0x1.35cdb3a9a144dp-1, 0x1.925007283d7aap-1 },
 	   { 0x1.38ee8a84beb71p-1, 0x1.8e86458169af8p-1 },
 	   { 0x1.3c07ca9cb4f9ep-1, 0x1.8ab94f6caa71dp-1 },
 	   { 0x1.3f196dcd0f135p-1, 0x1.86e9694134b9ep-1 },
 	   { 0x1.42236e79a5fa6p-1, 0x1.8316d6f48133dp-1 },
 	   { 0x1.4525c78dd5966p-1, 0x1.7f41dc12c9e89p-1 },
 	   { 0x1.4820747ba2dc2p-1, 0x1.7b6abbb7aaf19p-1 },
 	   { 0x1.4b13713ad3513p-1, 0x1.7791b886e7403p-1 },
 	   { 0x1.4dfeba47f63ccp-1, 0x1.73b714a552763p-1 },
 	   { 0x1.50e24ca35fd2cp-1, 0x1.6fdb11b1e0c34p-1 },
 	   { 0x1.53be25d016a4fp-1, 0x1.6bfdf0beddaf5p-1 },
 	   { 0x1.569243d2b3a9bp-1, 0x1.681ff24b4ab04p-1 },
 	   { 0x1.595ea53035283p-1, 0x1.6441563c665d4p-1 },
 	   { 0x1.5c2348ecc4dc3p-1, 0x1.60625bd75d07bp-1 },
 	   { 0x1.5ee02e8a71a53p-1, 0x1.5c8341bb23767p-1 },
 	   { 0x1.61955607dd15dp-1, 0x1.58a445da7c74cp-1 },
 	   { 0x1.6442bfdedd397p-1, 0x1.54c5a57629db0p-1 },
 	   { 0x1.66e86d0312e82p-1, 0x1.50e79d1749ac9p-1 },
 	   { 0x1.69865ee075011p-1, 0x1.4d0a6889dfd9fp-1 },
 	   { 0x1.6c1c9759d0e5fp-1, 0x1.492e42d78d2c5p-1 },
 	   { 0x1.6eab18c74091bp-1, 0x1.4553664273d24p-1 },
 	   { 0x1.7131e5f496a5ap-1, 0x1.417a0c4049fd0p-1 },
 	   { 0x1.73b1021fc0cb8p-1, 0x1.3da26d759aef5p-1 },
 	   { 0x1.762870f720c6fp-1, 0x1.39ccc1b136d5ap-1 },
 	   { 0x1.78983697dc96fp-1, 0x1.35f93fe7d1b3dp-1 },
 	   { 0x1.7b00578c26037p-1, 0x1.32281e2fd1a92p-1 },
 	   { 0x1.7d60d8c979f7bp-1, 0x1.2e5991bd4cbfcp-1 },
 	   { 0x1.7fb9bfaed8078p-1, 0x1.2a8dcede3673bp-1 },
 	   { 0x1.820b1202f27fbp-1, 0x1.26c508f6bd0ffp-1 },
 	   { 0x1.8454d5f25760dp-1, 0x1.22ff727dd6f7bp-1 },
 	   { 0x1.8697120d92a4ap-1, 0x1.1f3d3cf9ffe5ap-1 },
 	   { 0x1.88d1cd474a2e0p-1, 0x1.1b7e98fe26217p-1 },
 	   { 0x1.8b050ef253c37p-1, 0x1.17c3b626c7a11p-1 },
 	   { 0x1.8d30debfc572ep-1, 0x1.140cc3173f007p-1 },
 	   { 0x1.8f5544bd00c04p-1, 0x1.1059ed7740313p-1 },
 	   { 0x1.91724951b8fc6p-1, 0x1.0cab61f084b93p-1 },
 	   { 0x1.9387f53df5238p-1, 0x1.09014c2ca74dap-1 },
 	   { 0x1.959651980da31p-1, 0x1.055bd6d32e8d7p-1 },
 	   { 0x1.979d67caa6631p-1, 0x1.01bb2b87c6968p-1 },
 	   { 0x1.999d4192a5715p-1, 0x1.fc3ee5d1524b0p-2 },
 	   { 0x1.9b95e8fd26abap-1, 0x1.f511a91a67d2ap-2 },
 	   { 0x1.9d8768656cc42p-1, 0x1.edeeee0959518p-2 },
 	   { 0x1.9f71ca72cffb6p-1, 0x1.e6d6ffaa65a25p-2 },
 	   { 0x1.a1551a16aaeafp-1, 0x1.dfca26f5bbf88p-2 },
 	   { 0x1.a331628a45b92p-1, 0x1.d8c8aace11e63p-2 },
 	   { 0x1.a506af4cc00f4p-1, 0x1.d1d2cfff91594p-2 },
 	   { 0x1.a6d50c20fa293p-1, 0x1.cae8d93f1d7b6p-2 },
 	   { 0x1.a89c850b7d54dp-1, 0x1.c40b0729ed547p-2 },
 	   { 0x1.aa5d265064366p-1, 0x1.bd3998457afdap-2 },
 	   { 0x1.ac16fc7143263p-1, 0x1.b674c8ffc6283p-2 },
 	   { 0x1.adca142b10f98p-1, 0x1.afbcd3afe8ab6p-2 },
 	   { 0x1.af767a741088bp-1, 0x1.a911f096fbc26p-2 },
 	   { 0x1.b11c3c79bb424p-1, 0x1.a27455e14c93cp-2 },
 	   { 0x1.b2bb679ead19cp-1, 0x1.9be437a7de946p-2 },
 	   { 0x1.b4540978921eep-1, 0x1.9561c7f23a47bp-2 },
 	   { 0x1.b5e62fce16095p-1, 0x1.8eed36b886d93p-2 },
 	   { 0x1.b771e894d602ep-1, 0x1.8886b1e5ecfd1p-2 },
 	   { 0x1.b8f741ef54f83p-1, 0x1.822e655b417e6p-2 },
 	   { 0x1.ba764a2af2b78p-1, 0x1.7be47af1f5d89p-2 },
 	   { 0x1.bbef0fbde6221p-1, 0x1.75a91a7f4d2edp-2 },
 	   { 0x1.bd61a1453ab44p-1, 0x1.6f7c69d7d3ef8p-2 },
 	   { 0x1.bece0d82d1a5cp-1, 0x1.695e8cd31867ep-2 },
 	   { 0x1.c034635b66e23p-1, 0x1.634fa54fa285fp-2 },
 	   { 0x1.c194b1d49a184p-1, 0x1.5d4fd33729015p-2 },
 	   { 0x1.c2ef0812fc1bdp-1, 0x1.575f3483021c3p-2 },
 	   { 0x1.c443755820d64p-1, 0x1.517de540ce2a3p-2 },
 	   { 0x1.c5920900b5fd1p-1, 0x1.4babff975a04cp-2 },
 	   { 0x1.c6dad2829ec62p-1, 0x1.45e99bcbb7915p-2 },
 	   { 0x1.c81de16b14cefp-1, 0x1.4036d0468a7a2p-2 },
 	   { 0x1.c95b455cce69dp-1, 0x1.3a93b1998736cp-2 },
 	   { 0x1.ca930e0e2a825p-1, 0x1.35005285227f1p-2 },
 	   { 0x1.cbc54b476248dp-1, 0x1.2f7cc3fe6f423p-2 },
 	   { 0x1.ccf20ce0c0d27p-1, 0x1.2a09153529381p-2 },
 	   { 0x1.ce1962c0e0d8bp-1, 0x1.24a55399ea239p-2 },
 	   { 0x1.cf3b5cdaf0c39p-1, 0x1.1f518ae487dc8p-2 },
 	   { 0x1.d0580b2cfd249p-1, 0x1.1a0dc51a9934dp-2 },
 	   { 0x1.d16f7dbe41ca0p-1, 0x1.14da0a961fd14p-2 },
 	   { 0x1.d281c49d818d0p-1, 0x1.0fb6620c550afp-2 },
 	   { 0x1.d38eefdf64fddp-1, 0x1.0aa2d09497f2bp-2 },
 	   { 0x1.d4970f9ce00d9p-1, 0x1.059f59af7a906p-2 },
 	   { 0x1.d59a33f19ed42p-1, 0x1.00abff4dec7a3p-2 },
 	   { 0x1.d6986cfa798e7p-1, 0x1.f79183b101c5bp-3 },
 	   { 0x1.d791cad3eff01p-1, 0x1.edeb406d9c824p-3 },
 	   { 0x1.d8865d98abe01p-1, 0x1.e4652fadcb6b2p-3 },
 	   { 0x1.d97635600bb89p-1, 0x1.daff4969c0b04p-3 },
 	   { 0x1.da61623cb41e0p-1, 0x1.d1b982c501370p-3 },
 	   { 0x1.db47f43b2980dp-1, 0x1.c893ce1dcbef7p-3 },
 	   { 0x1.dc29fb60715afp-1, 0x1.bf8e1b1ca2279p-3 },
 	   { 0x1.dd0787a8bb39dp-1, 0x1.b6a856c3ed54fp-3 },
 	   { 0x1.dde0a90611a0dp-1, 0x1.ade26b7fbed95p-3 },
 	   { 0x1.deb56f5f12d28p-1, 0x1.a53c4135a6526p-3 },
 	   { 0x1.df85ea8db188ep-1, 0x1.9cb5bd549b111p-3 },
 	   { 0x1.e0522a5dfda73p-1, 0x1.944ec2e4f5630p-3 },
 	   { 0x1.e11a3e8cf4eb8p-1, 0x1.8c07329874652p-3 },
 	   { 0x1.e1de36c75ba58p-1, 0x1.83deeada4d25ap-3 },
 	   { 0x1.e29e22a89d766p-1, 0x1.7bd5c7df3fe9cp-3 },
 	   { 0x1.e35a11b9b61cep-1, 0x1.73eba3b5b07b7p-3 },
 	   { 0x1.e4121370224ccp-1, 0x1.6c205655be71fp-3 },
 	   { 0x1.e4c6372cd8927p-1, 0x1.6473b5b15a7a1p-3 },
 	   { 0x1.e5768c3b4a3fcp-1, 0x1.5ce595c455b0ap-3 },
 	   { 0x1.e62321d06c5e0p-1, 0x1.5575c8a468361p-3 },
 	   { 0x1.e6cc0709c8a0dp-1, 0x1.4e241e912c305p-3 },
 	   { 0x1.e7714aec96534p-1, 0x1.46f066040a832p-3 },
 	   { 0x1.e812fc64db369p-1, 0x1.3fda6bc016994p-3 },
 	   { 0x1.e8b12a44944a8p-1, 0x1.38e1fae1d6a9dp-3 },
 	   { 0x1.e94be342e6743p-1, 0x1.3206dceef5f87p-3 },
 	   { 0x1.e9e335fb56f87p-1, 0x1.2b48d9e5dea1cp-3 },
 	   { 0x1.ea7730ed0bbb9p-1, 0x1.24a7b84d38971p-3 },
 	   { 0x1.eb07e27a133aap-1, 0x1.1e233d434b813p-3 },
 	   { 0x1.eb9558e6b42cep-1, 0x1.17bb2c8d41535p-3 },
 	   { 0x1.ec1fa258c4beap-1, 0x1.116f48a6476ccp-3 },
 	   { 0x1.eca6ccd709544p-1, 0x1.0b3f52ce8c383p-3 },
 	   { 0x1.ed2ae6489ac1ep-1, 0x1.052b0b1a174eap-3 },
 	   { 0x1.edabfc7453e63p-1, 0x1.fe6460fef4680p-4 },
 	   { 0x1.ee2a1d004692cp-1, 0x1.f2a901ccafb37p-4 },
 	   { 0x1.eea5557137ae0p-1, 0x1.e723726b824a9p-4 },
 	   { 0x1.ef1db32a2277cp-1, 0x1.dbd32ac4c99b0p-4 },
 	   { 0x1.ef93436bc2daap-1, 0x1.d0b7a0f921e7cp-4 },
 	   { 0x1.f006135426b26p-1, 0x1.c5d0497c09e74p-4 },
 	   { 0x1.f0762fde45ee6p-1, 0x1.bb1c972f23e50p-4 },
 	   { 0x1.f0e3a5e1a1788p-1, 0x1.b09bfb7d11a83p-4 },
 	   { 0x1.f14e8211e8c55p-1, 0x1.a64de673e8837p-4 },
 	   { 0x1.f1b6d0fea5f4dp-1, 0x1.9c31c6df3b1b8p-4 },
 	   { 0x1.f21c9f12f0677p-1, 0x1.92470a61b6965p-4 },
 	   { 0x1.f27ff89525acfp-1, 0x1.888d1d8e510a3p-4 },
 	   { 0x1.f2e0e9a6a8b09p-1, 0x1.7f036c0107294p-4 },
 	   { 0x1.f33f7e43a706bp-1, 0x1.75a96077274bap-4 },
 	   { 0x1.f39bc242e43e6p-1, 0x1.6c7e64e7281cbp-4 },
 	   { 0x1.f3f5c1558b19ep-1, 0x1.6381e2980956bp-4 },
 	   { 0x1.f44d870704911p-1, 0x1.5ab342383d177p-4 },
 	   { 0x1.f4a31ebcd47dfp-1, 0x1.5211ebf41880bp-4 },
 	   { 0x1.f4f693b67bd77p-1, 0x1.499d478bca735p-4 },
 	   { 0x1.f547f10d60597p-1, 0x1.4154bc68d75c3p-4 },
 	   { 0x1.f59741b4b97cfp-1, 0x1.3937b1b319259p-4 },
 	   { 0x1.f5e4907982a07p-1, 0x1.31458e6542847p-4 },
 	   { 0x1.f62fe80272419p-1, 0x1.297db960e4f63p-4 },
 	   { 0x1.f67952cff6282p-1, 0x1.21df9981f8e53p-4 },
 	   { 0x1.f6c0db3c34641p-1, 0x1.1a6a95b1e786fp-4 },
 	   { 0x1.f7068b7b10fd9p-1, 0x1.131e14fa1625dp-4 },
 	   { 0x1.f74a6d9a38383p-1, 0x1.0bf97e95f2a64p-4 },
 	   { 0x1.f78c8b812d498p-1, 0x1.04fc3a0481321p-4 },
 	   { 0x1.f7cceef15d631p-1, 0x1.fc4b5e32d6259p-5 },
 	   { 0x1.f80ba18636f07p-1, 0x1.eeea8c1b1db93p-5 },
 	   { 0x1.f848acb544e95p-1, 0x1.e1d4cf1e2450ap-5 },
 	   { 0x1.f88419ce4e184p-1, 0x1.d508f9a1ea64ep-5 },
 	   { 0x1.f8bdf1fb78370p-1, 0x1.c885df3451a07p-5 },
 	   { 0x1.f8f63e416ebffp-1, 0x1.bc4a54a84e834p-5 },
 	   { 0x1.f92d077f8d56dp-1, 0x1.b055303221015p-5 },
 	   { 0x1.f96256700da8ep-1, 0x1.a4a549829587ep-5 },
 	   { 0x1.f99633a838a57p-1, 0x1.993979e14fffdp-5 },
 	   { 0x1.f9c8a7989af0dp-1, 0x1.8e109c4622913p-5 },
 	   { 0x1.f9f9ba8d3c733p-1, 0x1.83298d717210ep-5 },
 	   { 0x1.fa2974addae45p-1, 0x1.78832c03aa2b1p-5 },
 	   { 0x1.fa57ddfe27376p-1, 0x1.6e1c5893c380bp-5 },
 	   { 0x1.fa84fe5e05c8dp-1, 0x1.63f3f5c4de13bp-5 },
 	   { 0x1.fab0dd89d1309p-1, 0x1.5a08e85af27e0p-5 },
 	   { 0x1.fadb831a9f9c3p-1, 0x1.505a174e9c929p-5 },
 	   { 0x1.fb04f6868a944p-1, 0x1.46e66be002240p-5 },
 	   { 0x1.fb2d3f20f9101p-1, 0x1.3dacd1a8d8ccdp-5 },
 	   { 0x1.fb54641aebbc9p-1, 0x1.34ac36ad8dafep-5 },
 	   { 0x1.fb7a6c834b5a2p-1, 0x1.2be38b6d92415p-5 },
 	   { 0x1.fb9f5f4739170p-1, 0x1.2351c2f2d1449p-5 },
 	   { 0x1.fbc3433260ca5p-1, 0x1.1af5d2e04f3f6p-5 },
 	   { 0x1.fbe61eef4cf6ap-1, 0x1.12ceb37ff9bc3p-5 },
 	   { 0x1.fc07f907bc794p-1, 0x1.0adb5fcfa8c75p-5 },
 	   { 0x1.fc28d7e4f9cd0p-1, 0x1.031ad58d56279p-5 },
 	   { 0x1.fc48c1d033c7ap-1, 0x1.f7182a851bca2p-6 },
 	   { 0x1.fc67bcf2d7b8fp-1, 0x1.e85c449e377f2p-6 },
 	   { 0x1.fc85cf56ecd38p-1, 0x1.da0005e5f28dfp-6 },
 	   { 0x1.fca2fee770c79p-1, 0x1.cc0180af00a8bp-6 },
 	   { 0x1.fcbf5170b578bp-1, 0x1.be5ecd2fcb5f9p-6 },
 	   { 0x1.fcdacca0bfb73p-1, 0x1.b1160991ff737p-6 },
 	   { 0x1.fcf57607a6e7cp-1, 0x1.a4255a00b9f03p-6 },
 	   { 0x1.fd0f5317f582fp-1, 0x1.978ae8b55ce1bp-6 },
 	   { 0x1.fd2869270a56fp-1, 0x1.8b44e6031383ep-6 },
 	   { 0x1.fd40bd6d7a785p-1, 0x1.7f5188610ddc8p-6 },
 	   { 0x1.fd58550773cb5p-1, 0x1.73af0c737bb45p-6 },
 	   { 0x1.fd6f34f52013ap-1, 0x1.685bb5134ef13p-6 },
 	   { 0x1.fd85621b0876dp-1, 0x1.5d55cb54cd53ap-6 },
 	   { 0x1.fd9ae142795e3p-1, 0x1.529b9e8cf9a1ep-6 },
 	   { 0x1.fdafb719e6a69p-1, 0x1.482b8455dc491p-6 },
 	   { 0x1.fdc3e835500b3p-1, 0x1.3e03d891b37dep-6 },
 	   { 0x1.fdd7790ea5bc0p-1, 0x1.3422fd6d12e2bp-6 },
 	   { 0x1.fdea6e062d0c9p-1, 0x1.2a875b5ffab56p-6 },
 	   { 0x1.fdfccb62e52d3p-1, 0x1.212f612dee7fbp-6 },
 	   { 0x1.fe0e9552ebdd6p-1, 0x1.181983e5133ddp-6 },
 	   { 0x1.fe1fcfebe2083p-1, 0x1.0f443edc5ce49p-6 },
 	   { 0x1.fe307f2b503d0p-1, 0x1.06ae13b0d3255p-6 },
 	   { 0x1.fe40a6f70af4bp-1, 0x1.fcab1483ea7fcp-7 },
 	   { 0x1.fe504b1d9696cp-1, 0x1.ec72615a894c4p-7 },
 	   { 0x1.fe5f6f568b301p-1, 0x1.dcaf3691fc448p-7 },
 	   { 0x1.fe6e1742f7cf6p-1, 0x1.cd5ec93c12431p-7 },
 	   { 0x1.fe7c466dc57a1p-1, 0x1.be7e5ac24963bp-7 },
 	   { 0x1.fe8a004c19ae6p-1, 0x1.b00b38d6b3575p-7 },
 	   { 0x1.fe97483db8670p-1, 0x1.a202bd6372dcep-7 },
 	   { 0x1.fea4218d6594ap-1, 0x1.94624e78e0fafp-7 },
 	   { 0x1.feb08f7146046p-1, 0x1.87275e3a6869dp-7 },
 	   { 0x1.febc950b3fa75p-1, 0x1.7a4f6aca256cbp-7 },
 	   { 0x1.fec835695932ep-1, 0x1.6dd7fe3358230p-7 },
 	   { 0x1.fed37386190fbp-1, 0x1.61beae53b72b7p-7 },
 	   { 0x1.fede5248e38f4p-1, 0x1.56011cc3b036dp-7 },
 	   { 0x1.fee8d486585eep-1, 0x1.4a9cf6bda3f4cp-7 },
 	   { 0x1.fef2fd00af31ap-1, 0x1.3f8ff5042a88ep-7 },
 	   { 0x1.fefcce6813974p-1, 0x1.34d7dbc76d7e5p-7 },
 	   { 0x1.ff064b5afffbep-1, 0x1.2a727a89a3f14p-7 },
 	   { 0x1.ff0f766697c76p-1, 0x1.205dac02bd6b9p-7 },
 	   { 0x1.ff18520700971p-1, 0x1.1697560347b25p-7 },
 	   { 0x1.ff20e0a7ba8c2p-1, 0x1.0d1d69569b82dp-7 },
 	   { 0x1.ff2924a3f7a83p-1, 0x1.03ede1a45bfeep-7 },
 	   { 0x1.ff312046f2339p-1, 0x1.f60d8aa2a88f2p-8 },
 	   { 0x1.ff38d5cc4227fp-1, 0x1.e4cc4abf7d065p-8 },
 	   { 0x1.ff404760319b4p-1, 0x1.d4143a9dfe965p-8 },
 	   { 0x1.ff47772010262p-1, 0x1.c3e1a5f5c077cp-8 },
 	   { 0x1.ff4e671a85425p-1, 0x1.b430ecf4a83a8p-8 },
 	   { 0x1.ff55194fe19dfp-1, 0x1.a4fe83fb9db25p-8 },
 	   { 0x1.ff5b8fb26f5f6p-1, 0x1.9646f35a76623p-8 },
 	   { 0x1.ff61cc26c1578p-1, 0x1.8806d70b2fc36p-8 },
 	   { 0x1.ff67d08401202p-1, 0x1.7a3ade6c8b3e4p-8 },
 	   { 0x1.ff6d9e943c231p-1, 0x1.6cdfcbfc1e263p-8 },
 	   { 0x1.ff733814af88cp-1, 0x1.5ff2750fe7820p-8 },
 	   { 0x1.ff789eb6130c9p-1, 0x1.536fc18f7ce5cp-8 },
 	   { 0x1.ff7dd41ce2b4dp-1, 0x1.4754abacdf1dcp-8 },
 	   { 0x1.ff82d9e1a76d8p-1, 0x1.3b9e3f9d06e3fp-8 },
 	   { 0x1.ff87b1913e853p-1, 0x1.30499b503957fp-8 },
 	   { 0x1.ff8c5cad200a5p-1, 0x1.2553ee2a336bfp-8 },
 	   { 0x1.ff90dcaba4096p-1, 0x1.1aba78ba3af89p-8 },
 	   { 0x1.ff9532f846ab0p-1, 0x1.107a8c7323a6ep-8 },
 	   { 0x1.ff9960f3eb327p-1, 0x1.06918b6355624p-8 },
 	   { 0x1.ff9d67f51ddbap-1, 0x1.f9f9cfd9c3035p-9 },
 	   { 0x1.ffa14948549a7p-1, 0x1.e77448fb66bb9p-9 },
 	   { 0x1.ffa506302ebaep-1, 0x1.d58da68fd1170p-9 },
 	   { 0x1.ffa89fe5b3625p-1, 0x1.c4412bf4b8f0bp-9 },
 	   { 0x1.ffac17988ef4bp-1, 0x1.b38a3af2e55b4p-9 },
 	   { 0x1.ffaf6e6f4f5c0p-1, 0x1.a3645330550ffp-9 },
 	   { 0x1.ffb2a5879f35ep-1, 0x1.93cb11a30d765p-9 },
 	   { 0x1.ffb5bdf67fe6fp-1, 0x1.84ba3004a50d0p-9 },
 	   { 0x1.ffb8b8c88295fp-1, 0x1.762d84469c18fp-9 },
 	   { 0x1.ffbb970200110p-1, 0x1.6821000795a03p-9 },
 	   { 0x1.ffbe599f4f9d9p-1, 0x1.5a90b00981d93p-9 },
 	   { 0x1.ffc10194fcb64p-1, 0x1.4d78bba8ca5fdp-9 },
 	   { 0x1.ffc38fcffbb7cp-1, 0x1.40d564548fad7p-9 },
 	   { 0x1.ffc60535dd7f5p-1, 0x1.34a305080681fp-9 },
 	   { 0x1.ffc862a501fd7p-1, 0x1.28de11c5031ebp-9 },
 	   { 0x1.ffcaa8f4c9beap-1, 0x1.1d83170fbf6fbp-9 },
 	   { 0x1.ffccd8f5c66d1p-1, 0x1.128eb96be8798p-9 },
 	   { 0x1.ffcef371ea4d7p-1, 0x1.07fdb4dafea5fp-9 },
 	   { 0x1.ffd0f92cb6ba7p-1, 0x1.fb99b8b8279e1p-10 },
 	   { 0x1.ffd2eae369a07p-1, 0x1.e7f232d9e2630p-10 },
 	   { 0x1.ffd4c94d29fdbp-1, 0x1.d4fed7195d7e8p-10 },
 	   { 0x1.ffd6951b33686p-1, 0x1.c2b9cf7f893bfp-10 },
 	   { 0x1.ffd84ef9009eep-1, 0x1.b11d702b3deb1p-10 },
 	   { 0x1.ffd9f78c7524ap-1, 0x1.a024365f771bdp-10 },
 	   { 0x1.ffdb8f7605ee7p-1, 0x1.8fc8c794b03b5p-10 },
 	   { 0x1.ffdd1750e1220p-1, 0x1.8005f08d6f1efp-10 },
 	   { 0x1.ffde8fb314ebfp-1, 0x1.70d6a46e07ddap-10 },
 	   { 0x1.ffdff92db56e5p-1, 0x1.6235fbd7a4345p-10 },
 	   { 0x1.ffe1544d01ccbp-1, 0x1.541f340697987p-10 },
 	   { 0x1.ffe2a1988857cp-1, 0x1.468dadf4080abp-10 },
 	   { 0x1.ffe3e19349dc7p-1, 0x1.397ced7af2b15p-10 },
 	   { 0x1.ffe514bbdc197p-1, 0x1.2ce898809244ep-10 },
 	   { 0x1.ffe63b8c8b5f7p-1, 0x1.20cc76202c5fap-10 },
 	   { 0x1.ffe7567b7b5e1p-1, 0x1.15246dda49d47p-10 },
 	   { 0x1.ffe865fac722bp-1, 0x1.09ec86c75d497p-10 },
 	   { 0x1.ffe96a78a04a9p-1, 0x1.fe41cd9bb4eeep-11 },
 	   { 0x1.ffea645f6d6dap-1, 0x1.e97ba3b77f306p-11 },
 	   { 0x1.ffeb5415e7c44p-1, 0x1.d57f524723822p-11 },
 	   { 0x1.ffec39ff380b9p-1, 0x1.c245d4b998479p-11 },
 	   { 0x1.ffed167b12ac2p-1, 0x1.afc85e0f82e12p-11 },
 	   { 0x1.ffede9e5d3262p-1, 0x1.9e005769dbc1dp-11 },
 	   { 0x1.ffeeb49896c6dp-1, 0x1.8ce75e9f6f8a0p-11 },
 	   { 0x1.ffef76e956a9fp-1, 0x1.7c7744d9378f7p-11 },
 	   { 0x1.fff0312b010b5p-1, 0x1.6caa0d3582fe9p-11 },
 	   { 0x1.fff0e3ad91ec2p-1, 0x1.5d79eb71e893bp-11 },
 	   { 0x1.fff18ebe2b0e1p-1, 0x1.4ee1429bf7cc0p-11 },
 	   { 0x1.fff232a72b48ep-1, 0x1.40daa3c89f5b6p-11 },
 	   { 0x1.fff2cfb0453d9p-1, 0x1.3360ccd23db3ap-11 },
 	   { 0x1.fff3661e9569dp-1, 0x1.266ea71d4f71ap-11 },
 	   { 0x1.fff3f634b79f9p-1, 0x1.19ff4663ae9dfp-11 },
 	   { 0x1.fff48032dbe40p-1, 0x1.0e0de78654d1ep-11 },
 	   { 0x1.fff50456dab8cp-1, 0x1.0295ef6591848p-11 },
 	   { 0x1.fff582dc48d30p-1, 0x1.ef25d37f49fe1p-12 },
 	   { 0x1.fff5fbfc8a439p-1, 0x1.da01102b5f851p-12 },
 	   { 0x1.fff66feee5129p-1, 0x1.c5b5412dcafadp-12 },
 	   { 0x1.fff6dee89352ep-1, 0x1.b23a5a23e4210p-12 },
 	   { 0x1.fff7491cd4af6p-1, 0x1.9f8893d8fd1c1p-12 },
 	   { 0x1.fff7aebcff755p-1, 0x1.8d986a4187285p-12 },
 	   { 0x1.fff80ff8911fdp-1, 0x1.7c629a822bc9ep-12 },
 	   { 0x1.fff86cfd3e657p-1, 0x1.6be02102b3520p-12 },
 	   { 0x1.fff8c5f702ccfp-1, 0x1.5c0a378c90bcap-12 },
 	   { 0x1.fff91b102fca8p-1, 0x1.4cda5374ea275p-12 },
 	   { 0x1.fff96c717b695p-1, 0x1.3e4a23d1f4702p-12 },
 	   { 0x1.fff9ba420e834p-1, 0x1.30538fbb77ecdp-12 },
 	   { 0x1.fffa04a7928b1p-1, 0x1.22f0b496539bdp-12 },
 	   { 0x1.fffa4bc63ee9ap-1, 0x1.161be46ad3b50p-12 },
 	   { 0x1.fffa8fc0e5f33p-1, 0x1.09cfa445b00ffp-12 },
 	   { 0x1.fffad0b901755p-1, 0x1.fc0d55470cf51p-13 },
 	   { 0x1.fffb0ecebee1bp-1, 0x1.e577bbcd49935p-13 },
 	   { 0x1.fffb4a210b172p-1, 0x1.cfd4a5adec5bfp-13 },
 	   { 0x1.fffb82cd9dcbfp-1, 0x1.bb1a9657ce465p-13 },
 	   { 0x1.fffbb8f1049c6p-1, 0x1.a740684026555p-13 },
 	   { 0x1.fffbeca6adbe9p-1, 0x1.943d4a1d1ed39p-13 },
 	   { 0x1.fffc1e08f25f5p-1, 0x1.8208bc334a6a5p-13 },
 	   { 0x1.fffc4d3120aa1p-1, 0x1.709a8db59f25cp-13 },
 	   { 0x1.fffc7a37857d2p-1, 0x1.5feada379d8b7p-13 },
 	   { 0x1.fffca53375ce3p-1, 0x1.4ff207314a102p-13 },
 	   { 0x1.fffcce3b57bffp-1, 0x1.40a8c1949f75ep-13 },
 	   { 0x1.fffcf564ab6b7p-1, 0x1.3207fb7420eb9p-13 },
 	   { 0x1.fffd1ac4135f9p-1, 0x1.2408e9ba3327fp-13 },
 	   { 0x1.fffd3e6d5cd87p-1, 0x1.16a501f0e42cap-13 },
 	   { 0x1.fffd607387b07p-1, 0x1.09d5f819c9e29p-13 },
 	   { 0x1.fffd80e8ce0dap-1, 0x1.fb2b792b40a22p-14 },
 	   { 0x1.fffd9fdeabccep-1, 0x1.e3bcf436a1a95p-14 },
 	   { 0x1.fffdbd65e5ad0p-1, 0x1.cd55277c18d05p-14 },
 	   { 0x1.fffdd98e903b2p-1, 0x1.b7e94604479dcp-14 },
 	   { 0x1.fffdf46816833p-1, 0x1.a36eec00926ddp-14 },
 	   { 0x1.fffe0e0140857p-1, 0x1.8fdc1b2dcf7b9p-14 },
 	   { 0x1.fffe26683972ap-1, 0x1.7d2737527c3f9p-14 },
 	   { 0x1.fffe3daa95b18p-1, 0x1.6b4702d7d5849p-14 },
 	   { 0x1.fffe53d558ae9p-1, 0x1.5a329b7d30748p-14 },
 	   { 0x1.fffe68f4fa777p-1, 0x1.49e17724f4d41p-14 },
 	   { 0x1.fffe7d156d244p-1, 0x1.3a4b60ba9aa4dp-14 },
 	   { 0x1.fffe904222101p-1, 0x1.2b6875310f785p-14 },
 	   { 0x1.fffea2860ee1ep-1, 0x1.1d312098e9dbap-14 },
 	   { 0x1.fffeb3ebb267bp-1, 0x1.0f9e1b4dd36dfp-14 },
 	   { 0x1.fffec47d19457p-1, 0x1.02a8673a94691p-14 },
 	   { 0x1.fffed443e2787p-1, 0x1.ec929a665b449p-15 },
 	   { 0x1.fffee34943b15p-1, 0x1.d4f4b4c8e09edp-15 },
 	   { 0x1.fffef1960d85dp-1, 0x1.be6abbb10a5aap-15 },
 	   { 0x1.fffeff32af7afp-1, 0x1.a8e8cc1fadef6p-15 },
 	   { 0x1.ffff0c273bea2p-1, 0x1.94637d5bacfdbp-15 },
 	   { 0x1.ffff187b6bc0ep-1, 0x1.80cfdc72220cfp-15 },
 	   { 0x1.ffff2436a21dcp-1, 0x1.6e2367dc27f95p-15 },
 	   { 0x1.ffff2f5fefcaap-1, 0x1.5c540b4936fd2p-15 },
 	   { 0x1.ffff39fe16963p-1, 0x1.4b581b8d170fcp-15 },
 	   { 0x1.ffff44178c8d2p-1, 0x1.3b2652b06c2b2p-15 },
 	   { 0x1.ffff4db27f146p-1, 0x1.2bb5cc22e5db6p-15 },
 	   { 0x1.ffff56d4d5e5ep-1, 0x1.1cfe010e2052dp-15 },
 	   { 0x1.ffff5f8435efcp-1, 0x1.0ef6c4c84a0fep-15 },
 	   { 0x1.ffff67c604180p-1, 0x1.01984165a5f36p-15 },
 	   { 0x1.ffff6f9f67e55p-1, 0x1.e9b5e8d00ce76p-16 },
 	   { 0x1.ffff77154e0d6p-1, 0x1.d16f5716c6c1ap-16 },
 	   { 0x1.ffff7e2c6aea2p-1, 0x1.ba4f035d60e02p-16 },
 	   { 0x1.ffff84e93cd75p-1, 0x1.a447b7b03f045p-16 },
 	   { 0x1.ffff8b500e77cp-1, 0x1.8f4ccca7fc90dp-16 },
 	   { 0x1.ffff9164f8e46p-1, 0x1.7b5223dac7336p-16 },
 	   { 0x1.ffff972be5c59p-1, 0x1.684c227fcacefp-16 },
 	   { 0x1.ffff9ca891572p-1, 0x1.562fac4329b48p-16 },
 	   { 0x1.ffffa1de8c582p-1, 0x1.44f21e49054f2p-16 },
 	   { 0x1.ffffa6d13de73p-1, 0x1.34894a5e24657p-16 },
 	   { 0x1.ffffab83e54b8p-1, 0x1.24eb7254ccf83p-16 },
 	   { 0x1.ffffaff99bac4p-1, 0x1.160f438c70913p-16 },
 	   { 0x1.ffffb43555b5fp-1, 0x1.07ebd2a2d2844p-16 },
 	   { 0x1.ffffb839e52f3p-1, 0x1.f4f12e9ab070ap-17 },
 	   { 0x1.ffffbc09fa7cdp-1, 0x1.db5ad0b27805cp-17 },
 	   { 0x1.ffffbfa82616bp-1, 0x1.c304efa2c6f4ep-17 },
 	   { 0x1.ffffc316d9ed0p-1, 0x1.abe09e9144b5ep-17 },
 	   { 0x1.ffffc6586abf6p-1, 0x1.95df988e76644p-17 },
 	   { 0x1.ffffc96f1165ep-1, 0x1.80f439b4ee04bp-17 },
 	   { 0x1.ffffcc5cec0c1p-1, 0x1.6d11788a69c64p-17 },
 	   { 0x1.ffffcf23ff5fcp-1, 0x1.5a2adfa0b4bc4p-17 },
 	   { 0x1.ffffd1c637b2bp-1, 0x1.4834877429b8fp-17 },
 	   { 0x1.ffffd4456a10dp-1, 0x1.37231085c7d9ap-17 },
 	   { 0x1.ffffd6a3554a1p-1, 0x1.26eb9daed6f7ep-17 },
 	   { 0x1.ffffd8e1a2f22p-1, 0x1.1783ceac28910p-17 },
 	   { 0x1.ffffdb01e8546p-1, 0x1.08e1badf0fcedp-17 },
 	   { 0x1.ffffdd05a75eap-1, 0x1.f5f7d88472604p-18 },
 	   { 0x1.ffffdeee4f810p-1, 0x1.db92b5212fb8dp-18 },
 	   { 0x1.ffffe0bd3e852p-1, 0x1.c282cd3957edap-18 },
 	   { 0x1.ffffe273c15b7p-1, 0x1.aab7abace48dcp-18 },
 	   { 0x1.ffffe41314e06p-1, 0x1.94219bfcb4928p-18 },
 	   { 0x1.ffffe59c6698bp-1, 0x1.7eb1a2075864dp-18 },
 	   { 0x1.ffffe710d565ep-1, 0x1.6a597219a93d9p-18 },
 	   { 0x1.ffffe8717232dp-1, 0x1.570b69502f313p-18 },
 	   { 0x1.ffffe9bf4098cp-1, 0x1.44ba864670882p-18 },
 	   { 0x1.ffffeafb377d5p-1, 0x1.335a62115bce2p-18 },
 	   { 0x1.ffffec2641a9ep-1, 0x1.22df298214423p-18 },
 	   { 0x1.ffffed413e5b7p-1, 0x1.133d96ae7e0ddp-18 },
 	   { 0x1.ffffee4d01cd6p-1, 0x1.046aeabcfcdecp-18 },
 	   { 0x1.ffffef4a55bd4p-1, 0x1.ecb9cfe1d8642p-19 },
 	   { 0x1.fffff039f9e8fp-1, 0x1.d21397ead99cbp-19 },
 	   { 0x1.fffff11ca4876p-1, 0x1.b8d094c86d374p-19 },
 	   { 0x1.fffff1f302bc1p-1, 0x1.a0df0f0c626dcp-19 },
 	   { 0x1.fffff2bdb904dp-1, 0x1.8a2e269750a39p-19 },
 	   { 0x1.fffff37d63a36p-1, 0x1.74adc8f4064d3p-19 },
 	   { 0x1.fffff43297019p-1, 0x1.604ea819f007cp-19 },
 	   { 0x1.fffff4dde0118p-1, 0x1.4d0231928c6f9p-19 },
 	   { 0x1.fffff57fc4a95p-1, 0x1.3aba85fe22e1fp-19 },
 	   { 0x1.fffff618c3da6p-1, 0x1.296a70f414053p-19 },
 	   { 0x1.fffff6a956450p-1, 0x1.1905613b3abf2p-19 },
 	   { 0x1.fffff731ee681p-1, 0x1.097f6156f32c5p-19 },
 	   { 0x1.fffff7b2f8ed6p-1, 0x1.f59a20caf6695p-20 },
 	   { 0x1.fffff82cdcf1bp-1, 0x1.d9c73698fb1dcp-20 },
 	   { 0x1.fffff89ffc4aap-1, 0x1.bf716c6168baep-20 },
 	   { 0x1.fffff90cb3c81p-1, 0x1.a6852c6b58392p-20 },
 	   { 0x1.fffff9735b73bp-1, 0x1.8eefd70594a88p-20 },
 	   { 0x1.fffff9d446cccp-1, 0x1.789fb715aae95p-20 },
 	   { 0x1.fffffa2fc5015p-1, 0x1.6383f726a8e04p-20 },
 	   { 0x1.fffffa8621251p-1, 0x1.4f8c96f26a26ap-20 },
 	   { 0x1.fffffad7a2652p-1, 0x1.3caa61607f920p-20 },
 	   { 0x1.fffffb248c39dp-1, 0x1.2acee2f5ecdb8p-20 },
 	   { 0x1.fffffb6d1e95dp-1, 0x1.19ec60b1242edp-20 },
 	   { 0x1.fffffbb196132p-1, 0x1.09f5cf4dd2877p-20 },
 	   { 0x1.fffffbf22c1e2p-1, 0x1.f5bd95d8730d8p-21 },
 	   { 0x1.fffffc2f171e3p-1, 0x1.d9371e2ff7c35p-21 },
 	   { 0x1.fffffc688a9cfp-1, 0x1.be41de54d155ap-21 },
 	   { 0x1.fffffc9eb76acp-1, 0x1.a4c89e08ef4f3p-21 },
 	   { 0x1.fffffcd1cbc28p-1, 0x1.8cb738399b12cp-21 },
 	   { 0x1.fffffd01f36afp-1, 0x1.75fa8dbc84becp-21 },
 	   { 0x1.fffffd2f57d68p-1, 0x1.608078a70dcbcp-21 },
 	   { 0x1.fffffd5a2041fp-1, 0x1.4c37c0394d094p-21 },
 	   { 0x1.fffffd8271d12p-1, 0x1.39100d5687bfep-21 },
 	   { 0x1.fffffda86faa9p-1, 0x1.26f9df8519bd6p-21 },
 	   { 0x1.fffffdcc3b117p-1, 0x1.15e6827001f18p-21 },
 	   { 0x1.fffffdedf37edp-1, 0x1.05c803e4831c1p-21 },
 	   { 0x1.fffffe0db6b91p-1, 0x1.ed22548cffd35p-22 },
 	   { 0x1.fffffe2ba0ea5p-1, 0x1.d06ad6ecdf971p-22 },
 	   { 0x1.fffffe47ccb60p-1, 0x1.b551c847fbc96p-22 },
 	   { 0x1.fffffe62534d4p-1, 0x1.9bc09f112b494p-22 },
 	   { 0x1.fffffe7b4c81ep-1, 0x1.83a1ff0aa239dp-22 },
 	   { 0x1.fffffe92ced93p-1, 0x1.6ce1aa3fd7bddp-22 },
 	   { 0x1.fffffea8ef9cfp-1, 0x1.576c72b514859p-22 },
 	   { 0x1.fffffebdc2ec6p-1, 0x1.43302cc4a0da8p-22 },
 	   { 0x1.fffffed15bcbap-1, 0x1.301ba221dc9bbp-22 },
 	   { 0x1.fffffee3cc32cp-1, 0x1.1e1e857adc568p-22 },
 	   { 0x1.fffffef5251c2p-1, 0x1.0d2966b1746f7p-22 },
 	   { 0x1.ffffff0576917p-1, 0x1.fa5b4f49cc6b2p-23 },
 	   { 0x1.ffffff14cfb92p-1, 0x1.dc3ae30b55c16p-23 },
 	   { 0x1.ffffff233ee1dp-1, 0x1.bfd7555a3bd68p-23 },
 	   { 0x1.ffffff30d18e8p-1, 0x1.a517d9e61628ap-23 },
 	   { 0x1.ffffff3d9480fp-1, 0x1.8be4f8f6c951fp-23 },
 	   { 0x1.ffffff4993c46p-1, 0x1.74287ded49339p-23 },
 	   { 0x1.ffffff54dab72p-1, 0x1.5dcd669f2cd34p-23 },
 	   { 0x1.ffffff5f74141p-1, 0x1.48bfd38302870p-23 },
 	   { 0x1.ffffff6969fb8p-1, 0x1.34ecf8a3c124ap-23 },
 	   { 0x1.ffffff72c5fb6p-1, 0x1.22430f521cbcfp-23 },
 	   { 0x1.ffffff7b91176p-1, 0x1.10b1488aeb235p-23 },
 	   { 0x1.ffffff83d3d07p-1, 0x1.0027c00a263a6p-23 },
 	   { 0x1.ffffff8b962bep-1, 0x1.e12ee004efc37p-24 },
 	   { 0x1.ffffff92dfba2p-1, 0x1.c3e44ae32b16bp-24 },
 	   { 0x1.ffffff99b79d2p-1, 0x1.a854ea14102a8p-24 },
 	   { 0x1.ffffffa0248e8p-1, 0x1.8e6761569f45dp-24 },
 	   { 0x1.ffffffa62ce54p-1, 0x1.7603bac345f65p-24 },
 	   { 0x1.ffffffabd69b4p-1, 0x1.5f1353cdad001p-24 },
 	   { 0x1.ffffffb127525p-1, 0x1.4980cb3c80949p-24 },
 	   { 0x1.ffffffb624592p-1, 0x1.3537f00b6ad4dp-24 },
 	   { 0x1.ffffffbad2affp-1, 0x1.2225b12bffc68p-24 },
 	   { 0x1.ffffffbf370cdp-1, 0x1.10380e1adb7e9p-24 },
 	   { 0x1.ffffffc355dfdp-1, 0x1.febc107d5efaap-25 },
 	   { 0x1.ffffffc733572p-1, 0x1.df0f2a0ee6946p-25 },
 	   { 0x1.ffffffcad3626p-1, 0x1.c14b2188bcee4p-25 },
 	   { 0x1.ffffffce39b67p-1, 0x1.a553644f7f07dp-25 },
 	   { 0x1.ffffffd169d0cp-1, 0x1.8b0cfce0579dfp-25 },
 	   { 0x1.ffffffd466fa5p-1, 0x1.725e7c5dd20f7p-25 },
 	   { 0x1.ffffffd7344aap-1, 0x1.5b2fe547a1340p-25 },
 	   { 0x1.ffffffd9d4aabp-1, 0x1.456a974e92e93p-25 },
 	   { 0x1.ffffffdc4ad7ap-1, 0x1.30f93c3699078p-25 },
 	   { 0x1.ffffffde9964ep-1, 0x1.1dc7b5b978cf8p-25 },
 	   { 0x1.ffffffe0c2bf0p-1, 0x1.0bc30c5d52f15p-25 },
 	   { 0x1.ffffffe2c92dbp-1, 0x1.f5b2be65a0c7fp-26 },
 	   { 0x1.ffffffe4aed5ep-1, 0x1.d5f3a8dea7357p-26 },
 	   { 0x1.ffffffe675bbdp-1, 0x1.b82915b03515bp-26 },
 	   { 0x1.ffffffe81fc4ep-1, 0x1.9c3517e789488p-26 },
 	   { 0x1.ffffffe9aeb97p-1, 0x1.81fb7df06136ep-26 },
 	   { 0x1.ffffffeb24467p-1, 0x1.6961b8d641d06p-26 },
 	   { 0x1.ffffffec81ff2p-1, 0x1.524ec4d916caep-26 },
 	   { 0x1.ffffffedc95e7p-1, 0x1.3cab1343d18d1p-26 },
 	   { 0x1.ffffffeefbc85p-1, 0x1.2860757487a01p-26 },
 	   { 0x1.fffffff01a8b6p-1, 0x1.155a09065d4f7p-26 },
 	   { 0x1.fffffff126e1ep-1, 0x1.0384250e4c9fcp-26 },
 	   { 0x1.fffffff221f30p-1, 0x1.e59890b926c78p-27 },
 	   { 0x1.fffffff30cd3fp-1, 0x1.c642116a8a9e3p-27 },
 	   { 0x1.fffffff3e8892p-1, 0x1.a8e405e651ab6p-27 },
 	   { 0x1.fffffff4b606fp-1, 0x1.8d5f98114f872p-27 },
 	   { 0x1.fffffff57632dp-1, 0x1.7397c5a66e307p-27 },
 	   { 0x1.fffffff629e44p-1, 0x1.5b71456c5a4c4p-27 },
 	   { 0x1.fffffff6d1e56p-1, 0x1.44d26de513197p-27 },
 	   { 0x1.fffffff76ef3fp-1, 0x1.2fa31d6371537p-27 },
 	   { 0x1.fffffff801c1fp-1, 0x1.1bcca373b7b43p-27 },
 	   { 0x1.fffffff88af67p-1, 0x1.0939ab853339fp-27 },
 	   { 0x1.fffffff90b2e3p-1, 0x1.efac5187b2863p-28 },
 	   { 0x1.fffffff982fc1p-1, 0x1.cf1e86235d0e6p-28 },
 	   { 0x1.fffffff9f2e9fp-1, 0x1.b0a68a2128babp-28 },
 	   { 0x1.fffffffa5b790p-1, 0x1.9423165bc4444p-28 },
 	   { 0x1.fffffffabd229p-1, 0x1.7974e743dea3cp-28 },
 	   { 0x1.fffffffb18582p-1, 0x1.607e9eacd1050p-28 },
 	   { 0x1.fffffffb6d844p-1, 0x1.4924a74dec728p-28 },
 	   { 0x1.fffffffbbd0aap-1, 0x1.334d19e0c2160p-28 },
 	   { 0x1.fffffffc0748fp-1, 0x1.1edfa3c5f5ccap-28 },
 	   { 0x1.fffffffc4c96cp-1, 0x1.0bc56f1b54701p-28 },
 	   { 0x1.fffffffc8d462p-1, 0x1.f3d2185e047d9p-29 },
 	   { 0x1.fffffffcc9a41p-1, 0x1.d26cb87945e87p-29 },
 	   { 0x1.fffffffd01f89p-1, 0x1.b334fac4b9f99p-29 },
 	   { 0x1.fffffffd36871p-1, 0x1.96076f7918d1cp-29 },
 	   { 0x1.fffffffd678edp-1, 0x1.7ac2d72fc2c63p-29 },
 	   { 0x1.fffffffd954aep-1, 0x1.614801550319ep-29 },
 	   { 0x1.fffffffdbff2ap-1, 0x1.4979ac8b28926p-29 },
 	   { 0x1.fffffffde7ba0p-1, 0x1.333c68e2d0548p-29 },
 	   { 0x1.fffffffe0cd16p-1, 0x1.1e767bce37dd7p-29 },
 	   { 0x1.fffffffe2f664p-1, 0x1.0b0fc5b6d05a0p-29 },
 	   { 0x1.fffffffe4fa30p-1, 0x1.f1e3523b41d7dp-30 },
 	   { 0x1.fffffffe6daf7p-1, 0x1.d00de6608effep-30 },
 	   { 0x1.fffffffe89b0cp-1, 0x1.b0778b7b3301ap-30 },
 	   { 0x1.fffffffea3c9ap-1, 0x1.92fb04ec0f6cfp-30 },
 	   { 0x1.fffffffebc1a9p-1, 0x1.77756ec9f78fap-30 },
 	   { 0x1.fffffffed2c21p-1, 0x1.5dc61922d5a06p-30 },
 	   { 0x1.fffffffee7dc8p-1, 0x1.45ce65699ff6dp-30 },
 	   { 0x1.fffffffefb847p-1, 0x1.2f71a5f159970p-30 },
 	   { 0x1.ffffffff0dd2bp-1, 0x1.1a94ff571654fp-30 },
 	   { 0x1.ffffffff1ede9p-1, 0x1.071f4bbea09ecp-30 },
 	   { 0x1.ffffffff2ebdap-1, 0x1.e9f1ff8ddd774p-31 },
 	   { 0x1.ffffffff3d843p-1, 0x1.c818223a202c7p-31 },
 	   { 0x1.ffffffff4b453p-1, 0x1.a887bd2b4404dp-31 },
 	   { 0x1.ffffffff58126p-1, 0x1.8b1a336c5eb6bp-31 },
 	   { 0x1.ffffffff63fc3p-1, 0x1.6fab63324088ap-31 },
 	   { 0x1.ffffffff6f121p-1, 0x1.56197e30205bap-31 },
 	   { 0x1.ffffffff79626p-1, 0x1.3e44e45301b92p-31 },
 	   { 0x1.ffffffff82fabp-1, 0x1.281000bfe4c3fp-31 },
 	   { 0x1.ffffffff8be77p-1, 0x1.135f28f2d50b4p-31 },
 	   { 0x1.ffffffff94346p-1, 0x1.00187dded5975p-31 },
 	   { 0x1.ffffffff9bec8p-1, 0x1.dc479de0ef001p-32 },
 	   { 0x1.ffffffffa319fp-1, 0x1.bad4fdad3caa1p-32 },
 	   { 0x1.ffffffffa9c63p-1, 0x1.9baed3ed27ab8p-32 },
 	   { 0x1.ffffffffaffa4p-1, 0x1.7ead9ce4285bbp-32 },
 	   { 0x1.ffffffffb5be5p-1, 0x1.63ac6b4edc88ep-32 },
 	   { 0x1.ffffffffbb1a2p-1, 0x1.4a88be2a6390cp-32 },
 	   { 0x1.ffffffffc014ep-1, 0x1.332259185f1a0p-32 },
 	   { 0x1.ffffffffc4b56p-1, 0x1.1d5b1f3793044p-32 },
 	   { 0x1.ffffffffc901cp-1, 0x1.0916f04b6e18bp-32 },
 	   { 0x1.ffffffffccfffp-1, 0x1.ec77101de6926p-33 },
 	   { 0x1.ffffffffd0b56p-1, 0x1.c960bf23153e0p-33 },
 	   { 0x1.ffffffffd4271p-1, 0x1.a8bd20fc65ef7p-33 },
 	   { 0x1.ffffffffd759dp-1, 0x1.8a61745ec7d1dp-33 },
 	   { 0x1.ffffffffda520p-1, 0x1.6e25d0e756261p-33 },
 	   { 0x1.ffffffffdd13cp-1, 0x1.53e4f7d1666cbp-33 },
 	   { 0x1.ffffffffdfa2dp-1, 0x1.3b7c27a7ddb0ep-33 },
 	   { 0x1.ffffffffe202dp-1, 0x1.24caf2c32af14p-33 },
 	   { 0x1.ffffffffe4371p-1, 0x1.0fb3186804d0fp-33 },
 	   { 0x1.ffffffffe642ap-1, 0x1.f830c0bb41fd7p-34 },
 	   { 0x1.ffffffffe8286p-1, 0x1.d3c0f1a91c846p-34 },
 	   { 0x1.ffffffffe9eb0p-1, 0x1.b1e5acf351d87p-34 },
 	   { 0x1.ffffffffeb8d0p-1, 0x1.92712d259ce66p-34 },
 	   { 0x1.ffffffffed10ap-1, 0x1.7538c60a04476p-34 },
 	   { 0x1.ffffffffee782p-1, 0x1.5a14b04b47879p-34 },
 	   { 0x1.ffffffffefc57p-1, 0x1.40dfd87456f4cp-34 },
 	   { 0x1.fffffffff0fa7p-1, 0x1.2977b1172b9d5p-34 },
 	   { 0x1.fffffffff218fp-1, 0x1.13bc07e891491p-34 },
 	   { 0x1.fffffffff3227p-1, 0x1.ff1dbb4300811p-35 },
 	   { 0x1.fffffffff4188p-1, 0x1.d9a880f306bd8p-35 },
 	   { 0x1.fffffffff4fc9p-1, 0x1.b6e45220b55e0p-35 },
 	   { 0x1.fffffffff5cfdp-1, 0x1.96a0b33f2c4dap-35 },
 	   { 0x1.fffffffff6939p-1, 0x1.78b07e9e924acp-35 },
 	   { 0x1.fffffffff748ep-1, 0x1.5ce9ab1670dd2p-35 },
 	   { 0x1.fffffffff7f0dp-1, 0x1.4325167006bb0p-35 },
 	   { 0x1.fffffffff88c5p-1, 0x1.2b3e53538ff3fp-35 },
 	   { 0x1.fffffffff91c6p-1, 0x1.15137a7f44864p-35 },
 	   { 0x1.fffffffff9a1bp-1, 0x1.0084ff125639dp-35 },
 	   { 0x1.fffffffffa1d2p-1, 0x1.daeb0b7311ec7p-36 },
 	   { 0x1.fffffffffa8f6p-1, 0x1.b7937d1c40c52p-36 },
 	   { 0x1.fffffffffaf92p-1, 0x1.96d082f59ab06p-36 },
 	   { 0x1.fffffffffb5b0p-1, 0x1.7872d9fa10aadp-36 },
 	   { 0x1.fffffffffbb58p-1, 0x1.5c4e8e37bc7d0p-36 },
 	   { 0x1.fffffffffc095p-1, 0x1.423ac0df49a40p-36 },
 	   { 0x1.fffffffffc56dp-1, 0x1.2a117230ad284p-36 },
 	   { 0x1.fffffffffc9e8p-1, 0x1.13af4f04f9998p-36 },
 	   { 0x1.fffffffffce0dp-1, 0x1.fde703724e560p-37 },
 	   { 0x1.fffffffffd1e1p-1, 0x1.d77f0c82e7641p-37 },
 	   { 0x1.fffffffffd56cp-1, 0x1.b3ee02611d7ddp-37 },
 	   { 0x1.fffffffffd8b3p-1, 0x1.92ff33023d5bdp-37 },
 	   { 0x1.fffffffffdbbap-1, 0x1.7481a9e69f53fp-37 },
 	   { 0x1.fffffffffde86p-1, 0x1.5847eda620959p-37 },
 	   { 0x1.fffffffffe11dp-1, 0x1.3e27c1fcc74bdp-37 },
 	   { 0x1.fffffffffe380p-1, 0x1.25f9ee0b923dcp-37 },
 	   { 0x1.fffffffffe5b6p-1, 0x1.0f9a0686531ffp-37 },
 	   { 0x1.fffffffffe7c0p-1, 0x1.f5cc7718082afp-38 },
 	   { 0x1.fffffffffe9a2p-1, 0x1.cf7e53d6a2ca5p-38 },
 	   { 0x1.fffffffffeb60p-1, 0x1.ac0f5f3229372p-38 },
 	   { 0x1.fffffffffecfbp-1, 0x1.8b498644847eap-38 },
 	   { 0x1.fffffffffee77p-1, 0x1.6cfa9bcca59dcp-38 },
 	   { 0x1.fffffffffefd6p-1, 0x1.50f411d4fd2cdp-38 },
 	   { 0x1.ffffffffff11ap-1, 0x1.370ab8327af5ep-38 },
 	   { 0x1.ffffffffff245p-1, 0x1.1f167f88c6b6ep-38 },
 	   { 0x1.ffffffffff359p-1, 0x1.08f24085d4597p-38 },
 	   { 0x1.ffffffffff457p-1, 0x1.e8f70e181d619p-39 },
 	   { 0x1.ffffffffff542p-1, 0x1.c324c20e337dcp-39 },
 	   { 0x1.ffffffffff61bp-1, 0x1.a03261574b54ep-39 },
 	   { 0x1.ffffffffff6e3p-1, 0x1.7fe903cdf5855p-39 },
 	   { 0x1.ffffffffff79bp-1, 0x1.6215c58da3450p-39 },
 	   { 0x1.ffffffffff845p-1, 0x1.46897d4b69fc6p-39 },
 	   { 0x1.ffffffffff8e2p-1, 0x1.2d1877d731b7bp-39 },
 	   { 0x1.ffffffffff973p-1, 0x1.159a386b11517p-39 },
 	   { 0x1.ffffffffff9f8p-1, 0x1.ffd27ae9393cep-40 },
 	   { 0x1.ffffffffffa73p-1, 0x1.d7c593130dd0bp-40 },
 	   { 0x1.ffffffffffae4p-1, 0x1.b2cd607c79bcfp-40 },
 	   { 0x1.ffffffffffb4cp-1, 0x1.90ae4d3405651p-40 },
 	   { 0x1.ffffffffffbadp-1, 0x1.71312dd1759e2p-40 },
 	   { 0x1.ffffffffffc05p-1, 0x1.5422ef5d8949dp-40 },
 	   { 0x1.ffffffffffc57p-1, 0x1.39544b0ecc957p-40 },
 	   { 0x1.ffffffffffca2p-1, 0x1.20997f73e73ddp-40 },
 	   { 0x1.ffffffffffce7p-1, 0x1.09ca0eaacd277p-40 },
 	   { 0x1.ffffffffffd27p-1, 0x1.e9810295890ecp-41 },
 	   { 0x1.ffffffffffd62p-1, 0x1.c2b45b5aa4a1dp-41 },
 	   { 0x1.ffffffffffd98p-1, 0x1.9eee068fa7596p-41 },
 	   { 0x1.ffffffffffdcap-1, 0x1.7df2b399c10a8p-41 },
 	   { 0x1.ffffffffffdf8p-1, 0x1.5f8b87a31bd85p-41 },
 	   { 0x1.ffffffffffe22p-1, 0x1.4385c96e9a2d9p-41 },
 	   { 0x1.ffffffffffe49p-1, 0x1.29b2933ef4cbcp-41 },
 	   { 0x1.ffffffffffe6cp-1, 0x1.11e68a6378f8ap-41 },
 	   { 0x1.ffffffffffe8dp-1, 0x1.f7f338086a86bp-42 },
 	   { 0x1.ffffffffffeabp-1, 0x1.cf8d7d9ce040ap-42 },
 	   { 0x1.ffffffffffec7p-1, 0x1.aa577251ae484p-42 },
 	   { 0x1.ffffffffffee1p-1, 0x1.8811d739efb5ep-42 },
 	   { 0x1.ffffffffffef8p-1, 0x1.68823e52970bep-42 },
 	   { 0x1.fffffffffff0ep-1, 0x1.4b72ae68e8b4cp-42 },
 	   { 0x1.fffffffffff22p-1, 0x1.30b14dbe876bcp-42 },
 	   { 0x1.fffffffffff34p-1, 0x1.181012ef86610p-42 },
 	   { 0x1.fffffffffff45p-1, 0x1.01647ba798744p-42 },
 	   { 0x1.fffffffffff54p-1, 0x1.d90e917701675p-43 },
 	   { 0x1.fffffffffff62p-1, 0x1.b2a87e86d0c8ap-43 },
 	   { 0x1.fffffffffff6fp-1, 0x1.8f53dcb377293p-43 },
 	   { 0x1.fffffffffff7bp-1, 0x1.6ed2f2515e933p-43 },
 	   { 0x1.fffffffffff86p-1, 0x1.50ecc9ed47f19p-43 },
 	   { 0x1.fffffffffff90p-1, 0x1.356cd5ce7799ep-43 },
 	   { 0x1.fffffffffff9ap-1, 0x1.1c229a587ab78p-43 },
 	   { 0x1.fffffffffffa2p-1, 0x1.04e15ecc7f3f6p-43 },
 	   { 0x1.fffffffffffaap-1, 0x1.deffc7e6a6017p-44 },
 	   { 0x1.fffffffffffb1p-1, 0x1.b7b040832f310p-44 },
 	   { 0x1.fffffffffffb8p-1, 0x1.938e021f36d76p-44 },
 	   { 0x1.fffffffffffbep-1, 0x1.7258610b3b233p-44 },
 	   { 0x1.fffffffffffc3p-1, 0x1.53d3bfc82a909p-44 },
 	   { 0x1.fffffffffffc8p-1, 0x1.37c92babdc2fdp-44 },
 	   { 0x1.fffffffffffcdp-1, 0x1.1e06010120f6ap-44 },
 	   { 0x1.fffffffffffd1p-1, 0x1.065b9616170d4p-44 },
 	   { 0x1.fffffffffffd5p-1, 0x1.e13dd96b3753ap-45 },
 	   { 0x1.fffffffffffd9p-1, 0x1.b950d32467392p-45 },
 	   { 0x1.fffffffffffdcp-1, 0x1.94a72263259a5p-45 },
 	   { 0x1.fffffffffffdfp-1, 0x1.72fd93e036cdcp-45 },
 	   { 0x1.fffffffffffe2p-1, 0x1.54164576929abp-45 },
 	   { 0x1.fffffffffffe4p-1, 0x1.37b83c521fe96p-45 },
 	   { 0x1.fffffffffffe7p-1, 0x1.1daf033182e96p-45 },
 	   { 0x1.fffffffffffe9p-1, 0x1.05ca50205d26ap-45 },
 	   { 0x1.fffffffffffebp-1, 0x1.dfbb6235639fap-46 },
 	   { 0x1.fffffffffffedp-1, 0x1.b7807e294781fp-46 },
 	   { 0x1.fffffffffffeep-1, 0x1.9298add70a734p-46 },
 	   { 0x1.ffffffffffff0p-1, 0x1.70beaf9c7ffb6p-46 },
 	   { 0x1.ffffffffffff1p-1, 0x1.51b2cd6709222p-46 },
 	   { 0x1.ffffffffffff3p-1, 0x1.353a6cf7f7fffp-46 },
 	   { 0x1.ffffffffffff4p-1, 0x1.1b1fa8cbe84a7p-46 },
 	   { 0x1.ffffffffffff5p-1, 0x1.0330f0fd69921p-46 },
 	   { 0x1.ffffffffffff6p-1, 0x1.da81670f96f9bp-47 },
 	   { 0x1.ffffffffffff7p-1, 0x1.b24a16b4d09aap-47 },
 	   { 0x1.ffffffffffff7p-1, 0x1.8d6eeb6efdbd6p-47 },
 	   { 0x1.ffffffffffff8p-1, 0x1.6ba91ac734785p-47 },
 	   { 0x1.ffffffffffff9p-1, 0x1.4cb7966770ab5p-47 },
 	   { 0x1.ffffffffffff9p-1, 0x1.305e9721d0981p-47 },
 	   { 0x1.ffffffffffffap-1, 0x1.1667311fff70ap-47 },
 	   { 0x1.ffffffffffffbp-1, 0x1.fd3de10d62855p-48 },
 	   { 0x1.ffffffffffffbp-1, 0x1.d1aefbcd48d0cp-48 },
 	   { 0x1.ffffffffffffbp-1, 0x1.a9cc93c25aca9p-48 },
 	   { 0x1.ffffffffffffcp-1, 0x1.85487ee3ea735p-48 },
 	   { 0x1.ffffffffffffcp-1, 0x1.63daf8b4b1e0cp-48 },
 	   { 0x1.ffffffffffffdp-1, 0x1.45421e69a6ca1p-48 },
 	   { 0x1.ffffffffffffdp-1, 0x1.294175802d99ap-48 },
 	   { 0x1.ffffffffffffdp-1, 0x1.0fa17bf41068fp-48 },
 	   { 0x1.ffffffffffffdp-1, 0x1.f05e82aae2bb9p-49 },
 	   { 0x1.ffffffffffffep-1, 0x1.c578101b29058p-49 },
 	   { 0x1.ffffffffffffep-1, 0x1.9e39dc5dd2f7cp-49 },
 	   { 0x1.ffffffffffffep-1, 0x1.7a553a728bbf2p-49 },
 	   { 0x1.ffffffffffffep-1, 0x1.5982008db1304p-49 },
 	   { 0x1.ffffffffffffep-1, 0x1.3b7e00422e51bp-49 },
 	   { 0x1.ffffffffffffep-1, 0x1.200c898d9ee3ep-49 },
 	   { 0x1.fffffffffffffp-1, 0x1.06f5f7eb65a56p-49 },
 	   { 0x1.fffffffffffffp-1, 0x1.e00e9148a1d25p-50 },
 	   { 0x1.fffffffffffffp-1, 0x1.b623734024e92p-50 },
 	   { 0x1.fffffffffffffp-1, 0x1.8fd4e01891bf8p-50 },
 	   { 0x1.fffffffffffffp-1, 0x1.6cd44c7470d89p-50 },
 	   { 0x1.fffffffffffffp-1, 0x1.4cd9c04158cd7p-50 },
 	   { 0x1.fffffffffffffp-1, 0x1.2fa34bf5c8344p-50 },
 	   { 0x1.fffffffffffffp-1, 0x1.14f4890ff2461p-50 },
 	   { 0x1.fffffffffffffp-1, 0x1.f92c49dfa4df5p-51 },
 	   { 0x1.fffffffffffffp-1, 0x1.ccaaea71ab0dfp-51 },
 	   { 0x1.fffffffffffffp-1, 0x1.a40829f001197p-51 },
 	   { 0x1.0000000000000p+0, 0x1.7eef13b59e96cp-51 },
 	   { 0x1.0000000000000p+0, 0x1.5d11e1a252bf5p-51 },
 	   { 0x1.0000000000000p+0, 0x1.3e296303b2297p-51 },
 	   { 0x1.0000000000000p+0, 0x1.21f47009f43cep-51 },
 	   { 0x1.0000000000000p+0, 0x1.083768c5e4541p-51 },
 	   { 0x1.0000000000000p+0, 0x1.e1777d831265ep-52 },
 	   { 0x1.0000000000000p+0, 0x1.b69f10b0191b5p-52 },
 	   { 0x1.0000000000000p+0, 0x1.8f8a3a05b5b52p-52 },
 	   { 0x1.0000000000000p+0, 0x1.6be573c40c8e7p-52 },
 	   { 0x1.0000000000000p+0, 0x1.4b645ba991fdbp-52 },
 	   { 0x1.0000000000000p+0, 0x1.2dc119095729fp-52 },
   },
 };
diff --git a/contrib/arm-optimized-routines/pl/math/erfc_data.c b/contrib/arm-optimized-routines/math/aarch64/v_erfc_data.c
similarity index 99%
rename from contrib/arm-optimized-routines/pl/math/erfc_data.c
rename to contrib/arm-optimized-routines/math/aarch64/v_erfc_data.c
index 40f72a4d6d5b..6acd96f74be5 100644
--- a/contrib/arm-optimized-routines/pl/math/erfc_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_erfc_data.c
@@ -1,3507 +1,3507 @@
 /*
  * Data used in double-precision erfc(x) function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
-/* Lookup table used in erfc.
+/* Lookup table used in vector erfc.
    For each possible rounded input r (multiples of 1/128), between
    r = 0.0 and r = ~27.0 (3488 values):
-   - the first entry __erfc_data.tab.erfc contains the values of erfc(r),
-   - the second entry __erfc_data.tab.scale contains the values of
+   - the first entry __v_erfc_data.tab.erfc contains the values of erfc(r),
+   - the second entry __v_erfc_data.tab.scale contains the values of
    2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore
    they are scaled by a large enough value 2^128 (fits in 8bit).  */
-const struct erfc_data __erfc_data = {
+const struct v_erfc_data __v_erfc_data = {
   .tab = { { 0x1p128, 0x1.20dd750429b6dp128 },
 	   { 0x1.fb7c9030853b3p127, 0x1.20d8f1975c85dp128 },
 	   { 0x1.f6f9447be0743p127, 0x1.20cb67bd452c7p128 },
 	   { 0x1.f27640f9853d9p127, 0x1.20b4d8bac36c1p128 },
 	   { 0x1.edf3a9ba22dadp127, 0x1.209546ad13ccfp128 },
 	   { 0x1.e971a2c4436aep127, 0x1.206cb4897b148p128 },
 	   { 0x1.e4f05010eca8cp127, 0x1.203b261cd0053p128 },
 	   { 0x1.e06fd58842c7ep127, 0x1.2000a00ae3804p128 },
 	   { 0x1.dbf056fe2df35p127, 0x1.1fbd27cdc72d3p128 },
 	   { 0x1.d771f82f02f4ep127, 0x1.1f70c3b4f2cc8p128 },
 	   { 0x1.d2f4dcbc2f894p127, 0x1.1f1b7ae44867fp128 },
 	   { 0x1.ce792828eae5cp127, 0x1.1ebd5552f795bp128 },
 	   { 0x1.c9fefdd6eaf19p127, 0x1.1e565bca400d4p128 },
 	   { 0x1.c58681031eb6ap127, 0x1.1de697e413d29p128 },
 	   { 0x1.c10fd4c26e896p127, 0x1.1d6e14099944ap128 },
 	   { 0x1.bc9b1bfe82687p127, 0x1.1cecdb718d61cp128 },
 	   { 0x1.b82879728f11ep127, 0x1.1c62fa1e869b6p128 },
 	   { 0x1.b3b80fa82a4bbp127, 0x1.1bd07cdd189acp128 },
 	   { 0x1.af4a00f426daap127, 0x1.1b357141d95d5p128 },
 	   { 0x1.aade6f7378a0ep127, 0x1.1a91e5a748165p128 },
 	   { 0x1.a6757d08215d8p127, 0x1.19e5e92b964abp128 },
 	   { 0x1.a20f4b5626818p127, 0x1.19318bae53a04p128 },
 	   { 0x1.9dabfbc090901p127, 0x1.1874ddcdfce24p128 },
 	   { 0x1.994baf66747adp127, 0x1.17aff0e56ec1p128 },
 	   { 0x1.94ee8720076b6p127, 0x1.16e2d7093cd8cp128 },
 	   { 0x1.9094a37bbd66ep127, 0x1.160da304ed92fp128 },
 	   { 0x1.8c3e24bb73372p127, 0x1.153068581b781p128 },
 	   { 0x1.87eb2ad1a4032p127, 0x1.144b3b337c90cp128 },
 	   { 0x1.839bd55eaafc8p127, 0x1.135e3075d076bp128 },
 	   { 0x1.7f5043ae11862p127, 0x1.12695da8b5bdep128 },
 	   { 0x1.7b0894b3ea35cp127, 0x1.116cd8fd67618p128 },
 	   { 0x1.76c4e70a390e7p127, 0x1.1068b94962e5ep128 },
 	   { 0x1.728558ee694fcp127, 0x1.0f5d1602f7e41p128 },
 	   { 0x1.6e4a083ed132fp127, 0x1.0e4a073dc1b91p128 },
 	   { 0x1.6a13127843ec1p127, 0x1.0d2fa5a70c168p128 },
 	   { 0x1.65e094b3b2413p127, 0x1.0c0e0a8223359p128 },
 	   { 0x1.61b2aba3da093p127, 0x1.0ae54fa490723p128 },
 	   { 0x1.5d89739304dcfp127, 0x1.09b58f724416bp128 },
 	   { 0x1.59650860d6469p127, 0x1.087ee4d9ad247p128 },
 	   { 0x1.5545858029b39p127, 0x1.07416b4fbfe7cp128 },
 	   { 0x1.512b05f5006e1p127, 0x1.05fd3ecbec298p128 },
 	   { 0x1.4d15a4527fdc7p127, 0x1.04b27bc403d3p128 },
 	   { 0x1.49057ab900447p127, 0x1.03613f2812dafp128 },
 	   { 0x1.44faa2d42c4ap127, 0x1.0209a65e29545p128 },
 	   { 0x1.40f535d93160ep127, 0x1.00abcf3e187a9p128 },
 	   { 0x1.3cf54c850162p127, 0x1.fe8fb01a47307p127 },
 	   { 0x1.38faff1aa574ap127, 0x1.fbbbbef34b4b2p127 },
 	   { 0x1.35066561a275dp127, 0x1.f8dc092d58ff8p127 },
 	   { 0x1.311796a46f064p127, 0x1.f5f0cdaf15313p127 },
 	   { 0x1.2d2ea9aefb636p127, 0x1.f2fa4c16c0019p127 },
 	   { 0x1.294bb4cd4b2bdp127, 0x1.eff8c4b1375dbp127 },
 	   { 0x1.256ecdca212ccp127, 0x1.ecec7870ebca8p127 },
 	   { 0x1.219809edbd524p127, 0x1.e9d5a8e4c934ep127 },
 	   { 0x1.1dc77dfcacd02p127, 0x1.e6b4982f158b9p127 },
 	   { 0x1.19fd3e36ac96ap127, 0x1.e38988fc46e72p127 },
 	   { 0x1.16395e559e218p127, 0x1.e054be79d3042p127 },
 	   { 0x1.127bf18c8eadcp127, 0x1.dd167c4cf9d2ap127 },
 	   { 0x1.0ec50a86d0dd4p127, 0x1.d9cf06898cdafp127 },
 	   { 0x1.0b14bb6728cd8p127, 0x1.d67ea1a8b5368p127 },
 	   { 0x1.076b15c70aa28p127, 0x1.d325927fb9d89p127 },
 	   { 0x1.03c82ab5eb831p127, 0x1.cfc41e36c7df9p127 },
 	   { 0x1.002c0ab8a5018p127, 0x1.cc5a8a3fbea4p127 },
 	   { 0x1.f92d8b91d5cc7p126, 0x1.c8e91c4d01368p127 },
 	   { 0x1.f210d6a9a6a31p126, 0x1.c5701a484ef9dp127 },
 	   { 0x1.eb02147ce245cp126, 0x1.c1efca49a5011p127 },
 	   { 0x1.e40161b701275p126, 0x1.be68728e29d5ep127 },
 	   { 0x1.dd0ed9ea4bdd6p126, 0x1.bada596f25436p127 },
 	   { 0x1.d62a978f7c957p126, 0x1.b745c55905bf8p127 },
 	   { 0x1.cf54b4058455fp126, 0x1.b3aafcc27502ep127 },
 	   { 0x1.c88d479173ccep126, 0x1.b00a46237d5bep127 },
 	   { 0x1.c1d4695e87644p126, 0x1.ac63e7ecc1411p127 },
 	   { 0x1.bb2a2f7e5652p126, 0x1.a8b8287ec6a09p127 },
 	   { 0x1.b48eaee924501p126, 0x1.a5074e215762p127 },
 	   { 0x1.ae01fb7e55a66p126, 0x1.a1519efaf889ep127 },
 	   { 0x1.a78428050527ep126, 0x1.9d97610879642p127 },
 	   { 0x1.a115462cbbc17p126, 0x1.99d8da149c13fp127 },
 	   { 0x1.9ab5668e4930ap126, 0x1.96164fafd8de3p127 },
 	   { 0x1.946498acbd766p126, 0x1.925007283d7aap127 },
 	   { 0x1.8e22eaf68291ep126, 0x1.8e86458169af8p127 },
 	   { 0x1.87f06ac6960c4p126, 0x1.8ab94f6caa71dp127 },
 	   { 0x1.81cd2465e1d96p126, 0x1.86e9694134b9ep127 },
 	   { 0x1.7bb9230cb40b4p126, 0x1.8316d6f48133dp127 },
 	   { 0x1.75b470e454d35p126, 0x1.7f41dc12c9e89p127 },
 	   { 0x1.6fbf1708ba47cp126, 0x1.7b6abbb7aaf19p127 },
 	   { 0x1.69d91d8a595dap126, 0x1.7791b886e7403p127 },
 	   { 0x1.64028b7013867p126, 0x1.73b714a552763p127 },
 	   { 0x1.5e3b66b9405a9p126, 0x1.6fdb11b1e0c34p127 },
 	   { 0x1.5883b45fd2b63p126, 0x1.6bfdf0beddaf5p127 },
 	   { 0x1.52db785a98acap126, 0x1.681ff24b4ab04p127 },
 	   { 0x1.4d42b59f95afap126, 0x1.6441563c665d4p127 },
 	   { 0x1.47b96e267647ap126, 0x1.60625bd75d07bp127 },
 	   { 0x1.423fa2eb1cb59p126, 0x1.5c8341bb23767p127 },
 	   { 0x1.3cd553f045d45p126, 0x1.58a445da7c74cp127 },
 	   { 0x1.377a8042458d1p126, 0x1.54c5a57629dbp127 },
 	   { 0x1.322f25f9da2fdp126, 0x1.50e79d1749ac9p127 },
 	   { 0x1.2cf3423f15fdfp126, 0x1.4d0a6889dfd9fp127 },
 	   { 0x1.27c6d14c5e341p126, 0x1.492e42d78d2c5p127 },
 	   { 0x1.22a9ce717edcbp126, 0x1.4553664273d24p127 },
 	   { 0x1.1d9c3416d2b4bp126, 0x1.417a0c4049fdp127 },
 	   { 0x1.189dfbc07e69p126, 0x1.3da26d759aef5p127 },
 	   { 0x1.13af1e11be721p126, 0x1.39ccc1b136d5ap127 },
 	   { 0x1.0ecf92d046d22p126, 0x1.35f93fe7d1b3dp127 },
 	   { 0x1.09ff50e7b3f93p126, 0x1.32281e2fd1a92p127 },
 	   { 0x1.053e4e6d0c10bp126, 0x1.2e5991bd4cbfcp127 },
 	   { 0x1.008c80a24ff1p126, 0x1.2a8dcede3673bp127 },
 	   { 0x1.f7d3b7f436013p125, 0x1.26c508f6bd0ffp127 },
 	   { 0x1.eeaca836a27ccp125, 0x1.22ff727dd6f7bp127 },
 	   { 0x1.e5a3b7c9b56dap125, 0x1.1f3d3cf9ffe5ap127 },
 	   { 0x1.dcb8cae2d747fp125, 0x1.1b7e98fe26217p127 },
 	   { 0x1.d3ebc436b0f26p125, 0x1.17c3b626c7a12p127 },
 	   { 0x1.cb3c8500ea349p125, 0x1.140cc3173f007p127 },
 	   { 0x1.c2aaed0bfcfeep125, 0x1.1059ed7740313p127 },
 	   { 0x1.ba36dab91c0e9p125, 0x1.0cab61f084b93p127 },
 	   { 0x1.b1e02b082b72p125, 0x1.09014c2ca74dap127 },
 	   { 0x1.a9a6b99fc973bp125, 0x1.055bd6d32e8d7p127 },
 	   { 0x1.a18a60d56673ep125, 0x1.01bb2b87c6968p127 },
 	   { 0x1.998af9b56a3aep125, 0x1.fc3ee5d1524bp126 },
 	   { 0x1.91a85c0b65519p125, 0x1.f511a91a67d2ap126 },
 	   { 0x1.89e25e6a4cef9p125, 0x1.edeeee0959518p126 },
 	   { 0x1.8238d634c0127p125, 0x1.e6d6ffaa65a25p126 },
 	   { 0x1.7aab97a554544p125, 0x1.dfca26f5bbf88p126 },
 	   { 0x1.733a75d6e91b8p125, 0x1.d8c8aace11e63p126 },
 	   { 0x1.6be542ccffc2fp125, 0x1.d1d2cfff91594p126 },
 	   { 0x1.64abcf7c175b4p125, 0x1.cae8d93f1d7b7p126 },
 	   { 0x1.5d8debd20aacep125, 0x1.c40b0729ed548p126 },
 	   { 0x1.568b66be6f268p125, 0x1.bd3998457afdbp126 },
 	   { 0x1.4fa40e3af3674p125, 0x1.b674c8ffc6283p126 },
 	   { 0x1.48d7af53bc19fp125, 0x1.afbcd3afe8ab6p126 },
 	   { 0x1.4226162fbddd5p125, 0x1.a911f096fbc26p126 },
 	   { 0x1.3b8f0e1912f7p125, 0x1.a27455e14c93cp126 },
 	   { 0x1.351261854b991p125, 0x1.9be437a7de946p126 },
 	   { 0x1.2eafda1db784ap125, 0x1.9561c7f23a47bp126 },
 	   { 0x1.286740c7a7dabp125, 0x1.8eed36b886d93p126 },
 	   { 0x1.22385daca7f47p125, 0x1.8886b1e5ecfd1p126 },
 	   { 0x1.1c22f842ac1f2p125, 0x1.822e655b417e7p126 },
 	   { 0x1.1626d7543522p125, 0x1.7be47af1f5d89p126 },
 	   { 0x1.1043c1086777dp125, 0x1.75a91a7f4d2edp126 },
 	   { 0x1.0a797aeb152f2p125, 0x1.6f7c69d7d3ef8p126 },
 	   { 0x1.04c7c9f4b969p125, 0x1.695e8cd31867ep126 },
 	   { 0x1.fe5ce524c8ee5p124, 0x1.634fa54fa285fp126 },
 	   { 0x1.f35a715b2f3e1p124, 0x1.5d4fd33729015p126 },
 	   { 0x1.e887bf681f218p124, 0x1.575f3483021c3p126 },
 	   { 0x1.dde4553ef94dep124, 0x1.517de540ce2a3p126 },
 	   { 0x1.d36fb7fa50177p124, 0x1.4babff975a04cp126 },
 	   { 0x1.c9296beb09cf1p124, 0x1.45e99bcbb7915p126 },
 	   { 0x1.bf10f4a759889p124, 0x1.4036d0468a7a2p126 },
 	   { 0x1.b525d5198cb1cp124, 0x1.3a93b1998736cp126 },
 	   { 0x1.ab678f8eabedbp124, 0x1.35005285227f1p126 },
 	   { 0x1.a1d5a5c4edb96p124, 0x1.2f7cc3fe6f423p126 },
 	   { 0x1.986f98f9f96c8p124, 0x1.2a09153529381p126 },
 	   { 0x1.8f34e9f8f93a6p124, 0x1.24a55399ea239p126 },
 	   { 0x1.8625192879e39p124, 0x1.1f518ae487dc8p126 },
 	   { 0x1.7d3fa69816db5p124, 0x1.1a0dc51a9934dp126 },
 	   { 0x1.7484120df1b01p124, 0x1.14da0a961fd14p126 },
 	   { 0x1.6bf1db13f3983p124, 0x1.0fb6620c550afp126 },
 	   { 0x1.63888104d811ap124, 0x1.0aa2d09497f2bp126 },
 	   { 0x1.5b478318ff939p124, 0x1.059f59af7a906p126 },
 	   { 0x1.532e6073095f2p124, 0x1.00abff4dec7a3p126 },
 	   { 0x1.4b3c982c338c7p124, 0x1.f79183b101c5bp125 },
 	   { 0x1.4371a960807f8p124, 0x1.edeb406d9c825p125 },
 	   { 0x1.3bcd133aa0ffcp124, 0x1.e4652fadcb6b2p125 },
 	   { 0x1.344e54ffa23b9p124, 0x1.daff4969c0b04p125 },
 	   { 0x1.2cf4ee1a5f0fcp124, 0x1.d1b982c50137p125 },
 	   { 0x1.25c05e26b3f99p124, 0x1.c893ce1dcbef7p125 },
 	   { 0x1.1eb024fc75285p124, 0x1.bf8e1b1ca2279p125 },
 	   { 0x1.17c3c2ba26319p124, 0x1.b6a856c3ed54fp125 },
 	   { 0x1.10fab7cf72f94p124, 0x1.ade26b7fbed95p125 },
 	   { 0x1.0a548507696cp124, 0x1.a53c4135a6526p125 },
 	   { 0x1.03d0ab9273b94p124, 0x1.9cb5bd549b111p125 },
 	   { 0x1.fadd5a20258d3p123, 0x1.944ec2e4f563p125 },
 	   { 0x1.ee5c1730b147cp123, 0x1.8c07329874652p125 },
 	   { 0x1.e21c938a45a83p123, 0x1.83deeada4d25ap125 },
 	   { 0x1.d61dd57628999p123, 0x1.7bd5c7df3fe9cp125 },
 	   { 0x1.ca5ee4649e31fp123, 0x1.73eba3b5b07b7p125 },
 	   { 0x1.bedec8fddb34p123, 0x1.6c205655be72p125 },
 	   { 0x1.b39c8d3276d8ap123, 0x1.6473b5b15a7a1p125 },
 	   { 0x1.a8973c4b5c03ep123, 0x1.5ce595c455b0ap125 },
 	   { 0x1.9dcde2f93a207p123, 0x1.5575c8a468362p125 },
 	   { 0x1.933f8f6375f2cp123, 0x1.4e241e912c305p125 },
 	   { 0x1.88eb51369acb9p123, 0x1.46f066040a832p125 },
 	   { 0x1.7ed039b24c96bp123, 0x1.3fda6bc016994p125 },
 	   { 0x1.74ed5bb6bb581p123, 0x1.38e1fae1d6a9dp125 },
 	   { 0x1.6b41cbd198bc8p123, 0x1.3206dceef5f87p125 },
 	   { 0x1.61cca04a90795p123, 0x1.2b48d9e5dea1cp125 },
 	   { 0x1.588cf12f4446bp123, 0x1.24a7b84d38971p125 },
 	   { 0x1.4f81d85ecc55bp123, 0x1.1e233d434b813p125 },
 	   { 0x1.46aa7194bd324p123, 0x1.17bb2c8d41535p125 },
 	   { 0x1.3e05da73b4159p123, 0x1.116f48a6476ccp125 },
 	   { 0x1.3593328f6abbep123, 0x1.0b3f52ce8c383p125 },
 	   { 0x1.2d519b7653e1ep123, 0x1.052b0b1a174eap125 },
 	   { 0x1.254038bac19d6p123, 0x1.fe6460fef468p124 },
 	   { 0x1.1d5e2ffb96d4p123, 0x1.f2a901ccafb37p124 },
 	   { 0x1.15aaa8ec85205p123, 0x1.e723726b824a9p124 },
 	   { 0x1.0e24cd5dd8846p123, 0x1.dbd32ac4c99bp124 },
 	   { 0x1.06cbc943d255ap123, 0x1.d0b7a0f921e7cp124 },
 	   { 0x1.ff3d957b29b39p122, 0x1.c5d0497c09e74p124 },
 	   { 0x1.f13a043742333p122, 0x1.bb1c972f23e5p124 },
 	   { 0x1.e38b43cbd0f0fp122, 0x1.b09bfb7d11a84p124 },
 	   { 0x1.d62fbdc2e756bp122, 0x1.a64de673e8837p124 },
 	   { 0x1.c925e02b41668p122, 0x1.9c31c6df3b1b8p124 },
 	   { 0x1.bc6c1da1f3121p122, 0x1.92470a61b6965p124 },
 	   { 0x1.b000ed5b4a626p122, 0x1.888d1d8e510a3p124 },
 	   { 0x1.a3e2cb2ae9edbp122, 0x1.7f036c0107294p124 },
 	   { 0x1.9810378b1f299p122, 0x1.75a96077274bap124 },
 	   { 0x1.8c87b7a37834fp122, 0x1.6c7e64e7281cbp124 },
 	   { 0x1.8147d54e9cc33p122, 0x1.6381e2980956bp124 },
 	   { 0x1.764f1f1f6ddeap122, 0x1.5ab342383d178p124 },
 	   { 0x1.6b9c28657041ap122, 0x1.5211ebf41880bp124 },
 	   { 0x1.612d893085125p122, 0x1.499d478bca735p124 },
 	   { 0x1.5701de53f4d2ep122, 0x1.4154bc68d75c3p124 },
 	   { 0x1.4d17c968d062bp122, 0x1.3937b1b31925ap124 },
 	   { 0x1.436df0cfabf1dp122, 0x1.31458e6542847p124 },
 	   { 0x1.3a02ffb1b7ceep122, 0x1.297db960e4f63p124 },
 	   { 0x1.30d5a6013afc5p122, 0x1.21df9981f8e53p124 },
 	   { 0x1.27e49879737d3p122, 0x1.1a6a95b1e786fp124 },
 	   { 0x1.1f2e909de04d2p122, 0x1.131e14fa1625dp124 },
 	   { 0x1.16b24cb8f8f92p122, 0x1.0bf97e95f2a64p124 },
 	   { 0x1.0e6e8fda56cf7p122, 0x1.04fc3a0481321p124 },
 	   { 0x1.066221d4539d8p122, 0x1.fc4b5e32d6259p123 },
 	   { 0x1.fd179e7243e3cp121, 0x1.eeea8c1b1db94p123 },
 	   { 0x1.edd4d2aec5adbp121, 0x1.e1d4cf1e2450ap123 },
 	   { 0x1.def98c6c79efap121, 0x1.d508f9a1ea64fp123 },
 	   { 0x1.d0838121f2418p121, 0x1.c885df3451a07p123 },
 	   { 0x1.c2706fa45005ep121, 0x1.bc4a54a84e834p123 },
 	   { 0x1.b4be201caa4b4p121, 0x1.b055303221015p123 },
 	   { 0x1.a76a63fc95c79p121, 0x1.a4a549829587ep123 },
 	   { 0x1.9a7315f1d6a55p121, 0x1.993979e14fffep123 },
 	   { 0x1.8dd619d943ca1p121, 0x1.8e109c4622913p123 },
 	   { 0x1.81915cb0e3323p121, 0x1.83298d717210ep123 },
 	   { 0x1.75a2d48946eb1p121, 0x1.78832c03aa2b1p123 },
 	   { 0x1.6a08807632262p121, 0x1.6e1c5893c380bp123 },
 	   { 0x1.5ec0687e8dcb2p121, 0x1.63f3f5c4de13bp123 },
 	   { 0x1.53c89d8bb3ddbp121, 0x1.5a08e85af27ep123 },
 	   { 0x1.491f395818f54p121, 0x1.505a174e9c929p123 },
 	   { 0x1.3ec25e5d5af12p121, 0x1.46e66be00224p123 },
 	   { 0x1.34b037c1bbfc5p121, 0x1.3dacd1a8d8ccep123 },
 	   { 0x1.2ae6f94510dd8p121, 0x1.34ac36ad8dafep123 },
 	   { 0x1.2164df2d29765p121, 0x1.2be38b6d92415p123 },
 	   { 0x1.18282e31ba3e8p121, 0x1.2351c2f2d1449p123 },
 	   { 0x1.0f2f3367cd6aap121, 0x1.1af5d2e04f3f6p123 },
 	   { 0x1.0678442cc256fp121, 0x1.12ceb37ff9bc3p123 },
 	   { 0x1.fc037c21c3622p120, 0x1.0adb5fcfa8c75p123 },
 	   { 0x1.eb940d8319831p120, 0x1.031ad58d56279p123 },
 	   { 0x1.db9f17e61c31p120, 0x1.f7182a851bca2p122 },
 	   { 0x1.cc218694238a2p120, 0x1.e85c449e377f3p122 },
 	   { 0x1.bd18548996419p120, 0x1.da0005e5f28dfp122 },
 	   { 0x1.ae808c479c371p120, 0x1.cc0180af00a8bp122 },
 	   { 0x1.a05747a543aa7p120, 0x1.be5ecd2fcb5f9p122 },
 	   { 0x1.9299afa0246a6p120, 0x1.b1160991ff737p122 },
 	   { 0x1.8544fc2c8c1dap120, 0x1.a4255a00b9f03p122 },
 	   { 0x1.785674053e8b9p120, 0x1.978ae8b55ce1bp122 },
 	   { 0x1.6bcb6c7ad4854p120, 0x1.8b44e6031383ep122 },
 	   { 0x1.5fa14942c3d54p120, 0x1.7f5188610ddc8p122 },
 	   { 0x1.53d57c461a5a7p120, 0x1.73af0c737bb45p122 },
 	   { 0x1.4865856ff632ap120, 0x1.685bb5134ef13p122 },
 	   { 0x1.3d4ef27bc49a6p120, 0x1.5d55cb54cd53ap122 },
 	   { 0x1.328f5ec350e67p120, 0x1.529b9e8cf9a1ep122 },
 	   { 0x1.2824730cacbb4p120, 0x1.482b8455dc491p122 },
 	   { 0x1.1e0be557fa673p120, 0x1.3e03d891b37dep122 },
 	   { 0x1.144378ad22027p120, 0x1.3422fd6d12e2bp122 },
 	   { 0x1.0ac8fce979b96p120, 0x1.2a875b5ffab56p122 },
 	   { 0x1.019a4e8d69649p120, 0x1.212f612dee7fbp122 },
 	   { 0x1.f16aad1422a55p119, 0x1.181983e5133ddp122 },
 	   { 0x1.e030141df7d25p119, 0x1.0f443edc5ce49p122 },
 	   { 0x1.cf80d4afc3019p119, 0x1.06ae13b0d3255p122 },
 	   { 0x1.bf5908f50b4ap119, 0x1.fcab1483ea7fcp121 },
 	   { 0x1.afb4e269693dfp119, 0x1.ec72615a894c4p121 },
 	   { 0x1.a090a974cfebep119, 0x1.dcaf3691fc448p121 },
 	   { 0x1.91e8bd0830a74p119, 0x1.cd5ec93c12432p121 },
 	   { 0x1.83b9923a85f7bp119, 0x1.be7e5ac24963bp121 },
 	   { 0x1.75ffb3e6519ap119, 0x1.b00b38d6b3575p121 },
 	   { 0x1.68b7c2479902dp119, 0x1.a202bd6372dcep121 },
 	   { 0x1.5bde729a6b60fp119, 0x1.94624e78e0fafp121 },
 	   { 0x1.4f708eb9fba63p119, 0x1.87275e3a6869ep121 },
 	   { 0x1.436af4c058acbp119, 0x1.7a4f6aca256cbp121 },
 	   { 0x1.37ca96a6cd1d4p119, 0x1.6dd7fe335823p121 },
 	   { 0x1.2c8c79e6f04a3p119, 0x1.61beae53b72b7p121 },
 	   { 0x1.21adb71c70c75p119, 0x1.56011cc3b036dp121 },
 	   { 0x1.172b79a7a1181p119, 0x1.4a9cf6bda3f4cp121 },
 	   { 0x1.0d02ff50ce651p119, 0x1.3f8ff5042a88ep121 },
 	   { 0x1.033197ec68c0ep119, 0x1.34d7dbc76d7e5p121 },
 	   { 0x1.f3694a0008381p118, 0x1.2a727a89a3f14p121 },
 	   { 0x1.e11332d0714c5p118, 0x1.205dac02bd6b9p121 },
 	   { 0x1.cf5bf1fed1e7p118, 0x1.1697560347b26p121 },
 	   { 0x1.be3eb08ae7c2p118, 0x1.0d1d69569b82dp121 },
 	   { 0x1.adb6b810af9e2p118, 0x1.03ede1a45bfeep121 },
 	   { 0x1.9dbf721b98dfap118, 0x1.f60d8aa2a88f2p120 },
 	   { 0x1.8e54677bb0151p118, 0x1.e4cc4abf7d065p120 },
 	   { 0x1.7f713f9cc9784p118, 0x1.d4143a9dfe965p120 },
 	   { 0x1.7111bfdfb3cep118, 0x1.c3e1a5f5c077cp120 },
 	   { 0x1.6331caf57b5dbp118, 0x1.b430ecf4a83a8p120 },
 	   { 0x1.55cd603cc415p118, 0x1.a4fe83fb9db25p120 },
 	   { 0x1.48e09b21414bfp118, 0x1.9646f35a76624p120 },
 	   { 0x1.3c67b27d50fe7p118, 0x1.8806d70b2fc36p120 },
 	   { 0x1.305ef7fdbfb95p118, 0x1.7a3ade6c8b3e5p120 },
 	   { 0x1.24c2d787b9e37p118, 0x1.6cdfcbfc1e263p120 },
 	   { 0x1.198fd6a0ee7bdp118, 0x1.5ff2750fe782p120 },
 	   { 0x1.0ec293d9e6d85p118, 0x1.536fc18f7ce5cp120 },
 	   { 0x1.0457c63a9669p118, 0x1.4754abacdf1dcp120 },
 	   { 0x1.f49879624a021p117, 0x1.3b9e3f9d06e3fp120 },
 	   { 0x1.e139bb05eb49ep117, 0x1.30499b503957fp120 },
 	   { 0x1.ce8d4b7fd6c7p117, 0x1.2553ee2a336bfp120 },
 	   { 0x1.bc8d516fda8bap117, 0x1.1aba78ba3af89p120 },
 	   { 0x1.ab341ee553e25p117, 0x1.107a8c7323a6ep120 },
 	   { 0x1.9a7c305336484p117, 0x1.06918b6355624p120 },
 	   { 0x1.8a602b88919cp117, 0x1.f9f9cfd9c3035p119 },
 	   { 0x1.7adadead962edp117, 0x1.e77448fb66bb9p119 },
 	   { 0x1.6be73f45149fbp117, 0x1.d58da68fd117p119 },
 	   { 0x1.5d80693276a6dp117, 0x1.c4412bf4b8f0bp119 },
 	   { 0x1.4fa19dc42d409p117, 0x1.b38a3af2e55b4p119 },
 	   { 0x1.424642c28ff75p117, 0x1.a3645330550ffp119 },
 	   { 0x1.3569e18328604p117, 0x1.93cb11a30d765p119 },
 	   { 0x1.29082600643fdp117, 0x1.84ba3004a50dp119 },
 	   { 0x1.1d1cddf5a82dep117, 0x1.762d84469c18fp119 },
 	   { 0x1.11a3f7ffbbfeap117, 0x1.6821000795a03p119 },
 	   { 0x1.069982c189a9ep117, 0x1.5a90b00981d93p119 },
 	   { 0x1.f7f3581a4dc2cp116, 0x1.4d78bba8ca5fdp119 },
 	   { 0x1.e381802242163p116, 0x1.40d564548fad7p119 },
 	   { 0x1.cfd6511405b2dp116, 0x1.34a305080681fp119 },
 	   { 0x1.bcead7f01492fp116, 0x1.28de11c5031ebp119 },
 	   { 0x1.aab859b20ac9ep116, 0x1.1d83170fbf6fbp119 },
 	   { 0x1.993851cc9779ap116, 0x1.128eb96be8798p119 },
 	   { 0x1.886470ad946a7p116, 0x1.07fdb4dafea5fp119 },
 	   { 0x1.78369a4a2cbd6p116, 0x1.fb99b8b8279e1p118 },
 	   { 0x1.68a8e4b2fc8c2p116, 0x1.e7f232d9e263p118 },
 	   { 0x1.59b596b012aaap116, 0x1.d4fed7195d7e8p118 },
 	   { 0x1.4b572664bd2dcp116, 0x1.c2b9cf7f893bfp118 },
 	   { 0x1.3d8837fb08d1dp116, 0x1.b11d702b3deb2p118 },
 	   { 0x1.30439c56dadf6p116, 0x1.a024365f771bdp118 },
 	   { 0x1.23844fd08cb93p116, 0x1.8fc8c794b03b5p118 },
 	   { 0x1.174578f6efd5dp116, 0x1.8005f08d6f1efp118 },
 	   { 0x1.0b826758a086bp116, 0x1.70d6a46e07ddap118 },
 	   { 0x1.003692548d98bp116, 0x1.6235fbd7a4345p118 },
 	   { 0x1.eabb2fe335196p115, 0x1.541f340697987p118 },
 	   { 0x1.d5e6777a83c2ap115, 0x1.468dadf4080abp118 },
 	   { 0x1.c1e6cb6239574p115, 0x1.397ced7af2b15p118 },
 	   { 0x1.aeb4423e690e7p115, 0x1.2ce898809244ep118 },
 	   { 0x1.9c47374a0974ep115, 0x1.20cc76202c5fbp118 },
 	   { 0x1.8a98484a1e8d3p115, 0x1.15246dda49d47p118 },
 	   { 0x1.79a0538dd4fc7p115, 0x1.09ec86c75d497p118 },
 	   { 0x1.695875fb574ap115, 0x1.fe41cd9bb4eeep117 },
 	   { 0x1.59ba0929261c5p115, 0x1.e97ba3b77f306p117 },
 	   { 0x1.4abea183bc47p115, 0x1.d57f524723822p117 },
 	   { 0x1.3c600c7f477c5p115, 0x1.c245d4b99847ap117 },
 	   { 0x1.2e984ed53e777p115, 0x1.afc85e0f82e12p117 },
 	   { 0x1.2161a2cd9d894p115, 0x1.9e005769dbc1dp117 },
 	   { 0x1.14b67693928cfp115, 0x1.8ce75e9f6f8ap117 },
 	   { 0x1.08916a956172p115, 0x1.7c7744d9378f7p117 },
 	   { 0x1.f9da9fde95755p114, 0x1.6caa0d3582fe9p117 },
 	   { 0x1.e38a4dc27b11bp114, 0x1.5d79eb71e893bp117 },
 	   { 0x1.ce283a9e3e33p114, 0x1.4ee1429bf7ccp117 },
 	   { 0x1.b9ab1a96e3b3ep114, 0x1.40daa3c89f5b6p117 },
 	   { 0x1.a609f7584d32bp114, 0x1.3360ccd23db3ap117 },
 	   { 0x1.933c2d52c56c9p114, 0x1.266ea71d4f71ap117 },
 	   { 0x1.8139690c0d187p114, 0x1.19ff4663ae9dfp117 },
 	   { 0x1.6ff9a4837fa43p114, 0x1.0e0de78654d1ep117 },
 	   { 0x1.5f7524a8e81a2p114, 0x1.0295ef6591848p117 },
 	   { 0x1.4fa476e59f668p114, 0x1.ef25d37f49fe1p116 },
 	   { 0x1.40806eb78e353p114, 0x1.da01102b5f851p116 },
 	   { 0x1.3202235dada5p114, 0x1.c5b5412dcafadp116 },
 	   { 0x1.2422ed95a3235p114, 0x1.b23a5a23e421p116 },
 	   { 0x1.16dc656a14df6p114, 0x1.9f8893d8fd1c1p116 },
 	   { 0x1.0a2860115569cp114, 0x1.8d986a4187285p116 },
 	   { 0x1.fc01dbb80c841p113, 0x1.7c629a822bc9ep116 },
 	   { 0x1.e4c0b066a497p113, 0x1.6be02102b352p116 },
 	   { 0x1.ce823f4cc4badp113, 0x1.5c0a378c90bcap116 },
 	   { 0x1.b93bf40d5eccbp113, 0x1.4cda5374ea275p116 },
 	   { 0x1.a4e3a125adc76p113, 0x1.3e4a23d1f4703p116 },
 	   { 0x1.916f7c5f2f764p113, 0x1.30538fbb77ecdp116 },
 	   { 0x1.7ed61b5d3db0ap113, 0x1.22f0b496539bep116 },
 	   { 0x1.6d0e7045988cbp113, 0x1.161be46ad3b5p116 },
 	   { 0x1.5c0fc68335b0cp113, 0x1.09cfa445b00ffp116 },
 	   { 0x1.4bd1bfa2aba3dp113, 0x1.fc0d55470cf51p115 },
 	   { 0x1.3c4c504792bf8p113, 0x1.e577bbcd49935p115 },
 	   { 0x1.2d77bd3a382bcp113, 0x1.cfd4a5adec5cp115 },
 	   { 0x1.1f4c988d02149p113, 0x1.bb1a9657ce465p115 },
 	   { 0x1.11c3bed8e716ap113, 0x1.a740684026555p115 },
 	   { 0x1.04d654905dadp113, 0x1.943d4a1d1ed39p115 },
 	   { 0x1.f0fb86d056745p112, 0x1.8208bc334a6a5p115 },
 	   { 0x1.d9676faafa27fp112, 0x1.709a8db59f25cp115 },
 	   { 0x1.c2e43d417197bp112, 0x1.5feada379d8b7p115 },
 	   { 0x1.ad664518e771bp112, 0x1.4ff207314a102p115 },
 	   { 0x1.98e25420092dap112, 0x1.40a8c1949f75ep115 },
 	   { 0x1.854daa4a49b0fp112, 0x1.3207fb7420eb9p115 },
 	   { 0x1.729df6503422ap112, 0x1.2408e9ba3327fp115 },
 	   { 0x1.60c95193c542dp112, 0x1.16a501f0e42cap115 },
 	   { 0x1.4fc63c27c71aep112, 0x1.09d5f819c9e29p115 },
 	   { 0x1.3f8b98f93052ap112, 0x1.fb2b792b40a22p114 },
 	   { 0x1.3010aa198de78p112, 0x1.e3bcf436a1a95p114 },
 	   { 0x1.214d0d298365p112, 0x1.cd55277c18d05p114 },
 	   { 0x1.1338b7e273194p112, 0x1.b7e94604479dcp114 },
 	   { 0x1.05cbf4be650abp112, 0x1.a36eec00926ddp114 },
 	   { 0x1.f1febf7a916aap111, 0x1.8fdc1b2dcf7b9p114 },
 	   { 0x1.d997c68d65936p111, 0x1.7d2737527c3f9p114 },
 	   { 0x1.c2556a4e7a90fp111, 0x1.6b4702d7d5849p114 },
 	   { 0x1.ac2aa7516ade4p111, 0x1.5a329b7d30748p114 },
 	   { 0x1.970b05888fda2p111, 0x1.49e17724f4d41p114 },
 	   { 0x1.82ea92dbc1a27p111, 0x1.3a4b60ba9aa4ep114 },
 	   { 0x1.6fbdddeff308fp111, 0x1.2b6875310f785p114 },
 	   { 0x1.5d79f11e27f6bp111, 0x1.1d312098e9dbap114 },
 	   { 0x1.4c144d984e1b8p111, 0x1.0f9e1b4dd36dfp114 },
 	   { 0x1.3b82e6ba892a4p111, 0x1.02a8673a94692p114 },
 	   { 0x1.2bbc1d878d272p111, 0x1.ec929a665b449p113 },
 	   { 0x1.1cb6bc4eaa678p111, 0x1.d4f4b4c8e09edp113 },
 	   { 0x1.0e69f27a37df3p111, 0x1.be6abbb10a5aap113 },
 	   { 0x1.00cd508511266p111, 0x1.a8e8cc1fadef6p113 },
 	   { 0x1.e7b1882bccac5p110, 0x1.94637d5bacfdbp113 },
 	   { 0x1.cf09287e48bb9p110, 0x1.80cfdc72220cfp113 },
 	   { 0x1.b792bbc489b04p110, 0x1.6e2367dc27f95p113 },
 	   { 0x1.a140206ab945p110, 0x1.5c540b4936fd2p113 },
 	   { 0x1.8c03d2d39119bp110, 0x1.4b581b8d170fcp113 },
 	   { 0x1.77d0e6e5bed21p110, 0x1.3b2652b06c2b2p113 },
 	   { 0x1.649b01d73110ap110, 0x1.2bb5cc22e5db6p113 },
 	   { 0x1.525654343aad2p110, 0x1.1cfe010e2052dp113 },
 	   { 0x1.40f79420887c7p110, 0x1.0ef6c4c84a0fep113 },
 	   { 0x1.3073f7cff4a85p110, 0x1.01984165a5f36p113 },
 	   { 0x1.20c1303550f0ep110, 0x1.e9b5e8d00ce77p112 },
 	   { 0x1.11d563e54f40ep110, 0x1.d16f5716c6c1ap112 },
 	   { 0x1.03a72a2bbdc06p110, 0x1.ba4f035d60e03p112 },
 	   { 0x1.ec5b0ca2b20f5p109, 0x1.a447b7b03f045p112 },
 	   { 0x1.d2bfc6210880ap109, 0x1.8f4ccca7fc90dp112 },
 	   { 0x1.ba6c1c6e87c4p109, 0x1.7b5223dac7336p112 },
 	   { 0x1.a35068e9c89cfp109, 0x1.684c227fcacefp112 },
 	   { 0x1.8d5dbaa383b98p109, 0x1.562fac4329b48p112 },
 	   { 0x1.7885ce9f67cdbp109, 0x1.44f21e49054f2p112 },
 	   { 0x1.64bb0863504ddp109, 0x1.34894a5e24657p112 },
 	   { 0x1.51f06ad20e4c3p109, 0x1.24eb7254ccf83p112 },
 	   { 0x1.4019914f0b53ap109, 0x1.160f438c70913p112 },
 	   { 0x1.2f2aa92823e8p109, 0x1.07ebd2a2d2844p112 },
 	   { 0x1.1f186b432c98bp109, 0x1.f4f12e9ab070ap111 },
 	   { 0x1.0fd8160ca94ap109, 0x1.db5ad0b27805cp111 },
 	   { 0x1.015f67a552924p109, 0x1.c304efa2c6f4ep111 },
 	   { 0x1.e749309831666p108, 0x1.abe09e9144b5ep111 },
 	   { 0x1.cd3caa04cdd1bp108, 0x1.95df988e76644p111 },
 	   { 0x1.b48774d0f8e45p108, 0x1.80f439b4ee04bp111 },
 	   { 0x1.9d189f9f85cbfp108, 0x1.6d11788a69c64p111 },
 	   { 0x1.86e0050236315p108, 0x1.5a2adfa0b4bc4p111 },
 	   { 0x1.71ce426a561d3p108, 0x1.4834877429b8fp111 },
 	   { 0x1.5dd4af79906a9p108, 0x1.37231085c7d9ap111 },
 	   { 0x1.4ae555af52cdfp108, 0x1.26eb9daed6f7ep111 },
 	   { 0x1.38f2e86f38216p108, 0x1.1783ceac2891p111 },
 	   { 0x1.27f0bd5d0e6b1p108, 0x1.08e1badf0fcedp111 },
 	   { 0x1.17d2c50b2bfafp108, 0x1.f5f7d88472604p110 },
 	   { 0x1.088d83f7e4069p108, 0x1.db92b5212fb8dp110 },
 	   { 0x1.f42c17ae0ebf6p107, 0x1.c282cd3957edap110 },
 	   { 0x1.d8c3ea48f2889p107, 0x1.aab7abace48dcp110 },
 	   { 0x1.beceb1f9f5b3dp107, 0x1.94219bfcb4928p110 },
 	   { 0x1.a6399674d366bp107, 0x1.7eb1a2075864ep110 },
 	   { 0x1.8ef2a9a18d857p107, 0x1.6a597219a93dap110 },
 	   { 0x1.78e8dcd2e6bfdp107, 0x1.570b69502f313p110 },
 	   { 0x1.640bf6745325ep107, 0x1.44ba864670882p110 },
 	   { 0x1.504c882a97424p107, 0x1.335a62115bce2p110 },
 	   { 0x1.3d9be56279ee9p107, 0x1.22df298214423p110 },
 	   { 0x1.2bec1a4917edbp107, 0x1.133d96ae7e0ddp110 },
 	   { 0x1.1b2fe32991d5cp107, 0x1.046aeabcfcdecp110 },
 	   { 0x1.0b5aa42bf5054p107, 0x1.ecb9cfe1d8642p109 },
 	   { 0x1.f8c0c2e2ce8dep106, 0x1.d21397ead99cbp109 },
 	   { 0x1.dc6b6f1384e18p106, 0x1.b8d094c86d374p109 },
 	   { 0x1.c19fa87de37fbp106, 0x1.a0df0f0c626dcp109 },
 	   { 0x1.a848df650bea7p106, 0x1.8a2e269750a39p109 },
 	   { 0x1.90538b942ea7cp106, 0x1.74adc8f4064d3p109 },
 	   { 0x1.79ad1fce5b3d8p106, 0x1.604ea819f007cp109 },
 	   { 0x1.6443fdcf0c327p106, 0x1.4d0231928c6f9p109 },
 	   { 0x1.50076ad55cc39p106, 0x1.3aba85fe22e2p109 },
 	   { 0x1.3ce784b411931p106, 0x1.296a70f414053p109 },
 	   { 0x1.2ad53760d7287p106, 0x1.1905613b3abf2p109 },
 	   { 0x1.19c232fd50b88p106, 0x1.097f6156f32c5p109 },
 	   { 0x1.09a0e254c75ep106, 0x1.f59a20caf6695p108 },
 	   { 0x1.f4c8c392fb944p105, 0x1.d9c73698fb1dcp108 },
 	   { 0x1.d800ed59bd026p105, 0x1.bf716c6168baep108 },
 	   { 0x1.bcd30dfbd611bp105, 0x1.a6852c6b58392p108 },
 	   { 0x1.a32923130213fp105, 0x1.8eefd70594a89p108 },
 	   { 0x1.8aee4cd06ec1bp105, 0x1.789fb715aae95p108 },
 	   { 0x1.740ebfab80eb4p105, 0x1.6383f726a8e04p108 },
 	   { 0x1.5e77b6bbd2127p105, 0x1.4f8c96f26a26ap108 },
 	   { 0x1.4a1766b6e5e8ap105, 0x1.3caa61607f92p108 },
 	   { 0x1.36dcf18a6465cp105, 0x1.2acee2f5ecdb8p108 },
 	   { 0x1.24b85a8bf0124p105, 0x1.19ec60b1242edp108 },
 	   { 0x1.139a7b37f8475p105, 0x1.09f5cf4dd2877p108 },
 	   { 0x1.0374f8792ca97p105, 0x1.f5bd95d8730d8p107 },
 	   { 0x1.e87470e4f4246p104, 0x1.d9371e2ff7c35p107 },
 	   { 0x1.cbbab18b73217p104, 0x1.be41de54d155ap107 },
 	   { 0x1.b0a44aa2f067ep104, 0x1.a4c89e08ef4f3p107 },
 	   { 0x1.971a1ec0f40c7p104, 0x1.8cb738399b12cp107 },
 	   { 0x1.7f064a8ba8323p104, 0x1.75fa8dbc84becp107 },
 	   { 0x1.685414c16188ep104, 0x1.608078a70dcbcp107 },
 	   { 0x1.52efdf060cd2p104, 0x1.4c37c0394d094p107 },
 	   { 0x1.3ec7176d784b5p104, 0x1.39100d5687bfep107 },
 	   { 0x1.2bc82ab9d2302p104, 0x1.26f9df8519bd7p107 },
 	   { 0x1.19e277461404p104, 0x1.15e6827001f18p107 },
 	   { 0x1.090640946d2d5p104, 0x1.05c803e4831c1p107 },
 	   { 0x1.f24946f22d5aep103, 0x1.ed22548cffd35p106 },
 	   { 0x1.d45f15b49b35ep103, 0x1.d06ad6ecdf971p106 },
 	   { 0x1.b83349fd05191p103, 0x1.b551c847fbc96p106 },
 	   { 0x1.9dacb2c432ef4p103, 0x1.9bc09f112b494p106 },
 	   { 0x1.84b37e1cbf8ebp103, 0x1.83a1ff0aa239dp106 },
 	   { 0x1.6d3126d74b6ccp103, 0x1.6ce1aa3fd7bddp106 },
 	   { 0x1.5710631158bffp103, 0x1.576c72b514859p106 },
 	   { 0x1.423d13a3b73e1p103, 0x1.43302cc4a0da8p106 },
 	   { 0x1.2ea43465e3995p103, 0x1.301ba221dc9bbp106 },
 	   { 0x1.1c33cd3c37addp103, 0x1.1e1e857adc568p106 },
 	   { 0x1.0adae3e73c2b5p103, 0x1.0d2966b1746f7p106 },
 	   { 0x1.f512dd15b73b7p102, 0x1.fa5b4f49cc6b2p105 },
 	   { 0x1.d6608dc942687p102, 0x1.dc3ae30b55c16p105 },
 	   { 0x1.b9823c51276e1p102, 0x1.bfd7555a3bd68p105 },
 	   { 0x1.9e5ce2f93dd76p102, 0x1.a517d9e61628ap105 },
 	   { 0x1.84d6fe15b6b93p102, 0x1.8be4f8f6c951fp105 },
 	   { 0x1.6cd87746bc76bp102, 0x1.74287ded49339p105 },
 	   { 0x1.564a91cd221fp102, 0x1.5dcd669f2cd34p105 },
 	   { 0x1.4117d7e2c667dp102, 0x1.48bfd38302871p105 },
 	   { 0x1.2d2c0909ebeb9p102, 0x1.34ecf8a3c124ap105 },
 	   { 0x1.1a7409475f2f9p102, 0x1.22430f521cbcfp105 },
 	   { 0x1.08ddd13bd35e7p102, 0x1.10b1488aeb235p105 },
 	   { 0x1.f0b0be22d18e8p101, 0x1.0027c00a263a6p105 },
 	   { 0x1.d1a75065a8c74p101, 0x1.e12ee004efc37p104 },
 	   { 0x1.b48117843c1c7p101, 0x1.c3e44ae32b16bp104 },
 	   { 0x1.99218b8ac7f8ep101, 0x1.a854ea14102a8p104 },
 	   { 0x1.7f6dc6010b4adp101, 0x1.8e6761569f45dp104 },
 	   { 0x1.674c6ae60d852p101, 0x1.7603bac345f65p104 },
 	   { 0x1.50a592e3c968ep101, 0x1.5f1353cdad001p104 },
 	   { 0x1.3b62b6aafb0c8p101, 0x1.4980cb3c80949p104 },
 	   { 0x1.276e9b681072fp101, 0x1.3537f00b6ad4dp104 },
 	   { 0x1.14b54042f445bp101, 0x1.2225b12bffc68p104 },
 	   { 0x1.0323ccdc1a3dcp101, 0x1.10380e1adb7e9p104 },
 	   { 0x1.e5510173b9a5p100, 0x1.febc107d5efaap103 },
 	   { 0x1.c6654733b86adp100, 0x1.df0f2a0ee6947p103 },
 	   { 0x1.a964ed354f984p100, 0x1.c14b2188bcee4p103 },
 	   { 0x1.8e324c651b064p100, 0x1.a553644f7f07dp103 },
 	   { 0x1.74b179d1eba81p100, 0x1.8b0cfce0579ep103 },
 	   { 0x1.5cc82d9070d95p100, 0x1.725e7c5dd20f7p103 },
 	   { 0x1.465daafca8b1dp100, 0x1.5b2fe547a134p103 },
 	   { 0x1.315aaa46df48ep100, 0x1.456a974e92e93p103 },
 	   { 0x1.1da9433aebbcfp100, 0x1.30f93c3699078p103 },
 	   { 0x1.0b34d93135fcp100, 0x1.1dc7b5b978cf8p103 },
 	   { 0x1.f3d41033c44ccp99, 0x1.0bc30c5d52f15p103 },
 	   { 0x1.d36d25268cd2bp99, 0x1.f5b2be65a0c7fp102 },
 	   { 0x1.b512a1fb1d8fcp99, 0x1.d5f3a8dea7357p102 },
 	   { 0x1.98a442fc4fc15p99, 0x1.b82915b03515bp102 },
 	   { 0x1.7e03b1cc6d738p99, 0x1.9c3517e789488p102 },
 	   { 0x1.651468e010b8ap99, 0x1.81fb7df06136ep102 },
 	   { 0x1.4dbb989001d84p99, 0x1.6961b8d641d06p102 },
 	   { 0x1.37e00dac4e8b5p99, 0x1.524ec4d916caep102 },
 	   { 0x1.236a197bf0b9ap99, 0x1.3cab1343d18d1p102 },
 	   { 0x1.10437b1569d7ep99, 0x1.2860757487a01p102 },
 	   { 0x1.fcae93fb7323cp98, 0x1.155a09065d4f7p102 },
 	   { 0x1.db23c3f816f92p98, 0x1.0384250e4c9fcp102 },
 	   { 0x1.bbc1a022c14d4p98, 0x1.e59890b926c78p101 },
 	   { 0x1.9e658108af2ep98, 0x1.c642116a8a9e3p101 },
 	   { 0x1.82eedbe410407p98, 0x1.a8e405e651ab6p101 },
 	   { 0x1.693f22ab61ce9p98, 0x1.8d5f98114f872p101 },
 	   { 0x1.5139a5f3661fbp98, 0x1.7397c5a66e307p101 },
 	   { 0x1.3ac3788a1b429p98, 0x1.5b71456c5a4c4p101 },
 	   { 0x1.25c354b26cb4ep98, 0x1.44d26de513197p101 },
 	   { 0x1.122182e9a270fp98, 0x1.2fa31d6371537p101 },
 	   { 0x1.ff8f84418d51p97, 0x1.1bcca373b7b43p101 },
 	   { 0x1.dd4262aac53e8p97, 0x1.0939ab853339fp101 },
 	   { 0x1.bd3474ec16ca5p97, 0x1.efac5187b2863p100 },
 	   { 0x1.9f40fd0082b72p97, 0x1.cf1e86235d0e7p100 },
 	   { 0x1.8345858c4438dp97, 0x1.b0a68a2128babp100 },
 	   { 0x1.6921be96b86b1p97, 0x1.9423165bc4444p100 },
 	   { 0x1.50b75c536f927p97, 0x1.7974e743dea3dp100 },
 	   { 0x1.39e9f7dcbe479p97, 0x1.607e9eacd105p100 },
 	   { 0x1.249ef1c3be817p97, 0x1.4924a74dec729p100 },
 	   { 0x1.10bd565b35393p97, 0x1.334d19e0c216p100 },
 	   { 0x1.fc5b8748842b2p96, 0x1.1edfa3c5f5ccap100 },
 	   { 0x1.d9b4a18a38642p96, 0x1.0bc56f1b54701p100 },
 	   { 0x1.b95cede6d524bp96, 0x1.f3d2185e047d9p99 },
 	   { 0x1.9b2df77a02225p96, 0x1.d26cb87945e87p99 },
 	   { 0x1.7f03b935e8e3ap96, 0x1.b334fac4b9f99p99 },
 	   { 0x1.64bc777824f0ep96, 0x1.96076f7918d1cp99 },
 	   { 0x1.4c389be9acb83p96, 0x1.7ac2d72fc2c63p99 },
 	   { 0x1.355a9387de78cp96, 0x1.614801550319ep99 },
 	   { 0x1.2006aeb6bc768p96, 0x1.4979ac8b28927p99 },
 	   { 0x1.0c23033e2a376p96, 0x1.333c68e2d0548p99 },
 	   { 0x1.f32ea02b55d23p95, 0x1.1e767bce37dd7p99 },
 	   { 0x1.d099c5c770f5ap95, 0x1.0b0fc5b6d05ap99 },
 	   { 0x1.b05cfe2e99435p95, 0x1.f1e3523b41d7dp98 },
 	   { 0x1.92508d0743fc9p95, 0x1.d00de6608effep98 },
 	   { 0x1.764f46cf19f9cp95, 0x1.b0778b7b3301bp98 },
 	   { 0x1.5c36679625a01p95, 0x1.92fb04ec0f6cfp98 },
 	   { 0x1.43e56c3e340a7p95, 0x1.77756ec9f78fap98 },
 	   { 0x1.2d3dee1869201p95, 0x1.5dc61922d5a06p98 },
 	   { 0x1.182380bd2f494p95, 0x1.45ce65699ff6dp98 },
 	   { 0x1.047b91fcb6491p95, 0x1.2f71a5f15997p98 },
 	   { 0x1.e45a9790460c1p94, 0x1.1a94ff571654fp98 },
 	   { 0x1.c242efeaca76p94, 0x1.071f4bbea09ecp98 },
 	   { 0x1.a284cb82c31cep94, 0x1.e9f1ff8ddd774p97 },
 	   { 0x1.84f7a1eb7f7f3p94, 0x1.c818223a202c7p97 },
 	   { 0x1.697595326d7dcp94, 0x1.a887bd2b4404dp97 },
 	   { 0x1.4fdb462549af1p94, 0x1.8b1a336c5eb6bp97 },
 	   { 0x1.3807ab51436a8p94, 0x1.6fab63324088ap97 },
 	   { 0x1.21dbea9108398p94, 0x1.56197e30205bap97 },
 	   { 0x1.0d3b35021d695p94, 0x1.3e44e45301b92p97 },
 	   { 0x1.f4154a787cc1bp93, 0x1.281000bfe4c3fp97 },
 	   { 0x1.d0623f4f4a28fp93, 0x1.135f28f2d50b4p97 },
 	   { 0x1.af2e69a26261p93, 0x1.00187dded5975p97 },
 	   { 0x1.904e0b3aa82a3p93, 0x1.dc479de0ef001p96 },
 	   { 0x1.73985278fa30ep93, 0x1.bad4fdad3caa1p96 },
 	   { 0x1.58e7298af87d9p93, 0x1.9baed3ed27ab8p96 },
 	   { 0x1.401708b7e64c6p93, 0x1.7ead9ce4285bbp96 },
 	   { 0x1.2906cb94eb40dp93, 0x1.63ac6b4edc88ep96 },
 	   { 0x1.139788f2dd663p93, 0x1.4a88be2a6390cp96 },
 	   { 0x1.ff58dab4f2a79p92, 0x1.332259185f1ap96 },
 	   { 0x1.da552fdd03043p92, 0x1.1d5b1f3793044p96 },
 	   { 0x1.b7f1f31b571b6p92, 0x1.0916f04b6e18bp96 },
 	   { 0x1.98006c2117e39p92, 0x1.ec77101de6926p95 },
 	   { 0x1.7a550f03b145bp92, 0x1.c960bf23153ep95 },
 	   { 0x1.5ec74662c5961p92, 0x1.a8bd20fc65ef7p95 },
 	   { 0x1.453141082302ap92, 0x1.8a61745ec7d1dp95 },
 	   { 0x1.2d6fc2c9e8bcp92, 0x1.6e25d0e756261p95 },
 	   { 0x1.1761f87a6dc3dp92, 0x1.53e4f7d1666cbp95 },
 	   { 0x1.02e94eb4ac8a5p92, 0x1.3b7c27a7ddb0ep95 },
 	   { 0x1.dfd296adef82ap91, 0x1.24caf2c32af14p95 },
 	   { 0x1.bc8ed301215ebp91, 0x1.0fb3186804d0fp95 },
 	   { 0x1.9bd5efd2c0f15p91, 0x1.f830c0bb41fd7p94 },
 	   { 0x1.7d79f2db2d4a5p91, 0x1.d3c0f1a91c846p94 },
 	   { 0x1.61500f5293f06p91, 0x1.b1e5acf351d87p94 },
 	   { 0x1.47306f04df3d6p91, 0x1.92712d259ce66p94 },
 	   { 0x1.2ef5ff0323b28p91, 0x1.7538c60a04476p94 },
 	   { 0x1.187e3fb74914dp91, 0x1.5a14b04b47879p94 },
 	   { 0x1.03a918225a966p91, 0x1.40dfd87456f4cp94 },
 	   { 0x1.e0b15822be4ep90, 0x1.2977b1172b9d5p94 },
 	   { 0x1.bce26a2fb7176p90, 0x1.13bc07e891491p94 },
 	   { 0x1.9bb1bc445c3c6p90, 0x1.ff1dbb4300811p93 },
 	   { 0x1.7cef42e9a617dp90, 0x1.d9a880f306bd8p93 },
 	   { 0x1.606e51e0a4963p90, 0x1.b6e45220b55ep93 },
 	   { 0x1.460560e841d79p90, 0x1.96a0b33f2c4dap93 },
 	   { 0x1.2d8dd47a40ad8p90, 0x1.78b07e9e924acp93 },
 	   { 0x1.16e3ca3d4393fp90, 0x1.5ce9ab1670dd2p93 },
 	   { 0x1.01e5e8edda47bp90, 0x1.4325167006bbp93 },
 	   { 0x1.dcea670907819p89, 0x1.2b3e53538ff3fp93 },
 	   { 0x1.b8e9bec48816dp89, 0x1.15137a7f44864p93 },
 	   { 0x1.97945aa1c9c35p89, 0x1.0084ff125639dp93 },
 	   { 0x1.78b88a4e7107bp89, 0x1.daeb0b7311ec7p92 },
 	   { 0x1.5c2827c986b62p89, 0x1.b7937d1c40c53p92 },
 	   { 0x1.41b858361b0fep89, 0x1.96d082f59ab06p92 },
 	   { 0x1.294150fb19119p89, 0x1.7872d9fa10aadp92 },
 	   { 0x1.129e20e732adcp89, 0x1.5c4e8e37bc7dp92 },
 	   { 0x1.fb58fa290d436p88, 0x1.423ac0df49a4p92 },
 	   { 0x1.d499229819bc6p88, 0x1.2a117230ad284p92 },
 	   { 0x1.b0c1a759f7739p88, 0x1.13af4f04f9998p92 },
 	   { 0x1.8f9bb6c075486p88, 0x1.fde703724e56p91 },
 	   { 0x1.70f4744735c2bp88, 0x1.d77f0c82e7641p91 },
 	   { 0x1.549cb0f7ef8e2p88, 0x1.b3ee02611d7ddp91 },
 	   { 0x1.3a68a8c1234e1p88, 0x1.92ff33023d5bdp91 },
 	   { 0x1.222fc469e8b8cp88, 0x1.7481a9e69f53fp91 },
 	   { 0x1.0bcc5fd30f1ddp88, 0x1.5847eda620959p91 },
 	   { 0x1.ee3728761897bp87, 0x1.3e27c1fcc74bdp91 },
 	   { 0x1.c7fa0c7e3bac7p87, 0x1.25f9ee0b923dcp91 },
 	   { 0x1.a4a56eb132a54p87, 0x1.0f9a0686532p91 },
 	   { 0x1.8401b5336a8ap87, 0x1.f5cc7718082bp90 },
 	   { 0x1.65db58e2358c1p87, 0x1.cf7e53d6a2ca5p90 },
 	   { 0x1.4a029a7ea7cd1p87, 0x1.ac0f5f3229372p90 },
 	   { 0x1.304b3d1961171p87, 0x1.8b498644847eap90 },
 	   { 0x1.188c45630dc53p87, 0x1.6cfa9bcca59dcp90 },
 	   { 0x1.029fbd8b92835p87, 0x1.50f411d4fd2cdp90 },
 	   { 0x1.dcc4fabf32f1cp86, 0x1.370ab8327af5ep90 },
 	   { 0x1.b767ecb334a7ep86, 0x1.1f167f88c6b6ep90 },
 	   { 0x1.94ec06c0ff29fp86, 0x1.08f24085d4597p90 },
 	   { 0x1.751977e5803d3p86, 0x1.e8f70e181d61ap89 },
 	   { 0x1.57bc950253825p86, 0x1.c324c20e337dcp89 },
 	   { 0x1.3ca58b816a87fp86, 0x1.a03261574b54ep89 },
 	   { 0x1.23a8197d2607ep86, 0x1.7fe903cdf5855p89 },
 	   { 0x1.0c9b4b0a6a16fp86, 0x1.6215c58da345p89 },
 	   { 0x1.eeb27891d2bb3p85, 0x1.46897d4b69fc6p89 },
 	   { 0x1.c77dbfc848866p85, 0x1.2d1877d731b7bp89 },
 	   { 0x1.a357936adf17bp85, 0x1.159a386b11517p89 },
 	   { 0x1.8203fa7992554p85, 0x1.ffd27ae9393cep88 },
 	   { 0x1.634b7f56b0a5cp85, 0x1.d7c593130dd0bp88 },
 	   { 0x1.46fada7e6a5fep85, 0x1.b2cd607c79bcfp88 },
 	   { 0x1.2ce2a3690576bp85, 0x1.90ae4d3405651p88 },
 	   { 0x1.14d707280e6cfp85, 0x1.71312dd1759e2p88 },
 	   { 0x1.fd5f08ad2b29ap84, 0x1.5422ef5d8949dp88 },
 	   { 0x1.d48d57f7718b7p84, 0x1.39544b0ecc957p88 },
 	   { 0x1.aef3ce0add578p84, 0x1.20997f73e73ddp88 },
 	   { 0x1.8c52800f939c8p84, 0x1.09ca0eaacd277p88 },
 	   { 0x1.6c6e61e57bf9bp84, 0x1.e9810295890ecp87 },
 	   { 0x1.4f10e8ebc44a9p84, 0x1.c2b45b5aa4a1dp87 },
 	   { 0x1.3407b59d72a5bp84, 0x1.9eee068fa7596p87 },
 	   { 0x1.1b2443858c0a1p84, 0x1.7df2b399c10a8p87 },
 	   { 0x1.043b9f1621ff3p84, 0x1.5f8b87a31bd85p87 },
 	   { 0x1.de4c41eb96b45p83, 0x1.4385c96e9a2d9p87 },
 	   { 0x1.b77e5cbd5d147p83, 0x1.29b2933ef4cbcp87 },
 	   { 0x1.93c9fc62bfb11p83, 0x1.11e68a6378f8ap87 },
 	   { 0x1.72f0c4c8e9bffp83, 0x1.f7f338086a86bp86 },
 	   { 0x1.54b92affb11afp83, 0x1.cf8d7d9ce040ap86 },
 	   { 0x1.38ee17b150182p83, 0x1.aa577251ae485p86 },
 	   { 0x1.1f5e908f70e0cp83, 0x1.8811d739efb5fp86 },
 	   { 0x1.07dd6833bb38p83, 0x1.68823e52970bep86 },
 	   { 0x1.e481e7f6ac4bcp82, 0x1.4b72ae68e8b4cp86 },
 	   { 0x1.bcc58edad5559p82, 0x1.30b14dbe876bcp86 },
 	   { 0x1.983ee9896d582p82, 0x1.181012ef8661p86 },
 	   { 0x1.76aca47764427p82, 0x1.01647ba798745p86 },
 	   { 0x1.57d287836bd3dp82, 0x1.d90e917701675p85 },
 	   { 0x1.3b79118c097a1p82, 0x1.b2a87e86d0c8ap85 },
 	   { 0x1.216d1b97279a9p82, 0x1.8f53dcb377293p85 },
 	   { 0x1.097f82fc04025p82, 0x1.6ed2f2515e933p85 },
 	   { 0x1.e709b415656dp81, 0x1.50ecc9ed47f19p85 },
 	   { 0x1.beaa3d6c15504p81, 0x1.356cd5ce7799ep85 },
 	   { 0x1.9996ed9b83967p81, 0x1.1c229a587ab78p85 },
 	   { 0x1.778be2bd9795bp81, 0x1.04e15ecc7f3f6p85 },
 	   { 0x1.584a99af8a842p81, 0x1.deffc7e6a6017p84 },
 	   { 0x1.3b99832cbefddp81, 0x1.b7b040832f31p84 },
 	   { 0x1.2143a112d0466p81, 0x1.938e021f36d76p84 },
 	   { 0x1.09182b326b229p81, 0x1.7258610b3b233p84 },
 	   { 0x1.e5d47637f5db5p80, 0x1.53d3bfc82a909p84 },
 	   { 0x1.bd20fcc3b76d7p80, 0x1.37c92babdc2fdp84 },
 	   { 0x1.97c9dda748fc7p80, 0x1.1e06010120f6ap84 },
 	   { 0x1.7589207e91ad1p80, 0x1.065b9616170d4p84 },
 	   { 0x1.561e669aa7fdbp80, 0x1.e13dd96b3753bp83 },
 	   { 0x1.394e7a2ac9fc7p80, 0x1.b950d32467392p83 },
 	   { 0x1.1ee2e61eccc99p80, 0x1.94a72263259a5p83 },
 	   { 0x1.06a996198f06fp80, 0x1.72fd93e036cdcp83 },
 	   { 0x1.e0e8fbad2703ep79, 0x1.54164576929abp83 },
 	   { 0x1.b8328ee330ae9p79, 0x1.37b83c521fe96p83 },
 	   { 0x1.92e21013a767p79, 0x1.1daf033182e96p83 },
 	   { 0x1.70aff489136ebp79, 0x1.05ca50205d26ap83 },
 	   { 0x1.515a7c77fab48p79, 0x1.dfbb6235639fap82 },
 	   { 0x1.34a53ce0bbb6fp79, 0x1.b7807e294781fp82 },
 	   { 0x1.1a58b2b09fdcbp79, 0x1.9298add70a734p82 },
 	   { 0x1.0241de6c31e5bp79, 0x1.70beaf9c7ffb6p82 },
 	   { 0x1.d863cf753825cp78, 0x1.51b2cd6709222p82 },
 	   { 0x1.affb906d0ae09p78, 0x1.353a6cf7f7fffp82 },
 	   { 0x1.8afbf9e9520c2p78, 0x1.1b1fa8cbe84a7p82 },
 	   { 0x1.691c7c768becep78, 0x1.0330f0fd69921p82 },
 	   { 0x1.4a1a79df39cdep78, 0x1.da81670f96f9bp81 },
 	   { 0x1.2db8ca9009091p78, 0x1.b24a16b4d09aap81 },
 	   { 0x1.13bf4cb384e4ap78, 0x1.8d6eeb6efdbd6p81 },
 	   { 0x1.f7f4f88751db4p77, 0x1.6ba91ac734786p81 },
 	   { 0x1.cc7626bced452p77, 0x1.4cb7966770ab5p81 },
 	   { 0x1.a4ab6470c1c5cp77, 0x1.305e9721d0981p81 },
 	   { 0x1.80451c2811052p77, 0x1.1667311fff70ap81 },
 	   { 0x1.5efa4d64f59f6p77, 0x1.fd3de10d62855p80 },
 	   { 0x1.40880373ed74p77, 0x1.d1aefbcd48d0cp80 },
 	   { 0x1.24b0d7368076ep77, 0x1.a9cc93c25aca9p80 },
 	   { 0x1.0b3c7b0d960fp77, 0x1.85487ee3ea735p80 },
 	   { 0x1.e7eea02e4ed88p76, 0x1.63daf8b4b1e0cp80 },
 	   { 0x1.bd6408059b696p76, 0x1.45421e69a6ca1p80 },
 	   { 0x1.96826d9e90341p76, 0x1.294175802d99ap80 },
 	   { 0x1.72fa4fa12d516p76, 0x1.0fa17bf41068fp80 },
 	   { 0x1.5282d2d5803fep76, 0x1.f05e82aae2bb9p79 },
 	   { 0x1.34d935f1be064p76, 0x1.c578101b29058p79 },
 	   { 0x1.19c050c56d0d7p76, 0x1.9e39dc5dd2f7cp79 },
 	   { 0x1.01001dd9c7ccep76, 0x1.7a553a728bbf2p79 },
 	   { 0x1.d4ca9b634ecbap75, 0x1.5982008db1304p79 },
 	   { 0x1.ab81c5c80cf39p75, 0x1.3b7e00422e51bp79 },
 	   { 0x1.85cfacb7477f2p75, 0x1.200c898d9ee3ep79 },
 	   { 0x1.6365862923eb9p75, 0x1.06f5f7eb65a56p79 },
 	   { 0x1.43fb317b5dc37p75, 0x1.e00e9148a1d25p78 },
 	   { 0x1.274ea96044bd7p75, 0x1.b623734024e92p78 },
 	   { 0x1.0d23817479c67p75, 0x1.8fd4e01891bf8p78 },
 	   { 0x1.ea84dd159259p74, 0x1.6cd44c7470d89p78 },
 	   { 0x1.bef1b1a12823ep74, 0x1.4cd9c04158cd7p78 },
 	   { 0x1.9730edfda64acp74, 0x1.2fa34bf5c8344p78 },
 	   { 0x1.72ede3b7eaa25p74, 0x1.14f4890ff2461p78 },
 	   { 0x1.51db1ec3a3087p74, 0x1.f92c49dfa4df5p77 },
 	   { 0x1.33b1c9d1576ecp74, 0x1.ccaaea71ab0dfp77 },
 	   { 0x1.18311f8a03acap74, 0x1.a40829f001197p77 },
 	   { 0x1.fe3bcf4629feap73, 0x1.7eef13b59e96cp77 },
 	   { 0x1.d083fda665164p73, 0x1.5d11e1a252bf5p77 },
 	   { 0x1.a6d7d18831888p73, 0x1.3e296303b2297p77 },
 	   { 0x1.80dcd6603df1bp73, 0x1.21f47009f43cep77 },
 	   { 0x1.5e4062d5b6a4ep73, 0x1.083768c5e4542p77 },
 	   { 0x1.3eb6ef47c2758p73, 0x1.e1777d831265fp76 },
 	   { 0x1.21fb7a81c5444p73, 0x1.b69f10b0191b5p76 },
 	   { 0x1.07cefb734d68bp73, 0x1.8f8a3a05b5b53p76 },
 	   { 0x1.dfefbdb19ac7ep72, 0x1.6be573c40c8e7p76 },
 	   { 0x1.b4831fb12344p72, 0x1.4b645ba991fdbp76 },
 	   { 0x1.8cf81557d20b6p72, 0x1.2dc119095729fp76 },
 	   { 0x1.68f6f0feb4755p72, 0x1.12bbcfa4d62dep76 },
 	   { 0x1.482fa78c40635p72, 0x1.f4343c7d504b9p75 },
 	   { 0x1.2a59289a484fbp72, 0x1.c74d4fe1e0e8bp75 },
 	   { 0x1.0f30c4d0be5cp72, 0x1.9e614ecbf4af6p75 },
 	   { 0x1.ecf3428c48d4fp71, 0x1.791716475420cp75 },
 	   { 0x1.bff86d9ec8499p71, 0x1.571d34563050ap75 },
 	   { 0x1.970bb87f4ae14p71, 0x1.3829407a207d8p75 },
 	   { 0x1.71d0b55b79b86p71, 0x1.1bf74244aed5ap75 },
 	   { 0x1.4ff315d036fbdp71, 0x1.024924c7520d1p75 },
 	   { 0x1.3125f6a3d257p71, 0x1.d5cc6ba567f29p74 },
 	   { 0x1.15233ae8815f2p71, 0x1.ab3560167ccaap74 },
 	   { 0x1.f755ea760487dp70, 0x1.846e9dda7a163p74 },
 	   { 0x1.c905bbd9ab5a6p70, 0x1.6121d7db32bddp74 },
 	   { 0x1.9eebaa0589b4ep70, 0x1.410047ead6894p74 },
 	   { 0x1.78a6de0f41b89p70, 0x1.23c2090cdde78p74 },
 	   { 0x1.55df1790f2f61p70, 0x1.09257fca001cp74 },
 	   { 0x1.3643ec463a3cfp70, 0x1.e1dd9ec677783p73 },
 	   { 0x1.198c18435598dp70, 0x1.b5ceb5a13221bp73 },
 	   { 0x1.fee9bab9f4e14p69, 0x1.8dbaa11de2037p73 },
 	   { 0x1.cf82e0eb6196bp69, 0x1.694680a9a3ee6p73 },
 	   { 0x1.a474e7029a919p69, 0x1.481f73b3778e8p73 },
 	   { 0x1.7d5af6513e2bep69, 0x1.29f9e7d8fd094p73 },
 	   { 0x1.59d93e1d8f57dp69, 0x1.0e90f64b5b103p73 },
 	   { 0x1.399c279e4699ap69, 0x1.eb4b9e47b58c9p72 },
 	   { 0x1.1c579bbca6885p69, 0x1.bdfe62f60dd7p72 },
 	   { 0x1.01c659160612dp69, 0x1.94d1de5c4576fp72 },
 	   { 0x1.d352b1ae2694p68, 0x1.6f66f6ab90c3cp72 },
 	   { 0x1.a78e8252c204dp68, 0x1.4d67050b31c2ap72 },
 	   { 0x1.7fd7c80f3410ep68, 0x1.2e8318008cf89p72 },
 	   { 0x1.5bcf92cc55d86p68, 0x1.1273463a1589bp72 },
 	   { 0x1.3b1f876b10da7p68, 0x1.f1ec20afad0e2p71 },
 	   { 0x1.1d791bb1324a1p68, 0x1.c39fa0d4a5a2bp71 },
 	   { 0x1.0294e37abcee8p68, 0x1.99946bf7e02a1p71 },
 	   { 0x1.d463db5fa3c13p67, 0x1.73679b24aeb9bp71 },
 	   { 0x1.a82a5f4047a5bp67, 0x1.50bf2558ab78fp71 },
 	   { 0x1.8011fb05fe09p67, 0x1.314916abfa1eap71 },
 	   { 0x1.5bb91decf8a58p67, 0x1.14bad9006f53bp71 },
 	   { 0x1.3ac71ce35c1d3p67, 0x1.f5a1196b5bb2ep70 },
 	   { 0x1.1ceb656955c59p67, 0x1.c698e001f6d3p70 },
 	   { 0x1.01dcc2acf7755p67, 0x1.9beca74b0f147p70 },
 	   { 0x1.d2b166911c178p66, 0x1.753637caac6d9p70 },
 	   { 0x1.a6459c5b11342p66, 0x1.5218993857afcp70 },
 	   { 0x1.7e086accc805dp66, 0x1.323f3f19cff3ep70 },
 	   { 0x1.59962aef547b3p66, 0x1.155d47fdb9c94p70 },
 	   { 0x1.3894608650edep66, 0x1.f6599b70323cap69 },
 	   { 0x1.1ab0e4d284f44p66, 0x1.c6dc8a4bb3ba6p69 },
 	   { 0x1.ff4248ebb8299p65, 0x1.9bcfd83a431e9p69 },
 	   { 0x1.ce42dd8e4fa23p65, 0x1.74ca889bbacd5p69 },
 	   { 0x1.a1e8aa1400997p65, 0x1.516d33e26c04p69 },
 	   { 0x1.79c430435a7fcp65, 0x1.31612a7ef535fp69 },
 	   { 0x1.557046eb39249p65, 0x1.1457ab75c2489p69 },
 	   { 0x1.349127b59b217p65, 0x1.f41259c9550cp68 },
 	   { 0x1.16d392dff5104p65, 0x1.c46969ca99a2ep68 },
 	   { 0x1.f7d80dc993f2fp64, 0x1.993e82b76e726p68 },
 	   { 0x1.c72c149cb214bp64, 0x1.72267ac1b25ap68 },
 	   { 0x1.9b270c24cc8fap64, 0x1.4ec0062aeeb78p68 },
 	   { 0x1.73585df7b6643p64, 0x1.2eb2d18a2081bp68 },
 	   { 0x1.4f59f9910367ep64, 0x1.11aeb0b11d1a1p68 },
 	   { 0x1.2ecf5b7f6abe3p64, 0x1.eed5c0bbf1061p67 },
 	   { 0x1.1164ab45aa235p64, 0x1.bf4ab21b4f3fp67 },
 	   { 0x1.ed9bdbc6f1b0ap63, 0x1.944462d4d5991p67 },
 	   { 0x1.bd8c96533b39bp63, 0x1.6d561de54f6a1p67 },
 	   { 0x1.921ec84d5860ep63, 0x1.4a1d472804fc8p67 },
 	   { 0x1.6ae172414cebap63, 0x1.2a406e25fcb44p67 },
 	   { 0x1.476e3b661be8cp63, 0x1.0d6e7662dda9dp67 },
 	   { 0x1.276873924f0b4p63, 0x1.e6bba6770e22dp66 },
 	   { 0x1.0a7c2c9322f59p63, 0x1.b797ab2ba22d2p66 },
 	   { 0x1.e0bad18c4e37dp62, 0x1.8cf813910fdcdp66 },
 	   { 0x1.b18eba0be4d24p62, 0x1.666f488db6e0ap66 },
 	   { 0x1.86f7884e1caadp62, 0x1.4399f7770045fp66 },
 	   { 0x1.608484d592328p62, 0x1.241e1ebbbf4ecp66 },
 	   { 0x1.3dcfaee52a8f5p62, 0x1.07aa30ce6a5ap66 },
 	   { 0x1.1e7cbac093f27p62, 0x1.dbe8969a24c6fp65 },
 	   { 0x1.023827dc88ed9p62, 0x1.ad7301258d788p65 },
 	   { 0x1.d16cd999791c3p61, 0x1.837a640fa9d3dp65 },
 	   { 0x1.a3666de0788bp61, 0x1.5d90f358d61f6p65 },
 	   { 0x1.79e17816df1e8p61, 0x1.3b5342f7be9cp65 },
 	   { 0x1.546e385224d1p61, 0x1.1c674ecd152d3p65 },
 	   { 0x1.32a7a483e977bp61, 0x1.007b997a0b531p65 },
 	   { 0x1.1432649c86c4dp61, 0x1.ce8cc007a6432p64 },
 	   { 0x1.f177ce0bd5836p60, 0x1.a109c0bccbc39p64 },
 	   { 0x1.bff3166bc36eep60, 0x1.77f5624913c3ap64 },
 	   { 0x1.934fc0975fb3p60, 0x1.52e251d5d3b1fp64 },
 	   { 0x1.6b13ebb9a5ad4p60, 0x1.316da780bc4d9p64 },
 	   { 0x1.46d17a80cc174p60, 0x1.133deb1d3526p64 },
 	   { 0x1.2624f3a0a887p60, 0x1.f00460b24acf8p63 },
 	   { 0x1.08b47d7733cb6p60, 0x1.bee2903d584f9p63 },
 	   { 0x1.dc5de496b181p59, 0x1.92920a7c80e26p63 },
 	   { 0x1.ac9615b3c9fd7p59, 0x1.6a9b25345c773p63 },
 	   { 0x1.818d3a356669ep59, 0x1.4691b26b9c82fp63 },
 	   { 0x1.5acbdab2ed713p59, 0x1.2613e9610f6d1p63 },
 	   { 0x1.37e61fd4c0fep59, 0x1.08c969adf0beap63 },
 	   { 0x1.187ab3d71db11p59, 0x1.dcc4ac4f59be5p62 },
 	   { 0x1.f8637ea4e52acp58, 0x1.ad2d0a9a18288p62 },
 	   { 0x1.c577fd709b099p58, 0x1.82498a7cc94b9p62 },
 	   { 0x1.97a3dc62119c8p58, 0x1.5ba462dee8a02p62 },
 	   { 0x1.6e66137bb7ccap58, 0x1.38d330d8806ap62 },
 	   { 0x1.494a3f6a9a70ep58, 0x1.1975e0627306cp62 },
 	   { 0x1.27e767bb79ea2p58, 0x1.fa6b5ee8f3088p61 },
 	   { 0x1.09dee32687729p58, 0x1.c78892308bd9p61 },
 	   { 0x1.ddb6ae2f39381p57, 0x1.99b5ec6741cb3p61 },
 	   { 0x1.ad1f9fba4b2abp57, 0x1.7073c400e10dcp61 },
 	   { 0x1.816dde4c11ca3p57, 0x1.4b4ee0b3a84d6p61 },
 	   { 0x1.5a245d5e5289cp57, 0x1.29df4862ac231p61 },
 	   { 0x1.36d26a686daafp57, 0x1.0bc7294e0cbafp61 },
 	   { 0x1.171277cbbce9cp57, 0x1.e163bd8df864p60 },
 	   { 0x1.f5120b45c00e6p56, 0x1.b0a61bce91993p60 },
 	   { 0x1.c1c74b30d0bbp56, 0x1.84cbb00f925fp60 },
 	   { 0x1.93b02e5cf0324p56, 0x1.5d5841ce6cb73p60 },
 	   { 0x1.6a46f43f3118cp56, 0x1.39dbcd485dd07p60 },
 	   { 0x1.45132973bb79bp56, 0x1.19f153b38a108p60 },
 	   { 0x1.23a85891dc72bp56, 0x1.fa7b9159fc471p59 },
 	   { 0x1.05a4dba466c4ep56, 0x1.c6de3429e31fap59 },
 	   { 0x1.d561964307dc4p55, 0x1.98769faac8a1bp59 },
 	   { 0x1.a4fa0f13737e8p55, 0x1.6ebf82977acfp59 },
 	   { 0x1.7984b636ad1bep55, 0x1.4940bc89fa5aap59 },
 	   { 0x1.5281628cb373ap55, 0x1.278e135bcf0a4p59 },
 	   { 0x1.2f7cc38bc628dp55, 0x1.0946088b6f8edp59 },
 	   { 0x1.100f1aef8eaf5p55, 0x1.dc21972b9e9f4p58 },
 	   { 0x1.e7b62ce66acdep54, 0x1.ab3e8cfada51ap58 },
 	   { 0x1.b5198cf325114p54, 0x1.7f5483f729c27p58 },
 	   { 0x1.87b15da6677afp54, 0x1.57e33e2b1c6dap58 },
 	   { 0x1.5ef5de2e68985p54, 0x1.3477480d89e25p58 },
 	   { 0x1.3a6d00852a688p54, 0x1.14a8b54629fb2p58 },
 	   { 0x1.19a90b14f53afp54, 0x1.f033fa073d52p57 },
 	   { 0x1.f88eba04114cbp53, 0x1.bcede5acc0d4p57 },
 	   { 0x1.c3dea36b87937p53, 0x1.8ee7b29d0b081p57 },
 	   { 0x1.94a28136fa731p53, 0x1.659917bbb6632p57 },
 	   { 0x1.6a4b2c9663fa1p53, 0x1.40877b79cd868p57 },
 	   { 0x1.44580945b8452p53, 0x1.1f44979177348p57 },
 	   { 0x1.22558f1aa9f03p53, 0x1.016d3f035816p57 },
 	   { 0x1.03dbf8db89298p53, 0x1.cd508600d0ba8p56 },
 	   { 0x1.d11c2965639f6p52, 0x1.9d4ae77a21604p56 },
 	   { 0x1.a03065db54a4bp52, 0x1.723974e9529d8p56 },
 	   { 0x1.745e6013d8cf3p52, 0x1.4b9a944f57915p56 },
 	   { 0x1.4d1f2eb8531p52, 0x1.28f9c9b769ee3p56 },
 	   { 0x1.29f9b7c4f56dfp52, 0x1.09ee66b6e99e9p56 },
 	   { 0x1.0a814a1dfc5edp52, 0x1.dc34b6999ff72p55 },
 	   { 0x1.dca8b63e38fa9p51, 0x1.aa5249b4cca57p55 },
 	   { 0x1.aa36c9242f8bcp51, 0x1.7d9db080918bap55 },
 	   { 0x1.7d0fbfa6c3c19p51, 0x1.558e88e8945efp55 },
 	   { 0x1.54a6b679dd96fp51, 0x1.31aa564e92066p55 },
 	   { 0x1.307d4e71272d7p51, 0x1.11831a9c3763dp55 },
 	   { 0x1.1022313b11381p51, 0x1.e96c265c21fbfp54 },
 	   { 0x1.e65f78e13edcdp50, 0x1.b5d52c19374fep54 },
 	   { 0x1.b2959e487c93fp50, 0x1.87a2188252d5fp54 },
 	   { 0x1.84436cf62b6f8p50, 0x1.5e440cc8caaf9p54 },
 	   { 0x1.5ad66c67f3f63p50, 0x1.393ad199301dep54 },
 	   { 0x1.35cb549c616ebp50, 0x1.18135a0647102p54 },
 	   { 0x1.14ac7e9322a1ap50, 0x1.f4ccd98eab06bp53 },
 	   { 0x1.ee20fae75a2c5p49, 0x1.bfaedff2748c1p53 },
 	   { 0x1.b931b883c77f2p49, 0x1.9026a7e3c9538p53 },
 	   { 0x1.89e1f8e1d4be6p49, 0x1.659f3419269eep53 },
 	   { 0x1.5f9a24050e89fp49, 0x1.3f92e9472ca4cp53 },
 	   { 0x1.39d2746cbe57fp49, 0x1.1d89fb6602df9p53 },
 	   { 0x1.18115431b6c4ap49, 0x1.fe32077e095c4p52 },
 	   { 0x1.f3d3ca19edf64p48, 0x1.c7bf775863df5p52 },
 	   { 0x1.bdf55dd9bdcep48, 0x1.970fb0b5580dcp52 },
 	   { 0x1.8dd8e25d2255dp48, 0x1.6b88087e4af9fp52 },
 	   { 0x1.62e225ebca19p48, 0x1.449de67f2c6b2p52 },
 	   { 0x1.3c855ef212badp48, 0x1.21d51dc348d4dp52 },
 	   { 0x1.1a4576cd5cddcp48, 0x1.02be7023a443ep52 },
 	   { 0x1.f765035c713d8p47, 0x1.cdec7155697e1p51 },
 	   { 0x1.c0d0bdeb46ae2p47, 0x1.9c4671c1a6e3cp51 },
 	   { 0x1.901afbd3819bep47, 0x1.6feb0af26f865p51 },
 	   { 0x1.64a386137b955p47, 0x1.484b1e63b3be4p51 },
 	   { 0x1.3ddb15521ce49p47, 0x1.24e68a1458bd7p51 },
 	   { 0x1.1b418ba2217c6p47, 0x1.054a9a7c2f05ap51 },
 	   { 0x1.f8c8bad8e2a2p46, 0x1.d2214ad33ca5ep50 },
 	   { 0x1.c1ba4950b8f4fp46, 0x1.9fb9933adac68p50 },
 	   { 0x1.90a0b40dd690cp46, 0x1.72b99eccc462ep50 },
 	   { 0x1.64d860502b279p46, 0x1.4a8e4dbe3539cp50 },
 	   { 0x1.3dcf1aadc099dp46, 0x1.26b4018ef81f7p50 },
 	   { 0x1.1b02414a73357p46, 0x1.06b4fe82cc6aep50 },
 	   { 0x1.f7fa3e4bec2aep45, 0x1.d44feffb34893p49 },
 	   { 0x1.c0aee6d6b1406p45, 0x1.a15d86bb23572p49 },
 	   { 0x1.8f684065398bfp45, 0x1.73ea5ac0d71a9p49 },
 	   { 0x1.637ff9397e989p45, 0x1.4b5fdd0f567fap49 },
 	   { 0x1.3c618d3c706ebp45, 0x1.2737769828878p49 },
 	   { 0x1.1988625955723p45, 0x1.06f8da87263cep49 },
 	   { 0x1.f4fc2f6d50e41p44, 0x1.d4710a9e149edp48 },
 	   { 0x1.bdb204ff1cda3p44, 0x1.a12cc7b1bf616p48 },
 	   { 0x1.8c75a6fa17116p44, 0x1.73793d6253bd7p48 },
 	   { 0x1.609ec277b8703p44, 0x1.4abd0af44c7f8p48 },
 	   { 0x1.399725d96eb63p44, 0x1.266f2e981ccfbp48 },
 	   { 0x1.16d8d1241b86bp44, 0x1.06154a07d21a2p48 },
 	   { 0x1.efd875a51d28dp43, 0x1.d2842b40e25fp47 },
 	   { 0x1.b8cd873c4de72p43, 0x1.9f27fa465d061p47 },
 	   { 0x1.87d2a89e5ac65p43, 0x1.7167c3937ded9p47 },
 	   { 0x1.5c3e42539c769p43, 0x1.48a7fb96552cap47 },
 	   { 0x1.35791e04cd29fp43, 0x1.245dcbaa25b1bp47 },
 	   { 0x1.12fc6cdafd10dp43, 0x1.040d4ab2de626p47 },
 	   { 0x1.e8a0077a1ed47p42, 0x1.ce8fcb8dadc2cp46 },
 	   { 0x1.b2118f75a4eb7p42, 0x1.9b55e7c11d9e6p46 },
 	   { 0x1.818e8b1c2616fp42, 0x1.6dbce02ec5c77p46 },
 	   { 0x1.566cdf4525ebp42, 0x1.4527acab6dfebp46 },
 	   { 0x1.3014fd204bc71p42, 0x1.210a3ddcb4706p46 },
 	   { 0x1.0dffe0bfc0c74p42, 0x1.00e7aba6527c9p46 },
 	   { 0x1.df6a8d5e14f11p41, 0x1.c8a12a152d814p45 },
 	   { 0x1.a9942579915cdp41, 0x1.95c35893651c9p45 },
 	   { 0x1.79bdc576e403ap41, 0x1.6884d52cc9914p45 },
 	   { 0x1.4f3d9114d799bp41, 0x1.4047ce663f641p45 },
 	   { 0x1.297c4e6eb62fcp41, 0x1.1c7f9c74f3e7cp45 },
 	   { 0x1.07f35ef1a4fcp41, 0x1.f95dcee779f74p44 },
 	   { 0x1.d455e0a3b0d94p40, 0x1.c0cc007cc808ep44 },
 	   { 0x1.9f70bf04a77cep40, 0x1.8e82cd2a6133cp44 },
 	   { 0x1.707990a8defefp40, 0x1.61d0ef76712e4p44 },
 	   { 0x1.46c779ebb14aep40, 0x1.3a1882865d26ep44 },
 	   { 0x1.21c4420bc9879p40, 0x1.16cce86450b2p44 },
 	   { 0x1.00ea48df1e7fbp40, 0x1.eee1d41e1e516p43 },
 	   { 0x1.c7856a7693627p39, 0x1.b72a1658393d4p43 },
 	   { 0x1.93c7abef59a2cp39, 0x1.85ac17b553c4fp43 },
 	   { 0x1.65df602b1e0ffp39, 0x1.59b72775450f3p43 },
 	   { 0x1.3d256a5ee461dp39, 0x1.32ae03812fcp43 },
 	   { 0x1.19053bac5f645p39, 0x1.1004b9cd4bae6p43 },
 	   { 0x1.f1f58fe66e142p38, 0x1.e27d88d5289bfp42 },
 	   { 0x1.b9216793da422p38, 0x1.abdab3fb224cep42 },
 	   { 0x1.86bd6adace04ep38, 0x1.7b5bd9f52a89ep42 },
 	   { 0x1.5a104640aeb74p38, 0x1.5051a941eb13p42 },
 	   { 0x1.32755417b50ddp38, 0x1.2a20366f6a0dep42 },
 	   { 0x1.0f5a5274f5c45p38, 0x1.083cdb1163405p42 },
 	   { 0x1.e07ab300dc4b9p37, 0x1.d458a013d18b4p41 },
 	   { 0x1.a956163a49613p37, 0x1.9f01f97b2e043p41 },
 	   { 0x1.7879eb52380edp37, 0x1.6fb2eaf7d8102p41 },
 	   { 0x1.4d30488394e18p37, 0x1.45be480207b14p41 },
 	   { 0x1.26d7af2869fc5p37, 0x1.208a2b041836ep41 },
 	   { 0x1.04e0c593552f5p37, 0x1.ff1ba8cbc9c8dp40 },
 	   { 0x1.cd98a274acae3p36, 0x1.c49f8a8ec4aebp40 },
 	   { 0x1.9852d44d7528bp36, 0x1.90c81ede57558p40 },
 	   { 0x1.6927c2c3e497p36, 0x1.62d5a948b6358p40 },
 	   { 0x1.3f65a98c177c9p36, 0x1.3a1de0952fd2bp40 },
 	   { 0x1.1a6ed66936eeap36, 0x1.16098d4b94692p40 },
 	   { 0x1.f36ed3084aa81p35, 0x1.ec24d6a8bc072p39 },
 	   { 0x1.b986ab7ebdd54p35, 0x1.b3828ebcc128bp39 },
 	   { 0x1.864933f3c0573p35, 0x1.8158a3038115ep39 },
 	   { 0x1.58f359f0c4e8fp35, 0x1.54eb3e9a3e72bp39 },
 	   { 0x1.30d82cb8a968cp35, 0x1.2d93b0174f61ap39 },
 	   { 0x1.0d5e5f59de7c1p35, 0x1.0abe0d45fd5c2p39 },
 	   { 0x1.dbfc240ab5f81p34, 0x1.d7ce33a39bd89p38 },
 	   { 0x1.a47db588b15cfp34, 0x1.a134d30d655e4p38 },
 	   { 0x1.736c0d0a31187p34, 0x1.70e16f315ef4p38 },
 	   { 0x1.480a1879e8f57p34, 0x1.461cda38e2783p38 },
 	   { 0x1.21b0591ce1cfdp34, 0x1.2044a2faebb7bp38 },
 	   { 0x1.ff94e3fca1752p33, 0x1.fd91813f8cc8cp37 },
 	   { 0x1.c3a9f9558ffap33, 0x1.c2530177987fep37 },
 	   { 0x1.8eb738c76b2f2p33, 0x1.8deb61106f334p37 },
 	   { 0x1.5fee91a43fef1p33, 0x1.5f91f55e86346p37 },
 	   { 0x1.3699940a6a811p33, 0x1.3694e7b13691bp37 },
 	   { 0x1.1216c07263dep33, 0x1.1256a18de488bp37 },
 	   { 0x1.e3ae49fef5535p32, 0x1.e49705a5ebd5fp36 },
 	   { 0x1.aab87fb8e4441p32, 0x1.abefb3186e784p36 },
 	   { 0x1.786c3dca158c4p32, 0x1.79dc285401b7dp36 },
 	   { 0x1.4c036b7451223p32, 0x1.4d9a4f359ba1ep36 },
 	   { 0x1.24cec8453db03p32, 0x1.267e46fd85893p36 },
 	   { 0x1.02334e92993b9p32, 0x1.03efdea0a0506p36 },
 	   { 0x1.c74fc41217dfbp31, 0x1.cad0afbb569b1p35 },
 	   { 0x1.9166837399532p31, 0x1.94e0d5e7a8744p35 },
 	   { 0x1.61d46c11dd916p31, 0x1.653d077d9eefp35 },
 	   { 0x1.37dbe7711fcd4p31, 0x1.3b2a639494566p35 },
 	   { 0x1.12d55c1e73c65p31, 0x1.16038b4af0a0ep35 },
 	   { 0x1.e4594b115943bp30, 0x1.ea6c598920c48p34 },
 	   { 0x1.aabdabdb93484p30, 0x1.b081aaf25ade1p34 },
 	   { 0x1.77f073eb945dfp30, 0x1.7d62079a4e4a6p34 },
 	   { 0x1.4b252d0bc8bebp30, 0x1.5042e1a8664edp34 },
 	   { 0x1.23a7345c57ccap30, 0x1.287117d29a9e6p34 },
 	   { 0x1.00d6f8a57f06ep30, 0x1.054e44f8ee735p34 },
 	   { 0x1.c44f136cf3bd8p29, 0x1.cc9cbc5fe04a8p33 },
 	   { 0x1.8e38df2790b7ap29, 0x1.95eb2cb828067p33 },
 	   { 0x1.5e8f828661e21p29, 0x1.65acfefcd0029p33 },
 	   { 0x1.3490e7e2bc31cp29, 0x1.3b20c56ad84f5p33 },
 	   { 0x1.0f91b7ff9bb2ap29, 0x1.159b917beb87ap33 },
 	   { 0x1.ddf56913a541ep28, 0x1.e90cb5cac7057p32 },
 	   { 0x1.a48cc1b8a7bc7p28, 0x1.aeb7659e5f7efp32 },
 	   { 0x1.71fde01e2ca8cp28, 0x1.7b4b752e86e5fp32 },
 	   { 0x1.4578e0b906b32p28, 0x1.4df8ace15322ep32 },
 	   { 0x1.1e4659a2a2156p28, 0x1.26072a17961ap32 },
 	   { 0x1.f788fc218597bp27, 0x1.02d48c75e7d9bp32 },
 	   { 0x1.bac92daac0b9dp27, 0x1.c7a2ecd5f05ap31 },
 	   { 0x1.85518c3484796p27, 0x1.90feaede7f2aep31 },
 	   { 0x1.56441b55bfff1p27, 0x1.60dcef1cedc3ap31 },
 	   { 0x1.2cdd203ab43a1p27, 0x1.36787980e7387p31 },
 	   { 0x1.08700c199ad4fp27, 0x1.112346e13dd7ep31 },
 	   { 0x1.d0c9857c390f3p26, 0x1.e087915129a98p30 },
 	   { 0x1.986a650394095p26, 0x1.a6a5096da5b7dp30 },
 	   { 0x1.66d6688315ad6p26, 0x1.73aff07c7874ep30 },
 	   { 0x1.3b3d55ebd8547p26, 0x1.46d572e10e216p30 },
 	   { 0x1.14e7b714e7093p26, 0x1.1f5ba17e5a90bp30 },
 	   { 0x1.e667d9a8bcd9ep25, 0x1.f93d0d186fbcdp29 },
 	   { 0x1.ab2733e383ad8p25, 0x1.bc1b22cec72bp29 },
 	   { 0x1.7712b76c8c7f6p25, 0x1.86529e9df069cp29 },
 	   { 0x1.494d8e1d4fc61p25, 0x1.5702d052bf73ap29 },
 	   { 0x1.2115447c6627dp25, 0x1.2d65aee08874cp29 },
 	   { 0x1.fb7d503fc65c8p24, 0x1.08ccb49580d43p29 },
 	   { 0x1.bd660913b938cp24, 0x1.d13c32a98512bp28 },
 	   { 0x1.86db66e158524p24, 0x1.98a4bfd5a5fadp28 },
 	   { 0x1.56f3ed5aa4222p24, 0x1.66e459a7794f4p28 },
 	   { 0x1.2ce2265a96befp24, 0x1.3b28bbce3c1c6p28 },
 	   { 0x1.07f14a8d0c116p24, 0x1.14b8b6b67144ep28 },
 	   { 0x1.cf049ebedf60dp23, 0x1.e5e26dbef0e28p27 },
 	   { 0x1.96129ca292f7ep23, 0x1.aa854b5c4f131p27 },
 	   { 0x1.6416763f6b3bcp23, 0x1.765d329106241p27 },
 	   { 0x1.3837bf030f4a8p23, 0x1.488b9479ee1c4p27 },
 	   { 0x1.11b82880134f9p23, 0x1.204c8d940530bp27 },
 	   { 0x1.dfe0c1b8af1f3p22, 0x1.f9e77238e0031p26 },
 	   { 0x1.a49aa1651cfcap22, 0x1.bbd2c8fd7e193p26 },
 	   { 0x1.709b5a3a79128p22, 0x1.85502f16a0f8dp26 },
 	   { 0x1.42ffa7e9ace3fp22, 0x1.5574ceffe3945p26 },
 	   { 0x1.1affd2eccd616p22, 0x1.2b72182c97af5p26 },
 	   { 0x1.efd8be43ac9a9p21, 0x1.06925da53a0fcp26 },
 	   { 0x1.b2564005de7e5p21, 0x1.cc6bb6d71090dp25 },
 	   { 0x1.7c694cd2b4ffdp21, 0x1.93a02d0c97221p25 },
 	   { 0x1.4d23fa69bd814p21, 0x1.61cb1a027e057p25 },
 	   { 0x1.23b556e6e918ep21, 0x1.361358dd1f243p25 },
 	   { 0x1.fecbcf04dca9p20, 0x1.0fba0d2660d89p25 },
 	   { 0x1.bf29264dcdc82p20, 0x1.dc2ef387bd0ep24 },
 	   { 0x1.8767d7fc43eb6p20, 0x1.a130711aadcdap24 },
 	   { 0x1.568f9937abc79p20, 0x1.6d758e1ac9659p24 },
 	   { 0x1.2bc67d8c20136p20, 0x1.401abca024479p24 },
 	   { 0x1.064d4616b0094p20, 0x1.185819a7f8c6ap24 },
 	   { 0x1.caf8458ad2a12p19, 0x1.eafc2b00a99b1p23 },
 	   { 0x1.917faff93e54p19, 0x1.ade505ba61e89p23 },
 	   { 0x1.5f2e79283b1cap19, 0x1.785c00b5cb27ep23 },
 	   { 0x1.33220b1da4f59p19, 0x1.4973634932c1ap23 },
 	   { 0x1.0c93ac678b0ccp19, 0x1.205a7d78be568p23 },
 	   { 0x1.d5aa313452daep18, 0x1.f8b4440d68221p22 },
 	   { 0x1.9a9b05368c88bp18, 0x1.b9a31a7b9868cp22 },
 	   { 0x1.66ede7f0c2d55p18, 0x1.826756e1a42e2p22 },
 	   { 0x1.39b7fc18e5891p18, 0x1.5209676e4b424p22 },
 	   { 0x1.122b662569616p18, 0x1.27b019965e362p22 },
 	   { 0x1.df2779ceabfc8p17, 0x1.029ce648133fdp22 },
 	   { 0x1.a2a5d2945d2b7p17, 0x1.c45161cd95fe8p21 },
 	   { 0x1.6dbccf848794ap17, 0x1.8b81d680cdfc5p21 },
 	   { 0x1.3f79bf21caa96p17, 0x1.59ca24a7521ddp21 },
 	   { 0x1.17080ae674896p17, 0x1.2e48f266999cfp21 },
 	   { 0x1.e75b024885f54p16, 0x1.0838b13324d03p21 },
 	   { 0x1.a98e26924c6c8p16, 0x1.cdd86b83e679dp20 },
 	   { 0x1.738bf4bc8d296p16, 0x1.93977456406ddp20 },
 	   { 0x1.445a6a9a273c6p16, 0x1.60a47aca18e96p20 },
 	   { 0x1.1b1eabeffc3a5p16, 0x1.341669953fe1cp20 },
 	   { 0x1.ee324e1fde417p15, 0x1.0d210b765b3d6p20 },
 	   { 0x1.af4465e9c5668p15, 0x1.d622fa53c02cep19 },
 	   { 0x1.784e3008fb46bp15, 0x1.9a961d6383ef7p19 },
 	   { 0x1.484eecd2f1383p15, 0x1.66890cd0bf55fp19 },
 	   { 0x1.1e65fd1ef2701p15, 0x1.390b73f2a4fbp19 },
 	   { 0x1.f39dc6baaccd7p14, 0x1.114ae59581395p19 },
 	   { 0x1.b3bb863d26278p14, 0x1.dd1e5296953a3p18 },
 	   { 0x1.7bf89f052b591p14, 0x1.a06dfa21b6c59p18 },
 	   { 0x1.4b4e35dbe0cddp14, 0x1.6b6a7a27c9005p18 },
 	   { 0x1.20d6781986167p14, 0x1.3d1cca3d4f6d8p18 },
 	   { 0x1.f790f6877f51ep13, 0x1.14acc164c64fep18 },
 	   { 0x1.b6e93fa7299b3p13, 0x1.e2ba80b9c3a1bp17 },
 	   { 0x1.7e82cde922833p13, 0x1.a511aa3827999p17 },
 	   { 0x1.4d515a14a6132p13, 0x1.6f3d9139319edp17 },
 	   { 0x1.226a790f97768p13, 0x1.404113d7d18e6p17 },
 	   { 0x1.fa02b8ac73416p12, 0x1.173ed60fcd6fap17 },
 	   { 0x1.b8c634233722p12, 0x1.e6ea95e92c624p16 },
 	   { 0x1.7fe6d7fbcef2cp12, 0x1.a8767775dd309p16 },
 	   { 0x1.4e53acc7531b1p12, 0x1.71f97a2983044p16 },
 	   { 0x1.231e547065724p12, 0x1.42710a88aab19p16 },
 	   { 0x1.faed5c4559717p11, 0x1.18fb2ded8ebb1p16 },
 	   { 0x1.b94e0bfb59934p11, 0x1.e9a4d9b21386ep15 },
 	   { 0x1.80217e57d8a3fp11, 0x1.aa947efe69879p15 },
 	   { 0x1.4e52d23cf50bp11, 0x1.7397d8e2bd385p15 },
 	   { 0x1.22f0652094ae6p11, 0x1.43a79684f6ef6p15 },
 	   { 0x1.fa4eba730bf6p10, 0x1.19ddbd8138a9p15 },
 	   { 0x1.b87f86a26fad7p10, 0x1.eae2ef93df996p14 },
 	   { 0x1.7f323487ff94ap10, 0x1.ab66cfccafb75p14 },
 	   { 0x1.4d4ec8ea8ee67p10, 0x1.7414e5b5ca43cp14 },
 	   { 0x1.21e112e39bf18p10, 0x1.43e1e22ebfdb4p14 },
 	   { 0x1.f8283ec45f117p9, 0x1.19e4732be2ffp14 },
 	   { 0x1.b65c7f9f1fbedp9, 0x1.eaa1efb3b003ep13 },
 	   { 0x1.7d1b22b6810f6p9, 0x1.aaeb7de6855e2p13 },
 	   { 0x1.4b49e984886ep9, 0x1.736f7c0d13f06p13 },
 	   { 0x1.1ff2d0d5a2649p9, 0x1.431f651be2ff4p13 },
 	   { 0x1.f47ee1cab73ddp8, 0x1.190f3f39e9af4p13 },
 	   { 0x1.b2e9e76c8d9f9p8, 0x1.e8e2722ca46cfp12 },
 	   { 0x1.79e11d635b9a7p8, 0x1.a923a9d8d5019p12 },
 	   { 0x1.4848ddf7dfffep8, 0x1.71a91ee04e82cp12 },
 	   { 0x1.1d2a13fdd2709p8, 0x1.4161e6298ed3ap12 },
 	   { 0x1.ef5b15f73200ap7, 0x1.176014201ab17p12 },
 	   { 0x1.ae2fb07705cc3p7, 0x1.e5a88cbf394e4p11 },
 	   { 0x1.758b92cdfdc64p7, 0x1.a6137c537bf6dp11 },
 	   { 0x1.44528f79b1b51p7, 0x1.6ec5f2d1367f4p11 },
 	   { 0x1.198d422be3f8cp7, 0x1.3ead7491061afp11 },
 	   { 0x1.e8c8a7276c93p6, 0x1.14dadee76975ap11 },
 	   { 0x1.a838b09afcf62p6, 0x1.e0fbc2ec572b9p10 },
 	   { 0x1.70246e766d2f3p6, 0x1.a1c215fcd0beap10 },
 	   { 0x1.3f700c0d99876p6, 0x1.6accae115453ep10 },
 	   { 0x1.1524997d01ap6, 0x1.3b08582357e32p10 },
 	   { 0x1.e0d68d9047f7ap5, 0x1.118577f06b2f2p10 },
 	   { 0x1.a11277ca2bd3fp5, 0x1.dae6e8d292a1ep9 },
 	   { 0x1.69b7f34ec048ep5, 0x1.9c3973d4c9b08p9 },
 	   { 0x1.39ac6410ceb63p5, 0x1.65c67e684d1e6p9 },
 	   { 0x1.0ffa110b113fp5, 0x1.367af901b137p9 },
 	   { 0x1.d796b4f7aaf7fp4, 0x1.0d678c614f535p9 },
 	   { 0x1.98cd1cb38dccp4, 0x1.d377f96b9fd62p8 },
 	   { 0x1.62548d6675835p4, 0x1.958648bd6035p8 },
 	   { 0x1.331480815e7cdp4, 0x1.5fbee5e7590f4p8 },
 	   { 0x1.0a19336cc73a1p4, 0x1.310fbf558eca2p8 },
 	   { 0x1.cd1db96a6c6efp3, 0x1.088a80b837328p8 },
 	   { 0x1.8f7b007e1de49p3, 0x1.cabfe10b3371ap7 },
 	   { 0x1.5a0a9c047e3c7p3, 0x1.8db7ccf7600f4p7 },
 	   { 0x1.2bb6f2dd8e254p3, 0x1.58c38f07b7c3bp7 },
 	   { 0x1.038ef3cbdc1c7p3, 0x1.2ad2ebb6268bdp7 },
 	   { 0x1.c1829acfb62b3p2, 0x1.02f94d1fb1ba4p7 },
 	   { 0x1.85308ad209551p2, 0x1.c0d23d3daadadp6 },
 	   { 0x1.50ec3549a202dp2, 0x1.84df8496cc3aep6 },
 	   { 0x1.23a3bf963c1ebp2, 0x1.50e4191e1b76cp6 },
 	   { 0x1.f8d2fce0ebb41p1, 0x1.23d2690dc7344p6 },
 	   { 0x1.b4de68e608347p1, 0x1.f980a88588961p5 },
 	   { 0x1.7a03df8f9f479p1, 0x1.b5c5135a44acbp5 },
 	   { 0x1.470ce4924af72p1, 0x1.7b10fe1f0aeaap5 },
 	   { 0x1.1aec242758b4fp1, 0x1.4831de32e25bdp5 },
 	   { 0x1.e9700b697ec96p0, 0x1.1c1d98f1b1f71p5 },
 	   { 0x1.a74be9568f922p0, 0x1.ebda6af103d07p4 },
 	   { 0x1.6e0c8fadbb05p0, 0x1.a9b07f491a273p4 },
 	   { 0x1.3c8164e42f29cp0, 0x1.70618a9c019dap4 },
 	   { 0x1.11a259faba91ep0, 0x1.3ebfb36da371bp4 },
 	   { 0x1.d91518c2acaf6p-1, 0x1.13c51b7852ecp4 },
 	   { 0x1.98e739a118b5ep-1, 0x1.dd1d36683753bp3 },
 	   { 0x1.616346ca3be0ep-1, 0x1.9cae5c1f5de61p3 },
 	   { 0x1.315f58c13df9cp-1, 0x1.64e7f0a95542fp3 },
 	   { 0x1.07d957435b8c4p-1, 0x1.34a1a5595e9cbp3 },
 	   { 0x1.c7e35cf4db634p-2, 0x1.0ada93ac2688ep3 },
 	   { 0x1.89cd6ead31b71p-2, 0x1.cd680d6a376d2p2 },
 	   { 0x1.542176fe1c2b2p-2, 0x1.8ed9e84be9bacp2 },
 	   { 0x1.25bd00bd97eddp-2, 0x1.58bc1beb8e117p2 },
 	   { 0x1.fb491e02b7c15p-3, 0x1.29ecb15514182p2 },
 	   { 0x1.b5fcd30c7e1f6p-3, 0x1.017069c4b54cfp2 },
 	   { 0x1.7a1c33cc1922bp-3, 0x1.bcdb33f7b88f9p1 },
 	   { 0x1.46610483f2395p-3, 0x1.804f671a7a35cp1 },
 	   { 0x1.19b0f23241b88p-3, 0x1.4bf6ca87a4707p1 },
 	   { 0x1.e62f62b4555dcp-4, 0x1.1eb67d8a75351p1 },
 	   { 0x1.a383ca9f98a0fp-4, 0x1.ef3318a5788dep0 },
 	   { 0x1.69f16aeb3677p-4, 0x1.ab97c2106c4d2p0 },
 	   { 0x1.383bf2b37a037p-4, 0x1.712bc1550fb6ap0 },
 	   { 0x1.0d51cf5a16254p-4, 0x1.3eb13a24821e2p0 },
 	   { 0x1.d08cdac87dce6p-5, 0x1.131510c1da6adp0 },
 	   { 0x1.909a7c3ac6f99p-5, 0x1.dad26311e9efp-1 },
 	   { 0x1.596acfa0bcc8fp-5, 0x1.99bf36c7ef068p-1 },
 	   { 0x1.29cc13bfd53ap-5, 0x1.618c26c1169a6p-1 },
 	   { 0x1.00b60212cf113p-5, 0x1.3104d5f799552p-1 },
 	   { 0x1.ba886ae6e40ep-6, 0x1.071e8b6003b16p-1 },
 	   { 0x1.7d62a282a4851p-6, 0x1.c5e5338097f6bp-2 },
 	   { 0x1.48a59e9cb1eb1p-6, 0x1.87730de08c821p-2 },
 	   { 0x1.1b2abc895a771p-6, 0x1.518db221cf8bap-2 },
 	   { 0x1.e7e6f4c33ededp-7, 0x1.230ae74a714aap-2 },
 	   { 0x1.a4480db60fe17p-7, 0x1.f5d1c58fdc6acp-3 },
 	   { 0x1.69fd19aacb90ap-7, 0x1.b091a88a72f08p-3 },
 	   { 0x1.37be42e1159e7p-7, 0x1.74d459ba38afep-3 },
 	   { 0x1.0c707db025298p-7, 0x1.414d114bdcde1p-3 },
 	   { 0x1.ce3ee3757dbe5p-8, 0x1.14dc49cbc0c3p-3 },
 	   { 0x1.8df06bfb34f6dp-8, 0x1.dd13408401cdcp-4 },
 	   { 0x1.568986affafc5p-8, 0x1.9afd0eca1593dp-4 },
 	   { 0x1.26d009f5af049p-8, 0x1.6203633a6814ap-4 },
 	   { 0x1.fb69c5d6b524ep-9, 0x1.30e632b0008c9p-4 },
 	   { 0x1.b49c67cd1611fp-9, 0x1.069124dc6eaefp-4 },
 	   { 0x1.77a47ec4e9fa1p-9, 0x1.c42b48d5cfe42p-5 },
 	   { 0x1.43260788f0a1fp-9, 0x1.854b792c33d4ap-5 },
 	   { 0x1.15f4e018a09eep-9, 0x1.4f1f511f7b2d7p-5 },
 	   { 0x1.de1c72f739a49p-10, 0x1.2073f996519cp-5 },
 	   { 0x1.9b25dc6d6642ep-10, 0x1.f08155c194aadp-6 },
 	   { 0x1.61853cc8eddacp-10, 0x1.ab41e011814e5p-6 },
 	   { 0x1.2feeed430b87bp-10, 0x1.6f9f62ec4193ap-6 },
 	   { 0x1.05451535e8102p-10, 0x1.3c45d7f9e2fbp-6 },
 	   { 0x1.c122bcbda7f8ep-11, 0x1.100ffa10ff0f3p-6 },
 	   { 0x1.81ff0b26f3b6ap-11, 0x1.d401bee3a7787p-7 },
 	   { 0x1.4bb153d2d0728p-11, 0x1.927ce5fbbe352p-7 },
 	   { 0x1.1cfe80beb05a4p-11, 0x1.5a195c6e2a08ep-7 },
 	   { 0x1.e9ae566e02486p-12, 0x1.2992f3c7d2ce7p-7 },
 	   { 0x1.a4a3297375461p-12, 0x1.ffa47aef63bd2p-8 },
 	   { 0x1.6948e77b6c537p-12, 0x1.b7ccca35ce88ep-8 },
 	   { 0x1.3644eed5b1126p-12, 0x1.79ffc3cd6bc92p-8 },
 	   { 0x1.0a6cd27d913d7p-12, 0x1.44d7c3dca9cc8p-8 },
 	   { 0x1.c97f5c053e775p-13, 0x1.1720abf01aa9bp-8 },
 	   { 0x1.88c0c973b68fcp-13, 0x1.dfa22008cf2c8p-9 },
 	   { 0x1.512157ee1d8bep-13, 0x1.9c08a63df00dcp-9 },
 	   { 0x1.215988e86b086p-13, 0x1.61eb258af5a93p-9 },
 	   { 0x1.f09f2b684fb31p-14, 0x1.2ff68a28f7dc4p-9 },
 	   { 0x1.aa222a98ba953p-14, 0x1.0506e21782262p-9 },
 	   { 0x1.6d9b06046eb66p-14, 0x1.c041afe3a1ad2p-10 },
 	   { 0x1.39a30e3030664p-14, 0x1.80d8271e40929p-10 },
 	   { 0x1.0d05cd2b64652p-14, 0x1.4a5cc1e67b046p-10 },
 	   { 0x1.cd740d2318d4dp-15, 0x1.1b8f04bdfa1bfp-10 },
 	   { 0x1.8bb7603d9828p-15, 0x1.e6b65816f0ff1p-11 },
 	   { 0x1.534d810db5377p-15, 0x1.a1a7ec86c94fbp-11 },
 	   { 0x1.22e56de90dc1ap-15, 0x1.665a9398034f1p-11 },
 	   { 0x1.f2bb06a7069e2p-16, 0x1.336f30c8d3345p-11 },
 	   { 0x1.ab79b6edb04e1p-16, 0x1.07b7cbf13abf4p-11 },
 	   { 0x1.6e5b33b150249p-16, 0x1.c461717dacbd8p-12 },
 	   { 0x1.39f005226a7dbp-16, 0x1.83f56253c12f1p-12 },
 	   { 0x1.0cfc8192e69bdp-16, 0x1.4cab82baddd6cp-12 },
 	   { 0x1.cce310b024fd4p-17, 0x1.1d39d04e50424p-12 },
 	   { 0x1.8acc81455f971p-17, 0x1.e9094beff3587p-13 },
 	   { 0x1.522570529739fp-17, 0x1.a3308036822dbp-13 },
 	   { 0x1.219685023e1bep-17, 0x1.67464f8a36affp-13 },
 	   { 0x1.eff1f945e7f7bp-18, 0x1.33e2c9c277148p-13 },
 	   { 0x1.a89fa515a2b44p-18, 0x1.07d0b7bb52fc7p-13 },
 	   { 0x1.6b83bb4ee4348p-18, 0x1.c40cfbd11fd1p-14 },
 	   { 0x1.372982e2fde1dp-18, 0x1.833ffa698fa8bp-14 },
 	   { 0x1.0a51297b20ab7p-18, 0x1.4bb29dadf3acp-14 },
 	   { 0x1.c7d093fb7e463p-19, 0x1.1c147957723bdp-14 },
 	   { 0x1.8607006600009p-19, 0x1.e6896f5762306p-15 },
 	   { 0x1.4db1c7b733812p-19, 0x1.a096cc3260668p-15 },
 	   { 0x1.1d76959a6b622p-19, 0x1.64a7647d3f88ap-15 },
 	   { 0x1.e858d8b3acc8p-20, 0x1.314deba7bab37p-15 },
 	   { 0x1.a1a94b14e3d7fp-20, 0x1.0550e92636252p-15 },
 	   { 0x1.6529df3d1cf1cp-20, 0x1.bf46cd0f972c3p-16 },
 	   { 0x1.316449a955429p-20, 0x1.7ebd49fbb30eep-16 },
 	   { 0x1.0517b9e1f89dep-20, 0x1.47796af08285bp-16 },
 	   { 0x1.be627dddb55d7p-21, 0x1.1827a73755ec7p-16 },
 	   { 0x1.7d8a7f2a8a2dp-21, 0x1.df49a10ccc568p-17 },
 	   { 0x1.4613bf000c71dp-21, 0x1.99ee7037b652bp-17 },
 	   { 0x1.16a45fcb7b882p-21, 0x1.5e9197017791dp-17 },
 	   { 0x1.dc283bcbe780fp-22, 0x1.2bc40c543e36bp-17 },
 	   { 0x1.96ca751cac37fp-22, 0x1.004b34180a4a9p-17 },
 	   { 0x1.5b7cd13179ddep-22, 0x1.b632d58444fadp-18 },
 	   { 0x1.28cb2cb8b4015p-22, 0x1.768f3e13d3bdcp-18 },
 	   { 0x1.faedd62dabd96p-23, 0x1.401fa7657909ep-18 },
 	   { 0x1.b0de982dbf111p-23, 0x1.1190d162109abp-18 },
 	   { 0x1.7195b2becea19p-23, 0x1.d3803e22a78e4p-19 },
 	   { 0x1.3b8387eea3f9dp-23, 0x1.8f694ad8ac632p-19 },
 	   { 0x1.0d521f8291cd6p-23, 0x1.55326d6aac6fap-19 },
 	   { 0x1.cbb9be9cbac1ep-24, 0x1.236e8d3a9e0e7p-19 },
 	   { 0x1.8852e54d26542p-24, 0x1.f1ca221c0b98bp-20 },
 	   { 0x1.4ec36b8fdf428p-24, 0x1.a914b62872bc3p-20 },
 	   { 0x1.1d9d0055d11dp-24, 0x1.6af2ae42db58p-20 },
 	   { 0x1.e74cb7ebdea0ap-25, 0x1.35dbe86ed95c7p-20 },
 	   { 0x1.9fa735b03463ap-25, 0x1.0880cfe68041ep-20 },
 	   { 0x1.627f6220ca6a9p-25, 0x1.c3847cbf78a3bp-21 },
 	   { 0x1.2e4d9d8b5b22fp-25, 0x1.81550cf271bfdp-21 },
 	   { 0x1.01c325e8bb3cp-25, 0x1.48cefa0aac509p-21 },
 	   { 0x1.b783bc148fcefp-26, 0x1.188ab9ce5fdddp-21 },
 	   { 0x1.76aa8791eba33p-26, 0x1.dea9996bf1c0fp-22 },
 	   { 0x1.3f58d390caeecp-26, 0x1.984c7bb9c53ffp-22 },
 	   { 0x1.10299f255a2cap-26, 0x1.5c3c6ce5f2f75p-22 },
 	   { 0x1.cfd7e08a13b2p-27, 0x1.28f8faa7c3202p-22 },
 	   { 0x1.8b368e0429dacp-27, 0x1.fa7304087353p-23 },
 	   { 0x1.50b2501707be6p-27, 0x1.afca3c464e1d5p-23 },
 	   { 0x1.1ecf2c897b782p-27, 0x1.701780b38d71ap-23 },
 	   { 0x1.e891642306feep-28, 0x1.39c08dab159ep-23 },
 	   { 0x1.a013c6709bdd5p-28, 0x1.0b66dac93672bp-23 },
 	   { 0x1.624c9a2f2f8fcp-28, 0x1.c7bde43ebd873p-24 },
 	   { 0x1.2da83d59392f5p-28, 0x1.84520ec5eb55ap-24 },
 	   { 0x1.00ce3767b77a8p-28, 0x1.4ad54236cf6b4p-24 },
 	   { 0x1.b5312d520a3f4p-29, 0x1.19d258cf47194p-24 },
 	   { 0x1.74191dcab90bcp-29, 0x1.e015665e4efbdp-25 },
 	   { 0x1.3ca855a30dad5p-29, 0x1.98dc92b26aeap-25 },
 	   { 0x1.0d71d1069e44fp-29, 0x1.5c29c3e79c162p-25 },
 	   { 0x1.ca7c7b61a5357p-30, 0x1.28708aaed4d7p-25 },
 	   { 0x1.86083aaabaf73p-30, 0x1.f8bd2046619b5p-26 },
 	   { 0x1.4bc21b880f9dep-30, 0x1.ada636f165959p-26 },
 	   { 0x1.1a28183b0e32p-30, 0x1.6dafa60f704a1p-26 },
 	   { 0x1.dfe23a6ad4f8bp-31, 0x1.37351629c53cp-26 },
 	   { 0x1.980956bea8ccp-31, 0x1.08cff68f5874cp-26 },
 	   { 0x1.5ae767663002ep-31, 0x1.c29ce58c1fc1p-27 },
 	   { 0x1.26e4fd1165b76p-31, 0x1.7f5772973d16cp-27 },
 	   { 0x1.f54dde2ba8f56p-32, 0x1.4612c5674eed9p-27 },
 	   { 0x1.aa0af3e698b26p-32, 0x1.15539e864d70fp-27 },
 	   { 0x1.6a0956d7d1b63p-32, 0x1.d7ad5cdc3741ep-28 },
 	   { 0x1.339bd6e517d44p-32, 0x1.9110bc4b50f8cp-28 },
 	   { 0x1.0554f0943ba8cp-32, 0x1.54fb970dbe54ep-28 },
 	   { 0x1.bbfac9007ec07p-33, 0x1.21dd98bc7de87p-28 },
 	   { 0x1.791862715d02fp-33, 0x1.ecc34851c9763p-29 },
 	   { 0x1.403f77382e654p-33, 0x1.a2ca34863bfcbp-29 },
 	   { 0x1.0feff2a4fc49p-33, 0x1.63e0d12d4d288p-29 },
 	   { 0x1.cdc5de1ae8c09p-34, 0x1.2e615f0543e41p-29 },
 	   { 0x1.8804761a993c4p-34, 0x1.00e4ae934cb56p-29 },
 	   { 0x1.4cc23eb3b5ffap-34, 0x1.b471c42165f4ap-30 },
 	   { 0x1.1a6c6c06ea18bp-34, 0x1.72b316e47cc93p-30 },
 	   { 0x1.df58ab9ae4fcbp-35, 0x1.3ad1e7143aa75p-30 },
 	   { 0x1.96bd0bd6c9a31p-35, 0x1.0b54bd6a9e23fp-30 },
 	   { 0x1.59163428fb3a6p-35, 0x1.c5f4a785a88d1p-31 },
 	   { 0x1.24be8d0138113p-35, 0x1.8162809b8dff6p-31 },
 	   { 0x1.f09f3c1618809p-36, 0x1.4721b76389525p-31 },
 	   { 0x1.a53148c3fc482p-36, 0x1.15a6678e0082cp-31 },
 	   { 0x1.652d1d62b45e1p-36, 0x1.d73f8da963966p-32 },
 	   { 0x1.2eda549c16ee8p-36, 0x1.8fdeb6a9e8ebcp-32 },
 	   { 0x1.00c2a84aed164p-36, 0x1.5342fe16e83a5p-32 },
 	   { 0x1.b3501c0fdbbcfp-37, 0x1.1fcdfea216d16p-32 },
 	   { 0x1.70f8998ccf075p-37, 0x1.e83eb9bce31c4p-33 },
 	   { 0x1.38b3a7222dd33p-37, 0x1.9e170e2dbff8cp-33 },
 	   { 0x1.08fb437656229p-37, 0x1.5f27a9aa5f66p-33 },
 	   { 0x1.c1085f96d9feep-38, 0x1.29bfa42bc7b76p-33 },
 	   { 0x1.7c6a3cf1c9dcfp-38, 0x1.f8de2739c95a9p-34 },
 	   { 0x1.423e65b2a3a8cp-38, 0x1.abfaa7d4233fap-34 },
 	   { 0x1.10ef40de709bcp-38, 0x1.6ac1833360c58p-34 },
 	   { 0x1.ce48f9d9e5928p-39, 0x1.336f5ff042b88p-34 },
 	   { 0x1.8773adc5703cep-39, 0x1.0484d7ff5f6bdp-34 },
 	   { 0x1.4b6e86a5aa9d8p-39, 0x1.b978904649f57p-35 },
 	   { 0x1.189488e2e9743p-39, 0x1.760249f31a968p-35 },
 	   { 0x1.db0100ef385d3p-40, 0x1.3cd13761f1731p-35 },
 	   { 0x1.9206c1ae9fb29p-40, 0x1.0c569a0b1627cp-35 },
 	   { 0x1.54382e8081943p-40, 0x1.c67fe1e83e91p-36 },
 	   { 0x1.1fe13002859cap-40, 0x1.80dbcff1d72cfp-36 },
 	   { 0x1.e71fde0c5e218p-41, 0x1.45d945dc4844dp-36 },
 	   { 0x1.9c159bbc9900ap-41, 0x1.13da615eb6c5fp-36 },
 	   { 0x1.5c8fc931c6d94p-41, 0x1.d2ffe78d87996p-37 },
 	   { 0x1.26cb8c1920344p-41, 0x1.8b4017551e03bp-37 },
 	   { 0x1.f295714275bc3p-42, 0x1.4e7bd56b77338p-37 },
 	   { 0x1.a592ca70605e5p-42, 0x1.1b06621cfb60ep-37 },
 	   { 0x1.646a234bddd88p-42, 0x1.dee83fc205fc8p-38 },
 	   { 0x1.2d4a498c21371p-42, 0x1.9521701d324dap-38 },
 	   { 0x1.fd5235020e009p-43, 0x1.56ad77d8efe38p-38 },
 	   { 0x1.ae71657ff542ep-43, 0x1.21d11201bfbcfp-38 },
 	   { 0x1.6bbc82f12468ap-43, 0x1.ea290040397f4p-39 },
 	   { 0x1.3354802504d9ep-43, 0x1.9e7295f29cf91p-39 },
 	   { 0x1.03a3b07cf84bp-43, 0x1.5e631fb2a96dbp-39 },
 	   { 0x1.b6a52af7c7202p-44, 0x1.28313d62cbf4fp-39 },
 	   { 0x1.727cc024d462ap-44, 0x1.f4b2d92a8da6ap-40 },
 	   { 0x1.38e1c7590edafp-44, 0x1.a726cda9c5fc4p-40 },
 	   { 0x1.083385f1e344cp-44, 0x1.6592390114765p-40 },
 	   { 0x1.be229b5ed10ebp-45, 0x1.2e1e1bdc1cff3p-40 },
 	   { 0x1.78a15c33bf0d1p-45, 0x1.fe77379b5869ap-41 },
 	   { 0x1.3dea49bdca04dp-45, 0x1.af3202215009fp-41 },
 	   { 0x1.0c5225e967ce3p-45, 0x1.6c30c15ee186bp-41 },
 	   { 0x1.c4df14833b32ep-46, 0x1.338f646703f05p-41 },
 	   { 0x1.7e2197e99732ep-46, 0x1.03b4338f71d3bp-41 },
 	   { 0x1.4266d76b7e9efp-46, 0x1.b688e02001605p-42 },
 	   { 0x1.0ff9aa4df55cbp-46, 0x1.72355f261c90fp-42 },
 	   { 0x1.cad0ea9847218p-47, 0x1.387d609c076c8p-42 },
 	   { 0x1.82f5884a3c4ffp-47, 0x1.07bcd8d61f54dp-42 },
 	   { 0x1.4650f71159187p-47, 0x1.bd20f0d88c869p-43 },
 	   { 0x1.1324c9f973607p-47, 0x1.77977767b819cp-43 },
 	   { 0x1.cfef7f529f1bfp-48, 0x1.3ce0fee10ae91p-43 },
 	   { 0x1.8716298a66d68p-48, 0x1.0b4fbeda58aa9p-43 },
 	   { 0x1.49a2f582864b8p-48, 0x1.c2f0b2bc85943p-44 },
 	   { 0x1.15cee56fb8f8p-48, 0x1.7c4f426570458p-44 },
 	   { 0x1.d43356b5d1bc3p-49, 0x1.40b3e347db73ap-44 },
 	   { 0x1.8a7d700826ce3p-49, 0x1.0e67b4f33d066p-44 },
 	   { 0x1.4c57f38808af9p-49, 0x1.c7efb04c36011p-45 },
 	   { 0x1.17f41219f6e6ep-49, 0x1.8055de49eb405p-45 },
 	   { 0x1.d796294cc09e7p-50, 0x1.43f076e4dac86p-45 },
 	   { 0x1.8d265709c8b81p-50, 0x1.11003322f9f2ap-45 },
 	   { 0x1.4e6bf1c869176p-50, 0x1.cc169496c493bp-46 },
 	   { 0x1.199123dce7f7cp-50, 0x1.83a55fe01c77fp-46 },
 	   { 0x1.da12f38ef6065p-51, 0x1.4691f56a0b9d1p-46 },
 	   { 0x1.8f0ced10d0db4p-51, 0x1.131565242338p-46 },
 	   { 0x1.4fdbda9c9106cp-51, 0x1.cf5f3d25346p-47 },
 	   { 0x1.1aa3b4e8f3caap-51, 0x1.8638e1112031dp-47 },
 	   { 0x1.dba6023e1257ap-52, 0x1.489478d82c425p-47 },
 	   { 0x1.902e5d96b5dc7p-52, 0x1.14a433d21a4e2p-47 },
 	   { 0x1.50a589affacc9p-52, 0x1.d1c4c912f9acbp-48 },
 	   { 0x1.1b2a2ba958505p-52, 0x1.880c8cf6ecf16p-48 },
 	   { 0x1.dc4cfb90a7ce5p-53, 0x1.49f5031dc194p-48 },
 	   { 0x1.9088f811b7254p-53, 0x1.15aa4ccc2f79bp-48 },
 	   { 0x1.50c7d151d73dp-53, 0x1.d343a5202c7c4p-49 },
 	   { 0x1.1b23bebdcda6dp-53, 0x1.891da95a3a6f5p-49 },
 	   { 0x1.dc06e50abd949p-54, 0x1.4ab18582d9df2p-49 },
 	   { 0x1.901c34297491p-54, 0x1.1626283914e64p-49 },
 	   { 0x1.50427d64b1c7dp-54, 0x1.d3d994938f3adp-50 },
 	   { 0x1.1a9076f0d2e24p-54, 0x1.896a9d7ab89b1p-50 },
 	   { 0x1.dad425efa38efp-55, 0x1.4ac8e5c7c8723p-50 },
 	   { 0x1.8ee8b30ca2586p-55, 0x1.16170c969f828p-50 },
 	   { 0x1.4f1653e256f41p-55, 0x1.d385b6cd88b32p-51 },
 	   { 0x1.19712f23cae3dp-55, 0x1.88f2f609fe4d3p-51 },
 	   { 0x1.d8b686448b5afp-56, 0x1.4a3b00e506616p-51 },
 	   { 0x1.8cf03de32b406p-56, 0x1.157d10888e2f3p-51 },
 	   { 0x1.4d4512f22a65dp-56, 0x1.d2488978a2f74p-52 },
 	   { 0x1.17c7923127a39p-56, 0x1.87b7664b4e00cp-52 },
 	   { 0x1.d5b12a674c804p-57, 0x1.4908ab62a09acp-52 },
 	   { 0x1.8a35c1621f2ccp-57, 0x1.14591aa0080cap-52 },
 	   { 0x1.4ad16c988b007p-57, 0x1.d023e74fea7e1p-53 },
 	   { 0x1.159616cbf8a0cp-57, 0x1.85b9c65443c51p-53 },
 	   { 0x1.d1c88b489c5c3p-58, 0x1.4733af4601fe1p-53 },
 	   { 0x1.86bd4690c0845p-58, 0x1.12acdf1c9738cp-53 },
 	   { 0x1.47bf000e37ae9p-58, 0x1.cd1b037f7490bp-54 },
 	   { 0x1.12dff96b26d81p-58, 0x1.82fd0e7486194p-54 },
 	   { 0x1.cd026b64a0ca8p-59, 0x1.44bec79d5416cp-54 },
 	   { 0x1.828be8d7b2e74p-59, 0x1.107adbae7661dp-54 },
 	   { 0x1.441250d6b8cc7p-59, 0x1.c93261af2cd0dp-55 },
 	   { 0x1.0fa934555eb5ap-59, 0x1.7f854fd47e7d3p-55 },
 	   { 0x1.c765c89feb632p-60, 0x1.41ad99b7fc9ebp-55 },
 	   { 0x1.7da7c97c8ea4bp-60, 0x1.0dc65148f57fcp-55 },
 	   { 0x1.3fd0bbb47d67cp-60, 0x1.c46fcad39a071p-56 },
 	   { 0x1.0bf675e9015a3p-60, 0x1.7b57aa64c1e42p-56 },
 	   { 0x1.c0facb396944ap-61, 0x1.3e04ac23c3f11p-56 },
 	   { 0x1.781800b4c5862p-61, 0x1.0a933c1a65e31p-56 },
 	   { 0x1.3b0069a07f02dp-61, 0x1.beda3eeb5f0a2p-57 },
 	   { 0x1.07cd15415698ap-61, 0x1.767a404101f5ap-57 },
 	   { 0x1.b9cab20b7b4acp-62, 0x1.39c95b8dcd835p-57 },
 	   { 0x1.71e48c82b190ap-62, 0x1.06e649c54a11dp-57 },
 	   { 0x1.35a840f1bb9bfp-62, 0x1.b879e3daa485dp-58 },
 	   { 0x1.0333055f872d1p-62, 0x1.70f426b1f5c67p-58 },
 	   { 0x1.b1dfbc5f13465p-63, 0x1.3501cdad9df5bp-58 },
 	   { 0x1.6b163d96b3dd9p-63, 0x1.02c4cdfc5722cp-58 },
 	   { 0x1.2fcfd4e6913cap-63, 0x1.b157f19f267eap-59 },
 	   { 0x1.fc5d8e0519af3p-64, 0x1.6acd55017e4e2p-59 },
 	   { 0x1.a945119b38a65p-64, 0x1.2fb4e266d3e9fp-59 },
 	   { 0x1.63b6a2745bde1p-64, 0x1.fc696b5025168p-60 },
 	   { 0x1.297f53c6e927fp-64, 0x1.a97e9c202c067p-60 },
 	   { 0x1.f18eb2ba6357fp-65, 0x1.640e915b3f3eap-60 },
 	   { 0x1.a006a7219c6a4p-65, 0x1.29ea2353deb28p-60 },
 	   { 0x1.5bcff1208eb99p-65, 0x1.f278f182d5ccep-61 },
 	   { 0x1.22bf73da1838dp-65, 0x1.a0f8fae51588p-61 },
 	   { 0x1.e60853b8b4b65p-66, 0x1.5cc15bf9dbbbbp-61 },
 	   { 0x1.963124add21cp-66, 0x1.23a9b1f0c9515p-61 },
 	   { 0x1.536cefa1810b4p-66, 0x1.e7c6162103b4ep-62 },
 	   { 0x1.1b995f6e584afp-66, 0x1.97d2ef035140ap-62 },
 	   { 0x1.d9da06644bc9dp-67, 0x1.54efd8e5e8a15p-62 },
 	   { 0x1.8bd1c79049ec2p-67, 0x1.1cfc34a10ee47p-62 },
 	   { 0x1.4a98db9bff0e8p-67, 0x1.dc5f9803d5324p-63 },
 	   { 0x1.1416a031bacf2p-67, 0x1.8e1907994f8d3p-63 },
 	   { 0x1.cd13f7b7c3414p-68, 0x1.4ca4b88f6234cp-63 },
 	   { 0x1.80f645203dff7p-68, 0x1.15eac2ce52257p-63 },
 	   { 0x1.415f515af2672p-68, 0x1.d054eb8db2ad5p-64 },
 	   { 0x1.0c410a1d6b3cap-68, 0x1.83d8652f7235cp-64 },
 	   { 0x1.bfc6c8b2d1c95p-69, 0x1.43eb1f8cfdcf1p-64 },
 	   { 0x1.75acacc068ebep-69, 0x1.0e7ed05fb3af3p-64 },
 	   { 0x1.37cc328e513e5p-69, 0x1.c3b617ec3cfd6p-65 },
 	   { 0x1.0422a6340a512p-69, 0x1.791e9c59e2b42p-65 },
 	   { 0x1.b2036a988beadp-70, 0x1.3ace8dce03fbdp-65 },
 	   { 0x1.6a0349d192d1ap-70, 0x1.06c218ca5f25ap-65 },
 	   { 0x1.2deb8d0dae905p-70, 0x1.b69393c895b87p-66 },
 	   { 0x1.f78b3aa5bebbep-71, 0x1.6df997f6bab1bp-66 },
 	   { 0x1.a3dafb67a96cfp-71, 0x1.315ac58b7d6b7p-66 },
 	   { 0x1.5e0885ebd9cc3p-71, 0x1.fd7d13f78002dp-67 },
 	   { 0x1.23c981e88b022p-71, 0x1.a8fe21d205ebp-67 },
 	   { 0x1.e66846a73c925p-72, 0x1.62777b62fde0cp-67 },
 	   { 0x1.955ea2f392221p-72, 0x1.279bb2446baf4p-67 },
 	   { 0x1.51cacbb42476ep-72, 0x1.ecfc5eb955129p-68 },
 	   { 0x1.19722d0b598a4p-72, 0x1.9b06ad8cbcafbp-68 },
 	   { 0x1.d4f0c5733dbc9p-73, 0x1.56a684fe99fcap-68 },
 	   { 0x1.869f70ffc1fcbp-73, 0x1.1d9d500e92622p-68 },
 	   { 0x1.45586a9e82938p-73, 0x1.dc163a555fefbp-69 },
 	   { 0x1.0ef18dbc017ffp-73, 0x1.8cbe28ca7c426p-69 },
 	   { 0x1.c338d2435fb4bp-74, 0x1.4a94f1540c9eap-69 },
 	   { 0x1.77ae3cb88b469p-74, 0x1.136b93820fc76p-69 },
 	   { 0x1.38bf7be87e681p-74, 0x1.cadeb8c3bba05p-70 },
 	   { 0x1.0453702b9a5bbp-74, 0x1.7e356a2db5e15p-70 },
 	   { 0x1.b154294e891dap-75, 0x1.3e50df3387f95p-70 },
 	   { 0x1.689b85dc875b1p-75, 0x1.09125281c373ap-70 },
 	   { 0x1.2c0dc90fab5bap-75, 0x1.b969aedac7779p-71 },
 	   { 0x1.f346b0aa94647p-76, 0x1.6f7d0d10edd84p-71 },
 	   { 0x1.9f5604d9610bp-76, 0x1.31e8350b95daep-71 },
 	   { 0x1.597757e14e4e8p-76, 0x1.fd3a5c3ac18bbp-72 },
 	   { 0x1.1f50b401397f7p-76, 0x1.a7ca8fa24018p-72 },
 	   { 0x1.ddd8dcb76e388p-77, 0x1.60a5532471804p-72 },
 	   { 0x1.8d50fcdd2a012p-77, 0x1.256887c26e498p-72 },
 	   { 0x1.4a512f5483d32p-77, 0x1.e82efb884fa7p-73 },
 	   { 0x1.129521372a709p-77, 0x1.961449f1f5f93p-73 },
 	   { 0x1.c872d91eff745p-78, 0x1.51be080b9d49dp-73 },
 	   { 0x1.7b56e9895b756p-78, 0x1.18df034ba2c47p-73 },
 	   { 0x1.3b37e1b01d1bdp-78, 0x1.d31877f1753bap-74 },
 	   { 0x1.05e763ef1c6e1p-78, 0x1.845928aac023dp-74 },
 	   { 0x1.b3291e83a6ddap-79, 0x1.42d6673958cf7p-74 },
 	   { 0x1.6978c8d7d61b8p-79, 0x1.0c58552d896bdp-74 },
 	   { 0x1.2c3987ce2b431p-79, 0x1.be0be95f0126ep-75 },
 	   { 0x1.f2a6593b4ee39p-80, 0x1.72aab5cc51918p-75 },
 	   { 0x1.9e0f0cfd57ab4p-80, 0x1.33fd04413c4e8p-75 },
 	   { 0x1.57c6a75ebbd36p-80, 0x1.ffc132424c87ap-76 },
 	   { 0x1.1d636b1da2b46p-80, 0x1.a91d6af35687bp-76 },
 	   { 0x1.d9c6f3705063cp-81, 0x1.6119a09e14fe5p-76 },
 	   { 0x1.8936d384f421ap-81, 0x1.253fb5c838ba6p-76 },
 	   { 0x1.464f8c7e074fcp-81, 0x1.e7068fdcaeb4ep-77 },
 	   { 0x1.0ec1f5aebc21fp-81, 0x1.945fff2eb1b17p-77 },
 	   { 0x1.c14515cb6f8fp-82, 0x1.4fb5a7146299ap-77 },
 	   { 0x1.74b15b6eeceb1p-82, 0x1.16ab8334ccb0ap-77 },
 	   { 0x1.352169fa33216p-82, 0x1.ce965139dad89p-78 },
 	   { 0x1.0060a522d6818p-82, 0x1.7fe578074e0c8p-78 },
 	   { 0x1.a933ad3e37ea3p-83, 0x1.3e8d828e807b4p-78 },
 	   { 0x1.608e37fe916b7p-83, 0x1.084c9533fea9dp-78 },
 	   { 0x1.24490f08ca22dp-83, 0x1.b68488148e38cp-79 },
 	   { 0x1.e4940102c0a26p-84, 0x1.6bbe630bdc58cp-79 },
 	   { 0x1.91a40479b1837p-84, 0x1.2daed7fd23569p-79 },
 	   { 0x1.4cdb9a0d20ef7p-84, 0x1.f45c523b5ec4ep-80 },
 	   { 0x1.13d21ec7ce7a5p-84, 0x1.9ee3b5d440d2p-80 },
 	   { 0x1.c90f21d2d475fp-85, 0x1.57f9f997e1f52p-80 },
 	   { 0x1.7aa5b8d4b4359p-85, 0x1.1d262b74c69e4p-80 },
 	   { 0x1.39a647b21bed6p-85, 0x1.d8b50e711660ap-81 },
 	   { 0x1.03c70a0dadb1dp-85, 0x1.87c4bc616ed3dp-81 },
 	   { 0x1.ae43ba1c85bb1p-86, 0x1.44a615135e868p-81 },
 	   { 0x1.6446b3db12c58p-86, 0x1.0cfed72363bb7p-81 },
 	   { 0x1.26f997cdc041dp-86, 0x1.bdb5f7a82d0f4p-82 },
 	   { 0x1.e86218ea3e6acp-87, 0x1.7136d3b897e11p-82 },
 	   { 0x1.9440cec9f5e3ap-87, 0x1.31cf2729ac24dp-82 },
 	   { 0x1.4e93295651e9bp-87, 0x1.fa860b2bf75f8p-83 },
 	   { 0x1.14df714b2cc27p-87, 0x1.a36fa64c5b19fp-83 },
 	   { 0x1.ca3058fde005fp-88, 0x1.5b478418ed951p-83 },
 	   { 0x1.7b135dc219792p-88, 0x1.1f8035d726d41p-83 },
 	   { 0x1.3995999427ba7p-88, 0x1.dbf75e60682c2p-84 },
 	   { 0x1.03604de581436p-88, 0x1.89f0afa1deecap-84 },
 	   { 0x1.ad067d36fa2c8p-89, 0x1.4602a49df0a52p-84 },
 	   { 0x1.62c6642f5d4b9p-89, 0x1.0dc2db21eaf21p-84 },
 	   { 0x1.2556d7a42568ap-89, 0x1.be61355e30a98p-85 },
 	   { 0x1.e5068065139bep-90, 0x1.7145a7dd1cf8cp-85 },
 	   { 0x1.90efd5cd13c3p-90, 0x1.31725e0702649p-85 },
 	   { 0x1.4b62e9374c452p-90, 0x1.f93e90900fd6bp-86 },
 	   { 0x1.11de133cc6916p-90, 0x1.a1d0c10ff74dfp-86 },
 	   { 0x1.c49bf95c5f745p-91, 0x1.597928f3e0c7p-86 },
 	   { 0x1.75f56ab48bd89p-91, 0x1.1d9f316556fccp-86 },
 	   { 0x1.34f00cbd8ea42p-91, 0x1.d8389849eaf01p-87 },
 	   { 0x1.fe61cbe17950dp-92, 0x1.8650e1db268ebp-87 },
 	   { 0x1.a589caf82618cp-92, 0x1.4293ddcb013c1p-87 },
 	   { 0x1.5c1e107375834p-92, 0x1.0a90025fd130cp-87 },
 	   { 0x1.1f7319c565581p-92, 0x1.b87eb911fc5efp-88 },
 	   { 0x1.daa6c6af5c17fp-93, 0x1.6bea387f6b0ap-88 },
 	   { 0x1.87d63120a742cp-93, 0x1.2c9c915a28ddap-88 },
 	   { 0x1.436e80df031fp-93, 0x1.f094496a5e827p-89 },
 	   { 0x1.0aef9bffa708dp-93, 0x1.9a19446f657ccp-89 },
 	   { 0x1.b890579385cdcp-94, 0x1.52a33b4b8094cp-89 },
 	   { 0x1.6b84ffdb5d885p-94, 0x1.179841589cdp-89 },
 	   { 0x1.2be9773700384p-94, 0x1.cda2d93f291abp-90 },
 	   { 0x1.eecef0206652cp-95, 0x1.7d0e0e7cac5bp-90 },
 	   { 0x1.9821029662ccfp-95, 0x1.3a804f20fd2f4p-90 },
 	   { 0x1.5097c74b3d08ep-95, 0x1.038a34010e13fp-90 },
 	   { 0x1.158fcf12f6c8ep-95, 0x1.ac508371be502p-91 },
 	   { 0x1.c9b60c296975dp-96, 0x1.61608ea10db83p-91 },
 	   { 0x1.7958bc88e6006p-96, 0x1.2383e3bce375p-91 },
 	   { 0x1.370dfa8e149d1p-96, 0x1.e0e820ef7463p-92 },
 	   { 0x1.0060a594f59c7p-96, 0x1.8c9f67fa9c048p-92 },
 	   { 0x1.a6925bee98d74p-97, 0x1.471203b047e85p-92 },
 	   { 0x1.5c351b499632p-97, 0x1.0dae92b93887p-92 },
 	   { 0x1.1ee518d278c58p-97, 0x1.bcabf2ba981bfp-93 },
 	   { 0x1.d8b2f8b0b2924p-98, 0x1.6e8f25135d13fp-93 },
 	   { 0x1.855f0a34582a6p-98, 0x1.2e219acb023aep-93 },
 	   { 0x1.40b1881e58e3p-98, 0x1.f1fe817902cebp-94 },
 	   { 0x1.0818d80634105p-98, 0x1.9a5d5233d8e13p-94 },
 	   { 0x1.b2ecbb2e8d76cp-99, 0x1.521d0766f8b85p-94 },
 	   { 0x1.6614d9da549fbp-99, 0x1.168c985c93c95p-94 },
 	   { 0x1.26c7736a63e7fp-99, 0x1.cae6809d7d445p-95 },
 	   { 0x1.e546a107b57d5p-100, 0x1.79f71edd3cb51p-95 },
 	   { 0x1.8f64020effd9cp-100, 0x1.37443c37e4835p-95 },
 	   { 0x1.48aa64075b15p-100, 0x1.004e8297ce819p-95 },
 	   { 0x1.0e6e891142764p-100, 0x1.a60ceba01346ap-96 },
 	   { 0x1.bcfa525d16889p-101, 0x1.5b71dfbe662f9p-96 },
 	   { 0x1.6e0be1ed4e4ccp-101, 0x1.1dfe04c5b884ap-96 },
 	   { 0x1.2d14568fa3103p-101, 0x1.d6c299b6b03dep-97 },
 	   { 0x1.ef39c9c67da7p-102, 0x1.8366f8264d161p-97 },
 	   { 0x1.973b86e9a718fp-102, 0x1.3ec401194be5fp-97 },
 	   { 0x1.4ed55e6d4d5dfp-102, 0x1.0641ea45be131p-97 },
 	   { 0x1.1345b1de4a541p-102, 0x1.af7b06dd7c2fap-98 },
 	   { 0x1.c48e8cf8e20edp-103, 0x1.62e7924beab28p-98 },
 	   { 0x1.73f6cd7db5a56p-103, 0x1.23e2123cac1dcp-98 },
 	   { 0x1.31afb2e91937bp-103, 0x1.e00be39adba8fp-99 },
 	   { 0x1.f6600b76754fcp-104, 0x1.8ab4ee2717624p-99 },
 	   { 0x1.9cc2881babafp-104, 0x1.447fa5b4e25fep-99 },
 	   { 0x1.5316d5b010b17p-104, 0x1.0abf02c055867p-99 },
 	   { 0x1.1688993cfebe3p-104, 0x1.b67d9f35f4de8p-100 },
 	   { 0x1.c98758b0a4ebap-105, 0x1.685ccfe1e2ab5p-100 },
 	   { 0x1.77baf72da4868p-105, 0x1.281e65593d67p-100 },
 	   { 0x1.3484c1e2418cbp-105, 0x1.e698bd1000fd2p-101 },
 	   { 0x1.fa991c211034p-106, 0x1.8fc0326c87b11p-101 },
 	   { 0x1.9fe006460b912p-106, 0x1.485d5ed97243ep-101 },
 	   { 0x1.555b844a27ecdp-106, 0x1.0db191585c5a2p-101 },
 	   { 0x1.182875c9f3984p-106, 0x1.baf50ff65044dp-102 },
 	   { 0x1.cbce2423a80acp-107, 0x1.6bb8ebe73c54ap-102 },
 	   { 0x1.794741d4d28c6p-107, 0x1.2a9fd1221e357p-102 },
 	   { 0x1.3586a18110b0ep-107, 0x1.ea4b746dbeae3p-103 },
 	   { 0x1.fbd1c1dcb3991p-108, 0x1.9271dfe5687e7p-103 },
 	   { 0x1.a085cf5d6c87ep-108, 0x1.4a4b9ae2c857dp-103 },
 	   { 0x1.559911f8b7812p-108, 0x1.0f0c2d578f06ap-103 },
 	   { 0x1.181ddd71c27fbp-108, 0x1.bccd0201398bap-104 },
 	   { 0x1.cb5889458c00ep-109, 0x1.6cec95dfef21ap-104 },
 	   { 0x1.789499da6bff1p-109, 0x1.2b5ae7721763fp-104 },
 	   { 0x1.34b0b5ddf82c6p-109, 0x1.eb1327842cc63p-105 },
 	   { 0x1.fa04646636ebep-110, 0x1.92bda7bca05b7p-105 },
 	   { 0x1.9eb0ea42d451ep-110, 0x1.4a4186866270ap-105 },
 	   { 0x1.53ce6234f7db7p-110, 0x1.0ec8a57831ec5p-105 },
 	   { 0x1.1668fdbb007d5p-110, 0x1.bbfd05e1b64f3p-106 },
 	   { 0x1.c8289c5fd0187p-111, 0x1.6bf24d893426cp-106 },
 	   { 0x1.75a62b0407aefp-111, 0x1.2a4c4fb42b862p-106 },
 	   { 0x1.3206cc37b0e4ap-111, 0x1.e8ec43d273fbap-107 },
 	   { 0x1.f53937c26236ep-112, 0x1.90a22ee0d506ep-107 },
 	   { 0x1.9a69ad7793258p-112, 0x1.483f4fee6553cp-107 },
 	   { 0x1.50039cbf56e41p-112, 0x1.0ce82f0139653p-107 },
 	   { 0x1.13119a81ee824p-112, 0x1.b888d3fea2a71p-108 },
 	   { 0x1.c24cdc6a6909bp-113, 0x1.68ce8cbb7eaebp-108 },
 	   { 0x1.7089487e1182ep-113, 0x1.2778e05f0f826p-108 },
 	   { 0x1.2d94fe2dcd5a4p-113, 0x1.e3e0a1bcb7b9p-109 },
 	   { 0x1.ed85fe218f015p-114, 0x1.8c29185861611p-109 },
 	   { 0x1.93c37ffa2be3p-114, 0x1.444e2559eb861p-109 },
 	   { 0x1.4a49efe08b764p-114, 0x1.09735c9244f77p-109 },
 	   { 0x1.0e26d33274acdp-114, 0x1.b28030446d467p-110 },
 	   { 0x1.b9dfc560135fp-115, 0x1.638fa554a9791p-110 },
 	   { 0x1.6955081ac80b2p-115, 0x1.22ed7a20d2031p-110 },
 	   { 0x1.276f565251c73p-115, 0x1.dc07399fb9ebdp-111 },
 	   { 0x1.e30d639687648p-116, 0x1.8566bbf3afdccp-111 },
 	   { 0x1.8adc46e842374p-116, 0x1.3e7fef514c8f7p-111 },
 	   { 0x1.42bb0eedd3fb2p-116, 0x1.0479dd0162987p-111 },
 	   { 0x1.07beb0edff1b8p-116, 0x1.a9fe7272a642bp-112 },
 	   { 0x1.af070915be74ep-117, 0x1.5c4d5495043b3p-112 },
 	   { 0x1.602994f04daa5p-117, 0x1.1cbea64272b5fp-112 },
 	   { 0x1.1fb139d7ad13p-117, 0x1.d18375dee0b86p-113 },
 	   { 0x1.d5fdfa65dd70dp-118, 0x1.7c798c690caf6p-113 },
 	   { 0x1.7fdb85ec65bd4p-118, 0x1.36eec953c25e3p-113 },
 	   { 0x1.39787263ebbcap-118, 0x1.fc2409fc1812ep-114 },
 	   { 0x1.ffeb0495cc103p-119, 0x1.9f29b80329143p-114 },
 	   { 0x1.a1f276c1aeb71p-119, 0x1.5328106ecc8f8p-114 },
 	   { 0x1.552f40714fe54p-119, 0x1.1507fc4d2f4bap-114 },
 	   { 0x1.167c9d827337cp-119, 0x1.c484291d11ffp-115 },
 	   { 0x1.c690e28b6a9bfp-120, 0x1.7189333483e3bp-115 },
 	   { 0x1.72f13b97db104p-120, 0x1.2dbc3e931f24dp-115 },
 	   { 0x1.2eaa616a9b21cp-120, 0x1.ecb050b3055ap-116 },
 	   { 0x1.edda16b7edc87p-121, 0x1.9231c8255bcdbp-116 },
 	   { 0x1.92da9c960076ap-121, 0x1.4848161f4e509p-116 },
 	   { 0x1.48955baf138afp-121, 0x1.0beb55467080ap-116 },
 	   { 0x1.0bf90e157d9dap-121, 0x1.b542338309321p-117 },
 	   { 0x1.b5082a5d8de09p-122, 0x1.64c56b8fb3cecp-117 },
 	   { 0x1.6454856772fedp-122, 0x1.231052b5f7dd6p-117 },
 	   { 0x1.227ecea87251dp-122, 0x1.dadb937ed07ebp-118 },
 	   { 0x1.d99724acabf71p-123, 0x1.834eb55a1d18ep-118 },
 	   { 0x1.81ff31715569ap-123, 0x1.3bdc43dd8955fp-118 },
 	   { 0x1.3a90e48619574p-123, 0x1.018fd4cd15479p-118 },
 	   { 0x1.005296113b586p-123, 0x1.a3fee5158c03fp-119 },
 	   { 0x1.a1acf8c750894p-124, 0x1.5664a8518a142p-119 },
 	   { 0x1.54421936100c1p-124, 0x1.171860917e7c8p-119 },
 	   { 0x1.152813e135602p-124, 0x1.c6f152728fb8fp-120 },
 	   { 0x1.c375a4cba7b23p-125, 0x1.72bf4ab4db677p-120 },
 	   { 0x1.6fa5568fa20f3p-125, 0x1.2e18c95c4bfb1p-120 },
 	   { 0x1.2b5b13ef0805cp-125, 0x1.ec41a3d4cf576p-121 },
 	   { 0x1.e77117811a7d2p-126, 0x1.91022d83bf8f5p-121 },
 	   { 0x1.8ccd934db2cbp-126, 0x1.46a292659269ep-121 },
 	   { 0x1.42faa33070d2ap-126, 0x1.0a05da41d6048p-121 },
 	   { 0x1.06db98d7f6125p-126, 0x1.b14375f322de2p-122 },
 	   { 0x1.abcdbdfcc9f7cp-127, 0x1.60c75486158bp-122 },
 	   { 0x1.5c15c23fbb403p-127, 0x1.1f35bc35fb59fp-122 },
 	   { 0x1.1b2fdb7cab6dfp-127, 0x1.d39954e0a9d3dp-123 },
 	   { 0x1.ccb8a64624f6cp-128, 0x1.7c98ab66270f5p-123 },
 	   { 0x1.76bb52e82b59ap-128, 0x1.35be6eb898758p-123 },
 	   { 0x1.30c117f001ac3p-128, 0x1.f819edd38db9cp-124 },
 	   { 0x1.efa0e49e3feccp-129, 0x1.9a2821242ebdp-124 },
 	   { 0x1.92fa046d58d4ep-129, 0x1.4dadd528d6ea9p-124 },
 	   { 0x1.479ae4e865feep-129, 0x1.0f6d9e092345cp-124 },
 	   { 0x1.0a4c603089f16p-129, 0x1.b987187720ae4p-125 },
 	   { 0x1.b0e03e96a5485p-130, 0x1.6711ad9310ce1p-125 },
 	   { 0x1.5fc89a9e03199p-130, 0x1.23f97aea9f29fp-125 },
 	   { 0x1.1dd90a3522c75p-130, 0x1.dac6b554960ffp-126 },
 	   { 0x1.d07c0b8b30398p-131, 0x1.81f77dc55f2bdp-126 },
 	   { 0x1.795540ea5dda7p-131, 0x1.39bb36d1a51dap-126 },
 	   { 0x1.327f191dd6247p-131, 0x1.fdf7c425dfb89p-127 },
 	   { 0x1.f1db008e061d6p-132, 0x1.9e6c7f42ee3ap-127 },
 	   { 0x1.944b7c8850269p-132, 0x1.50bd38f4b0e14p-127 },
 	   { 0x1.4846e1e475567p-132, 0x1.11954fcd9d596p-127 },
 	   { 0x1.0a8512d6deebp-132, 0x1.bc7d8a23288e1p-128 },
 	   { 0x1.b0b57b848dfd5p-133, 0x1.69099571fea27p-128 },
 	   { 0x1.5f385601a1095p-133, 0x1.25378a982372p-128 },
 	   { 0x1.1d0aee3f21eaep-133, 0x1.dc36feecfa2bap-129 },
 	   { 0x1.ce9ce0f1b56b8p-134, 0x1.82a9fb7ad076bp-129 },
 	   { 0x1.775af322a6fb6p-134, 0x1.39ea243c7bf71p-129 },
 	   { 0x1.3084e2fb958e5p-134, 0x1.fda4af81b306ap-130 },
 	   { 0x1.ee0aaff5c7275p-135, 0x1.9da7a2c5ab52cp-130 },
 	   { 0x1.90b5b261712acp-135, 0x1.4fb44aa933f5cp-130 },
 	   { 0x1.44f853ca3d2a1p-135, 0x1.1068e39733d5fp-130 },
 	   { 0x1.07839b24e2329p-135, 0x1.ba0b385a9673fp-131 },
 	   { 0x1.ab4ef712ea53cp-136, 0x1.669cb88b98bb4p-131 },
 	   { 0x1.5a6a27edc2aafp-136, 0x1.22e458ff074e2p-131 },
 	   { 0x1.18ccfb2383c0dp-136, 0x1.d7dccacf16bdfp-132 },
 	   { 0x1.c72c7d427b5c7p-137, 0x1.7ea9a57d9c3fdp-132 },
 	   { 0x1.70debd3477d7cp-137, 0x1.364981b4fcaccp-132 },
 	   { 0x1.2ae4c8505c4dcp-137, 0x1.f723b60a4c45ap-133 },
 	   { 0x1.e45347f37826dp-138, 0x1.97e0b5db827a8p-133 },
 	   { 0x1.8859d9d834871p-138, 0x1.4a9cae44d02aap-133 },
 	   { 0x1.3dcdd6f53a761p-138, 0x1.0bf347561e06fp-133 },
 	   { 0x1.0163c7a1b8ce3p-138, 0x1.b246ea577dcd5p-134 },
 	   { 0x1.a0de9e4d0326ap-139, 0x1.5fe1a8f2ffd47p-134 },
 	   { 0x1.518a7407eb90ep-139, 0x1.1d15869af1a46p-134 },
 	   { 0x1.1146574533e59p-139, 0x1.cde08f63664fdp-135 },
 	   { 0x1.ba6f77161f191p-140, 0x1.761ba88bf6eedp-135 },
 	   { 0x1.661c59f17faep-140, 0x1.2efafc89163c3p-135 },
 	   { 0x1.21d2894bdd4c7p-140, 0x1.eab12c8aa7e5p-136 },
 	   { 0x1.d50e0eba3e44dp-141, 0x1.8d4d432dee077p-136 },
 	   { 0x1.7b84a5753cf1fp-141, 0x1.41a589d11cb19p-136 },
 	   { 0x1.33091416396dbp-141, 0x1.045db9ec2ba81p-136 },
 	   { 0x1.f0bb3ff173143p-142, 0x1.a57861242277fp-137 },
 	   { 0x1.91c3cacc75aaap-142, 0x1.551681b8d361p-137 },
 	   { 0x1.44ea256a84bbp-142, 0x1.140098b38820cp-137 },
 	   { 0x1.06bb841410434p-142, 0x1.be9e2feb561ep-138 },
 	   { 0x1.a8d98b0d5771p-143, 0x1.694e9fdcb7be5p-138 },
 	   { 0x1.57755a2313bdfp-143, 0x1.24419d9ce37ffp-138 },
 	   { 0x1.15a03d39bca43p-143, 0x1.d8bf1578b3aacp-139 },
 	   { 0x1.c0c4e9f387792p-144, 0x1.7e4dfe2cee6a2p-139 },
 	   { 0x1.6aa9b63079411p-144, 0x1.3520b0bf08a51p-139 },
 	   { 0x1.250ad98a67e4fp-144, 0x1.f3daa3dd37f3ap-140 },
 	   { 0x1.d9842421f4af1p-145, 0x1.94140b3abb78ep-140 },
 	   { 0x1.7e859d0226582p-145, 0x1.469d2facc66f7p-140 },
 	   { 0x1.34f9e5d4c96d3p-145, 0x1.07f7c6b04c092p-140 },
 	   { 0x1.f314a5f5af6d7p-146, 0x1.aa9f80ec12e52p-141 },
 	   { 0x1.9306ca687d568p-146, 0x1.58b5e63278412p-141 },
 	   { 0x1.456b681315dafp-146, 0x1.167dcc97a0fd3p-141 },
 	   { 0x1.06b98180e66fp-146, 0x1.c1ee5bab4ede7p-142 },
 	   { 0x1.a82a4c036e3f3p-147, 0x1.6b69077bfc3c7p-142 },
 	   { 0x1.565cda5d05a6ap-147, 0x1.257dcc5bc2717p-142 },
 	   { 0x1.144d77262f022p-147, 0x1.d9fdd2296338fp-143 },
 	   { 0x1.bdec7b50a66cp-148, 0x1.7eb427b4ddd71p-143 },
 	   { 0x1.67cb265d8483ap-148, 0x1.34f5aee91217p-143 },
 	   { 0x1.224399b226996p-148, 0x1.f2ca4dc8ff69fp-144 },
 	   { 0x1.d448f86c23d12p-149, 0x1.92943634830d2p-144 },
 	   { 0x1.79b2a15ae0faap-149, 0x1.44e2d8e947442p-144 },
 	   { 0x1.3098d833c2dap-149, 0x1.0627b1e47c261p-144 },
 	   { 0x1.eb3aa595948f3p-150, 0x1.a705784809825p-145 },
 	   { 0x1.8c0f08dff4e68p-150, 0x1.554226cd542efp-145 },
 	   { 0x1.3f49a8880f6adp-150, 0x1.1343e7a202e9p-145 },
 	   { 0x1.015dd1c62a082p-150, 0x1.bc0384ab3550dp-146 },
 	   { 0x1.9edb80143a705p-151, 0x1.660fe966c4e28p-146 },
 	   { 0x1.4e52056f2dec4p-151, 0x1.20b6b60dae611p-146 },
 	   { 0x1.0d62a769875ep-151, 0x1.d1893fc15ba16p-147 },
 	   { 0x1.b2128dd015485p-152, 0x1.7747e31ddd25cp-147 },
 	   { 0x1.5dad6d3a16694p-152, 0x1.2e7c997078049p-147 },
 	   { 0x1.19a81ef58dfc6p-152, 0x1.e790d89e8e564p-148 },
 	   { 0x1.c5ae1b79c4ee8p-153, 0x1.88e545d12ba57p-148 },
 	   { 0x1.6d56e11abc8a7p-153, 0x1.3c919aea9787p-148 },
 	   { 0x1.262a204b39df1p-153, 0x1.fe13c6f07b6aep-149 },
 	   { 0x1.d9a774b67b183p-154, 0x1.9ae2b16a9550ap-149 },
 	   { 0x1.7d48e51f6d6edp-154, 0x1.4af14f857334ep-149 },
 	   { 0x1.32e43016e50e4p-154, 0x1.0a8564eab8ff5p-149 },
 	   { 0x1.edf747f9f14f1p-155, 0x1.ad3a33350402p-150 },
 	   { 0x1.8d7d80e14b91p-155, 0x1.5996d7e13f467p-150 },
 	   { 0x1.3fd1708b687cbp-155, 0x1.1636f3d76858ap-150 },
 	   { 0x1.014ad3fec9ec4p-155, 0x1.bfe545fce7a55p-151 },
 	   { 0x1.9dee40ecc2982p-156, 0x1.687ce08618977p-151 },
 	   { 0x1.4ceca2b27454p-156, 0x1.221a377d62eb4p-151 },
 	   { 0x1.0bbd071377b87p-156, 0x1.d2dcd30499eb7p-152 },
 	   { 0x1.ae9438e9a5c0bp-157, 0x1.779da2df7a30cp-152 },
 	   { 0x1.5a30285652adp-157, 0x1.2e2a7c1fe1c5fp-152 },
 	   { 0x1.164daef1c2b15p-157, 0x1.e61933d473856p-153 },
 	   { 0x1.bf6806876a635p-158, 0x1.86f2e6e7e582ap-153 },
 	   { 0x1.67960688424efp-158, 0x1.3a62b4892ce6ep-153 },
 	   { 0x1.20f7f47f404a7p-158, 0x1.f99234ed0089ep-154 },
 	   { 0x1.d061d530972c5p-159, 0x1.9676058974913p-154 },
 	   { 0x1.7517e8c57f622p-159, 0x1.46bd7c1e28efp-154 },
 	   { 0x1.2bb6ba79809edp-159, 0x1.069f8cb02119fp-154 },
 	   { 0x1.e17962871247p-160, 0x1.a61febb6d574dp-155 },
 	   { 0x1.82af24bbe81ddp-160, 0x1.53351984f5d61p-155 },
 	   { 0x1.3684a09debb18p-160, 0x1.108b4faaa8971p-155 },
 	   { 0x1.f2a603a977e7cp-161, 0x1.b5e91e3ee196dp-156 },
 	   { 0x1.9054beadf5a51p-161, 0x1.5fc381e001854p-156 },
 	   { 0x1.415c074fc9065p-161, 0x1.1a8782bc000bep-156 },
 	   { 0x1.01ef55a0092e3p-161, 0x1.c5c9be5ba37d4p-157 },
 	   { 0x1.9e016e74801cbp-162, 0x1.6c625c9dd5c05p-157 },
 	   { 0x1.4c3713bae315dp-162, 0x1.248f08aa2a9f5p-157 },
 	   { 0x1.0a8cf82738469p-162, 0x1.d5b98efc2e8d5p-158 },
 	   { 0x1.abada51b7b47ep-163, 0x1.790b07dcc17ddp-158 },
 	   { 0x1.570fb47030aa8p-163, 0x1.2e9c8b4dec3dep-158 },
 	   { 0x1.13270ae279a57p-163, 0x1.e5affac730013p-159 },
 	   { 0x1.b951931589ad6p-164, 0x1.85b69d604d483p-159 },
 	   { 0x1.61dfa678e3296p-164, 0x1.38aa7fa8655e3p-159 },
 	   { 0x1.1bb88966006c4p-164, 0x1.f5a41ad29abd6p-160 },
 	   { 0x1.c6e52f00f28e6p-165, 0x1.925df815332e1p-160 },
 	   { 0x1.6ca07adb2cabep-165, 0x1.42b32a68b6433p-160 },
 	   { 0x1.243c4de072741p-165, 0x1.02c65f05a223cp-160 },
 	   { 0x1.d4603cf73627ep-166, 0x1.9ef9ba1f58105p-161 },
 	   { 0x1.774b9c8b0652p-166, 0x1.4cb0a4ddc2264p-161 },
 	   { 0x1.2cad15ed5f00dp-166, 0x1.0ab038a2ddd17p-161 },
 	   { 0x1.e1ba565f2f2dap-167, 0x1.ab82536c08c11p-162 },
 	   { 0x1.81da56c03901cp-167, 0x1.569ce24f30cadp-162 },
 	   { 0x1.350587b61e2e7p-167, 0x1.128ac3f80b9acp-162 },
 	   { 0x1.eeeaf2386ba73p-168, 0x1.b7f008c184953p-163 },
 	   { 0x1.8c45dba9ebaffp-168, 0x1.6071b5b7d5f0bp-163 },
 	   { 0x1.3d40375ab2fc9p-168, 0x1.1a5112ad78884p-163 },
 	   { 0x1.fbe96dd52dd2ap-169, 0x1.c43afb43abf3ap-164 },
 	   { 0x1.96874b77050b3p-169, 0x1.6a28d7dab475p-164 },
 	   { 0x1.4557ac9b8a4ffp-169, 0x1.21fe234726979p-164 },
 	   { 0x1.04568afbad70bp-169, 0x1.d05b30647f5b6p-165 },
 	   { 0x1.a097bba9c5bbap-170, 0x1.73bbedaae952fp-165 },
 	   { 0x1.4d4668bc3c638p-170, 0x1.298ce64edbc52p-165 },
 	   { 0x1.0a969821c25d4p-170, 0x1.dc489a35fd89p-166 },
 	   { 0x1.aa703eac27071p-171, 0x1.7d248efdebaf1p-166 },
 	   { 0x1.5506ec96ce1d8p-171, 0x1.30f843b6c62b7p-166 },
 	   { 0x1.10b0827e1c59fp-171, 0x1.e7fb2011e1175p-167 },
 	   { 0x1.b409eb99c2287p-172, 0x1.865c4d7ebd336p-167 },
 	   { 0x1.5c93bed6568e9p-172, 0x1.383b206d0bb99p-167 },
 	   { 0x1.169ff47b694c6p-172, 0x1.f36aa78ac249dp-168 },
 	   { 0x1.bd5de633517f7p-173, 0x1.8f5cbbd7e3bd9p-168 },
 	   { 0x1.63e7724f64774p-173, 0x1.3f5064180659dp-168 },
 	   { 0x1.1c60a3dd2224ep-173, 0x1.fe8f1d993bb19p-169 },
 	   { 0x1.c66566ef40333p-174, 0x1.981f750955121p-169 },
 	   { 0x1.6afcac6c09d1ap-174, 0x1.4632fef2669ecp-169 },
 	   { 0x1.21ee56dbc8c6ap-174, 0x1.04b03ffb7174ap-169 },
 	   { 0x1.cf19c31a391acp-175, 0x1.a09e23dee12dbp-170 },
 	   { 0x1.71ce2ba111a68p-175, 0x1.4cddefbe00daep-170 },
 	   { 0x1.2744e94597dfp-175, 0x1.09eb734c1a314p-170 },
 	   { 0x1.d77474fa3c96fp-176, 0x1.a8d28a7b21f9ep-171 },
 	   { 0x1.7856cde19858bp-176, 0x1.534c49c3a48ap-171 },
 	   { 0x1.2c60519b06073p-176, 0x1.0ef5469afe541p-171 },
 	   { 0x1.df6f23e67822ep-177, 0x1.b0b689ea896fp-172 },
 	   { 0x1.7e9197060941ap-177, 0x1.59793ad60d8abp-172 },
 	   { 0x1.313ca61e59763p-177, 0x1.13c9ee6b2a529p-172 },
 	   { 0x1.e703ac45eb1a5p-178, 0x1.b84429b1d33d8p-173 },
 	   { 0x1.8479b71b66ff2p-178, 0x1.5f60114dc317ap-173 },
 	   { 0x1.35d621cd7892fp-178, 0x1.1865baa279b03p-173 },
 	   { 0x1.ee2c2766d39aep-179, 0x1.bf759f4ae6481p-174 },
 	   { 0x1.8a0a908fbee34p-179, 0x1.64fc41f392bcdp-174 },
 	   { 0x1.3a29293d26666p-179, 0x1.1cc51b3533d1bp-174 },
 	   { 0x1.f4e2f320ed2f5p-180, 0x1.c645558315ad7p-175 },
 	   { 0x1.8f3fbe30bc1d8p-180, 0x1.6a496dcf4682p-175 },
 	   { 0x1.3e324f4cf0981p-180, 0x1.20e4a4b8e031ep-175 },
 	   { 0x1.fb22b934b993p-181, 0x1.ccadf3adb1afp-176 },
 	   { 0x1.941518f17ca26p-181, 0x1.6f4367d03dbd8p-176 },
 	   { 0x1.41ee59ab3f625p-181, 0x1.24c114d62226p-176 },
 	   { 0x1.00733b2d2d2a7p-181, 0x1.d2aa649df6e65p-177 },
 	   { 0x1.9886bd6d1085bp-182, 0x1.73e63a45afd4dp-177 },
 	   { 0x1.455a452136a6p-182, 0x1.285756918be22p-177 },
 	   { 0x1.0314c07978175p-182, 0x1.d835dd5ba6335p-178 },
 	   { 0x1.9c91111b6c15fp-183, 0x1.782e2c1c97a81p-178 },
 	   { 0x1.4873499e69a71p-183, 0x1.2ba486638ab1ep-178 },
 	   { 0x1.0573c7a800f18p-183, 0x1.dd4be385e972p-179 },
 	   { 0x1.a030c72f0cf33p-184, 0x1.7c17c5d99552cp-179 },
 	   { 0x1.4b36ddfcc8743p-184, 0x1.2ea5f617d321fp-179 },
 	   { 0x1.078e5ec28bafdp-184, 0x1.e1e853589fe15p-180 },
 	   { 0x1.a362e51221b9fp-185, 0x1.7f9fd64579e1ap-180 },
 	   { 0x1.4da2bb75a5c65p-185, 0x1.3159306d0abdp-180 },
 	   { 0x1.0962c95c3eb5p-185, 0x1.e6076548c0765p-181 },
 	   { 0x1.a624c67aa97dfp-186, 0x1.82c376c3acddfp-181 },
 	   { 0x1.4fb4e0c13d49p-186, 0x1.33bbfc6dd55a6p-181 },
 	   { 0x1.0aef82f484486p-186, 0x1.e9a5b32d2ef52p-182 },
 	   { 0x1.a874210dbadcfp-187, 0x1.85800f4a2d262p-182 },
 	   { 0x1.516b94dabb86dp-187, 0x1.35cc607ce4fd8p-182 },
 	   { 0x1.0c33410fd4c56p-187, 0x1.ecc03cea2935dp-183 },
 	   { 0x1.aa4f078af0321p-188, 0x1.87d359f39448ep-183 },
 	   { 0x1.52c5696370c9dp-188, 0x1.3788a50e33e44p-183 },
 	   { 0x1.0d2cf5025ba2dp-188, 0x1.ef546c9652b0ap-184 },
 	   { 0x1.abb3ec79d594dp-189, 0x1.89bb66243bfd5p-184 },
 	   { 0x1.53c13ca08d951p-189, 0x1.38ef570827673p-184 },
 	   { 0x1.0ddbcd68fc943p-189, 0x1.f1601a115b514p-185 },
 	   { 0x1.aca1a45423b35p-190, 0x1.8b369b3c6ec4fp-185 },
 	   { 0x1.545e3b0f8838ap-190, 0x1.39ff49c7fe5e8p-185 },
 	   { 0x1.0e3f374dd9d68p-190, 0x1.f2e18e05495b4p-186 },
 	   { 0x1.ad1767288e013p-191, 0x1.8c43bad265564p-186 },
 	   { 0x1.549be08e15927p-191, 0x1.3ab798c59d4c2p-186 },
 	   { 0x1.0e56def61fbc4p-191, 0x1.f3d7844c8a592p-187 },
 	   { 0x1.ad14d1b2f0b5fp-192, 0x1.8ce1e26fb8214p-187 },
 	   { 0x1.5479f9137160bp-192, 0x1.3b17a8d383f04p-187 },
 	   { 0x1.0e22b05782284p-192, 0x1.f4412db819edfp-188 },
 	   { 0x1.ac99e5e7b9269p-193, 0x1.8d108ccedcd75p-188 },
 	   { 0x1.53f8a0f98a8b8p-193, 0x1.3b1f28f8795cap-188 },
 	   { 0x1.0da2d734853ffp-193, 0x1.f41e3132440dap-189 },
 	   { 0x1.aba70af1767bp-194, 0x1.8ccf9296410aep-189 },
 	   { 0x1.531844d58365ep-194, 0x1.3ace12e143377p-189 },
 	   { 0x1.0cd7bedf59779p-194, 0x1.f36eac3bc78c2p-190 },
 	   { 0x1.aa3d0ca096eedp-195, 0x1.8c1f2a8f92477p-190 },
 	   { 0x1.51d9a0dfd2e93p-195, 0x1.3a24aae988ae7p-190 },
 	   { 0x1.0bc211a3c2859p-195, 0x1.f23332c263066p-191 },
 	   { 0x1.a85d1a4e6bedcp-196, 0x1.8affe95ac6f2ap-191 },
 	   { 0x1.503dbfed30324p-196, 0x1.39237fbbcfa18p-191 },
 	   { 0x1.0a62b7d92f095p-196, 0x1.f06cce511da3ep-192 },
 	   { 0x1.a608c535a2ba1p-197, 0x1.8972c09d7f45cp-192 },
 	   { 0x1.4e45f9fa4adffp-197, 0x1.37cb698950bdap-192 },
 	   { 0x1.08bad69ed20a4p-197, 0x1.ee1cfc9be3df9p-193 },
 	   { 0x1.a341fe436d2d7p-198, 0x1.8778fdb058321p-193 },
 	   { 0x1.4bf3f24d273a5p-198, 0x1.361d88db2b95bp-193 },
 	   { 0x1.06cbce44363ecp-198, 0x1.eb45ad695330ap-194 },
 	   { 0x1.a00b13659be7cp-199, 0x1.851447ccc879bp-194 },
 	   { 0x1.4949952fc2371p-199, 0x1.341b44ff4c3c6p-194 },
 	   { 0x1.0497386163a39p-199, 0x1.e7e93fdecaep-195 },
 	   { 0x1.9c66ac5ae65b3p-200, 0x1.82469dbf1833ep-195 },
 	   { 0x1.464915486577bp-200, 0x1.31c64a141680ep-195 },
 	   { 0x1.021ee5a248c7fp-200, 0x1.e40a7f340982ap-196 },
 	   { 0x1.9857c70b8b2bcp-201, 0x1.7f125320f1e94p-196 },
 	   { 0x1.42f4e894cc71ap-201, 0x1.2f2086b6a5cf4p-196 },
 	   { 0x1.fec9b69351b7p-202, 0x1.dfac9ed4c27cep-197 },
 	   { 0x1.93e1b371520a1p-202, 0x1.7b7a0d21f0262p-197 },
 	   { 0x1.3f4fc50de840ap-202, 0x1.2c2c295822108p-197 },
 	   { 0x1.f8d6a0e0a9508p-203, 0x1.dad335f7aacdbp-198 },
 	   { 0x1.8f080f16c57cp-203, 0x1.7780bee4609a1p-198 },
 	   { 0x1.3b5c9cfaada16p-203, 0x1.28eb9d3f5000ap-198 },
 	   { 0x1.f269560bdbf92p-204, 0x1.d5823ab37d92ep-199 },
 	   { 0x1.89cec0363502dp-204, 0x1.7329a5753ca24p-199 },
 	   { 0x1.371e9af8e6ccfp-204, 0x1.2561873c1cc7ap-199 },
 	   { 0x1.eb86f931c309dp-205, 0x1.cfbdfc9b64d6ep-200 },
 	   { 0x1.8439f081b525ap-205, 0x1.6e7843670c8d2p-200 },
 	   { 0x1.32991dc38028ep-205, 0x1.2190c2136fc76p-200 },
 	   { 0x1.e434fdd743954p-206, 0x1.c98b1eed08258p-201 },
 	   { 0x1.7e4e079de1a2ep-206, 0x1.69705c180d6c1p-201 },
 	   { 0x1.2dcfb3be31ebdp-206, 0x1.1d7c5aaa0949p-201 },
 	   { 0x1.dc7920bafc5dcp-207, 0x1.c2ee925b3e3f6p-202 },
 	   { 0x1.780fa5599d558p-207, 0x1.6415eeac7f744p-202 },
 	   { 0x1.28c6164ec1235p-207, 0x1.19278bf59ff34p-202 },
 	   { 0x1.d459605b63623p-208, 0x1.bbed8e8100752p-203 },
 	   { 0x1.71839bad6a45bp-208, 0x1.5e6d30c67b96bp-203 },
 	   { 0x1.2380250c57526p-208, 0x1.1495babbc8d8ep-203 },
 	   { 0x1.cbdbf53eed588p-209, 0x1.b48d8b08c37b5p-204 },
 	   { 0x1.6aaee88d3a5e6p-209, 0x1.587a8905112ebp-204 },
 	   { 0x1.1e01e0cda0c0ep-209, 0x1.0fca71267dd26p-204 },
 	   { 0x1.c3074a0c1c67dp-210, 0x1.acd43894c1f06p-205 },
 	   { 0x1.6396af97c5f7fp-210, 0x1.52428954b7c2fp-205 },
 	   { 0x1.184f669e7e645p-210, 0x1.0ac95a364b406p-205 },
 	   { 0x1.b9e1f37f768c9p-211, 0x1.a4c779750fb77p-206 },
 	   { 0x1.5c4033ae88d94p-211, 0x1.4bc9e91b546a8p-206 },
 	   { 0x1.126ceaa621095p-211, 0x1.05963d1a5105bp-206 },
 	   { 0x1.b072a84d6770bp-212, 0x1.9c6d5a387a6d7p-207 },
 	   { 0x1.54b0d08180ac6p-212, 0x1.45157f4a2e598p-207 },
 	   { 0x1.0c5eb30658611p-212, 0x1.0034f87652744p-207 },
 	   { 0x1.a6c038fdf5aedp-213, 0x1.93cc0a254a9f5p-208 },
 	   { 0x1.4cedf419a9b38p-213, 0x1.3e2a3c60327aap-208 },
 	   { 0x1.062912bcc23f9p-213, 0x1.f552fb3e1c70bp-209 },
 	   { 0x1.9cd187cff951cp-214, 0x1.8ae9d3a6eb66fp-209 },
 	   { 0x1.44fd186d008c2p-214, 0x1.370d2466d3327p-209 },
 	   { 0x1.ffa0c91caab55p-215, 0x1.e9ef97aa04b46p-210 },
 	   { 0x1.92ad80b12a09bp-215, 0x1.81cd14bd535bbp-210 },
 	   { 0x1.3ce3bd0683046p-215, 0x1.2fc348f3a8121p-210 },
 	   { 0x1.f2b20c0b002abp-216, 0x1.de47d70b3398cp-211 },
 	   { 0x1.885b1157e885cp-216, 0x1.787c377ac34cdp-211 },
 	   { 0x1.34a760cc47acap-216, 0x1.2851c338b22e4p-211 },
 	   { 0x1.e58ea51580badp-217, 0x1.d263d33512bb6p-212 },
 	   { 0x1.7de1218b19542p-217, 0x1.6efdaa9c0e45ep-212 },
 	   { 0x1.2c4d7bed4d522p-217, 0x1.20bdae2cd61c6p-212 },
 	   { 0x1.d83f3d3e6d15p-218, 0x1.c64ba5bdb46dep-213 },
 	   { 0x1.73468ba3c29b8p-218, 0x1.6557da47246f7p-213 },
 	   { 0x1.23db7a001a935p-218, 0x1.190c20d5b5808p-213 },
 	   { 0x1.cacc668087b83p-219, 0x1.ba075f0192b6p-214 },
 	   { 0x1.689215536317fp-219, 0x1.5b9128fb09361p-214 },
 	   { 0x1.1b56b45aac06fp-219, 0x1.114228bb99133p-214 },
 	   { 0x1.bd3e92f58e3aep-220, 0x1.ad9efd6e7e35p-215 },
 	   { 0x1.5dca68b92a62fp-220, 0x1.51afe8bbb6b6cp-215 },
 	   { 0x1.12c46cab86e91p-220, 0x1.0964c48f92b05p-215 },
 	   { 0x1.af9e0c680145ap-221, 0x1.a11a652260dp-216 },
 	   { 0x1.52f60dcf5b39p-221, 0x1.47ba5483b6e8fp-216 },
 	   { 0x1.0a29c7db10f7p-221, 0x1.0178df0b67157p-216 },
 	   { 0x1.a1f2ec5b27de2p-222, 0x1.948157e97fbd7p-217 },
 	   { 0x1.481b643932becp-222, 0x1.3db68a0470a4fp-217 },
 	   { 0x1.018bc93b8e2e5p-222, 0x1.f306942454ae6p-218 },
 	   { 0x1.9445149305037p-223, 0x1.87db6da6dd3cap-218 },
 	   { 0x1.3d409d78b6819p-223, 0x1.33aa83bd4deabp-218 },
 	   { 0x1.f1de9c1ab95aap-224, 0x1.e311742f9561bp-219 },
 	   { 0x1.869c2824b4b6bp-224, 0x1.7b300d303ed2cp-219 },
 	   { 0x1.326bb792c8c5bp-224, 0x1.299c1370fc2d1p-219 },
 	   { 0x1.e0b212b870715p-225, 0x1.d31b83aa1a53bp-220 },
 	   { 0x1.78ff85165ac91p-225, 0x1.6e8665a634affp-220 },
 	   { 0x1.27a27826da7a5p-225, 0x1.1f90dcff1976ep-220 },
 	   { 0x1.cf9b0072f8176p-226, 0x1.c32d9c998168ap-221 },
 	   { 0x1.6b763e947db08p-226, 0x1.61e5684f4d137p-221 },
 	   { 0x1.1cea67fe8699cp-226, 0x1.158e51a7ac97ep-221 },
 	   { 0x1.bea20cad09b1fp-227, 0x1.b350464c51c99p-222 },
 	   { 0x1.5e0717c155a1cp-227, 0x1.5553c2fc66728p-222 },
 	   { 0x1.1248cf18568a2p-227, 0x1.0b99abbccdbb1p-222 },
 	   { 0x1.adcf760300963p-228, 0x1.a38baebfb68e4p-223 },
 	   { 0x1.50b87f214792dp-228, 0x1.48d7dafad7ffep-223 },
 	   { 0x1.07c2b12fe4dbap-228, 0x1.01b7eac5ea688p-223 },
 	   { 0x1.9d2b0d0c4a0b1p-229, 0x1.93e7a4bb0743p-224 },
 	   { 0x1.43908aa677d25p-229, 0x1.3c77c897ed254p-224 },
 	   { 0x1.fab995891c153p-230, 0x1.efdba02e2ceffp-225 },
 	   { 0x1.8cbc2fe600108p-230, 0x1.846b92a47c343p-225 },
 	   { 0x1.3694f45c1b92fp-230, 0x1.30395337f89bbp-225 },
 	   { 0x1.e6371d3dc0233p-231, 0x1.dc7fb7bbca8adp-226 },
 	   { 0x1.7c89c6867890ep-231, 0x1.751e7a10e8264p-226 },
 	   { 0x1.29cb17b0f706bp-231, 0x1.2421ee0211f87p-226 },
 	   { 0x1.d20647a807a0cp-232, 0x1.c9649548abac7p-227 },
 	   { 0x1.6c9a3fd812077p-232, 0x1.6606f00ed6d5dp-227 },
 	   { 0x1.1d37ef5f490cdp-232, 0x1.1836b52067807p-227 },
 	   { 0x1.be2ec88ae1479p-233, 0x1.b6922692e74d4p-228 },
 	   { 0x1.5cf38f9818abfp-233, 0x1.572b1a2c0293ap-228 },
 	   { 0x1.10e013ef486f7p-233, 0x1.0c7c6b93f06a1p-228 },
 	   { 0x1.aab7b734b99f6p-234, 0x1.a40fcadcdd133p-229 },
 	   { 0x1.4d9b2cf546b09p-234, 0x1.4890ac32b69b5p-229 },
 	   { 0x1.04c7bad04b57cp-234, 0x1.00f779993bbc1p-229 },
 	   { 0x1.97a78d5f1c6dbp-235, 0x1.91e450ac30542p-230 },
 	   { 0x1.3e9611e8218p-235, 0x1.3a3ce69b6a143p-230 },
 	   { 0x1.f1e56c0773bb7p-236, 0x1.eb57d7362f984p-231 },
 	   { 0x1.850426f2df55dp-236, 0x1.8015f467ddd4p-231 },
 	   { 0x1.2fe8bb3e4f4d8p-236, 0x1.2c3495adab7d8p-231 },
 	   { 0x1.dac8e8a813f1fp-237, 0x1.d53ae35dbfa26p-232 },
 	   { 0x1.72d2c2a7422abp-237, 0x1.6eaa5fce4af3ap-232 },
 	   { 0x1.21972950f570dp-237, 0x1.1e7c114a57a33p-232 },
 	   { 0x1.c44004226dc17p-238, 0x1.bf9ebf2ac34cfp-233 },
 	   { 0x1.6118037139874p-238, 0x1.5da6aa3adb7a3p-233 },
 	   { 0x1.13a4e15d42467p-238, 0x1.11173d5813f4dp-233 },
 	   { 0x1.ae501496e23f2p-239, 0x1.aa895a750e0f6p-234 },
 	   { 0x1.4fd7f2b705e64p-239, 0x1.4d0f59b16ac32p-234 },
 	   { 0x1.0614ef7575b09p-239, 0x1.04098aca1b898p-234 },
 	   { 0x1.98fdb1084fd1cp-240, 0x1.95ffef5a788b3p-235 },
 	   { 0x1.3f16033b4da17p-240, 0x1.3ce864a4f75bbp-235 },
 	   { 0x1.f1d3d20014dd3p-241, 0x1.eeabf27142ccbp-236 },
 	   { 0x1.844cb59a101a9p-241, 0x1.82070510e6e91p-236 },
 	   { 0x1.2ed514b22b68bp-241, 0x1.2d35346de60f3p-236 },
 	   { 0x1.d84bdf7421499p-242, 0x1.d5fe3202b4d44p-237 },
 	   { 0x1.7040489842ad7p-242, 0x1.6ea2738b3dbebp-237 },
 	   { 0x1.1f1777f205012p-242, 0x1.1df8a8637ba9cp-237 },
 	   { 0x1.bf956a62adf73p-243, 0x1.be0e1bcc5bf2bp-238 },
 	   { 0x1.5cdae0381ff94p-243, 0x1.5bd567e120a1cp-238 },
 	   { 0x1.0fdef3b187063p-243, 0x1.0f35198b8b7f7p-238 },
 	   { 0x1.a7b2fd5556b6ap-244, 0x1.a6df243f2c6f4p-239 },
 	   { 0x1.4a1e48fd99b8ep-244, 0x1.49a26968a8fd1p-239 },
 	   { 0x1.012cc9c3d142ap-244, 0x1.00ec5ed2dbe3ep-239 },
 	   { 0x1.90a652d08b6ecp-245, 0x1.9073f3afbdfebp-240 },
 	   { 0x1.380bacb3471d9p-245, 0x1.380b5f70c487dp-240 },
 	   { 0x1.e603798765b0ap-246, 0x1.e63fa380d130bp-241 },
 	   { 0x1.7a705e88ab4c8p-246, 0x1.7ace6e086aab7p-241 },
 	   { 0x1.26a399e180e7cp-246, 0x1.2711978a97cf7p-241 },
 	   { 0x1.cabc2c3d98d7cp-247, 0x1.cba0a72ae9c08p-242 },
 	   { 0x1.651157275ac6fp-247, 0x1.65efbb20adf2dp-242 },
 	   { 0x1.15e60bb1a2bacp-247, 0x1.16b5cc5019368p-242 },
 	   { 0x1.b08358e30e1b1p-248, 0x1.b1fca598944c3p-243 },
 	   { 0x1.5088c08941b89p-248, 0x1.51d84fa353951p-243 },
 	   { 0x1.05d2722aa0abep-248, 0x1.06f82c9619b9p-243 },
 	   { 0x1.9757d44a0d5d1p-249, 0x1.9953a1cf16aadp-244 },
 	   { 0x1.3cd5765cc7b51p-249, 0x1.3e87f66d27bbp-244 },
 	   { 0x1.eccf7568ff3afp-250, 0x1.efb0c5f0312cdp-245 },
 	   { 0x1.7f37a88128933p-250, 0x1.81a4d1085cfd1p-245 },
 	   { 0x1.29f5b70afae6ep-250, 0x1.2bfdda4e2b20cp-245 },
 	   { 0x1.cf48b1a182cb9p-251, 0x1.d2ab3b59164a6p-246 },
 	   { 0x1.682022c0d8296p-251, 0x1.6aeea740e7e26p-246 },
 	   { 0x1.17e72ed48d1c2p-251, 0x1.1a389017ca93cp-246 },
 	   { 0x1.b30c9decefa86p-252, 0x1.b6dd2d215fccfp-247 },
 	   { 0x1.520de188c8ff4p-252, 0x1.552ee415230cdp-247 },
 	   { 0x1.06a7030db71fbp-252, 0x1.093620e33d9f9p-247 },
 	   { 0x1.98166f02e00aap-253, 0x1.9c4336b720df7p-248 },
 	   { 0x1.3cfce2d301755p-253, 0x1.40629fd47fda6p-248 },
 	   { 0x1.ec63bac9af50ap-254, 0x1.f1e828f7f1e6ep-249 },
 	   { 0x1.7e609b497d4bfp-254, 0x1.82d92bd0fbc5bp-249 },
 	   { 0x1.28e89244647b5p-254, 0x1.2c8658b1c7fabp-249 },
 	   { 0x1.cd07ee41894f6p-255, 0x1.d2def7b6139fbp-250 },
 	   { 0x1.65e4eca3c47cep-255, 0x1.6a9a29142865ap-250 },
 	   { 0x1.15cbd7439af48p-255, 0x1.1995fff959855p-250 },
 	   { 0x1.af324889fe32ep-256, 0x1.b549f742691f7p-251 },
 	   { 0x1.4e9c920d5db05p-256, 0x1.5380a4af4c2e9p-251 },
 	   { 0x1.03a122e1077b7p-256, 0x1.078d07375b0bp-251 },
 	   { 0x1.92d9bd168c63p-257, 0x1.9921acfd99f39p-252 },
 	   { 0x1.388030ea8589cp-257, 0x1.3d867ecfb60a5p-252 },
 	   { 0x1.e4c4faf832008p-258, 0x1.ecccda72dba49p-253 },
 	   { 0x1.77f4a046c515ep-258, 0x1.7e5deef2de87bp-253 },
 	   { 0x1.2387f5f4b712ep-258, 0x1.28a511d87ce7dp-253 },
 	   { 0x1.c413282821079p-259, 0x1.cc3995b1e2c4p-254 },
 	   { 0x1.5e78bc56d0fbbp-259, 0x1.64f5f80200f46p-254 },
 	   { 0x1.0faba5af01355p-259, 0x1.14d5424501d7ep-254 },
 	   { 0x1.a51f8a6830159p-260, 0x1.ad54bef9112dp-255 },
 	   { 0x1.465b65a83bdbbp-260, 0x1.4ce07b8d50856p-255 },
 	   { 0x1.f9c5589e7201fp-261, 0x1.020f8e226943ep-255 },
 	   { 0x1.87dc5ad8af9ecp-261, 0x1.90123a8271991p-256 },
 	   { 0x1.2f918e4d3f95cp-261, 0x1.3613b89391a8fp-256 },
 	   { 0x1.d6485a170413ap-262, 0x1.e098381b76cd3p-257 },
 	   { 0x1.6c3b66970be3dp-262, 0x1.7465697a54c64p-257 },
 	   { 0x1.1a0fd8c3a4e6fp-262, 0x1.20858c20a1795p-257 },
 	   { 0x1.b4ce217bd5e55p-263, 0x1.bf05934cfa1ccp-258 },
 	   { 0x1.522e259c7017ap-263, 0x1.5a41409f84e49p-258 },
 	   { 0x1.05caa9cf257c4p-263, 0x1.0c2b83023243dp-258 },
 	   { 0x1.954427a430b11p-264, 0x1.9f5672cf62a4fp-259 },
 	   { 0x1.39a5d07601e71p-264, 0x1.41985de8f7a14p-259 },
 	   { 0x1.e56c72cc01fccp-265, 0x1.f1f5d5615d783p-260 },
 	   { 0x1.7797a6e64ddc9p-265, 0x1.8179bfb69c631p-260 },
 	   { 0x1.229374c83806p-265, 0x1.2a5d1d1f1ae5cp-260 },
 	   { 0x1.c18d454a503aep-266, 0x1.cdd1c2bddbb9ep-261 },
 	   { 0x1.5bb5b3e414ad3p-266, 0x1.655e203c78adp-261 },
 	   { 0x1.0ce808921de57p-266, 0x1.1481ab5a1469ap-261 },
 	   { 0x1.9fdfe587f056ap-267, 0x1.abd4ca4bd8884p-262 },
 	   { 0x1.418b54bd6a895p-267, 0x1.4af20f59f283dp-262 },
 	   { 0x1.f128f851039d9p-268, 0x1.fff032b2dbde7p-263 },
 	   { 0x1.804c6e03f60cbp-268, 0x1.8be8c488684b4p-263 },
 	   { 0x1.290596a08a94fp-268, 0x1.3223f2e5be0fp-263 },
 	   { 0x1.cb1395c8187f6p-269, 0x1.d964d959533d1p-264 },
 	   { 0x1.62bb1316ec5fcp-269, 0x1.6df780d5ecc43p-264 },
 	   { 0x1.1211a1b47d3aep-269, 0x1.1ae2302fd4bcdp-264 },
 	   { 0x1.a772150026811p-270, 0x1.b5455f4e2ce45p-265 },
 	   { 0x1.47143aa78b5fep-270, 0x1.51eade2a24279p-265 },
 	   { 0x1.f93996ba5e93dp-271, 0x1.051b3f15282e5p-265 },
 	   { 0x1.8626f2553e204p-271, 0x1.93760037df87ap-266 },
 	   { 0x1.2d4091cd12adcp-271, 0x1.37ace1ccc1a8dp-266 },
 	   { 0x1.d1294db79df79p-272, 0x1.e17b7713cf17fp-267 },
 	   { 0x1.6715149108678p-272, 0x1.73db39c4b278bp-267 },
 	   { 0x1.1529206516167p-272, 0x1.1f27cc2724f9p-267 },
 	   { 0x1.abce28a1f17f2p-273, 0x1.bb70eb3792a1cp-268 },
 	   { 0x1.4a1fe3e55f964p-273, 0x1.5659e4463ddd1p-268 },
 	   { 0x1.fd6eb54be7326p-274, 0x1.08462ba9624dbp-268 },
 	   { 0x1.89049c51b8388p-274, 0x1.97f4ffe1284a1p-269 },
 	   { 0x1.2f2b5e6789756p-274, 0x1.3ad748e88c53fp-269 },
 	   { 0x1.d3aa617478594p-275, 0x1.e5e5db98318a5p-270 },
 	   { 0x1.68a9e9f7b2f9ap-275, 0x1.76e6798f53e9ap-270 },
 	   { 0x1.161c2a1de488ep-275, 0x1.21393590da64bp-270 },
 	   { 0x1.acda38e82463bp-276, 0x1.be32dc731f12cp-271 },
 	   { 0x1.4a9c33e05809ap-276, 0x1.5824d30f3fce1p-271 },
 	   { 0x1.fdaf4969fc45p-277, 0x1.09660e736b8bdp-271 },
 	   { 0x1.88d45a53c41c5p-277, 0x1.994b0856743cbp-272 },
 	   { 0x1.2eba8f55fe897p-277, 0x1.3b9051c5e7679p-272 },
 	   { 0x1.d287e1e77c85ap-278, 0x1.e689bae600601p-273 },
 	   { 0x1.6770239fc87e6p-278, 0x1.77071c1633b26p-273 },
 	   { 0x1.14e513c1b20dcp-278, 0x1.210a174166fcdp-273 },
 	   { 0x1.aa90041143186p-279, 0x1.bd7abebe480e6p-274 },
 	   { 0x1.488642c71cfa6p-279, 0x1.5740f6d4ed277p-274 },
 	   { 0x1.f9f9ce5a157bbp-280, 0x1.0874302ee34fdp-274 },
 	   { 0x1.85974997b931fp-280, 0x1.97701e51a6bfep-275 },
 	   { 0x1.2bf0c37efc00bp-280, 0x1.39d3aac239fe2p-275 },
 	   { 0x1.cdc89092e43c3p-281, 0x1.e36341a88ea0cp-276 },
 	   { 0x1.636f0e2785c54p-281, 0x1.743c5e4db43f9p-276 },
 	   { 0x1.118b19def65f8p-281, 0x1.1e9b8ad36fd99p-276 },
 	   { 0x1.a4fd2c459c71p-282, 0x1.b94cde5e4fc3p-277 },
 	   { 0x1.43ea7a73d5cfp-282, 0x1.53b3a109a94aep-277 },
 	   { 0x1.f26454740b953p-283, 0x1.057635a1ed1dfp-277 },
 	   { 0x1.7f60ab495565cp-283, 0x1.926f55b776f91p-278 },
 	   { 0x1.26de8be09d876p-283, 0x1.35abb1f1cadefp-278 },
 	   { 0x1.c5889cb51dbb9p-284, 0x1.dc853b381e5ap-279 },
 	   { 0x1.5cbe6a335189cp-284, 0x1.6e96e5d005f5dp-279 },
 	   { 0x1.0c22190c33c65p-284, 0x1.19fc0dba0e848p-279 },
 	   { 0x1.9c42b0a7816acp-285, 0x1.b1c21d6e11086p-280 },
 	   { 0x1.3ce41b9a97542p-285, 0x1.4d91f3701143cp-280 },
 	   { 0x1.e71ba6efe048bp-286, 0x1.007de792cfd6ep-280 },
 	   { 0x1.76552635a3b27p-286, 0x1.8a6663a0ececbp-281 },
 	   { 0x1.1fa1c7f04e719p-286, 0x1.2f310e41037d6p-281 },
 	   { 0x1.b9f88d1e59fb3p-287, 0x1.d2185735c5ad9p-282 },
 	   { 0x1.538582347c59ep-287, 0x1.66381bdd98a02p-282 },
 	   { 0x1.04c9ca3c242adp-287, 0x1.1346f1ba5a69ap-282 },
 	   { 0x1.9093a8968bba5p-288, 0x1.a706fd9470fb8p-283 },
 	   { 0x1.339c31e0d51b7p-288, 0x1.45000f1eec014p-283 },
 	   { 0x1.d8619415342d3p-289, 0x1.f3510620184eap-284 },
 	   { 0x1.6aa95f63dd017p-289, 0x1.7f84791f6fdbbp-284 },
 	   { 0x1.16648113f6ec6p-289, 0x1.2689bc620188bp-284 },
 	   { 0x1.ab5b65b277be7p-290, 0x1.c45998d7521aep-285 },
 	   { 0x1.47f9aad3382fep-290, 0x1.5b50e4b7d6356p-285 },
 	   { 0x1.f7591b1b1c875p-291, 0x1.0aa3508d5dbp-285 },
 	   { 0x1.82335294ba26p-291, 0x1.9959eb6f64db6p-286 },
 	   { 0x1.2848053b7dfb1p-291, 0x1.3a2fb2a16d1ccp-286 },
 	   { 0x1.c68a6f5a8ef62p-292, 0x1.e23b370697cbbp-287 },
 	   { 0x1.5c9ffcce7e5fdp-292, 0x1.720876851d9fbp-287 },
 	   { 0x1.0b5b54d487d35p-292, 0x1.1be79c992aff6p-287 },
 	   { 0x1.9a0421e5c5d71p-293, 0x1.b3980569c43a5p-288 },
 	   { 0x1.3a5c4268d4e27p-293, 0x1.4e1fc4f822568p-288 },
 	   { 0x1.e1fba80d34a41p-294, 0x1.0042910b94342p-288 },
 	   { 0x1.7172912ec21f8p-294, 0x1.8908e30f7a1b3p-289 },
 	   { 0x1.1b271db151968p-294, 0x1.2d5e5a1b8288ep-289 },
 	   { 0x1.b1f9ef2d6b135p-295, 0x1.ce1b3b9ea6267p-290 },
 	   { 0x1.4c872d1af92bcp-295, 0x1.623e8fb994f23p-290 },
 	   { 0x1.fd87064e02a6fp-296, 0x1.0f8695160ca38p-290 },
 	   { 0x1.8652a61cdcd3bp-296, 0x1.a031b186be289p-291 },
 	   { 0x1.2af84a660968dp-296, 0x1.3eee8e04dc3ap-291 },
 	   { 0x1.c9f07af149226p-297, 0x1.e8bd23cc416fp-292 },
 	   { 0x1.5eacf76fffc0cp-297, 0x1.766e8d5583265p-292 },
 	   { 0x1.0c80f3efbbf3fp-297, 0x1.1ed2fab014c43p-292 },
 	   { 0x1.9b1f8ffd8f3c8p-298, 0x1.b76010ebb6c6ap-293 },
 	   { 0x1.3ab5d5023fe4ap-298, 0x1.507d813502ab7p-293 },
 	   { 0x1.e1c174ea2aaa6p-299, 0x1.01aa61c90eaccp-293 },
 	   { 0x1.70b05029068dap-299, 0x1.8a90544ab274dp-294 },
 	   { 0x1.1a1fba21de5fp-299, 0x1.2e0fb0911dd84p-294 },
 	   { 0x1.afb70654af059p-300, 0x1.ce6f24739f7c7p-295 },
 	   { 0x1.4a458b53b2a84p-300, 0x1.61eefc532711fp-295 },
 	   { 0x1.f944d95c81983p-301, 0x1.0edb77098a96p-295 },
 	   { 0x1.8272ab43f7156p-301, 0x1.9e82e04d9025fp-296 },
 	   { 0x1.278886c5a4d73p-301, 0x1.3d237a2e0f859p-296 },
 	   { 0x1.c3f57b512a1f2p-302, 0x1.e5385c7d0efep-297 },
 	   { 0x1.598c52c5d1746p-302, 0x1.73258d0b919ebp-297 },
 	   { 0x1.0828ad1da0983p-302, 0x1.1bdb57d01ceccp-297 },
 	   { 0x1.93d4935512f54p-303, 0x1.b223e5e67d24ap-298 },
 	   { 0x1.34a3670d3cd59p-303, 0x1.4bf43098a2ef1p-298 },
 	   { 0x1.d7b67cefff216p-304, 0x1.fb93db1e39a21p-299 },
 	   { 0x1.686e7356020d2p-304, 0x1.8402d3eada60ap-299 },
 	   { 0x1.135e695d6d4f8p-304, 0x1.2892e3159736p-299 },
 	   { 0x1.a4b6028e1ae52p-305, 0x1.c5502f868f04bp-300 },
 	   { 0x1.415808da66669p-305, 0x1.5a670a5d83e0ep-300 },
 	   { 0x1.ead51e60a821dp-306, 0x1.08ac71830fd4ep-300 },
 	   { 0x1.76cfe88ffbfa7p-306, 0x1.9467d9d3bce7dp-301 },
 	   { 0x1.1e2e61d740a91p-306, 0x1.34ea92731d6fp-301 },
 	   { 0x1.b4f6c22875415p-307, 0x1.d7e402cf49a21p-302 },
 	   { 0x1.4d8e03e448998p-307, 0x1.6860e96265ba8p-302 },
 	   { 0x1.fd2c6816f010bp-308, 0x1.132f279000564p-302 },
 	   { 0x1.8494b75728df1p-308, 0x1.a4356bd52863ep-303 },
 	   { 0x1.28836b62851b4p-308, 0x1.40cac092d16a6p-303 },
 	   { 0x1.c476ceb4ce0a6p-309, 0x1.e9bb8c8c45eaap-304 },
 	   { 0x1.592d26553a529p-309, 0x1.75c6ad9777c96p-304 },
 	   { 0x1.074be65f60432p-309, 0x1.1d3d889242361p-304 },
 	   { 0x1.91a14719373e5p-310, 0x1.b34c7bf3e0108p-305 },
 	   { 0x1.3248b33f78dd9p-310, 0x1.4c1bf325b5886p-305 },
 	   { 0x1.d316bfa6ecf07p-311, 0x1.fab351a6d7271p-306 },
 	   { 0x1.641dc398561efp-311, 0x1.827d8b273a859p-306 },
 	   { 0x1.0f79d08c027e2p-311, 0x1.26c35a8453a6ep-306 },
 	   { 0x1.9ddabce45ff88p-312, 0x1.c18e854f7a653p-307 },
 	   { 0x1.3b6a0443345f1p-312, 0x1.56c727238c10ep-307 },
 	   { 0x1.e0b830517633fp-313, 0x1.05545196af9e3p-307 },
 	   { 0x1.6e4903f595976p-313, 0x1.8e6b62ae03487p-308 },
 	   { 0x1.170eca4e7a4cap-313, 0x1.2facf384d3a3bp-308 },
 	   { 0x1.a92756c27d93ap-314, 0x1.ceddf1e753b81p-309 },
 	   { 0x1.43d40bf74392dp-314, 0x1.60b61e0028436p-309 },
 	   { 0x1.ed3e286c4c0dep-315, 0x1.0cbd09b1e5e1p-309 },
 	   { 0x1.77993389df313p-315, 0x1.997719e8b73a8p-310 },
 	   { 0x1.1dfa945eaae99p-315, 0x1.37e77cf85ca37p-310 },
 	   { 0x1.b36ec5aa0588p-316, 0x1.db1e802a6c81fp-311 },
 	   { 0x1.4b749e64b35f5p-316, 0x1.69d3aa6fccfd9p-311 },
 	   { 0x1.f88d823260c9ep-317, 0x1.1383f4dd09079p-311 },
 	   { 0x1.7ffa0f1fabb65p-317, 0x1.a388f33976b7bp-312 },
 	   { 0x1.242e12375b352p-317, 0x1.3f613589599c6p-312 },
 	   { 0x1.bc9a844ffd2b5p-318, 0x1.e635a66e3ebe7p-313 },
 	   { 0x1.523af73f84783p-318, 0x1.720bfb4a981d7p-313 },
 	   { 0x1.0146a610e0588p-318, 0x1.199a49bcc51p-313 },
 	   { 0x1.87590d6d36008p-319, 0x1.ac8ae259e160cp-314 },
 	   { 0x1.299b80ea6bb7fp-319, 0x1.4609b0c4183cap-314 },
 	   { 0x1.c496292aa266bp-320, 0x1.f00af26520f9dp-315 },
 	   { 0x1.5817f72c95e4cp-320, 0x1.794ce31e24c7bp-315 },
 	   { 0x1.059392396d038p-320, 0x1.1ef2877dbfcadp-315 },
 	   { 0x1.8da5a346cbb3fp-321, 0x1.b468dc95cb829p-316 },
 	   { 0x1.2e36a9eb80d32p-321, 0x1.4bd213115ac94p-316 },
 	   { 0x1.cb4fb203e18ap-322, 0x1.f88862b544527p-317 },
 	   { 0x1.5cfe5be9615c7p-322, 0x1.7f861b04cbe3ap-317 },
 	   { 0x1.0923c6394f695p-322, 0x1.2380a7a548a2fp-317 },
 	   { 0x1.92d18166ccd51p-323, 0x1.bb1122f6e5762p-318 },
 	   { 0x1.31f510cb3f507p-323, 0x1.50ad48dd9b3a6p-318 },
 	   { 0x1.d0b7c794af438p-324, 0x1.ff9ab8e5d6631p-319 },
 	   { 0x1.60e2f23228dedp-324, 0x1.84a97f6b3e853p-319 },
 	   { 0x1.0bef1906dac58p-324, 0x1.273a4b16ba84fp-319 },
 	   { 0x1.96d0ca88e4fcp-325, 0x1.c07484e1da469p-320 },
 	   { 0x1.34ce1af3c1b6p-325, 0x1.549037ceef1fep-320 },
 	   { 0x1.d4c1f7c67dd18p-326, 0x1.0298e0fc06037p-320 },
 	   { 0x1.63bcc0600e3b1p-326, 0x1.88ab45875f419p-321 },
 	   { 0x1.0def17046c37ep-326, 0x1.2a16e161fa35fp-321 },
 	   { 0x1.999a40ba75f42p-327, 0x1.c48699c75f345p-322 },
 	   { 0x1.36bb3093bcf7fp-327, 0x1.5771e906a9978p-322 },
 	   { 0x1.d764e5657aa2p-328, 0x1.04a04a1699caap-322 },
 	   { 0x1.658528dc53bd5p-328, 0x1.8b822865b44e6p-323 },
 	   { 0x1.0f1f1acd583cp-328, 0x1.2c0fc98ac934cp-323 },
 	   { 0x1.9b2768ee2e28p-329, 0x1.c73df0b6d4334p-324 },
 	   { 0x1.37b7d60833afbp-329, 0x1.594bab8ddacb1p-324 },
 	   { 0x1.d89a6c43f4c1p-330, 0x1.05dee05833b3cp-324 },
 	   { 0x1.663803afd90e2p-330, 0x1.8d278c9cbfc58p-325 },
 	   { 0x1.0f7c5f2e4265p-330, 0x1.2d206b997c2ccp-325 },
 	   { 0x1.9b74a41343d69p-331, 0x1.c89434d36542fp-326 },
 	   { 0x1.37c1bd3bb9cfep-331, 0x1.5a192e33cf627p-326 },
 	   { 0x1.d85fb90bdf218p-332, 0x1.0651bc0c61b2p-326 },
 	   { 0x1.65d3aea4b609ep-332, 0x1.8d9799e5f2521p-327 },
 	   { 0x1.0f0609e7aa674p-332, 0x1.2d464a6b30dc2p-327 },
 	   { 0x1.9a813d2878f74p-333, 0x1.c88645e6c88eep-328 },
 	   { 0x1.36d8ce9d2217bp-333, 0x1.59d89052b0525p-328 },
 	   { 0x1.d6b5543d3c94p-334, 0x1.05f7d07f3fb02p-328 },
 	   { 0x1.645913a262a36p-334, 0x1.8cd14a1185c8dp-329 },
 	   { 0x1.0dbd2f003b6a5p-334, 0x1.2c810d60e767ep-329 },
 	   { 0x1.984f6bfe6778p-335, 0x1.c714448c370a6p-330 },
 	   { 0x1.34ff297cd534dp-335, 0x1.588a691f2cd1fp-330 },
 	   { 0x1.d39f201da2255p-336, 0x1.04d1f01416963p-330 },
 	   { 0x1.61cba521cabb4p-336, 0x1.8ad66d03eba59p-331 },
 	   { 0x1.0ba4cc94c45b3p-336, 0x1.2ad281b8cc2ap-331 },
 	   { 0x1.94e44c9a075e7p-337, 0x1.c44191b160ec2p-332 },
 	   { 0x1.32391bcecdc03p-337, 0x1.5631c55b5d22cp-332 },
 	   { 0x1.cf2449a3fda4bp-338, 0x1.02e2c911c7929p-332 },
 	   { 0x1.5e3150cc8eda4p-338, 0x1.87aba1a7120bfp-333 },
 	   { 0x1.08c1bf3c985fap-338, 0x1.283e938a586f7p-333 },
 	   { 0x1.9047cb663bb8cp-339, 0x1.c014c17012593p-334 },
 	   { 0x1.2e8d117dfdd44p-339, 0x1.52d41b7968429p-334 },
 	   { 0x1.c94f2cb2815a8p-340, 0x1.002edb3674f27p-334 },
 	   { 0x1.599268900e7bcp-340, 0x1.835843f5f0b0cp-335 },
 	   { 0x1.051aaf415041dp-340, 0x1.24cb3e8b7d756p-335 },
 	   { 0x1.8a84869fc8267p-341, 0x1.ba9781881c8a9p-336 },
 	   { 0x1.2a037bab743e1p-341, 0x1.4e79366e7a47p-336 },
 	   { 0x1.c22d2c350e306p-342, 0x1.f978cc962d426p-337 },
 	   { 0x1.53f982a03a248p-342, 0x1.7de65083f0e21p-337 },
 	   { 0x1.00b7f70f68972p-342, 0x1.208076f18ea3p-337 },
 	   { 0x1.83a7a5a0b9d4dp-343, 0x1.b3d6740403453p-338 },
 	   { 0x1.24a6b05eb3edap-343, 0x1.492b17a8d9ad4p-338 },
 	   { 0x1.b9ce7efad864cp-344, 0x1.f126a42ab2a64p-339 },
 	   { 0x1.4d7351162fad8p-344, 0x1.77623e1a3ca2fp-339 },
 	   { 0x1.f74706d1f613cp-345, 0x1.1b680aeae0c3cp-339 },
 	   { 0x1.7bc0a6e57fbc5p-345, 0x1.abe0fed214bcap-340 },
 	   { 0x1.1e82c35430e3dp-345, 0x1.42f5d0cb0afebp-340 },
 	   { 0x1.b045f25c98b4bp-346, 0x1.e77a20528f8f5p-341 },
 	   { 0x1.460e7202036c7p-346, 0x1.6fdace394b03cp-341 },
 	   { 0x1.ebd15c07c2acdp-347, 0x1.158d7d54f1681p-341 },
 	   { 0x1.72e125d540295p-347, 0x1.a2c9115542385p-342 },
 	   { 0x1.17a558b9c184fp-347, 0x1.3be755f8b210cp-342 },
 	   { 0x1.a5a8a3f3de092p-348, 0x1.dc88f077bd369p-343 },
 	   { 0x1.3ddb38ecb5b52p-348, 0x1.6760d57bb9982p-343 },
 	   { 0x1.df2826b036578p-349, 0x1.0efdda755dbb3p-343 },
 	   { 0x1.691c997f37f0ep-349, 0x1.98a2e123c782ep-344 },
 	   { 0x1.101d72c627ff7p-349, 0x1.340f49a72211p-344 },
 	   { 0x1.9a0db3d2b8dacp-350, 0x1.d06b3f65f6fdp-345 },
 	   { 0x1.34eb72e63e592p-350, 0x1.5e06fcff790f4p-345 },
 	   { 0x1.d166c8f34fca4p-351, 0x1.07c787991a68p-345 },
 	   { 0x1.5e880d9f1fe43p-351, 0x1.8d849f54265f7p-346 },
 	   { 0x1.07fb3b2ff1602p-351, 0x1.2b7ec30262d2bp-346 },
 	   { 0x1.8d8df0cbffd52p-352, 0x1.c33b5a8ad639fp-347 },
 	   { 0x1.2b52265317648p-352, 0x1.53e17e1a8afadp-347 },
 	   { 0x1.c2aa6bd34f17bp-353, 0x1.fff41d2913dabp-348 },
 	   { 0x1.5339d751ff2a1p-353, 0x1.818627da2e9e4p-348 },
 	   { 0x1.fe9f93308c405p-354, 0x1.2248100f21115p-348 },
 	   { 0x1.80438073219dep-354, 0x1.b515531d535ebp-349 },
 	   { 0x1.21234fbc4a127p-354, 0x1.4905d9b84e0cbp-349 },
 	   { 0x1.b31198aa5f8abp-355, 0x1.ef4bcc5f71a72p-350 },
 	   { 0x1.474946f304456p-355, 0x1.74c0ac8d03b2bp-350 },
 	   { 0x1.ec59d00f3fe38p-356, 0x1.187e74c209a91p-350 },
 	   { 0x1.7249848679fa9p-356, 0x1.a6169b09c4411p-351 },
 	   { 0x1.16739cec78bd4p-356, 0x1.3d8a8ccb26cd9p-351 },
 	   { 0x1.a2bbd0795adeep-357, 0x1.ddb87127c2076p-352 },
 	   { 0x1.3ace589cd3352p-357, 0x1.674e5d7be735cp-352 },
 	   { 0x1.d949ad392f075p-358, 0x1.0e35e84d33d3fp-352 },
 	   { 0x1.63bbbf78651ccp-358, 0x1.965d9f895d99cp-353 },
 	   { 0x1.0b5827a3ba382p-358, 0x1.3186c3440696p-353 },
 	   { 0x1.91c922f9ee4cp-359, 0x1.cb5d51a48d7d4p-354 },
 	   { 0x1.2de164c74e725p-359, 0x1.594a1039f0199p-354 },
 	   { 0x1.c5941f108d9d1p-360, 0x1.0382d1e479246p-354 },
 	   { 0x1.54b639c219649p-360, 0x1.8609634a384ccp-355 },
 	   { 0x1.ffcc62473097ap-361, 0x1.25120afe02122p-355 },
 	   { 0x1.8059c757355aep-361, 0x1.b85e31314f4b4p-356 },
 	   { 0x1.209ad26ca18d9p-361, 0x1.4acee7c0fcbafp-356 },
 	   { 0x1.b15e18d0d2d12p-362, 0x1.f0f38c6449ad9p-357 },
 	   { 0x1.4554e9983b016p-362, 0x1.753919ff4b182p-357 },
 	   { 0x1.e865bf893f8f4p-363, 0x1.1844080030d76p-357 },
 	   { 0x1.6e8db855aac9ap-363, 0x1.a4dede3a3eb93p-358 },
 	   { 0x1.1312cc0ae5d04p-363, 0x1.3bf7fe7aa33ap-358 },
 	   { 0x1.9ccc1bfbf7ecbp-364, 0x1.da5e8d4d639edp-359 },
 	   { 0x1.35b35e7d0088ep-364, 0x1.640bc7176cda7p-359 },
 	   { 0x1.d0a5ff60b92cfp-365, 0x1.0b342b640cc13p-359 },
 	   { 0x1.5c84558f35d95p-365, 0x1.9102c47629cb9p-360 },
 	   { 0x1.0560f8bafb2c7p-365, 0x1.2ce013e375d0fp-360 },
 	   { 0x1.8801ce509ea26p-366, 0x1.c36f07720a932p-361 },
 	   { 0x1.25ec7207b3c64p-366, 0x1.529fe13854ed9p-361 },
 	   { 0x1.b8b58f7c67c36p-367, 0x1.fbf2dc269c35dp-362 },
 	   { 0x1.4a5c0b3b7424dp-367, 0x1.7cec854a40ddcp-362 },
 	   { 0x1.ef3874e46141bp-368, 0x1.1da13f1aaaee6p-362 },
 	   { 0x1.732197e24d857p-368, 0x1.ac4c46230c45cp-363 },
 	   { 0x1.1619ff0ea7ec6p-368, 0x1.4112fbeff8a1fp-363 },
 	   { 0x1.a0bb46a0a2c53p-369, 0x1.e15420dda8758p-364 },
 	   { 0x1.383201c8ba71ap-369, 0x1.68bd97eb5b05dp-364 },
 	   { 0x1.d3b4e4b894768p-370, 0x1.0e54a78756b6bp-364 },
 	   { 0x1.5e4c4aaef013p-370, 0x1.951c14f527745p-365 },
 	   { 0x1.0654a030d3e7p-370, 0x1.2f8178dd14a04p-365 },
 	   { 0x1.88dc03d1ca801p-371, 0x1.c6b6bf9361ee4p-366 },
 	   { 0x1.2621d65152a67p-371, 0x1.5495f2949c65ep-366 },
 	   { 0x1.b860981f4834ap-372, 0x1.fe24891c8ca0cp-367 },
 	   { 0x1.49a0d4c97c281p-372, 0x1.7e02609a87253p-367 },
 	   { 0x1.ed66ed1143993p-373, 0x1.1e064158c947bp-367 },
 	   { 0x1.713a5a10cc9bp-373, 0x1.ac4304f253262p-368 },
 	   { 0x1.14455cbbff469p-373, 0x1.4093bdea6e36fp-368 },
 	   { 0x1.9d62205df47a6p-374, 0x1.dfe14a435c3c2p-369 },
 	   { 0x1.353bfdeb15aa4p-374, 0x1.6720e3d624fdcp-369 },
 	   { 0x1.ce97f23783a55p-375, 0x1.0cba8970a9d66p-369 },
 	   { 0x1.59f649793ea9ap-375, 0x1.921e961b81171p-370 },
 	   { 0x1.02b46c188f22dp-375, 0x1.2cd3135c626d1p-370 },
 	   { 0x1.82dcfdba2d59cp-376, 0x1.c2097f7f7c953p-371 },
 	   { 0x1.213830f44d648p-376, 0x1.5096e15b063dbp-371 },
 	   { 0x1.b0639acae41c7p-377, 0x1.f76b39886a20dp-372 },
 	   { 0x1.432d063e4cc5ap-377, 0x1.786c2636e4e2ap-372 },
 	   { 0x1.e3096b161ade1p-378, 0x1.196dc712e8651p-372 },
 	   { 0x1.68f1646f450ccp-378, 0x1.a4c39680abb0bp-373 },
 	   { 0x1.0dad51a121c5fp-378, 0x1.3a80eb1934625p-373 },
 	   { 0x1.92ed52465cf13p-379, 0x1.d6196b3830612p-374 },
 	   { 0x1.2cf8cdb32b26dp-379, 0x1.5f4b3b930a91ap-374 },
 	   { 0x1.c1934bb7035c1p-380, 0x1.067b3db09279ep-374 },
 	   { 0x1.4fbc11c19c0b7p-380, 0x1.8832413bcb6f5p-375 },
 	   { 0x1.f5613cdc1ad52p-381, 0x1.24f8b72bbd6eep-375 },
 	   { 0x1.76547ab0f816ap-381, 0x1.b5a5bcacf14ddp-376 },
 	   { 0x1.1770c93ef3136p-381, 0x1.46d8046ba690cp-376 },
 	   { 0x1.a128a30d837ebp-382, 0x1.e8209bd7c6d4dp-377 },
 	   { 0x1.375630e92b79p-382, 0x1.6c744b66f6406p-377 },
 	   { 0x1.d0a93cd8add1ep-383, 0x1.1015024fefc8dp-377 },
 	   { 0x1.5ab4549d6cf15p-383, 0x1.9631ba1694964p-378 },
 	   { 0x1.02a8fed4a1944p-383, 0x1.2f2b3b1ae197dp-378 },
 	   { 0x1.81e6d5efc2ecep-384, 0x1.c47e5b8f9de0cp-379 },
 	   { 0x1.1fd54f3e20bfcp-384, 0x1.51a481761d265p-379 },
 	   { 0x1.ad523512d80aep-385, 0x1.f7d2ff106229cp-380 },
 	   { 0x1.4023f854f9c86p-385, 0x1.77da522f79ec5p-380 },
 	   { 0x1.dd649c8fad0d5p-386, 0x1.185a192bd02b4p-380 },
 	   { 0x1.63e684c4d4572p-386, 0x1.a22ed5ef67f83p-381 },
 	   { 0x1.094b5ecc6e29p-386, 0x1.37d9a85948033p-381 },
 	   { 0x1.8b7643330549ep-387, 0x1.d10da89b8212ap-382 },
 	   { 0x1.26b65f14cd4dap-387, 0x1.5ab7d4224f7e2p-382 },
 	   { 0x1.b734f53e57228p-388, 0x1.0276587fa1c2p-382 },
 	   { 0x1.473b9d1931175p-388, 0x1.814bdb918424dp-383 },
 	   { 0x1.e78d8c6e84fddp-389, 0x1.1f2684f2af658p-383 },
 	   { 0x1.6b2a2c93cd65ap-389, 0x1.abf540fb4e1a1p-384 },
 	   { 0x1.0e7a7b055d281p-389, 0x1.3eddfeeed0dd2p-384 },
 	   { 0x1.92d87cacce695p-390, 0x1.db1c82f79707dp-385 },
 	   { 0x1.2bf57b6e0d98dp-390, 0x1.61ea0b7eb4c3cp-385 },
 	   { 0x1.bea4f9488e121p-391, 0x1.0799f1fb897d8p-385 },
 	   { 0x1.4c7d8bf7bdc41p-391, 0x1.889f21fdb1d69p-386 },
 	   { 0x1.eef6b8bfa9225p-392, 0x1.245c20ba28a39p-386 },
 	   { 0x1.705ed2bbfd521p-392, 0x1.b3598a0d5984p-387 },
 	   { 0x1.121f1b69882ebp-392, 0x1.4418fde75923ep-387 },
 	   { 0x1.97ec608197c79p-393, 0x1.e27e05b6c31f9p-388 },
 	   { 0x1.2f7b0edc74f1cp-393, 0x1.671af7f5d8858p-388 },
 	   { 0x1.c380c41f7503p-394, 0x1.0b3d4442eda68p-388 },
 	   { 0x1.4fd20f15083b3p-394, 0x1.8db341e4d4306p-389 },
 	   { 0x1.f37ea8d01e9c5p-395, 0x1.27e37e3bc73c9p-389 },
 	   { 0x1.736cebb19a201p-395, 0x1.b83a639f29a8p-390 },
 	   { 0x1.1428c012e2c57p-395, 0x1.47730acf38edcp-390 },
 	   { 0x1.9a9ae80c06018p-396, 0x1.e710d5155d028p-391 },
 	   { 0x1.31371c2b63b8p-396, 0x1.6a331ab64b688p-391 },
 	   { 0x1.c5b240b14f4d6p-397, 0x1.0d4fd25f7f52ep-391 },
 	   { 0x1.5129ffd17a136p-397, 0x1.90712f4e38e37p-392 },
 	   { 0x1.f510ba62354a5p-398, 0x1.29ac951c1e60bp-392 },
 	   { 0x1.74468acd1611cp-398, 0x1.ba819d5f14678p-393 },
 	   { 0x1.148e1d96c299ep-398, 0x1.48dce2dc3ecd5p-393 },
 	   { 0x1.9ad7d58aaba44p-399, 0x1.e8c0193d16d55p-394 },
 	   { 0x1.3121b71d77179p-399, 0x1.6b2456938b866p-394 },
 	   { 0x1.c52f68dd90e64p-400, 0x1.0dc826696c76cp-394 },
 	   { 0x1.507f397188496p-400, 0x1.90cc63cdbf2a2p-395 },
 	   { 0x1.f3a5bdf92c388p-401, 0x1.29af3c144f8cp-395 },
 	   { 0x1.72e7cbdbb95dbp-401, 0x1.ba24cc0f4c8e2p-396 },
 	   { 0x1.134d638b07143p-401, 0x1.48500e815d897p-396 },
 	   { 0x1.98a2111174d79p-402, 0x1.e7841c45926dp-397 },
 	   { 0x1.2f3b409e1b7b6p-402, 0x1.69ea5b1b71301p-397 },
 	   { 0x1.c1fa91a869695p-403, 0x1.0ca4195cda6d3p-397 },
 	   { 0x1.4dd4c7d7ec9fap-403, 0x1.8ec33daf13649p-398 },
 	   { 0x1.ef442d8796795p-404, 0x1.27eb66fea5e85p-398 },
 	   { 0x1.6f56f0c0f22b9p-404, 0x1.b72598c77c448p-399 },
 	   { 0x1.106c4a594a047p-404, 0x1.45cf12a60cb9ap-399 },
 	   { 0x1.9403b0e4bd1b9p-405, 0x1.e36284e81b5ffp-400 },
 	   { 0x1.2b8c63e7468c1p-405, 0x1.668ac570f2fc8p-400 },
 	   { 0x1.bc22598793379p-406, 0x1.09e8e37ef2488p-400 },
 	   { 0x1.4936d06178106p-406, 0x1.8a5f0c63b5c24p-401 },
 	   { 0x1.e7fffb3b16a7dp-407, 0x1.2469273320bdap-401 },
 	   { 0x1.69a431ed205ap-407, 0x1.b191b44e70edfp-402 },
 	   { 0x1.0bf7e7cce4d07p-407, 0x1.41655d7606103p-402 },
 	   { 0x1.8d11ace4d8996p-408, 0x1.dc6e2b76185d5p-403 },
 	   { 0x1.2625d4b960a47p-408, 0x1.6114f58eab906p-403 },
 	   { 0x1.b3c139841a735p-409, 0x1.05a2f4a403a4dp-403 },
 	   { 0x1.42ba35d81be5cp-409, 0x1.83b3c9af7ee45p-404 },
 	   { 0x1.ddf9fa6fc513ap-410, 0x1.1f386e3013e68p-404 },
 	   { 0x1.61e943a26f542p-410, 0x1.a9826f127d04dp-405 },
 	   { 0x1.06044c28d2704p-410, 0x1.3b26ef9596f74p-405 },
 	   { 0x1.83eb403668f94p-411, 0x1.d2c68adc24dd3p-406 },
 	   { 0x1.1f1fd15ed30fep-411, 0x1.59a199b7c8167p-406 },
 	   { 0x1.a8fcbdc7eab51p-412, 0x1.ffcb2bfa5b8dap-407 },
 	   { 0x1.3a7bfb4be9962p-412, 0x1.7adf828472cfdp-407 },
 	   { 0x1.d15ee90987618p-413, 0x1.1870951a86a79p-407 },
 	   { 0x1.584895194492p-413, 0x1.9f1bfa110cbbap-408 },
 	   { 0x1.fd57d7b45b3cap-414, 0x1.332fc55367264p-408 },
 	   { 0x1.78b8ffae32bfp-414, 0x1.c696d39db75f3p-409 },
 	   { 0x1.16996dab0cd1ep-414, 0x1.5051f4ea04fdfp-409 },
 	   { 0x1.9c046dcaa75a4p-415, 0x1.f194b2a4cb97p-410 },
 	   { 0x1.30a06c462f23ep-415, 0x1.700975cbb46aap-410 },
 	   { 0x1.c2662350ce7fap-416, 0x1.102fae0ec7794p-410 },
 	   { 0x1.4cec5169fb931p-416, 0x1.928c588cfb6d9p-411 },
 	   { 0x1.ec1db7d8e44b5p-417, 0x1.29a3060c44f3ap-411 },
 	   { 0x1.6babae8929706p-417, 0x1.b814aa869e0e4p-412 },
 	   { 0x1.0cb7ae5506e7ep-417, 0x1.454ee7edd0063p-412 },
 	   { 0x1.8d106f7f4047ep-418, 0x1.e0e0b72e6ef2ep-413 },
 	   { 0x1.255213192c405p-418, 0x1.6360f251c2f1fp-413 },
 	   { 0x1.b1500fc71b69ap-419, 0x1.0699a6631f93fp-413 },
 	   { 0x1.40052c8ba04b4p-419, 0x1.840a0d97bb129p-414 },
 	   { 0x1.d8a3d24511c07p-420, 0x1.1eaa023d58a69p-414 },
 	   { 0x1.5cfadd7b9716p-420, 0x1.a77ea01d8b821p-415 },
 	   { 0x1.01a47ddad3ea8p-420, 0x1.38c7c7057a652p-415 },
 	   { 0x1.7c5ff3799c35bp-421, 0x1.cdf6c504a93e5p-416 },
 	   { 0x1.18c087e86a1f3p-421, 0x1.551bff88c1175p-416 },
 	   { 0x1.9e64530b957f4p-422, 0x1.f7ae8590bb8p-417 },
 	   { 0x1.31c908986e1a8p-422, 0x1.73d293026bc2ap-417 },
 	   { 0x1.c33b25da2082ep-423, 0x1.12730a9790f69p-417 },
 	   { 0x1.4ce362055227ep-423, 0x1.951a7082f394ap-418 },
 	   { 0x1.eb1b0ae0a386ap-424, 0x1.2af1081b22794p-418 },
 	   { 0x1.6a3779e1ff3bp-424, 0x1.b925bc48353ep-419 },
 	   { 0x1.0b1f245435eeap-424, 0x1.4575deb5305a2p-419 },
 	   { 0x1.89efddb97fd18p-425, 0x1.e029ff0fc8645p-420 },
 	   { 0x1.227180cb0a8cap-425, 0x1.6228a92a17423p-420 },
 	   { 0x1.ac39e8a7de062p-426, 0x1.05302bb5e3a1ap-420 },
 	   { 0x1.3ba5b5279aa24p-426, 0x1.81331d3a2cc81p-421 },
 	   { 0x1.d145ea8ff6403p-427, 0x1.1c02d69097c72p-421 },
 	   { 0x1.56df011e743b9p-427, 0x1.a2c1b0ae83a64p-422 },
 	   { 0x1.f94750d0f9308p-428, 0x1.34ad734ae6135p-422 },
 	   { 0x1.7442e7172840ap-428, 0x1.c703bfdc748cdp-423 },
 	   { 0x1.123a683e9b9d5p-428, 0x1.4f5290291de6ep-423 },
 	   { 0x1.93f94a8e393e5p-429, 0x1.ee2bb5a2a447p-424 },
 	   { 0x1.298449094a08p-429, 0x1.6c16f34d9525ep-424 },
 	   { 0x1.b62c8f87855a8p-430, 0x1.0c379a70923bcp-424 },
 	   { 0x1.42a02f59d51efp-430, 0x1.8b21b8919710fp-425 },
 	   { 0x1.db09bb0ffb21fp-431, 0x1.2303a1b68b2dep-425 },
 	   { 0x1.5daee76f997a8p-431, 0x1.ac9c706a79cfcp-426 },
 	   { 0x1.01604a662bf4cp-431, 0x1.3b983b3f72fb5p-426 },
 	   { 0x1.7ad33d50dacdp-432, 0x1.d0b33fd9b6e85p-427 },
 	   { 0x1.16c1e4c8c451ap-432, 0x1.5615904c6373ap-427 },
 	   { 0x1.9a32159dea0d8p-433, 0x1.f7950165d693dp-428 },
 	   { 0x1.2dc48781056c9p-433, 0x1.729dc070c926ap-428 },
 	   { 0x1.bbf2871addffbp-434, 0x1.10b9b38c6e833p-428 },
 	   { 0x1.4684a4152d4ep-434, 0x1.9154f9f73ee5fp-429 },
 	   { 0x1.e03df4eb2c204p-435, 0x1.27418ebfd96bep-429 },
 	   { 0x1.6120558a89b12p-435, 0x1.b26192fa2f36ep-430 },
 	   { 0x1.03a014bcb5352p-435, 0x1.3f7df7d25b3e6p-430 },
 	   { 0x1.7db773a6f6623p-436, 0x1.d5ec232ba3385p-431 },
 	   { 0x1.1893b9023690dp-436, 0x1.598c75ff21ea4p-431 },
 	   { 0x1.9c6ba6a49465ap-437, 0x1.fc1f9e46a53e2p-432 },
 	   { 0x1.2f125d64e7642p-437, 0x1.758c452444076p-432 },
 	   { 0x1.bd607b51aff83p-438, 0x1.1294b791c6529p-432 },
 	   { 0x1.4735d5e25dd32p-438, 0x1.939e692035be7p-433 },
 	   { 0x1.e0bb7795ebab2p-439, 0x1.289cc9b3b4107p-433 },
 	   { 0x1.611962fb4b008p-439, 0x1.b3e5c199dc217p-434 },
 	   { 0x1.035217aa6e0adp-439, 0x1.40415be2c6028p-434 },
 	   { 0x1.7cd9c096da3b3p-440, 0x1.d6871e2c76342p-435 },
 	   { 0x1.17a22cd2a508fp-440, 0x1.599d2a64857abp-435 },
 	   { 0x1.9a95351e8c9f1p-441, 0x1.fba952efabe51p-436 },
 	   { 0x1.2d63f329a8bcbp-441, 0x1.74cc660d4897ap-436 },
 	   { 0x1.ba6ba0cb47e2bp-442, 0x1.11baa6a990cd8p-436 },
 	   { 0x1.44ae89d144108p-442, 0x1.91ecc31adec4ep-437 },
 	   { 0x1.dc7e8d1b8f556p-443, 0x1.270b14a1f9816p-437 },
 	   { 0x1.5d9a42222275cp-443, 0x1.b11d883fd3ec1p-438 },
 	   { 0x1.00789e350bd1ap-443, 0x1.3ddca348b8e79p-438 },
 	   { 0x1.7840aaba80c98p-444, 0x1.d27f9dd765764p-439 },
 	   { 0x1.13f45ccd8c935p-444, 0x1.56472f42babf3p-439 },
 	   { 0x1.94bc9a9955f26p-445, 0x1.f6359d3980ea5p-440 },
 	   { 0x1.28c5f3eaf8eddp-445, 0x1.7063ccd1b83c6p-440 },
 	   { 0x1.b32a3c3e46a35p-446, 0x1.0e31f012ad2b3p-440 },
 	   { 0x1.3f01c91fe7f47p-446, 0x1.8c4cd2c02ec2dp-441 },
 	   { 0x1.d3a718c61d154p-447, 0x1.2298481c2ca0dp-441 },
 	   { 0x1.56bd3dd5a05c1p-447, 0x1.aa1de55237abcp-442 },
 	   { 0x1.f65222fadfcp-448, 0x1.3861db33230bp-442 },
 	   { 0x1.700eb717cfb77p-448, 0x1.c9f401331dbf6p-443 },
 	   { 0x1.0da5e12700c8dp-448, 0x1.4fa3a533642f6p-443 },
 	   { 0x1.8b0da54d3c71fp-449, 0x1.ebed8656f1a7bp-444 },
 	   { 0x1.215aeed941b43p-449, 0x1.6873a105b43c2p-444 },
 	   { 0x1.a7d28bd609e5p-450, 0x1.081521636047p-444 },
 	   { 0x1.3659f3261d19p-450, 0x1.82e8d038330cap-445 },
 	   { 0x1.c6770887b13f6p-451, 0x1.1b65bea6b7e6ap-445 },
 	   { 0x1.4cb570f463d9dp-451, 0x1.9f1b427ce89a2p-446 },
 	   { 0x1.e715dafe5cd6p-452, 0x1.2ff9fffd4f5f9p-446 },
 	   { 0x1.6480ba9b1723cp-452, 0x1.bd241d06b6757p-447 },
 	   { 0x1.04e575dd6f2ebp-452, 0x1.45e411382662bp-447 },
 	   { 0x1.7dcff6d521467p-453, 0x1.dd1da1bc7ec85p-448 },
 	   { 0x1.1759a98201ff3p-453, 0x1.5d36e9f7af39cp-448 },
 	   { 0x1.98b82586ccf2dp-454, 0x1.ff233639de02ap-449 },
 	   { 0x1.2af6afc0ce651p-454, 0x1.7606528b3cf28p-449 },
 	   { 0x1.b54f244df93dfp-455, 0x1.11a8b54a30c34p-449 },
 	   { 0x1.3fcc4e4385b18p-455, 0x1.9066e8a3084adp-450 },
 	   { 0x1.d3abb2d5b9282p-456, 0x1.24e2ffedd9f78p-450 },
 	   { 0x1.55eaec016b2b5p-456, 0x1.ac6e23cde6ac9p-451 },
 	   { 0x1.f3e576e5bfb2cp-457, 0x1.394ff72563c26p-451 },
 	   { 0x1.6d6394041cb01p-457, 0x1.ca3259bb8013ep-452 },
 	   { 0x1.0b0a8012d71fbp-457, 0x1.4effb58fcce2p-452 },
 	   { 0x1.8647f7f3a91dep-458, 0x1.e9cac23b8427ep-453 },
 	   { 0x1.1d29e5c60946bp-458, 0x1.6602f707600f3p-453 },
 	   { 0x1.a0aa72640fd47p-459, 0x1.05a7bd790a4bcp-453 },
 	   { 0x1.305e23384e58ap-459, 0x1.7e6b1b23c38f4p-454 },
 	   { 0x1.bc9e08de1532fp-460, 0x1.176cc55ca9b8p-454 },
 	   { 0x1.44b4e89c6a35fp-460, 0x1.984a277e8539ap-455 },
 	   { 0x1.da366d9d2b975p-461, 0x1.2a417253e014bp-455 },
 	   { 0x1.5a3c60cb2c6b1p-461, 0x1.b3b2c9b4277c6p-456 },
 	   { 0x1.f98800fc076dbp-462, 0x1.3e333559670c8p-456 },
 	   { 0x1.71033226bf0afp-462, 0x1.d0b8591b88278p-457 },
 	   { 0x1.0d53e944a7e18p-462, 0x1.534ff7f271b4dp-457 },
 	   { 0x1.89187f3d75a14p-463, 0x1.ef6ed82d51675p-458 },
 	   { 0x1.1ed5d0deddfb7p-463, 0x1.69a61d0edc9d2p-458 },
 	   { 0x1.a28be72757b85p-464, 0x1.07f57aca805f1p-458 },
 	   { 0x1.3154ef266983dp-464, 0x1.814481a9f253cp-459 },
 	   { 0x1.bd6d859990532p-465, 0x1.1921067277b5dp-459 },
 	   { 0x1.44dcd404b4fcdp-465, 0x1.9a3a7d2712f82p-460 },
 	   { 0x1.d9cdf2aadd6a6p-466, 0x1.2b45137355f77p-460 },
 	   { 0x1.5979672b76b96p-466, 0x1.b497e1657b91bp-461 },
 	   { 0x1.f7be424410479p-467, 0x1.3e6cfcc06ed27p-461 },
 	   { 0x1.6f36e7903ba4fp-467, 0x1.d06cfa865bc4ep-462 },
 	   { 0x1.0ba8019bd4e86p-467, 0x1.52a47395ed2aep-462 },
 	   { 0x1.8621eaa755f34p-468, 0x1.edca8e605e67ap-463 },
 	   { 0x1.1c4a9efdce654p-468, 0x1.67f77ef705254p-463 },
 	   { 0x1.9e475b5aaea97p-469, 0x1.0660edcde1e02p-463 },
 	   { 0x1.2dd03980220acp-469, 0x1.7e727aec99554p-464 },
 	   { 0x1.b7b478b8fda1cp-470, 0x1.16b24c391593bp-464 },
 	   { 0x1.40424c4fd21f7p-470, 0x1.96221780dfe95p-465 },
 	   { 0x1.d276d459f43c7p-471, 0x1.27e2788696d86p-465 },
 	   { 0x1.53aa8c500f5dp-471, 0x1.af1357749947cp-466 },
 	   { 0x1.ee9c5073f397ep-472, 0x1.39fac2bf7a531p-466 },
 	   { 0x1.6812e6a2e8fcp-472, 0x1.c9538eaa71fbp-467 },
 	   { 0x1.06198ecffc0ep-472, 0x1.4d04b3a802aeep-467 },
 	   { 0x1.7d857ef6fe55ap-473, 0x1.e4f0604536408p-468 },
 	   { 0x1.15a4dc243cc5fp-473, 0x1.610a0b4ec8401p-468 },
 	   { 0x1.940cad97ee071p-474, 0x1.00fbde3ac71c6p-468 },
 	   { 0x1.25f772e00c70ap-474, 0x1.7614bf61d6bfap-469 },
 	   { 0x1.abb2fd3f529efp-475, 0x1.103beefa0765p-469 },
 	   { 0x1.3718d87e8a0afp-475, 0x1.8c2ef94786008p-470 },
 	   { 0x1.c48328a4346ebp-476, 0x1.203fa39242793p-470 },
 	   { 0x1.4910b37b4de72p-476, 0x1.a36313f8e64ecp-471 },
 	   { 0x1.de8817c6f33b9p-477, 0x1.310e5f6fbfd44p-471 },
 	   { 0x1.5be6c950a7e6fp-477, 0x1.bbbb999bb060ap-472 },
 	   { 0x1.f9ccdcf7c94fep-478, 0x1.42afa66f9fdc1p-472 },
 	   { 0x1.6fa2fc442a9d3p-478, 0x1.d54340d9c375dp-473 },
 	   { 0x1.0b2e58cb15f5cp-478, 0x1.552b1ae6aeaa2p-473 },
 	   { 0x1.844d490056942p-479, 0x1.f004e9f45a94bp-474 },
 	   { 0x1.1a217943b9ac7p-479, 0x1.68887b7750462p-474 },
 	   { 0x1.99edc3fa555f4p-480, 0x1.0605cdc8a1e5ep-474 },
 	   { 0x1.29c58e31af831p-480, 0x1.7ccfa0b55e3f7p-475 },
 	   { 0x1.b08c96a2d341cp-481, 0x1.14b13fa04509fp-475 },
 	   { 0x1.3a2063aa9bfc9p-481, 0x1.92087a96ea8f4p-476 },
 	   { 0x1.c831fc61280f7p-482, 0x1.240a6edc95f53p-476 },
 	   { 0x1.4b37d15842e1dp-482, 0x1.a83b0db0fa5b6p-477 },
 	   { 0x1.e0e63f582488bp-483, 0x1.34170d65d2fe5p-477 },
 	   { 0x1.5d11b81c3fea7p-483, 0x1.bf6f703f6c8b1p-478 },
 	   { 0x1.fab1b4f400c2ep-484, 0x1.44dcd884a52dcp-478 },
 	   { 0x1.6fb3ff8ccf41cp-484, 0x1.d7adc6f76430fp-479 },
 	   { 0x1.0ace5d20891a2p-484, 0x1.5661968fc8c68p-479 },
 	   { 0x1.8324934a763f4p-485, 0x1.f0fe41a3b588bp-480 },
 	   { 0x1.18d7d8058e531p-485, 0x1.68ab147365bffp-480 },
 	   { 0x1.9769602e7d2c4p-486, 0x1.05b48bc57ed71p-480 },
 	   { 0x1.27797b62a04a4p-486, 0x1.7bbf2311e9661p-481 },
 	   { 0x1.ac8851524d431p-487, 0x1.137b41cf9c9a4p-481 },
 	   { 0x1.36b7751d5da7fp-487, 0x1.8fa3947e525d9p-482 },
 	   { 0x1.c2874cefea298p-488, 0x1.21d7603b6e2ccp-482 },
 	   { 0x1.4695ee8470b66p-488, 0x1.a45e3910021acp-483 },
 	   { 0x1.d96c311be3eb3p-489, 0x1.30cd0207d04edp-483 },
 	   { 0x1.571909f179506p-489, 0x1.b9f4dc504a668p-484 },
 	   { 0x1.f13cd05945d89p-490, 0x1.40603dadb780ap-484 },
 	   { 0x1.6844e0504f766p-490, 0x1.d06d41c212c13p-485 },
 	   { 0x1.04ff770417c7ep-490, 0x1.509522cc01f2fp-485 },
 	   { 0x1.7a1d7e8c27e5p-491, 0x1.e7cd2184183ebp-486 },
 	   { 0x1.11dc1d57f7df8p-491, 0x1.616fb7b910c11p-486 },
 	   { 0x1.8ca6e2e342651p-492, 0x1.000d1267395e3p-486 },
 	   { 0x1.1f372812d1e14p-492, 0x1.72f3f6faafe57p-487 },
 	   { 0x1.9fe4fa21e8c98p-493, 0x1.0cacf12619fe1p-487 },
 	   { 0x1.2d1356c845fd1p-493, 0x1.8525cca4f244dp-488 },
 	   { 0x1.b3db9cc5a58f3p-494, 0x1.19c8ed29100e2p-488 },
 	   { 0x1.3b7359a6b9391p-494, 0x1.980913a0c5f1ep-489 },
 	   { 0x1.c88e8c09b9bb2p-495, 0x1.2763b979d57b5p-489 },
 	   { 0x1.4a59cf5958098p-495, 0x1.aba192db244fdp-490 },
 	   { 0x1.de016eddfacadp-496, 0x1.357ff9fbc97f4p-490 },
 	   { 0x1.59c942db45eaep-496, 0x1.bff2fa5de1e9dp-491 },
 	   { 0x1.f437cec9632b8p-497, 0x1.44204156d00fcp-491 },
 	   { 0x1.69c4293cefa3fp-497, 0x1.d500e0534289dp-492 },
 	   { 0x1.059a8a5ce0ce7p-497, 0x1.53470ed39dd97p-492 },
 	   { 0x1.7a4cdf5c8de47p-498, 0x1.eacebdf5973c2p-493 },
 	   { 0x1.117e42e10afc5p-498, 0x1.62f6cc2a62dbdp-493 },
 	   { 0x1.8b65a792fe14p-499, 0x1.00aff63626acfp-493 },
 	   { 0x1.1dc89fe4a5f8ap-499, 0x1.7331cb44dd6ecp-494 },
 	   { 0x1.9d10a7562f377p-500, 0x1.0c5bd0cbfba3p-494 },
 	   { 0x1.2a7b1b1593291p-500, 0x1.83fa43f4f73d5p-495 },
 	   { 0x1.af4fe4d278bf9p-501, 0x1.186c76677c8f7p-495 },
 	   { 0x1.37971726a776ep-501, 0x1.955251a12574cp-496 },
 	   { 0x1.c225447c48b85p-502, 0x1.24e359c6528bbp-496 },
 	   { 0x1.451dde15504ecp-502, 0x1.a73bf0e7dcf7bp-497 },
 	   { 0x1.d592869bae136p-503, 0x1.31c1d70a5a26cp-497 },
 	   { 0x1.53109f6b70a02p-503, 0x1.b9b8fd3b82acep-498 },
 	   { 0x1.e99944d35a898p-504, 0x1.3f09320694d4p-498 },
 	   { 0x1.61706e7ea0b42p-504, 0x1.cccb2e7856e93p-499 },
 	   { 0x1.fe3aefa4cdaa2p-505, 0x1.4cba948866255p-499 },
 	   { 0x1.703e40ae0b133p-505, 0x1.e0741675f15a5p-500 },
 	   { 0x1.09bc65f9b8064p-505, 0x1.5ad70c9e433d4p-500 },
 	   { 0x1.7f7aeba02f7efp-506, 0x1.f4b51e95f89d5p-501 },
 	   { 0x1.14a9f8443d058p-506, 0x1.695f8add0a062p-501 },
 	   { 0x1.8f272381e3222p-507, 0x1.04c7c2a8ead79p-501 },
 	   { 0x1.1fe6a1ccca721p-507, 0x1.7854e0a5444cfp-502 },
 	   { 0x1.9f437947f2743p-508, 0x1.0f822de49bc54p-502 },
 	   { 0x1.2b72bc2a1bb29p-508, 0x1.87b7be69a8c26p-503 },
 	   { 0x1.afd058f4d5cb9p-509, 0x1.1a8a41a9a734p-503 },
 	   { 0x1.374e8637e822fp-509, 0x1.9788b1f83908ep-504 },
 	   { 0x1.c0ce07e3f5247p-510, 0x1.25e0558a5c077p-504 },
 	   { 0x1.437a22e46ffc9p-510, 0x1.a7c824c7683f1p-505 },
 	   { 0x1.d23ca31c0220cp-511, 0x1.3184a6ce13b46p-505 },
 	   { 0x1.4ff5980398e02p-511, 0x1.b8765a48c0cf1p-506 },
 	   { 0x1.e41c1da9f8a5fp-512, 0x1.3d775743f06aep-506 },
 	   { 0x1.5cc0cd28b81e5p-512, 0x1.c9936e428a9d9p-507 },
 	   { 0x1.f66c3f065ea05p-513, 0x1.49b86c1b194cep-507 },
 	   { 0x1.69db8a882e29p-513, 0x1.db1f5331fbe71p-508 },
 	   { 0x1.049650c331274p-513, 0x1.5647ccc18e717p-508 },
 	   { 0x1.774577e1faf4fp-514, 0x1.ed19d0b78718cp-509 },
 	   { 0x1.0e2e586d3df5cp-514, 0x1.632541cab3acp-509 },
 	   { 0x1.84fe1b767669bp-515, 0x1.ff82820edeaabp-510 },
 	   { 0x1.17fdd44e1dc6cp-515, 0x1.705073deb552ap-510 },
 	   { 0x1.9304d9065a4b9p-516, 0x1.092c6a4a26abfp-510 },
 	   { 0x1.220449767742ap-516, 0x1.7dc8eab3ed87ap-511 },
 	   { 0x1.a158f0df4c356p-517, 0x1.12ce032c827cep-511 },
 	   { 0x1.2c4123936432bp-517, 0x1.8b8e0c1372c25p-512 },
 	   { 0x1.aff97ef6163edp-518, 0x1.1ca5926404568p-512 },
 	   { 0x1.36b3b4511d82bp-518, 0x1.999f1ae9f978bp-513 },
 	   { 0x1.bee57a0fbbbdcp-519, 0x1.26b285aeabdbep-513 },
 	   { 0x1.415b32c89327cp-519, 0x1.a7fb366632c72p-514 },
 	   { 0x1.ce1bb2fa9523ep-520, 0x1.30f431387ee69p-514 },
 	   { 0x1.4c36baf8c2285p-520, 0x1.b6a15925d0c25p-515 },
 	   { 0x1.dd9ad3d89a4a5p-521, 0x1.3b69cf0bd5608p-515 },
 	   { 0x1.57454d4c97f21p-521, 0x1.c590587256b75p-516 },
 	   { 0x1.ed615f7bfd7d2p-522, 0x1.46127e8d37ba7p-516 },
 	   { 0x1.6285ce2e2e29bp-522, 0x1.d4c6e38ed7f06p-517 },
 	   { 0x1.fd6db0d73348ep-523, 0x1.50ed44039bd53p-517 },
 	   { 0x1.6df705a8252f7p-523, 0x1.e4438317c2a1ep-518 },
 	   { 0x1.06defd40bdb09p-523, 0x1.5bf9082dc8412p-518 },
 	   { 0x1.79979f15ddb0dp-524, 0x1.f4049875ce63p-519 },
 	   { 0x1.0f2823287afb6p-524, 0x1.673497e5a0d03p-519 },
 	   { 0x1.856628e34ac2cp-525, 0x1.02042eb28efefp-519 },
 	   { 0x1.17913a85a33a7p-525, 0x1.729ea3d219a53p-520 },
 	   { 0x1.9161145d0e326p-526, 0x1.0a2671c8cdbeep-520 },
 	   { 0x1.20191f16dc709p-526, 0x1.7e35c0288722ep-521 },
 	   { 0x1.9d86b59187f4ep-527, 0x1.12680a24c58f5p-521 },
 	   { 0x1.28be97e6e9065p-527, 0x1.89f8647df9662p-522 },
 	   { 0x1.a9d5434377e7bp-528, 0x1.1ac7d823a316cp-522 },
 	   { 0x1.31805749922c3p-528, 0x1.95e4eba9494cap-523 },
 	   { 0x1.b64ad6eec66d3p-529, 0x1.2344a7c981006p-523 },
 	   { 0x1.3a5cfae5998ecp-529, 0x1.a1f993b67371dp-524 },
 	   { 0x1.c2e56cdffce02p-530, 0x1.2bdd30bebc795p-524 },
 	   { 0x1.43530bcc0ee3ap-530, 0x1.ae347debd307p-525 },
 	   { 0x1.cfa2e45eea63dp-531, 0x1.3490165a1de5p-525 },
 	   { 0x1.4c60fe9d5cbc1p-531, 0x1.ba93aee1c301fp-526 },
 	   { 0x1.dc80ffece4451p-532, 0x1.3d5be7b8309a9p-526 },
 	   { 0x1.558533bc564e3p-532, 0x1.c7150ead1fd0ep-527 },
 	   { 0x1.e97d659702f92p-533, 0x1.463f1fe01b7dap-527 },
 	   { 0x1.5ebdf78f85a03p-533, 0x1.d3b6691d169e3p-528 },
 	   { 0x1.f6959f5cadd73p-534, 0x1.4f3825f642bp-528 },
 	   { 0x1.680982d0eea8ap-534, 0x1.e0756e0ca137bp-529 },
 	   { 0x1.01e38dd55bfc7p-534, 0x1.58454d7cf072p-529 },
 	   { 0x1.7165faec70a1p-535, 0x1.ed4fb1c7fef16p-530 },
 	   { 0x1.088796f5a026p-535, 0x1.6164d6a338985p-530 },
 	   { 0x1.7ad1726ce2f3cp-536, 0x1.fa42ad866b6p-531 },
 	   { 0x1.0f3587953aeb5p-536, 0x1.6a94eea23ecd2p-531 },
 	   { 0x1.8449e977fef01p-537, 0x1.03a5dffc21d0dp-531 },
 	   { 0x1.15ebef6827c9dp-537, 0x1.73d3b028fc2cfp-532 },
 	   { 0x1.8dcd4e591ac76p-538, 0x1.0a3416f4dd0f1p-532 },
 	   { 0x1.1ca951b79a938p-538, 0x1.7d1f23d694b62p-533 },
 	   { 0x1.97597e1aad586p-539, 0x1.10ca917d13a59p-533 },
 	   { 0x1.236c25d3c18a2p-539, 0x1.867540c340902p-534 },
 	   { 0x1.a0ec452e85047p-540, 0x1.1767d933fa0f7p-534 },
 	   { 0x1.2a32d78fe110fp-540, 0x1.8fd3ed17c059fp-535 },
 	   { 0x1.aa8360248e3edp-541, 0x1.1e0a6bf884441p-535 },
 	   { 0x1.30fbc7c8ab284p-541, 0x1.9938feb3469d1p-536 },
 	   { 0x1.b41c7c6ff8cc6p-542, 0x1.24b0bc63cac6bp-536 },
 	   { 0x1.37c54cf4ab1fcp-542, 0x1.a2a23bdfb3241p-537 },
 	   { 0x1.bdb5393a7ccd2p-543, 0x1.2b59324d7fd9bp-537 },
 	   { 0x1.3e8db3be9418cp-543, 0x1.ac0d5c13ef72ap-538 },
 	   { 0x1.c74b284572b4cp-544, 0x1.32022b5a4d882p-538 },
 	   { 0x1.45533fa93710cp-544, 0x1.b57808c42df0bp-539 },
 	   { 0x1.d0dbced86364cp-545, 0x1.38a9fb93eb86p-539 },
 	   { 0x1.4c142bbcdb51bp-545, 0x1.bedfde3fbf9f1p-540 },
 	   { 0x1.da64a6bca7adp-546, 0x1.3f4eee0ab230dp-540 },
 	   { 0x1.52ceab3daa53bp-546, 0x1.c8426c9c266d4p-541 },
 	   { 0x1.e3e31f45a0a96p-547, 0x1.45ef458066425p-541 },
 	   { 0x1.5980ea6ad6692p-547, 0x1.d19d38acfc932p-542 },
 	   { 0x1.ed549e6504cf2p-548, 0x1.4c893d1bef1fep-542 },
 	   { 0x1.60290f4619f98p-548, 0x1.daedbd083bb8ep-543 },
 	   { 0x1.f6b681cab013bp-549, 0x1.531b0925a021ep-543 },
 	   { 0x1.66c53a6323b06p-549, 0x1.e4316b16614afp-544 },
 	   { 0x1.00031007ac3e3p-549, 0x1.59a2d7cbb3c39p-544 },
 	   { 0x1.6d5387be7adf6p-550, 0x1.ed65ac2de0264p-545 },
 	   { 0x1.04a064f4bdd38p-550, 0x1.601ed1ee8e719p-545 },
 	   { 0x1.73d20f9b5e73bp-551, 0x1.f687e2b942e41p-546 },
 	   { 0x1.0931e5b5e6c43p-551, 0x1.668d1bf455ad8p-546 },
 	   { 0x1.7a3ee7681856fp-552, 0x1.ff956b675583bp-547 },
 	   { 0x1.0db636a632668p-552, 0x1.6cebd6a35f863p-547 },
 	   { 0x1.809822a836e1fp-553, 0x1.0445cf3250898p-547 },
 	   { 0x1.122bfb19eafe7p-553, 0x1.73392002f5fc2p-548 },
 	   { 0x1.86dbd3e416493p-554, 0x1.08b3e84ebc2b9p-548 },
 	   { 0x1.1691d609b1ec9p-554, 0x1.79731441e1e21p-549 },
 	   { 0x1.8d080d9d1c96dp-555, 0x1.0d13aa83e4b01p-549 },
 	   { 0x1.1ae66ac0b0b6ap-555, 0x1.7f97cea22928bp-550 },
 	   { 0x1.931ae34603f62p-556, 0x1.1163bef9eebc1p-550 },
 	   { 0x1.1f285d8d6c817p-556, 0x1.85a56a6965552p-551 },
 	   { 0x1.99126a3e88ca5p-557, 0x1.15a2cf3193875p-551 },
 	   { 0x1.23565474c154ep-557, 0x1.8b9a03d510324p-552 },
 	   { 0x1.9eecbad1cb519p-558, 0x1.19cf85b21a11fp-552 },
 	   { 0x1.276ef7e686addp-558, 0x1.9173b9121e9f7p-553 },
 	   { 0x1.a4a7f136af77ep-559, 0x1.1de88eb969b39p-553 },
 	   { 0x1.2b70f3735b79fp-559, 0x1.9730ab373bc61p-554 },
 	   { 0x1.aa422e918100dp-560, 0x1.21ec98edb9593p-554 },
 	   { 0x1.2f5af68314ac2p-560, 0x1.9cceff40f1fb1p-555 },
 	   { 0x1.afb999f61e5d4p-561, 0x1.25da56105b758p-555 },
 	   { 0x1.332bb50b471fbp-561, 0x1.a24cdf0f0a2e7p-556 },
 	   { 0x1.b50c6169e961bp-562, 0x1.29b07bb123c75p-556 },
 	   { 0x1.36e1e845638bbp-562, 0x1.a7a87a6267113p-557 },
 	   { 0x1.ba38bae4baa67p-563, 0x1.2d6dc3e1e1b47p-557 },
 	   { 0x1.3a7c4f63d9d53p-563, 0x1.ace007da9e0c8p-558 },
 	   { 0x1.bf3ce55012ad1p-564, 0x1.3110ede9680cep-558 },
 	   { 0x1.3df9b045b81fcp-564, 0x1.b1f1c5f28dcc9p-559 },
 	   { 0x1.c4172983c2f7ep-565, 0x1.3498bef599a58p-559 },
 	   { 0x1.4158d828399aep-565, 0x1.b6dbfbfb30836p-560 },
 	   { 0x1.c8c5db3f49157p-566, 0x1.380402cbf1542p-560 },
 	   { 0x1.44989c55b9312p-566, 0x1.bb9cfb13e7262p-561 },
 	   { 0x1.cd475a1f163eep-567, 0x1.3b518c77fb7d2p-561 },
 	   { 0x1.47b7dad17cf31p-567, 0x1.c0331f1f7ac71p-562 },
 	   { 0x1.d19a128cff8a4p-568, 0x1.3e8036f737914p-562 },
 	   { 0x1.4ab57affd05a9p-568, 0x1.c49ccfb511d2cp-563 },
 	   { 0x1.d5bc7eab14dfbp-569, 0x1.418ee5e1d890ep-563 },
 	   { 0x1.4d906e49e5535p-569, 0x1.c8d8810c585d4p-564 },
 	   { 0x1.d9ad27381fd3dp-570, 0x1.447c860fdcf2cp-564 },
 	   { 0x1.5047b0bcf6527p-570, 0x1.cce4b4e41cdcap-565 },
 	   { 0x1.dd6aa46d0f45cp-571, 0x1.47480e39f8181p-565 },
 	   { 0x1.52da49a426b16p-571, 0x1.d0bffb62a59f5p-566 },
 	   { 0x1.e0f39ed2991f9p-572, 0x1.49f07f95c9d66p-566 },
 	   { 0x1.55474c1ca1f2bp-572, 0x1.d468f3ef07049p-567 },
 	   { 0x1.e446d00e60d84p-573, 0x1.4c74e66ce3841p-567 },
 	   { 0x1.578dd7a37e92bp-573, 0x1.d7de4e02c6f6fp-568 },
 	   { 0x1.e76303a6f7572p-574, 0x1.4ed45aae1d60cp-568 },
 	   { 0x1.59ad189ced845p-574, 0x1.db1ec9f31f5e1p-569 },
 	   { 0x1.ea4717be0f8c8p-575, 0x1.510e0078c325ep-569 },
 	   { 0x1.5ba448d444792p-575, 0x1.de2939b1372f7p-570 },
 	   { 0x1.ecf1fdc04a7dbp-576, 0x1.532108a122ff3p-570 },
 	   { 0x1.5d72aff4768dap-576, 0x1.e0fc8180b06b8p-571 },
 	   { 0x1.ef62bb0a0594ap-577, 0x1.550cb12e0f1dbp-571 },
 	   { 0x1.5f17a3f894e1dp-577, 0x1.e39798a3f0a89p-572 },
 	   { 0x1.f19869809eb8ap-578, 0x1.56d045cee7811p-572 },
 	   { 0x1.60928993f7077p-578, 0x1.e5f989fd91cadp-573 },
 	   { 0x1.f392381fab056p-579, 0x1.586b2049c7737p-573 },
 	   { 0x1.61e2d491b1f68p-579, 0x1.e82174a67122fp-574 },
 	   { 0x1.f54f6b79a6d5fp-580, 0x1.59dca8e17880fp-574 },
 	   { 0x1.6308082b0b65cp-580, 0x1.ea0e8c77dc629p-575 },
 	   { 0x1.f6cf5e2bb03dcp-581, 0x1.5b2456b2d3672p-575 },
 	   { 0x1.6401b7549eebbp-581, 0x1.ebc01a8965943p-576 },
 	   { 0x1.f8118143e7ebp-582, 0x1.5c41b0093e8e9p-576 },
 	   { 0x1.64cf8501f223bp-582, 0x1.ed357da1f18bap-577 },
 	   { 0x1.f9155c9a1fbd1p-583, 0x1.5d344aaa010f1p-577 },
 	   { 0x1.6571245f3d39ap-583, 0x1.ee6e2a9b9efdp-578 },
 	   { 0x1.f9da8f1a8a0ccp-584, 0x1.5dfbcc1628fd2p-578 },
 	   { 0x1.65e6590135ap-584, 0x1.ef69acba2f951p-579 },
 	   { 0x1.fa60cf0228aadp-585, 0x1.5e97e9c2cbc7fp-579 },
 	   { 0x1.662ef70ab154bp-585, 0x1.f027a5f3a7f56p-580 },
 	   { 0x1.faa7ea0cc6ecbp-586, 0x1.5f0869476fb64p-580 },
 	   { 0x1.664ae34801e0ep-586, 0x1.f0a7cf2ae7563p-581 },
 	   { 0x1.faafc59456a8cp-587, 0x1.5f4d2082760f5p-581 },
 	   { 0x1.663a133fef35p-587, 0x1.f0e9f85c03b41p-582 },
 	   { 0x1.fa785ea194bf2p-588, 0x1.5f65f5b366281p-582 },
 	   { 0x1.65fc8d3a43882p-588, 0x1.f0ee08ba43cd5p-583 },
 	   { 0x1.fa01c9ede6a16p-589, 0x1.5f52df8b025d3p-583 },
 	   { 0x1.6592683be2829p-589, 0x1.f0b3febf9cbcdp-584 },
 	   { 0x1.f94c33d66f35bp-590, 0x1.5f13e53118eaap-584 },
 	   { 0x1.64fbcbf86f1abp-590, 0x1.f03bf02da5a7ap-585 },
 	   { 0x1.f857e040665ap-591, 0x1.5ea91e400b8afp-585 },
 	   { 0x1.6438f0b98cabp-591, 0x1.ef860a0000a7ap-586 },
 	   { 0x1.f7252a6ecb2bbp-592, 0x1.5e12b2b611c72p-586 },
 	   { 0x1.634a1f3bd0d7ep-592, 0x1.ee92905044d53p-587 },
 	   { 0x1.f5b484c995f72p-593, 0x1.5d50dadc42d9dp-587 },
 	   { 0x1.622fb08184d56p-593, 0x1.ed61de2b81fc4p-588 },
 	   { 0x1.f40678969b4f4p-594, 0x1.5c63df237cf4dp-588 },
 	   { 0x1.60ea0d9b5d711p-594, 0x1.ebf4655983167p-589 },
 	   { 0x1.f21ba5a45e2afp-595, 0x1.5b4c17f7488b1p-589 },
 	   { 0x1.5f79af6759efdp-595, 0x1.ea4aae160108ap-590 },
 	   { 0x1.eff4c1e71b057p-596, 0x1.5a09ed86def16p-590 },
 	   { 0x1.5ddf1e460242cp-596, 0x1.e86556bc034fep-591 },
 	   { 0x1.ed92990861c73p-597, 0x1.589dd784842fp-591 },
 	   { 0x1.5c1af1c6454bep-597, 0x1.e6451363b8311p-592 },
 	   { 0x1.eaf60be99fa59p-598, 0x1.57085cdb6c23ep-592 },
 	   { 0x1.5a2dd0483fd76p-598, 0x1.e3eaad7319948p-593 },
 	   { 0x1.e820101a05296p-599, 0x1.554a135c6b3d2p-593 },
 	   { 0x1.58186e973c8cbp-599, 0x1.e1570321beee3p-594 },
 	   { 0x1.e511af403f0e1p-600, 0x1.53639f61bab8bp-594 },
 	   { 0x1.55db8f7b445c6p-600, 0x1.de8b06f0475d8p-595 },
 	   { 0x1.e1cc067882b19p-601, 0x1.5155b36a1ff17p-595 },
 	   { 0x1.537803429dd3dp-601, 0x1.db87bf13d1856p-596 },
 	   { 0x1.de5045a77840fp-602, 0x1.4f210fabcd4fep-596 },
 	   { 0x1.50eea743a03bp-602, 0x1.d84e44d6006fdp-597 },
 	   { 0x1.da9faec295ac1p-603, 0x1.4cc6819f5a3a9p-597 },
 	   { 0x1.4e406557456e3p-603, 0x1.d4dfc3ea1615fp-598 },
 	   { 0x1.d6bb950e85a76p-604, 0x1.4a46e38335bf7p-598 },
 	   { 0x1.4b6e334ceafc3p-604, 0x1.d13d79b7b4d75p-599 },
 	   { 0x1.d2a55c543d97bp-605, 0x1.47a31bd7fd98ap-599 },
 	   { 0x1.48791257b832ep-605, 0x1.cd68b49be13bdp-600 },
 	   { 0x1.ce5e780d6c294p-606, 0x1.44dc1cd628aecp-600 },
 	   { 0x1.45620e7623619p-606, 0x1.c962d320e4c77p-601 },
 	   { 0x1.c9e86a88f07ffp-607, 0x1.41f2e3dd79383p-601 },
 	   { 0x1.422a3dd414b5ep-607, 0x1.c52d432db963cp-602 },
 	   { 0x1.c544c4080f626p-608, 0x1.3ee878deaf1c1p-602 },
 	   { 0x1.3ed2c02828af5p-608, 0x1.c0c9812daaed1p-603 },
 	   { 0x1.c07521d52071ep-609, 0x1.3bbdedbff743p-603 },
 	   { 0x1.3b5cbe0c97302p-609, 0x1.bc391730e1bf4p-604 },
 	   { 0x1.bb7b2d547171ap-610, 0x1.38745dbc97fd1p-604 },
 	   { 0x1.37c9685446b6bp-610, 0x1.b77d9c068db21p-605 },
 	   { 0x1.b6589b1020c3ep-611, 0x1.350cecc05d9cfp-605 },
 	   { 0x1.3419f75c953bcp-611, 0x1.b298b2516cc35p-606 },
 	   { 0x1.b10f29bfb2a68p-612, 0x1.3188c6bf4cd49p-606 },
 	   { 0x1.304faa5c619afp-612, 0x1.ad8c07976bbcp-607 },
 	   { 0x1.aba0a14c264ccp-613, 0x1.2de91f0a22435p-607 },
 	   { 0x1.2c6bc6b0e1424p-613, 0x1.a859534d21642p-608 },
 	   { 0x1.a60ed1d150c44p-614, 0x1.2a2f2fa027fc3p-608 },
 	   { 0x1.286f9728ce321p-614, 0x1.a30255dde65bep-609 },
 	   { 0x1.a05b929d439abp-615, 0x1.265c387eea954p-609 },
 	   { 0x1.245c6b4e79163p-615, 0x1.9d88d7b14c6d3p-610 },
 	   { 0x1.9a88c12e847c2p-616, 0x1.22717ef05792fp-610 },
 	   { 0x1.203396b14a77p-616, 0x1.97eea82eb8229p-611 },
 	   { 0x1.94984031d9858p-617, 0x1.1e704cd7ceb7cp-611 },
 	   { 0x1.1bf6702f3caf4p-617, 0x1.92359cbfdea74p-612 },
 	   { 0x1.8e8bf6806bcabp-618, 0x1.1a59effeaeef1p-612 },
 	   { 0x1.17a6513ed67fap-618, 0x1.8c5f8fd2e86f6p-613 },
 	   { 0x1.8865ce1efe9b6p-619, 0x1.162fb960e6361p-613 },
 	   { 0x1.1344953a2bc16p-619, 0x1.866e5fdcf6e5cp-614 },
 	   { 0x1.8227b33ef66f4p-620, 0x1.11f2fc7a0a0a9p-614 },
 	   { 0x1.0ed298ab66e97p-620, 0x1.8063ee5dc8676p-615 },
 	   { 0x1.7bd39341e60d2p-621, 0x1.0da50e937b941p-615 },
 	   { 0x1.0a51b89b5ac38p-621, 0x1.7a421ee53231bp-616 },
 	   { 0x1.756b5bc0538cfp-622, 0x1.0947461417eb2p-616 },
 	   { 0x1.05c351e298147p-622, 0x1.740ad61b23997p-617 },
 	   { 0x1.6ef0f9946142ep-623, 0x1.04daf9d1f19dp-617 },
 	   { 0x1.0128c07d7eac9p-623, 0x1.6dbff8cae0f32p-618 },
 	   { 0x1.686657e900799p-624, 0x1.006180668cd93p-618 },
 	   { 0x1.f906bdc779cfcp-625, 0x1.67636af21f0cbp-619 },
 	   { 0x1.61cd5f4e4d33cp-625, 0x1.f7b85f0c272bbp-620 },
 	   { 0x1.efa90ac757637p-626, 0x1.60f70ed4a200ep-620 },
 	   { 0x1.5b27f4d3aafafp-626, 0x1.ee98b6b3e4f34p-621 },
 	   { 0x1.e63b1303dfbfbp-627, 0x1.5a7cc414fb8aap-621 },
 	   { 0x1.5477f92833195p-627, 0x1.e566abbe94f87p-622 },
 	   { 0x1.dcbf7abb88524p-628, 0x1.53f666d2fde17p-622 },
 	   { 0x1.4dbf47c1fc8ap-628, 0x1.dc24dc933bf6dp-623 },
 	   { 0x1.d338de3492428p-629, 0x1.4d65ced070949p-623 },
 	   { 0x1.46ffb60cbd76p-629, 0x1.d2d5e0d43505p-624 },
 	   { 0x1.c9a9d09a6515fp-630, 0x1.46ccce9c8cdf5p-624 },
 	   { 0x1.403b12a03d499p-630, 0x1.c97c4837b573ep-625 },
 	   { 0x1.c014dae645fc3p-631, 0x1.402d32c6be96dp-625 },
 	   { 0x1.3973247f05596p-631, 0x1.c01a996aebdb3p-626 },
 	   { 0x1.b67c7ad400b86p-632, 0x1.3988c1191e211p-626 },
 	   { 0x1.32a9aa5db4bb3p-632, 0x1.b6b3510058b7ap-627 },
 	   { 0x1.ace321e309c7bp-633, 0x1.32e137db0ef23p-627 },
 	   { 0x1.2be059f3526f7p-633, 0x1.ad48e069f2207p-628 },
 	   { 0x1.a34b346493cc3p-634, 0x1.2c384d1c64d5bp-628 },
 	   { 0x1.2518df52ef492p-634, 0x1.a3ddacff96f65p-629 },
 	   { 0x1.99b70897047dcp-635, 0x1.258fae0968e74p-629 },
 	   { 0x1.1e54dc4edf3a3p-635, 0x1.9a740f1248851p-630 },
 	   { 0x1.9028e5cf277c7p-636, 0x1.1ee8fe480d92cp-630 },
 	   { 0x1.1795e7e5c7ccap-636, 0x1.910e510c93fe1p-631 },
 	   { 0x1.86a303af6f699p-637, 0x1.1845d75e974c6p-631 },
 	   { 0x1.10dd8db9b7b2p-637, 0x1.87aeaea087811p-632 },
 	   { 0x1.7d27896d87b8ep-638, 0x1.11a7c823f5ff5p-632 },
 	   { 0x1.0a2d4d917179ap-638, 0x1.7e57540380a9p-633 },
 	   { 0x1.73b88d266bc5ap-639, 0x1.0b10543a01766p-633 },
 	   { 0x1.03869ae409b27p-639, 0x1.750a5d3814d59p-634 },
 	   { 0x1.6a58134129f18p-640, 0x1.0480f391c14fcp-634 },
 	   { 0x1.f9d5b8ddde221p-641, 0x1.6bc9d56645be6p-635 },
 	   { 0x1.61080de06bfbp-641, 0x1.fbf623f3bedbap-636 },
 	   { 0x1.ecb6d7acd34f7p-642, 0x1.6297b642274f2p-636 },
 	   { 0x1.57ca5c62d05ddp-642, 0x1.ef001d6eb49dfp-637 },
 	   { 0x1.dfb32aa129cc6p-643, 0x1.5975e7810e7p-637 },
 	   { 0x1.4ea0caf213789p-643, 0x1.e222785106b16p-638 },
 	   { 0x1.d2cd2eb59de4cp-644, 0x1.50663e5d53392p-638 },
 	   { 0x1.458d1220fa79dp-644, 0x1.d55fbee497ep-639 },
 	   { 0x1.c60744f31e198p-645, 0x1.476a7d28a437bp-639 },
 	   { 0x1.3c90d697e5b5dp-645, 0x1.c8ba606fb6833p-640 },
 	   { 0x1.b963b20518321p-646, 0x1.3e8452ecdbe84p-640 },
 	   { 0x1.33ada8cfe418fp-646, 0x1.bc34b0b8bbc6p-641 },
 	   { 0x1.ace49de2283aep-647, 0x1.35b55b1b3d652p-641 },
 	   { 0x1.2ae504dc15f24p-647, 0x1.afd0e79df00ebp-642 },
 	   { 0x1.a08c1388db34fp-648, 0x1.2cff1d49f192cp-642 },
 	   { 0x1.223852412258p-648, 0x1.a39120c175c51p-643 },
 	   { 0x1.945c00d028182p-649, 0x1.24630cff92d39p-643 },
 	   { 0x1.19a8e3da77fbep-649, 0x1.97775b48ec1aap-644 },
 	   { 0x1.8856364b336c5p-650, 0x1.1be2898c8a8a4p-644 },
 	   { 0x1.1137f7cd08642p-650, 0x1.8b8579b06ca2cp-645 },
 	   { 0x1.7c7c673fe436ep-651, 0x1.137eddf1f97aep-645 },
 	   { 0x1.08e6b787233bap-651, 0x1.7fbd41b078795p-646 },
 	   { 0x1.70d029afc4472p-652, 0x1.0b3940d5da6fcp-646 },
 	   { 0x1.00b637cd0ec0bp-652, 0x1.74205c365c73ep-647 },
 	   { 0x1.6552f6729a259p-653, 0x1.0312d48405757p-647 },
 	   { 0x1.f14ef1a3e4ac2p-654, 0x1.68b0556e87723p-648 },
 	   { 0x1.5a06296220023p-654, 0x1.f6194df7630e5p-649 },
 	   { 0x1.e176ccb941b53p-655, 0x1.5d6e9ce0425a7p-649 },
 	   { 0x1.4eeb0196310cdp-655, 0x1.e64f64121563ep-650 },
 	   { 0x1.d1e5afef936dap-656, 0x1.525c859a2ea9ap-650 },
 	   { 0x1.4402a1b0bd9dfp-656, 0x1.d6c9b6d4d6fc5p-651 },
 	   { 0x1.c29d225a230e3p-657, 0x1.477b466ee6cc1p-651 },
 	   { 0x1.394e1038ce88ep-657, 0x1.c789ea0183d02p-652 },
 	   { 0x1.b39e83951bdaap-658, 0x1.3ccbfa4112a58p-652 },
 	   { 0x1.2ece3803d8d68p-658, 0x1.b8917a154498bp-653 },
 	   { 0x1.a4eb0c6436cf4p-659, 0x1.324fa05e3adc4p-653 },
 	   { 0x1.2483e8ac9d061p-659, 0x1.a9e1bcd30af1fp-654 },
 	   { 0x1.9683cf6400112p-660, 0x1.28071ce79e917p-654 },
 	   { 0x1.1a6fd716c7c18p-660, 0x1.9b7be1e1550cbp-655 },
 	   { 0x1.8869b9cc95345p-661, 0x1.1df33948493fap-655 },
 	   { 0x1.10929dfe85b79p-661, 0x1.8d60f37a227b9p-656 },
 	   { 0x1.7a9d9444b613ep-662, 0x1.1414a4b7a1729p-656 },
 	   { 0x1.06ecbe9338febp-662, 0x1.7f91d72bfd333p-657 },
 	   { 0x1.6d2003c3fdf54p-663, 0x1.0a6bf4c7a4f95p-657 },
 	   { 0x1.fafd4238f8063p-664, 0x1.720f4eaaf4bbbp-658 },
 	   { 0x1.5ff18a8317f0ap-664, 0x1.00f9a5fe04069p-658 },
 	   { 0x1.e8912b5139031p-665, 0x1.64d9f8b065b73p-659 },
 	   { 0x1.531288f8c01c7p-665, 0x1.ef7c38ee94e41p-660 },
 	   { 0x1.d695a98770e4bp-666, 0x1.57f251e86550ep-660 },
 	   { 0x1.46833ee262b1p-666, 0x1.dd73492689d2p-661 },
 	   { 0x1.c50b006d4e015p-667, 0x1.4b58b5eba6cc7p-661 },
 	   { 0x1.3a43cc572b3d3p-667, 0x1.cbd8e7539eac7p-662 },
 	   { 0x1.b3f14799b1616p-668, 0x1.3f0d6044b145dp-662 },
 	   { 0x1.2e5432e458097p-668, 0x1.baad518e7426ep-663 },
 	   { 0x1.a3486c40b74f1p-669, 0x1.33106d7f3cac9p-663 },
 	   { 0x1.22b456b1a8db7p-669, 0x1.a9f09adee91e3p-664 },
 	   { 0x1.931032d667261p-670, 0x1.2761dc408f1efp-664 },
 	   { 0x1.1763ffacc46acp-670, 0x1.99a2acce5bd7fp-665 },
 	   { 0x1.834838ba6fe3dp-671, 0x1.1c018e67b6eaep-665 },
 	   { 0x1.0c62daba74e7cp-671, 0x1.89c349043d67ep-666 },
 	   { 0x1.73eff5eb5eca5p-672, 0x1.10ef4a3481a29p-666 },
 	   { 0x1.01b07aeca1f42p-672, 0x1.7a520aeb63faep-667 },
 	   { 0x1.6506bebfc67bdp-673, 0x1.062abb7415c63p-667 },
 	   { 0x1.ee98b577ea7cap-674, 0x1.6b4e695e9099fp-668 },
 	   { 0x1.568bc5a3d72eep-674, 0x1.f766e96435041p-669 },
 	   { 0x1.da6bba883d22ap-675, 0x1.5cb7b85aa6067p-669 },
 	   { 0x1.487e1cd9f3e43p-675, 0x1.e311e0dabf963p-670 },
 	   { 0x1.c6d89f0368fc1p-676, 0x1.4e8d2ab5187d6p-670 },
 	   { 0x1.3adcb83cdccc3p-676, 0x1.cf55249e0172ap-671 },
 	   { 0x1.b3ddd3216f86ep-677, 0x1.40cdd3d52967cp-671 },
 	   { 0x1.2da66f0214306p-677, 0x1.bc2f50c60488ep-672 },
 	   { 0x1.a1799fd5925f4p-678, 0x1.3378a96e8e29ap-672 },
 	   { 0x1.20d9fd7b31257p-678, 0x1.a99ed8a2f2e6bp-673 },
 	   { 0x1.8faa294857a39p-679, 0x1.268c853c2e48dp-673 },
 	   { 0x1.147606d4e1ee3p-679, 0x1.97a2092e9b19dp-674 },
 	   { 0x1.7e6d714d6fce7p-680, 0x1.1a0826b9b2f1ep-674 },
 	   { 0x1.087916d26f37cp-680, 0x1.86370b7b69b46p-675 },
 	   { 0x1.6dc159d3dbce3p-681, 0x1.0dea34dab05c3p-675 },
 	   { 0x1.f9c3470942341p-682, 0x1.755be71f29feap-676 },
 	   { 0x1.5da3a74ec8bc7p-682, 0x1.02313fbe40a01p-676 },
 	   { 0x1.e35c1df5edf07p-683, 0x1.650e8497f58cdp-677 },
 	   { 0x1.4e120315adc06p-683, 0x1.edb784bbee452p-678 },
 	   { 0x1.cdb951dc67cbfp-684, 0x1.554cafa9d0c34p-678 },
 	   { 0x1.3f09fdba5037ep-684, 0x1.d7d0486e476ccp-679 },
 	   { 0x1.b8d760c6a3faap-685, 0x1.461419b3892c2p-679 },
 	   { 0x1.308911536a23dp-685, 0x1.c2a975dad9bep-680 },
 	   { 0x1.a4b2aa8c000cap-686, 0x1.37625bf981bdbp-680 },
 	   { 0x1.228ca3bac6e07p-686, 0x1.ae3f97cbb25cep-681 },
 	   { 0x1.914773f3bbbacp-687, 0x1.2934f9e530badp-681 },
 	   { 0x1.151208bdc254ep-687, 0x1.9a8f1bb2e0d78p-682 },
 	   { 0x1.7e91e9c37a26bp-688, 0x1.1b8963382a86p-682 },
 	   { 0x1.0816843f2edd8p-688, 0x1.879454bd5bf1ap-683 },
 	   { 0x1.6c8e23b87885fp-689, 0x1.0e5cf631ac83bp-683 },
 	   { 0x1.f72e98937c4f8p-690, 0x1.754b7ed21d736p-684 },
 	   { 0x1.5b38276a48eap-690, 0x1.01ad01a5b2ddp-684 },
 	   { 0x1.df23162441e8bp-691, 0x1.63b0c17c2afp-685 },
 	   { 0x1.4a8beb16012edp-691, 0x1.eaed8e09770edp-686 },
 	   { 0x1.c804c1d0522ebp-692, 0x1.52c032be62aabp-686 },
 	   { 0x1.3a855850eeeeap-692, 0x1.d36ef8a6e08fap-687 },
 	   { 0x1.b1cdcc2ca0214p-693, 0x1.4275d9d00481dp-687 },
 	   { 0x1.2b204ea20186ep-693, 0x1.bcd89c2310d59p-688 },
 	   { 0x1.9c78595e362cep-694, 0x1.32cdb1c10f0eep-688 },
 	   { 0x1.1c58a6013aaeep-694, 0x1.a724c21e93002p-689 },
 	   { 0x1.87fe848fd6bffp-695, 0x1.23c3ac05a8c19p-689 },
 	   { 0x1.0e2a313c94bb5p-695, 0x1.924da8624908p-690 },
 	   { 0x1.745a6341bd9d3p-696, 0x1.1553b2e7eba16p-690 },
 	   { 0x1.0090c041eb55fp-696, 0x1.7e4d844204d5fp-691 },
 	   { 0x1.61860872f36c7p-697, 0x1.0779abdf88654p-691 },
 	   { 0x1.e710449b20327p-698, 0x1.6b1e85d9cfdc3p-692 },
 	   { 0x1.4f7b87a3ccd22p-698, 0x1.f462f39da55f5p-693 },
 	   { 0x1.ce184ffaa0275p-699, 0x1.58badb2559681p-693 },
 	   { 0x1.3e34f7b15484dp-699, 0x1.daedfe49c8a9fp-694 },
 	   { 0x1.b6314a8f93441p-700, 0x1.471cb2f12adecp-694 },
 	   { 0x1.2dac75898461p-700, 0x1.c28c3fc94131bp-695 },
 	   { 0x1.9f52e6b0168fbp-701, 0x1.363e3fa56683p-695 },
 	   { 0x1.1ddc26b854422p-701, 0x1.ab358720f461fp-696 },
 	   { 0x1.8974e49b18481p-702, 0x1.2619b9e9f9276p-696 },
 	   { 0x1.0ebe3bcdc6652p-702, 0x1.94e1adf5ef17ap-697 },
 	   { 0x1.748f15c14a99p-703, 0x1.16a96324493c1p-697 },
 	   { 0x1.004cf29d383afp-703, 0x1.7f889bf8109c7p-698 },
 	   { 0x1.60995fd7916b4p-704, 0x1.07e787ce8decbp-698 },
 	   { 0x1.e50530acb7a2bp-705, 0x1.6b224a16aa4ep-699 },
 	   { 0x1.4d8bbfb38c98p-705, 0x1.f39d03522ee6ep-700 },
 	   { 0x1.cab316f0b29dep-706, 0x1.57a6c57f8fed2p-700 },
 	   { 0x1.3b5e4bf3051bbp-706, 0x1.d8b1738bdcb74p-701 },
 	   { 0x1.b1987b3f62cd2p-707, 0x1.450e32693ba8dp-701 },
 	   { 0x1.2a09376f26716p-707, 0x1.bf0154de94403p-702 },
 	   { 0x1.99aa6a5f22416p-708, 0x1.3350cea8cd61ap-702 },
 	   { 0x1.1984d37c8d151p-708, 0x1.a681c1d2f0b94p-703 },
 	   { 0x1.82de1daeb9c47p-709, 0x1.2266f414ce57bp-703 },
 	   { 0x1.09c991f950457p-709, 0x1.8f27fe21c9591p-704 },
 	   { 0x1.6d28fdea9871ap-710, 0x1.12491ab5c17d9p-704 },
 	   { 0x1.f5a00e548f085p-711, 0x1.78e979aa0c9bep-705 },
 	   { 0x1.5880a5ae03598p-711, 0x1.02efdac5a4ff4p-705 },
 	   { 0x1.d921d6d1c821bp-712, 0x1.63bbd32217718p-706 },
 	   { 0x1.44dae3b23367bp-712, 0x1.e8a7dcff4677cp-707 },
 	   { 0x1.be0a394617721p-713, 0x1.4f94da865b2a3p-707 },
 	   { 0x1.322dbccd73cabp-713, 0x1.ccdc67829105bp-708 },
 	   { 0x1.a44b3f5ce9c8bp-714, 0x1.3c6a934743c05p-708 },
 	   { 0x1.206f6db46b93p-714, 0x1.b26f5afd4ebc9p-709 },
 	   { 0x1.8bd742e227a38p-715, 0x1.2a3336386b4d7p-709 },
 	   { 0x1.0f966c7fd2396p-715, 0x1.99530a15ce61ap-710 },
 	   { 0x1.74a0efc06d36ep-716, 0x1.18e533433f227p-710 },
 	   { 0x1.ff32d3f1c0a49p-717, 0x1.817a166d90dbdp-711 },
 	   { 0x1.5e9b45aff1bep-717, 0x1.087732df4f3abp-711 },
 	   { 0x1.e0dea55db81c4p-718, 0x1.6ad7728d6db01p-712 },
 	   { 0x1.49b9999981d6cp-718, 0x1.f1c02ea5235f3p-713 },
 	   { 0x1.c41e9fb058b1ep-719, 0x1.555e63841a093p-713 },
 	   { 0x1.35ef96b0fe655p-719, 0x1.d42dfb77e321ep-714 },
 	   { 0x1.a8e19002cb47fp-720, 0x1.4102823a6a0a2p-714 },
 	   { 0x1.23313f4adb099p-720, 0x1.b8267dd51660dp-715 },
 	   { 0x1.8f16bf19917acp-721, 0x1.2db7bc80b123ep-715 },
 	   { 0x1.1172ed701cd4p-721, 0x1.9d98e007ff597p-716 },
 	   { 0x1.76adf2095d808p-722, 0x1.1b7255d8af1cep-716 },
 	   { 0x1.00a953345bce4p-722, 0x1.8474c5f89cf1fp-717 },
 	   { 0x1.5f976a86ba7a3p-723, 0x1.0a26e7ff7c8ap-717 },
 	   { 0x1.e192f5a290a0dp-724, 0x1.6caa4dc34bcc6p-718 },
 	   { 0x1.49c3e6e576cf8p-724, 0x1.f394c675d5da1p-719 },
 	   { 0x1.c3918d16606afp-725, 0x1.562a0ffd36fefp-719 },
 	   { 0x1.3524a1ccb90cep-725, 0x1.d4a41cdb95576p-720 },
 	   { 0x1.a739e0c3f00b3p-726, 0x1.40e51faa74ee4p-720 },
 	   { 0x1.21ab51a49a64p-726, 0x1.b7670ded07be7p-721 },
 	   { 0x1.8c781323e2b8bp-727, 0x1.2ccd09eaa341p-721 },
 	   { 0x1.0f4a27c210b83p-727, 0x1.9bc980b6cd88bp-722 },
 	   { 0x1.7338f3cfd4b18p-728, 0x1.19d3d560c7458p-722 },
 	   { 0x1.fbe79eabbab8bp-729, 0x1.81b807901b2ddp-723 },
 	   { 0x1.5b69fdd784131p-729, 0x1.07ec015b26bbfp-723 },
 	   { 0x1.db36d8463b3e1p-730, 0x1.691fdebe382bep-724 },
 	   { 0x1.44f955c9776f6p-730, 0x1.ee11097f70374p-725 },
 	   { 0x1.bc693203fe92cp-731, 0x1.51eeeac7320bep-725 },
 	   { 0x1.2fd5c7756dd24p-731, 0x1.ce39998362bf9p-726 },
 	   { 0x1.9f66cc65fb2cbp-732, 0x1.3c13b67a17ff2p-726 },
 	   { 0x1.1beec36eb8502p-732, 0x1.b03976c943068p-727 },
 	   { 0x1.8418af0dd65edp-733, 0x1.277d70b2ebc6fp-727 },
 	   { 0x1.09345c546e7cdp-733, 0x1.93f94ba2c6b6ap-728 },
 	   { 0x1.6a68c4bfd764bp-734, 0x1.141be9e049453p-728 },
 	   { 0x1.ef2e87ca7b717p-735, 0x1.7962a50231832p-729 },
 	   { 0x1.5241d71eb6e19p-735, 0x1.01df915097b64p-729 },
 	   { 0x1.ce118fc8beeeap-736, 0x1.605fee84767fp-730 },
 	   { 0x1.3b8f8a28fd848p-736, 0x1.e172e498cd2fcp-731 },
 	   { 0x1.aef59daa19c93p-737, 0x1.48dc6e3757e71p-731 },
 	   { 0x1.263e577f574dp-737, 0x1.c1366206ca036p-732 },
 	   { 0x1.91bfa9231de5cp-738, 0x1.32c440230ef3ap-732 },
 	   { 0x1.123b897af1af4p-738, 0x1.a2ee0ea25a216p-733 },
 	   { 0x1.7655cd85a2773p-739, 0x1.1e04519eb8f87p-733 },
 	   { 0x1.feea6c3554149p-740, 0x1.867f82bdccb8fp-734 },
 	   { 0x1.5c9f427a491a4p-740, 0x1.0a8a5c7678dffp-734 },
 	   { 0x1.dbb4739afff2ep-741, 0x1.6bd1744d1513ep-735 },
 	   { 0x1.4484548d479a3p-741, 0x1.f089c3d3d8b6fp-736 },
 	   { 0x1.bab46440d8e4bp-742, 0x1.52cbafb8bc99fp-736 },
 	   { 0x1.2dee5d96e696ep-742, 0x1.ce464b1286c0dp-737 },
 	   { 0x1.9bcaf0aad775cp-743, 0x1.3b571085ef9dbp-737 },
 	   { 0x1.18c7bd07b007fp-743, 0x1.ae2a4fedee59cp-738 },
 	   { 0x1.7eda37d26ae66p-744, 0x1.255d79dbe3905p-738 },
 	   { 0x1.04fbd01fd3b9ap-744, 0x1.9017432798e26p-739 },
 	   { 0x1.63c5ba199716fp-745, 0x1.10c9ceee61d28p-739 },
 	   { 0x1.e4edd431a7a4p-746, 0x1.73effa34f57abp-740 },
 	   { 0x1.4a724e2f6eadep-746, 0x1.fb0fd6a99ec28p-741 },
 	   { 0x1.c24c9890314cdp-747, 0x1.5998a4600495bp-741 },
 	   { 0x1.32c615eef6a3dp-747, 0x1.d70936a92f04ap-742 },
 	   { 0x1.a1f03c81340fdp-748, 0x1.40f6bfdad1f14p-742 },
 	   { 0x1.1ca87340e1c39p-748, 0x1.b55b284add8c1p-743 },
 	   { 0x1.83b6cbf2ba29fp-749, 0x1.29f10ece9036ep-743 },
 	   { 0x1.0801fd07f7284p-749, 0x1.95e2d86ae92c8p-744 },
 	   { 0x1.677ffffc31b92p-750, 0x1.146f8c6e8dc57p-744 },
 	   { 0x1.e978e83ebd95dp-751, 0x1.787f26e598ebbp-745 },
 	   { 0x1.4d2d2f5dd4096p-751, 0x1.005b6216a17eap-745 },
 	   { 0x1.c58570e2f641dp-752, 0x1.5d10973fbab06p-746 },
 	   { 0x1.34a13f272cdfap-752, 0x1.db3db8f832a58p-747 },
 	   { 0x1.a4017c5ace0dep-753, 0x1.4379416dfac63p-747 },
 	   { 0x1.1dc0938cfb932p-753, 0x1.b84ac1ef46255p-748 },
 	   { 0x1.84c7064147f81p-754, 0x1.2b9cc2c3d6738p-748 },
 	   { 0x1.087100f5e6429p-754, 0x1.97b6c5dc3637ap-749 },
 	   { 0x1.67b20873fc995p-755, 0x1.15602f1227af8p-749 },
 	   { 0x1.e9337a8979dap-756, 0x1.795cb2bb480b6p-750 },
 	   { 0x1.4ca0667456eb8p-756, 0x1.00aa01fc8a73ep-750 },
 	   { 0x1.c446a2ccade1cp-757, 0x1.5d196927cdaccp-751 },
 	   { 0x1.3371d92c55c69p-757, 0x1.dac421184af19p-752 },
 	   { 0x1.a1ef1650d3562p-758, 0x1.42cba823b93cbp-752 },
 	   { 0x1.1c07db1df4cf6p-758, 0x1.b6e2f60b615c1p-753 },
 	   { 0x1.8202debc2593cp-759, 0x1.2a53f94211ba9p-753 },
 	   { 0x1.064595037ce7bp-759, 0x1.95853e0fd75adp-754 },
 	   { 0x1.645a58ac6913cp-760, 0x1.13949d3b2fbd2p-754 },
 	   { 0x1.e41f95cc492cep-761, 0x1.768213ee2ba9cp-755 },
 	   { 0x1.48d0194e5b153p-761, 0x1.fce2f1e195a7ap-756 },
 	   { 0x1.be99935f38c42p-762, 0x1.59b2d772c1b04p-756 },
 	   { 0x1.2f40d4a5d287p-762, 0x1.d5a005ce1b15dp-757 },
 	   { 0x1.9bc8aa74c3805p-763, 0x1.3ef3138f8ae58p-757 },
 	   { 0x1.178b448b82b16p-763, 0x1.b12e626e3c8a1p-758 },
 	   { 0x1.7b7f2dc7fa066p-764, 0x1.2620652c3102cp-758 },
 	   { 0x1.0190106456396p-764, 0x1.8f5ecffd9c995p-759 },
 	   { 0x1.5d92194746ef2p-765, 0x1.0f1a62a97a48ep-759 },
 	   { 0x1.da636b2add63ap-766, 0x1.7004d0a0dd3fcp-760 },
 	   { 0x1.41d8f14e2d235p-766, 0x1.f38508375a815p-761 },
 	   { 0x1.b4a8e16df3a2ep-767, 0x1.52f67f4a45dbdp-761 },
 	   { 0x1.282da2ee06e9fp-767, 0x1.cbf8187da97p-762 },
 	   { 0x1.91bc4f0e82a1p-768, 0x1.380c6fa6ddd1bp-762 },
 	   { 0x1.106c65473611bp-768, 0x1.a757e44dde4fbp-763 },
 	   { 0x1.716ca73d3a1dcp-769, 0x1.1f218f165083cp-763 },
 	   { 0x1.f4e737e667fe6p-770, 0x1.8571975a9ba0cp-764 },
 	   { 0x1.538bdbc88035p-770, 0x1.081306aee058bp-764 },
 	   { 0x1.cc4774fe05a13p-771, 0x1.661571375ee31p-765 },
 	   { 0x1.37eeb586702afp-771, 0x1.e5803c9b677cp-766 },
 	   { 0x1.a6be51e94d2c3p-772, 0x1.49169d29f057fp-766 },
 	   { 0x1.1e6cae3cc5ce4p-772, 0x1.be144165bfdadp-767 },
 	   { 0x1.841452e30c6ecp-773, 0x1.2e4b0b7596d86p-767 },
 	   { 0x1.06dfcc0330324p-773, 0x1.99a8814f82396p-768 },
 	   { 0x1.64157d8dbcaa1p-774, 0x1.158b4c1d7aa61p-768 },
 	   { 0x1.e248fc3725278p-775, 0x1.7806fe5adc0dep-769 },
 	   { 0x1.4691284199248p-775, 0x1.fd64d63539ac4p-770 },
 	   { 0x1.ba32f675bcca1p-776, 0x1.58fd2560c98e3p-770 },
 	   { 0x1.2b59cb5fcd07p-776, 0x1.d33b9c01b8858p-771 },
 	   { 0x1.953f4278d9771p-777, 0x1.3c5b9e7be019ep-771 },
 	   { 0x1.1244d4a198783p-777, 0x1.ac5a261b57bd2p-772 },
 	   { 0x1.7333ac721d353p-778, 0x1.21f61f6e6a3a5p-772 },
 	   { 0x1.f654f8b2c9938p-779, 0x1.8883e334bf813p-773 },
 	   { 0x1.53d9d5f4e3889p-779, 0x1.09a33ffab8174p-773 },
 	   { 0x1.cbcb3935e8707p-780, 0x1.678037d69a88ap-774 },
 	   { 0x1.36fefd85e37f7p-780, 0x1.e678a0474dd4dp-775 },
 	   { 0x1.a4a7147e53789p-781, 0x1.491a44a8cc267p-775 },
 	   { 0x1.1c73c8c2f3143p-781, 0x1.bd3a60953bab8p-776 },
 	   { 0x1.80a7df6e9e4abp-782, 0x1.2d20af56e98e4p-776 },
 	   { 0x1.040c111171b21p-782, 0x1.9748563f2a02cp-777 },
 	   { 0x1.5f9153468350dp-783, 0x1.13656dff66048p-777 },
 	   { 0x1.db3d65827b6f1p-784, 0x1.7463a2ae57157p-778 },
 	   { 0x1.412b4a3b0b6bbp-784, 0x1.f77b2a384d071p-779 },
 	   { 0x1.b20abd232bd72p-785, 0x1.5451ae34b02aep-779 },
 	   { 0x1.25417f5fe18aap-785, 0x1.cc024fa52d21ep-780 },
 	   { 0x1.8c38db09c3d68p-786, 0x1.36dbe645ba702p-780 },
 	   { 0x1.0ba351c6b2c44p-786, 0x1.a415d531b6e85p-781 },
 	   { 0x1.69856de02317p-787, 0x1.1bcf7eeeba2f5p-781 },
 	   { 0x1.e847157246bfcp-788, 0x1.7f70703ac5558p-782 },
 	   { 0x1.49b2d16422141p-788, 0x1.02fd377359b1p-782 },
 	   { 0x1.bd304de355d85p-789, 0x1.5dd1b0bb84b26p-783 },
 	   { 0x1.2c87c2ff697dcp-789, 0x1.d87243e77ecadp-784 },
 	   { 0x1.95b4456f24a66p-790, 0x1.3efdb3b369292p-784 },
 	   { 0x1.11cf1a60f1d84p-790, 0x1.aeb4dc01a4631p-785 },
 	   { 0x1.718a9184a8678p-791, 0x1.22bcd99dbdb06p-785 },
 	   { 0x1.f2af0be1fde49p-792, 0x1.88766c06b0833p-786 },
 	   { 0x1.507007917e3d9p-792, 0x1.08db80d427d79p-786 },
 	   { 0x1.c5e695f15072bp-793, 0x1.65709eb54bf5ep-787 },
 	   { 0x1.32266540e08c2p-793, 0x1.e253876b38acep-788 },
 	   { 0x1.9cf012acb820bp-794, 0x1.45623a2f6a451p-788 },
 	   { 0x1.1673fda512b46p-794, 0x1.b6f674d703273p-789 },
 	   { 0x1.777d05328bd26p-795, 0x1.280eca736b4b1p-789 },
 	   { 0x1.fa46d62b8e57dp-796, 0x1.8f4d804e3ad6fp-790 },
 	   { 0x1.5544c8bc23e1cp-796, 0x1.0d3e50a2eecdcp-790 },
 	   { 0x1.cc068b1dc8ab2p-797, 0x1.6b0c7763ce52bp-791 },
 	   { 0x1.36042b906571p-797, 0x1.e979edc5b3767p-792 },
 	   { 0x1.a1cbbab815b4cp-798, 0x1.49ecd657d5dd6p-792 },
 	   { 0x1.197d0fe71564cp-798, 0x1.bcb59141dc715p-793 },
 	   { 0x1.7b41f3bcb1869p-799, 0x1.2bad65a82bb23p-793 },
 	   { 0x1.feec24eca8006p-800, 0x1.93d6de18ac6bfp-794 },
 	   { 0x1.581b387627669p-800, 0x1.1011dd6dfecf6p-794 },
 	   { 0x1.cf746ccaba032p-801, 0x1.6e8be31f2fe24p-795 },
 	   { 0x1.380f8b864e1acp-801, 0x1.edc51c8649aaap-796 },
 	   { 0x1.a4312cc2f816ap-802, 0x1.4c88f43732a1p-796 },
 	   { 0x1.1adc83c96accfp-802, 0x1.bfd81ed74f1cdp-797 },
 	   { 0x1.7cc835281bbf3p-803, 0x1.2d883a292df3bp-797 },
 	   { 0x1.0044e6f2b903fp-803, 0x1.95fde403b5724p-798 },
 	   { 0x1.58e66674c0f82p-804, 0x1.11494966870b7p-798 },
 	   { 0x1.d0209514d613dp-805, 0x1.6fdef1ca550b3p-799 },
 	   { 0x1.383f2f4495aedp-805, 0x1.ef217eb67d36dp-800 },
 	   { 0x1.a41575f0363d6p-806, 0x1.4d2aaa5b8e28ap-800 },
 	   { 0x1.1a8c12a0cae91p-806, 0x1.c04fcbf1fddd8p-801 },
 	   { 0x1.7c08d08f2ccbbp-807, 0x1.2d96cdd2a30b8p-801 },
 	   { 0x1.ff186c5b90604p-808, 0x1.95b8ba50a2687p-802 },
 	   { 0x1.57a2b0b1c4c86p-808, 0x1.10df03cd711e3p-802 },
 	   { 0x1.ce07ef98af2aep-809, 0x1.6eff939f51c8fp-803 },
 	   { 0x1.36923c5eb270bp-809, 0x1.ed88d96607fb4p-804 },
 	   { 0x1.a1791489717bfp-810, 0x1.4bcf1445c1d61p-804 },
 	   { 0x1.188d2c2d680a3p-810, 0x1.be1a747b458c8p-805 },
 	   { 0x1.7907312c7e255p-811, 0x1.2bd8dde16ba8ap-805 },
 	   { 0x1.fa9e995f4c414p-812, 0x1.93089dc23e417p-806 },
 	   { 0x1.5455df149c7b5p-812, 0x1.0ed4f34d6e965p-806 },
 	   { 0x1.c93410e8142f8p-813, 0x1.6bf1c754a3325p-807 },
 	   { 0x1.33105a5b594f7p-813, 0x1.e9027b1c5a4abp-808 },
 	   { 0x1.9c67f441e11b3p-814, 0x1.487c687197597p-808 },
 	   { 0x1.14e8ebae7496ep-814, 0x1.b942323a72767p-809 },
 	   { 0x1.73d10c597b774p-815, 0x1.285660efb3e9ap-809 },
 	   { 0x1.f330b99c7f9e7p-816, 0x1.8df9d62fb9c5ep-810 },
 	   { 0x1.4f0ef77c81a6fp-816, 0x1.0b34677fe9486p-810 },
 	   { 0x1.c1baedb5f2e65p-817, 0x1.66c37bb05de1ep-811 },
 	   { 0x1.2dc9788ad9864p-817, 0x1.e1a30436bcde5p-812 },
 	   { 0x1.94f913add4907p-818, 0x1.4341c90c553e7p-812 },
 	   { 0x1.0fafd2c40ba27p-818, 0x1.b1dd0ffc5d04bp-813 },
 	   { 0x1.6c7df995241d1p-819, 0x1.231f4a6757469p-813 },
 	   { 0x1.e8f062cc963cep-820, 0x1.86a35930ed5e1p-814 },
 	   { 0x1.47e5cbff0d92ep-820, 0x1.060dd236f49a3p-814 },
 	   { 0x1.b7be34be4e18dp-821, 0x1.5f8c25cd122d7p-815 },
 	   { 0x1.26d5559b935e7p-821, 0x1.d78bca82e9f37p-816 },
 	   { 0x1.8b4dd6af9c05dp-822, 0x1.3c36d15093021p-816 },
 	   { 0x1.08f94cfc79158p-822, 0x1.a80c62c44a65bp-817 },
 	   { 0x1.632ec0e0d009cp-823, 0x1.1c4b11ed6627ap-817 },
 	   { 0x1.dc0b5f2e40ea4p-824, 0x1.7d261cc2edf72p-818 },
 	   { 0x1.3efa480ea698bp-824, 0x1.fef096f5252fp-819 },
 	   { 0x1.ab6a5245de9e5p-825, 0x1.566c107178d1fp-819 },
 	   { 0x1.1e52cde409267p-825, 0x1.cae9de8f00c0bp-820 },
 	   { 0x1.7f910d0084829p-826, 0x1.337ae444bd293p-820 },
 	   { 0x1.00e3012bd4171p-826, 0x1.9bfbcfe9dc1e8p-821 },
 	   { 0x1.580c66bfc7cf5p-827, 0x1.13f803c0631d9p-821 },
 	   { 0x1.ccba595fe34b5p-828, 0x1.71ac2109d33c9p-822 },
 	   { 0x1.347383dcf4a9bp-828, 0x1.ef21caa7d80c3p-823 },
 	   { 0x1.9cf52785fcd1fp-829, 0x1.4b8b6bbdb7a4fp-823 },
 	   { 0x1.1466f7a4ba4b3p-829, 0x1.bbf4bcf8ca0c3p-824 },
 	   { 0x1.71f5b701cb667p-830, 0x1.2934441fdae8bp-824 },
 	   { 0x1.ef1fef5338f87p-831, 0x1.8de00a5d4cff3p-825 },
 	   { 0x1.4b46ffc2e70ccp-831, 0x1.0a4a61359d63ap-825 },
 	   { 0x1.bb3f3e667d5e5p-832, 0x1.64673b39bdd54p-826 },
 	   { 0x1.287ea78b8278fp-832, 0x1.dcf3acd0cc1f4p-827 },
 	   { 0x1.8c9c8347a2863p-833, 0x1.3f1926f0c2aa4p-827 },
 	   { 0x1.093c166d47d9p-833, 0x1.aaecb94ca24e1p-828 },
 	   { 0x1.62b5957e6b822p-834, 0x1.1d8efbbc88d6cp-828 },
 	   { 0x1.da4f3c5b8c56fp-835, 0x1.7df554174928cp-829 },
 	   { 0x1.3d1457a1afdaep-835, 0x1.fed6b4a9440a8p-830 },
 	   { 0x1.a7e3665ffae25p-836, 0x1.558fae0fed7aap-830 },
 	   { 0x1.1b4da97b89113p-836, 0x1.c8b307e047613p-831 },
 	   { 0x1.7aa46b2ec675cp-837, 0x1.3149a005e5984p-831 },
 	   { 0x1.fa00e080e536p-838, 0x1.9819329634547p-832 },
 	   { 0x1.520f92dcad4a2p-838, 0x1.10bba52994e8ep-832 },
 	   { 0x1.c3a9666328faap-839, 0x1.6c7dd2d93c0f9p-833 },
 	   { 0x1.2dae795ce73b6p-839, 0x1.e70fd5d6d806dp-834 },
 	   { 0x1.92f5963d343cfp-840, 0x1.45629dffe1fa7p-834 },
 	   { 0x1.0d15f439254bep-840, 0x1.b2b2e959996bp-835 },
 	   { 0x1.675546ac2c967p-841, 0x1.2255364dfcfd7p-835 },
 	   { 0x1.dfca1ff236f02p-842, 0x1.83c6a3841fccap-836 },
 	   { 0x1.4046155930cfbp-842, 0x1.02ee197efc99dp-836 },
 	   { 0x1.ab8846c89a496p-843, 0x1.59bfc8bdbfffep-837 },
 	   { 0x1.1d5226b496f7ep-843, 0x1.cd9f4c973304p-838 },
 	   { 0x1.7cc7edd2bedd1p-844, 0x1.3420703d360eap-838 },
 	   { 0x1.fc1e021531b11p-845, 0x1.9b4a6e4580455p-839 },
 	   { 0x1.52f9fd29afa7bp-845, 0x1.1276cde31355ep-839 },
 	   { 0x1.c439018f9e7bp-846, 0x1.6e44a0da72dedp-840 },
 	   { 0x1.2d9d4a3bfacfap-846, 0x1.e8b82d35e9882p-841 },
 	   { 0x1.9247c7d6b7109p-847, 0x1.4603c1a2de688p-841 },
 	   { 0x1.0c3d4d5746632p-847, 0x1.b2e6fa531d555p-842 },
 	   { 0x1.65add59367765p-848, 0x1.220b241172407p-842 },
 	   { 0x1.dce1e8301e6efp-849, 0x1.82d28ae825549p-843 },
 	   { 0x1.3dde18cb97a8dp-849, 0x1.01ea51e3f541cp-843 },
 	   { 0x1.a7b31ccb0b2f4p-850, 0x1.57e3d8e31e749p-844 },
 	   { 0x1.1a59798dd7aa2p-850, 0x1.ca77ce984ce61p-845 },
 	   { 0x1.7843a7981f8e3p-851, 0x1.3192c63185ef2p-845 },
 	   { 0x1.f55b0f3ffe463p-852, 0x1.974911a73b1a7p-846 },
 	   { 0x1.4df9fe655b0fbp-852, 0x1.0f64b579273f6p-846 },
 	   { 0x1.bce68ce6bcfedp-853, 0x1.69a3e1bad13dap-847 },
 	   { 0x1.284bfe1cdea24p-853, 0x1.e1d6859c11527p-848 },
 	   { 0x1.8a9c29acbf47dp-854, 0x1.40f425a16dca3p-848 },
 	   { 0x1.06bd70b72892bp-854, 0x1.ab8633790b1e2p-849 },
 	   { 0x1.5dd55c1a48477p-855, 0x1.1cb4a43b9229fp-849 },
 	   { 0x1.d1bd6b173b9f2p-856, 0x1.7b25cc6523c3bp-850 },
 	   { 0x1.35fc8451ff49ep-856, 0x1.f8db2dc70232bp-851 },
 	   { 0x1.9c9712232f548p-857, 0x1.5014bc06e7f91p-851 },
 	   { 0x1.128b47439dcd5p-857, 0x1.bf66ba3b9066cp-852 },
 	   { 0x1.6d53d2be0a0b6p-858, 0x1.29c2c1dc958dbp-852 },
 	   { 0x1.e6122171333dfp-859, 0x1.8c4a9d76af90fp-853 },
 	   { 0x1.435229d0cc681p-859, 0x1.07ae5a7347d0bp-853 },
 	   { 0x1.ae1371b74ea2dp-860, 0x1.5ed9539dfd0c9p-854 },
 	   { 0x1.1e01427183001p-860, 0x1.d2c69c7599edcp-855 },
 	   { 0x1.7c589442700ecp-861, 0x1.3677341a98a13p-855 },
 	   { 0x1.f9be9e1d7b4e4p-862, 0x1.9cf2c5625685ep-856 },
 	   { 0x1.5033c96eb757p-862, 0x1.1298aebe8af0fp-856 },
 	   { 0x1.bef014f36ffa9p-863, 0x1.6d2655c8560ebp-857 },
 	   { 0x1.290979be09b3bp-863, 0x1.e58166789d0bcp-858 },
 	   { 0x1.8ac6ba86dcc3cp-864, 0x1.42b9e90b536b6p-858 },
 	   { 0x1.064e638fb2517p-864, 0x1.acfe7e64002b1p-859 },
 	   { 0x1.5c884857d8adep-865, 0x1.1d179e12ade6ep-859 },
 	   { 0x1.cf0beaeb1b319p-866, 0x1.7ae01eb0f55cbp-860 },
 	   { 0x1.338e29511ffcdp-866, 0x1.f772a9e0423a1p-861 },
 	   { 0x1.9881a23b2ff9bp-867, 0x1.4e72e15f0f016p-861 },
 	   { 0x1.0f43798c4f845p-867, 0x1.bc4e2f5a8c9afp-862 },
 	   { 0x1.6836e63bd7d88p-868, 0x1.27165d875ec78p-862 },
 	   { 0x1.de466f9c32fdap-869, 0x1.87eb54ae1860dp-863 },
 	   { 0x1.3d79f883687bfp-869, 0x1.043b38d103ec9p-863 },
 	   { 0x1.a56d48500b8a3p-870, 0x1.598a7d65e3b67p-864 },
 	   { 0x1.17ac327f9b5e5p-870, 0x1.cac2d1ee89db1p-865 },
 	   { 0x1.73278f241bb95p-871, 0x1.308090afcd9f3p-865 },
 	   { 0x1.ec801820c3f3dp-872, 0x1.942d41e7bf2a3p-866 },
 	   { 0x1.46b841565ab3ep-872, 0x1.0c34dc595f4bfp-866 },
 	   { 0x1.b16ea850bfa34p-873, 0x1.63e9cb83e74b2p-867 },
 	   { 0x1.1f76e44abf0ecp-873, 0x1.d83e5a3ffd7adp-868 },
 	   { 0x1.7d432d7dd0ca1p-874, 0x1.39428e0fd00c5p-868 },
 	   { 0x1.f99abec00b682p-875, 0x1.9f8c2eadfb109p-869 },
 	   { 0x1.4f35579392d4bp-875, 0x1.13957092e7741p-869 },
 	   { 0x1.bc6c19eee10e8p-876, 0x1.6d7ad6ac744f9p-870 },
 	   { 0x1.2692d6adc530fp-876, 0x1.e4a41e3c393c2p-871 },
 	   { 0x1.8673fad41c337p-877, 0x1.4149a31665d1ep-871 },
 	   { 0x1.02bd066e6e446p-877, 0x1.a9efbad7c9909p-872 },
 	   { 0x1.56dece3f159c3p-878, 0x1.1a4d14ca40e6p-872 },
 	   { 0x1.c64dabfd6babdp-879, 0x1.7628f37011dc7p-873 },
 	   { 0x1.2cf07ed3ac7cap-879, 0x1.efd93aae49244p-874 },
 	   { 0x1.8ea5cdb1b77f8p-880, 0x1.4884565714d83p-874 },
 	   { 0x1.0801f05da3babp-880, 0x1.b341347ab9d2ep-875 },
 	   { 0x1.5da3ba0723cbcp-881, 0x1.204d0f497ca7dp-875 },
 	   { 0x1.cefd7b19fc691p-882, 0x1.7de10a24a9be3p-876 },
 	   { 0x1.3281b7ca3d771p-882, 0x1.f9c4f419d97b9p-877 },
 	   { 0x1.95c663259c5d8p-883, 0x1.4ee2a6bb63f1dp-877 },
 	   { 0x1.0c90568fe453bp-883, 0x1.bb6bea4d790c6p-878 },
 	   { 0x1.6374ef6370a23p-884, 0x1.258802fee3a1bp-878 },
 	   { 0x1.d668024e6e773p-885, 0x1.8491dcb50d65p-879 },
 	   { 0x1.3739f6c74a992p-885, 0x1.012888bcf5e1bp-879 },
 	   { 0x1.9bc5a2748239p-886, 0x1.5456466d99824p-880 },
 	   { 0x1.105de86fb726ep-886, 0x1.c25d7813e5a28p-881 },
 	   { 0x1.68453b252f9afp-887, 0x1.29f220ff323bdp-881 },
 	   { 0x1.dc7c640bf856fp-888, 0x1.8a2c46b36447dp-882 },
 	   { 0x1.3b0e7a2d8004dp-888, 0x1.04b5178932d9ep-882 },
 	   { 0x1.a095d99893beap-889, 0x1.58d2d04dcdef9p-883 },
 	   { 0x1.1361f24d04a1ep-889, 0x1.c8060b8a624d8p-884 },
 	   { 0x1.6c0994513d45bp-890, 0x1.2d8154e3020f5p-884 },
 	   { 0x1.e12caa0268707p-891, 0x1.8ea37661d565fp-885 },
 	   { 0x1.3df6725a60cf5p-891, 0x1.078003d294269p-885 },
 	   { 0x1.a42bf15180a09p-892, 0x1.5c4df6da1a5fp-886 },
 	   { 0x1.15957e82800c6p-892, 0x1.cc58a0676d26ep-887 },
 	   { 0x1.6eb9463d29a0dp-893, 0x1.302d6b1661efp-887 },
 	   { 0x1.e46dfa81a2018p-894, 0x1.91ed1d851d1ddp-888 },
 	   { 0x1.3feb236502138p-894, 0x1.0982d94421652p-888 },
 	   { 0x1.a67f97b02e026p-895, 0x1.5ebfab91b4a2bp-889 },
 	   { 0x1.16f37032d6085p-895, 0x1.cf4b3235443f5p-890 },
 	   { 0x1.704e120e656fdp-896, 0x1.31f0304f01ddbp-890 },
 	   { 0x1.e638c247f445dp-897, 0x1.940198fd0e1c2p-891 },
 	   { 0x1.40e7ff18c854cp-897, 0x1.0ab8eaa8fae67p-891 },
 	   { 0x1.a78b6039c7039p-898, 0x1.60223e0067b2cp-892 },
 	   { 0x1.1778970df4481p-898, 0x1.d0d6e2f89dd66p-893 },
 	   { 0x1.70c446e7535ccp-899, 0x1.32c589802b4bap-893 },
 	   { 0x1.e688d1dc06742p-900, 0x1.94dc0e4e3bd62p-894 },
 	   { 0x1.40eab69ffb357p-900, 0x1.0b1f64079cf15p-894 },
 	   { 0x1.a74cd8f49285bp-901, 0x1.607271cb1c23p-895 },
 	   { 0x1.1723bbb37e71p-901, 0x1.d0f815d3e30e4p-896 },
 	   { 0x1.701ad03f5aba2p-902, 0x1.32ab83cb1b9aap-896 },
 	   { 0x1.e55d6dd34aeb5p-903, 0x1.947a7e7d08e62p-897 },
 	   { 0x1.3ff3437e5e592p-903, 0x1.0ab555a059592p-897 },
 	   { 0x1.a5c493ec4b75bp-904, 0x1.5faf8b45ee11cp-898 },
 	   { 0x1.15f5a46f2a8c5p-904, 0x1.cfae7d166a387p-899 },
 	   { 0x1.6e533a1804da5p-905, 0x1.31a25c153692fp-899 },
 	   { 0x1.e2b951ac76b4bp-906, 0x1.92ddcdd3a585ap-900 },
 	   { 0x1.3e03e7aaf4a23p-906, 0x1.097bb793410b5p-900 },
 	   { 0x1.a2f624fa2da41p-907, 0x1.5ddb524f58124p-901 },
 	   { 0x1.13f112353b2e2p-907, 0x1.ccfd1b6b2b0d1p-902 },
 	   { 0x1.6b71aaf8395acp-908, 0x1.2fac7e1ac1a55p-902 },
 	   { 0x1.dea2a52e6f8d6p-909, 0x1.9009c068a7447p-903 },
 	   { 0x1.3b2124c85eb7dp-909, 0x1.077566199da13p-903 },
 	   { 0x1.9ee813dcc82f4p-910, 0x1.5afa0b60e30adp-904 },
 	   { 0x1.111ab5ef7d9cep-910, 0x1.c8ea38207b48cp-905 },
 	   { 0x1.677cd3ce598a2p-911, 0x1.2cce7b0334e93p-905 },
 	   { 0x1.d922e485849dfp-912, 0x1.8c04eb792831bp-906 },
 	   { 0x1.3751aaab95803p-912, 0x1.04a716678c7d9p-906 },
 	   { 0x1.99a3c2eb312dfp-913, 0x1.571266fb205e7p-907 },
 	   { 0x1.0d791e54efc95p-913, 0x1.c37f46c8a36cep-908 },
 	   { 0x1.627dd610c1f2fp-914, 0x1.290ef7aa6784ep-908 },
 	   { 0x1.d246bba093dddp-915, 0x1.86d89be61c44fp-909 },
 	   { 0x1.329e3d8fc35e5p-915, 0x1.011744722e8f8p-909 },
 	   { 0x1.93354aecb0f91p-916, 0x1.522d67c700dd9p-910 },
 	   { 0x1.09149eae599f4p-916, 0x1.bcc8c2b79e5e6p-911 },
 	   { 0x1.5c8020a89d6a7p-917, 0x1.247692feaf7c7p-911 },
 	   { 0x1.ca1dd59404578p-918, 0x1.8090b25f1fb1cp-912 },
 	   { 0x1.2d1194826d1d9p-918, 0x1.f99c33fa36826p-913 },
 	   { 0x1.8bab4cd7bc185p-919, 0x1.4c563ff8738edp-913 },
 	   { 0x1.03f72f0fa181cp-919, 0x1.b4d5ff233ee8bp-914 },
 	   { 0x1.559144638d7d2p-920, 0x1.1f0fc4fe41aefp-914 },
 	   { 0x1.c0baa10766979p-921, 0x1.793b75fbd2367p-915 },
 	   { 0x1.26b830bbc4f33p-921, 0x1.efaa9eeaa4992p-916 },
 	   { 0x1.8316ba6f8ef74p-922, 0x1.459a26ac43fcfp-916 },
 	   { 0x1.fc588d5eeb3p-923, 0x1.abb8ece685efep-917 },
 	   { 0x1.4dc0c0d42f863p-923, 0x1.18e6b704952c1p-917 },
 	   { 0x1.b6320aea7077ap-924, 0x1.70e95e366ca95p-918 },
 	   { 0x1.1fa02ebad6485p-924, 0x1.e4700e7fab75ep-919 },
 	   { 0x1.798a96e59845bp-925, 0x1.3e0826243926dp-919 },
 	   { 0x1.ef81624855ca5p-926, 0x1.a185d71d9ae78p-920 },
 	   { 0x1.451fcaaed5e7p-926, 0x1.1209163a43d8ap-920 },
 	   { 0x1.aa9b30dd7b333p-927, 0x1.67acd56555624p-921 },
 	   { 0x1.17d9121b4ff43p-927, 0x1.d805487b20ec2p-922 },
 	   { 0x1.6f1bb0c9eff18p-928, 0x1.35b0e3e76f72ap-922 },
 	   { 0x1.e184bec96bcc5p-929, 0x1.965317fc3f8ebp-923 },
 	   { 0x1.3bc10ccdff1d7p-929, 0x1.0a85e11600392p-923 },
 	   { 0x1.9e0f0cdf83a76p-930, 0x1.5d99f4f4fa7a2p-924 },
 	   { 0x1.0f738d3253e75p-930, 0x1.ca8538b911cc2p-925 },
 	   { 0x1.63e056b37b486p-931, 0x1.2ca663e8f6c6ep-925 },
 	   { 0x1.d2806afda0512p-932, 0x1.8a38c763ae5p-926 },
 	   { 0x1.31b865207923bp-932, 0x1.026d30f31261ep-926 },
 	   { 0x1.90a81bef15367p-933, 0x1.52c63cbe5201dp-927 },
 	   { 0x1.068145905baddp-933, 0x1.bc0c903e2dd51p-928 },
 	   { 0x1.57f0081c7461bp-934, 0x1.22fbc7eb40c8ep-928 },
 	   { 0x1.c293abfeb81c1p-935, 0x1.7d5064d5d2e6ap-929 },
 	   { 0x1.271a9ed146425p-935, 0x1.f3a001a1da12ap-930 },
 	   { 0x1.8282015bfd093p-936, 0x1.474846e880b8p-930 },
 	   { 0x1.fa292d1f4b615p-937, 0x1.acb96019278e3p-931 },
 	   { 0x1.4b6323fa7fafcp-937, 0x1.18c50c637e437p-931 },
 	   { 0x1.b1ded81f6cf48p-938, 0x1.6fb47e7243b1p-932 },
 	   { 0x1.1bfd2aff12d23p-938, 0x1.e17fe4af1cdcdp-933 },
 	   { 0x1.73b9288cf980bp-939, 0x1.3b3779cd081bcp-933 },
 	   { 0x1.e680a6315c8f9p-940, 0x1.9caab20737c4bp-934 },
 	   { 0x1.3e52969a46a03p-940, 0x1.0e16c42489121p-934 },
 	   { 0x1.a082ea93d471fp-941, 0x1.618056ad2fa0dp-935 },
 	   { 0x1.1075d9566cab2p-941, 0x1.ce9e247afa7efp-936 },
 	   { 0x1.646a66f6fb197p-942, 0x1.2eabb9557e4c3p-936 },
 	   { 0x1.d22f0f82317a8p-943, 0x1.8c0020c90fd02p-937 },
 	   { 0x1.30d7883df3e07p-943, 0x1.0305d4157bdecp-937 },
 	   { 0x1.8ea1187daf8b3p-944, 0x1.52cf8a69cbdeep-938 },
 	   { 0x1.049a91d747c02p-944, 0x1.bb1f3a4ce848cp-939 },
 	   { 0x1.54b29ff375e83p-945, 0x1.21bd19407d3a8p-939 },
 	   { 0x1.bd5a7cbaf896dp-946, 0x1.7ad97206eb3e9p-940 },
 	   { 0x1.230b0dec754dap-946, 0x1.ef4e6059f1fe4p-941 },
 	   { 0x1.7c5a693980a4p-947, 0x1.43bdb9112e65bp-941 },
 	   { 0x1.f10221f87a1cap-948, 0x1.a7278c0b2c815p-942 },
 	   { 0x1.44ae6c097e3b8p-948, 0x1.148391a9b5b7p-942 },
 	   { 0x1.a8288818abb4p-949, 0x1.69563388e87eep-943 },
 },
 };
diff --git a/contrib/arm-optimized-routines/pl/math/erfcf_data.c b/contrib/arm-optimized-routines/math/aarch64/v_erfcf_data.c
similarity index 98%
rename from contrib/arm-optimized-routines/pl/math/erfcf_data.c
rename to contrib/arm-optimized-routines/math/aarch64/v_erfcf_data.c
index a54e11973819..9f992b4887fb 100644
--- a/contrib/arm-optimized-routines/pl/math/erfcf_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_erfcf_data.c
@@ -1,664 +1,664 @@
 /*
  * Data used in single-precision erfc(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
-/* Lookup table used in erfcf.
+/* Lookup table used in vector erfcf.
    For each possible rounded input r (multiples of 1/64), between
    r = 0.0 and r = 10.0625 (645 values):
-   - the first entry __erfcf_data.tab.erfc contains the values of erfc(r),
-   - the second entry __erfcf_data.tab.scale contains the values of
+   - the first entry __v_erfcf_data.tab.erfc contains the values of erfc(r),
+   - the second entry __v_erfcf_data.tab.scale contains the values of
    2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore
    they are scaled by a large enough value 2^47 (fits in 8 bits).  */
-const struct erfcf_data __erfcf_data = {
+const struct v_erfcf_data __v_erfcf_data = {
   .tab = { { 0x1p47, 0x1.20dd76p47 },
 	   { 0x1.f6f944p46, 0x1.20cb68p47 },
 	   { 0x1.edf3aap46, 0x1.209546p47 },
 	   { 0x1.e4f05p46, 0x1.203b26p47 },
 	   { 0x1.dbf056p46, 0x1.1fbd28p47 },
 	   { 0x1.d2f4dcp46, 0x1.1f1b7ap47 },
 	   { 0x1.c9fefep46, 0x1.1e565cp47 },
 	   { 0x1.c10fd4p46, 0x1.1d6e14p47 },
 	   { 0x1.b8287ap46, 0x1.1c62fap47 },
 	   { 0x1.af4ap46, 0x1.1b3572p47 },
 	   { 0x1.a6757ep46, 0x1.19e5eap47 },
 	   { 0x1.9dabfcp46, 0x1.1874dep47 },
 	   { 0x1.94ee88p46, 0x1.16e2d8p47 },
 	   { 0x1.8c3e24p46, 0x1.153068p47 },
 	   { 0x1.839bd6p46, 0x1.135e3p47 },
 	   { 0x1.7b0894p46, 0x1.116cd8p47 },
 	   { 0x1.728558p46, 0x1.0f5d16p47 },
 	   { 0x1.6a1312p46, 0x1.0d2fa6p47 },
 	   { 0x1.61b2acp46, 0x1.0ae55p47 },
 	   { 0x1.596508p46, 0x1.087ee4p47 },
 	   { 0x1.512b06p46, 0x1.05fd3ep47 },
 	   { 0x1.49057ap46, 0x1.03614p47 },
 	   { 0x1.40f536p46, 0x1.00abdp47 },
 	   { 0x1.38fbp46, 0x1.fbbbbep46 },
 	   { 0x1.311796p46, 0x1.f5f0cep46 },
 	   { 0x1.294bb4p46, 0x1.eff8c4p46 },
 	   { 0x1.21980ap46, 0x1.e9d5a8p46 },
 	   { 0x1.19fd3ep46, 0x1.e38988p46 },
 	   { 0x1.127bf2p46, 0x1.dd167cp46 },
 	   { 0x1.0b14bcp46, 0x1.d67ea2p46 },
 	   { 0x1.03c82ap46, 0x1.cfc41ep46 },
 	   { 0x1.f92d8cp45, 0x1.c8e91cp46 },
 	   { 0x1.eb0214p45, 0x1.c1efcap46 },
 	   { 0x1.dd0edap45, 0x1.bada5ap46 },
 	   { 0x1.cf54b4p45, 0x1.b3aafcp46 },
 	   { 0x1.c1d46ap45, 0x1.ac63e8p46 },
 	   { 0x1.b48eaep45, 0x1.a5074ep46 },
 	   { 0x1.a78428p45, 0x1.9d9762p46 },
 	   { 0x1.9ab566p45, 0x1.96165p46 },
 	   { 0x1.8e22eap45, 0x1.8e8646p46 },
 	   { 0x1.81cd24p45, 0x1.86e96ap46 },
 	   { 0x1.75b47p45, 0x1.7f41dcp46 },
 	   { 0x1.69d91ep45, 0x1.7791b8p46 },
 	   { 0x1.5e3b66p45, 0x1.6fdb12p46 },
 	   { 0x1.52db78p45, 0x1.681ff2p46 },
 	   { 0x1.47b96ep45, 0x1.60625cp46 },
 	   { 0x1.3cd554p45, 0x1.58a446p46 },
 	   { 0x1.322f26p45, 0x1.50e79ep46 },
 	   { 0x1.27c6d2p45, 0x1.492e42p46 },
 	   { 0x1.1d9c34p45, 0x1.417a0cp46 },
 	   { 0x1.13af1ep45, 0x1.39ccc2p46 },
 	   { 0x1.09ff5p45, 0x1.32281ep46 },
 	   { 0x1.008c8p45, 0x1.2a8dcep46 },
 	   { 0x1.eeaca8p44, 0x1.22ff72p46 },
 	   { 0x1.dcb8cap44, 0x1.1b7e98p46 },
 	   { 0x1.cb3c86p44, 0x1.140cc4p46 },
 	   { 0x1.ba36dap44, 0x1.0cab62p46 },
 	   { 0x1.a9a6bap44, 0x1.055bd6p46 },
 	   { 0x1.998afap44, 0x1.fc3ee6p45 },
 	   { 0x1.89e25ep44, 0x1.edeeeep45 },
 	   { 0x1.7aab98p44, 0x1.dfca26p45 },
 	   { 0x1.6be542p44, 0x1.d1d2dp45 },
 	   { 0x1.5d8decp44, 0x1.c40b08p45 },
 	   { 0x1.4fa40ep44, 0x1.b674c8p45 },
 	   { 0x1.422616p44, 0x1.a911fp45 },
 	   { 0x1.351262p44, 0x1.9be438p45 },
 	   { 0x1.28674p44, 0x1.8eed36p45 },
 	   { 0x1.1c22f8p44, 0x1.822e66p45 },
 	   { 0x1.1043c2p44, 0x1.75a91ap45 },
 	   { 0x1.04c7cap44, 0x1.695e8cp45 },
 	   { 0x1.f35a72p43, 0x1.5d4fd4p45 },
 	   { 0x1.dde456p43, 0x1.517de6p45 },
 	   { 0x1.c9296cp43, 0x1.45e99cp45 },
 	   { 0x1.b525d6p43, 0x1.3a93b2p45 },
 	   { 0x1.a1d5a6p43, 0x1.2f7cc4p45 },
 	   { 0x1.8f34eap43, 0x1.24a554p45 },
 	   { 0x1.7d3fa6p43, 0x1.1a0dc6p45 },
 	   { 0x1.6bf1dcp43, 0x1.0fb662p45 },
 	   { 0x1.5b4784p43, 0x1.059f5ap45 },
 	   { 0x1.4b3c98p43, 0x1.f79184p44 },
 	   { 0x1.3bcd14p43, 0x1.e4653p44 },
 	   { 0x1.2cf4eep43, 0x1.d1b982p44 },
 	   { 0x1.1eb024p43, 0x1.bf8e1cp44 },
 	   { 0x1.10fab8p43, 0x1.ade26cp44 },
 	   { 0x1.03d0acp43, 0x1.9cb5bep44 },
 	   { 0x1.ee5c18p42, 0x1.8c0732p44 },
 	   { 0x1.d61dd6p42, 0x1.7bd5c8p44 },
 	   { 0x1.bedec8p42, 0x1.6c2056p44 },
 	   { 0x1.a8973cp42, 0x1.5ce596p44 },
 	   { 0x1.933f9p42, 0x1.4e241ep44 },
 	   { 0x1.7ed03ap42, 0x1.3fda6cp44 },
 	   { 0x1.6b41ccp42, 0x1.3206dcp44 },
 	   { 0x1.588cf2p42, 0x1.24a7b8p44 },
 	   { 0x1.46aa72p42, 0x1.17bb2cp44 },
 	   { 0x1.359332p42, 0x1.0b3f52p44 },
 	   { 0x1.254038p42, 0x1.fe646p43 },
 	   { 0x1.15aaa8p42, 0x1.e72372p43 },
 	   { 0x1.06cbcap42, 0x1.d0b7ap43 },
 	   { 0x1.f13a04p41, 0x1.bb1c98p43 },
 	   { 0x1.d62fbep41, 0x1.a64de6p43 },
 	   { 0x1.bc6c1ep41, 0x1.92470ap43 },
 	   { 0x1.a3e2ccp41, 0x1.7f036cp43 },
 	   { 0x1.8c87b8p41, 0x1.6c7e64p43 },
 	   { 0x1.764f2p41, 0x1.5ab342p43 },
 	   { 0x1.612d8ap41, 0x1.499d48p43 },
 	   { 0x1.4d17cap41, 0x1.3937b2p43 },
 	   { 0x1.3a03p41, 0x1.297dbap43 },
 	   { 0x1.27e498p41, 0x1.1a6a96p43 },
 	   { 0x1.16b24cp41, 0x1.0bf97ep43 },
 	   { 0x1.066222p41, 0x1.fc4b5ep42 },
 	   { 0x1.edd4d2p40, 0x1.e1d4dp42 },
 	   { 0x1.d08382p40, 0x1.c885ep42 },
 	   { 0x1.b4be2p40, 0x1.b0553p42 },
 	   { 0x1.9a7316p40, 0x1.99397ap42 },
 	   { 0x1.81915cp40, 0x1.83298ep42 },
 	   { 0x1.6a088p40, 0x1.6e1c58p42 },
 	   { 0x1.53c89ep40, 0x1.5a08e8p42 },
 	   { 0x1.3ec25ep40, 0x1.46e66cp42 },
 	   { 0x1.2ae6fap40, 0x1.34ac36p42 },
 	   { 0x1.18282ep40, 0x1.2351c2p42 },
 	   { 0x1.067844p40, 0x1.12ceb4p42 },
 	   { 0x1.eb940ep39, 0x1.031ad6p42 },
 	   { 0x1.cc2186p39, 0x1.e85c44p41 },
 	   { 0x1.ae808cp39, 0x1.cc018p41 },
 	   { 0x1.9299bp39, 0x1.b1160ap41 },
 	   { 0x1.785674p39, 0x1.978ae8p41 },
 	   { 0x1.5fa14ap39, 0x1.7f5188p41 },
 	   { 0x1.486586p39, 0x1.685bb6p41 },
 	   { 0x1.328f5ep39, 0x1.529b9ep41 },
 	   { 0x1.1e0be6p39, 0x1.3e03d8p41 },
 	   { 0x1.0ac8fcp39, 0x1.2a875cp41 },
 	   { 0x1.f16aaep38, 0x1.181984p41 },
 	   { 0x1.cf80d4p38, 0x1.06ae14p41 },
 	   { 0x1.afb4e2p38, 0x1.ec7262p40 },
 	   { 0x1.91e8bep38, 0x1.cd5ecap40 },
 	   { 0x1.75ffb4p38, 0x1.b00b38p40 },
 	   { 0x1.5bde72p38, 0x1.94624ep40 },
 	   { 0x1.436af4p38, 0x1.7a4f6ap40 },
 	   { 0x1.2c8c7ap38, 0x1.61beaep40 },
 	   { 0x1.172b7ap38, 0x1.4a9cf6p40 },
 	   { 0x1.033198p38, 0x1.34d7dcp40 },
 	   { 0x1.e11332p37, 0x1.205dacp40 },
 	   { 0x1.be3ebp37, 0x1.0d1d6ap40 },
 	   { 0x1.9dbf72p37, 0x1.f60d8ap39 },
 	   { 0x1.7f714p37, 0x1.d4143ap39 },
 	   { 0x1.6331cap37, 0x1.b430ecp39 },
 	   { 0x1.48e09cp37, 0x1.9646f4p39 },
 	   { 0x1.305ef8p37, 0x1.7a3adep39 },
 	   { 0x1.198fd6p37, 0x1.5ff276p39 },
 	   { 0x1.0457c6p37, 0x1.4754acp39 },
 	   { 0x1.e139bcp36, 0x1.30499cp39 },
 	   { 0x1.bc8d52p36, 0x1.1aba78p39 },
 	   { 0x1.9a7c3p36, 0x1.06918cp39 },
 	   { 0x1.7adadep36, 0x1.e77448p38 },
 	   { 0x1.5d806ap36, 0x1.c4412cp38 },
 	   { 0x1.424642p36, 0x1.a36454p38 },
 	   { 0x1.290826p36, 0x1.84ba3p38 },
 	   { 0x1.11a3f8p36, 0x1.6821p38 },
 	   { 0x1.f7f358p35, 0x1.4d78bcp38 },
 	   { 0x1.cfd652p35, 0x1.34a306p38 },
 	   { 0x1.aab85ap35, 0x1.1d8318p38 },
 	   { 0x1.88647p35, 0x1.07fdb4p38 },
 	   { 0x1.68a8e4p35, 0x1.e7f232p37 },
 	   { 0x1.4b5726p35, 0x1.c2b9dp37 },
 	   { 0x1.30439cp35, 0x1.a02436p37 },
 	   { 0x1.174578p35, 0x1.8005fp37 },
 	   { 0x1.003692p35, 0x1.6235fcp37 },
 	   { 0x1.d5e678p34, 0x1.468daep37 },
 	   { 0x1.aeb442p34, 0x1.2ce898p37 },
 	   { 0x1.8a9848p34, 0x1.15246ep37 },
 	   { 0x1.695876p34, 0x1.fe41cep36 },
 	   { 0x1.4abea2p34, 0x1.d57f52p36 },
 	   { 0x1.2e984ep34, 0x1.afc85ep36 },
 	   { 0x1.14b676p34, 0x1.8ce75ep36 },
 	   { 0x1.f9daap33, 0x1.6caa0ep36 },
 	   { 0x1.ce283ap33, 0x1.4ee142p36 },
 	   { 0x1.a609f8p33, 0x1.3360ccp36 },
 	   { 0x1.81396ap33, 0x1.19ff46p36 },
 	   { 0x1.5f7524p33, 0x1.0295fp36 },
 	   { 0x1.40806ep33, 0x1.da011p35 },
 	   { 0x1.2422eep33, 0x1.b23a5ap35 },
 	   { 0x1.0a286p33, 0x1.8d986ap35 },
 	   { 0x1.e4c0bp32, 0x1.6be022p35 },
 	   { 0x1.b93bf4p32, 0x1.4cda54p35 },
 	   { 0x1.916f7cp32, 0x1.30539p35 },
 	   { 0x1.6d0e7p32, 0x1.161be4p35 },
 	   { 0x1.4bd1cp32, 0x1.fc0d56p34 },
 	   { 0x1.2d77bep32, 0x1.cfd4a6p34 },
 	   { 0x1.11c3bep32, 0x1.a74068p34 },
 	   { 0x1.f0fb86p31, 0x1.8208bcp34 },
 	   { 0x1.c2e43ep31, 0x1.5feadap34 },
 	   { 0x1.98e254p31, 0x1.40a8c2p34 },
 	   { 0x1.729df6p31, 0x1.2408eap34 },
 	   { 0x1.4fc63cp31, 0x1.09d5f8p34 },
 	   { 0x1.3010aap31, 0x1.e3bcf4p33 },
 	   { 0x1.1338b8p31, 0x1.b7e946p33 },
 	   { 0x1.f1fecp30, 0x1.8fdc1cp33 },
 	   { 0x1.c2556ap30, 0x1.6b4702p33 },
 	   { 0x1.970b06p30, 0x1.49e178p33 },
 	   { 0x1.6fbddep30, 0x1.2b6876p33 },
 	   { 0x1.4c144ep30, 0x1.0f9e1cp33 },
 	   { 0x1.2bbc1ep30, 0x1.ec929ap32 },
 	   { 0x1.0e69f2p30, 0x1.be6abcp32 },
 	   { 0x1.e7b188p29, 0x1.94637ep32 },
 	   { 0x1.b792bcp29, 0x1.6e2368p32 },
 	   { 0x1.8c03d2p29, 0x1.4b581cp32 },
 	   { 0x1.649b02p29, 0x1.2bb5ccp32 },
 	   { 0x1.40f794p29, 0x1.0ef6c4p32 },
 	   { 0x1.20c13p29, 0x1.e9b5e8p31 },
 	   { 0x1.03a72ap29, 0x1.ba4f04p31 },
 	   { 0x1.d2bfc6p28, 0x1.8f4cccp31 },
 	   { 0x1.a35068p28, 0x1.684c22p31 },
 	   { 0x1.7885cep28, 0x1.44f21ep31 },
 	   { 0x1.51f06ap28, 0x1.24eb72p31 },
 	   { 0x1.2f2aaap28, 0x1.07ebd2p31 },
 	   { 0x1.0fd816p28, 0x1.db5adp30 },
 	   { 0x1.e7493p27, 0x1.abe09ep30 },
 	   { 0x1.b48774p27, 0x1.80f43ap30 },
 	   { 0x1.86e006p27, 0x1.5a2aep30 },
 	   { 0x1.5dd4bp27, 0x1.37231p30 },
 	   { 0x1.38f2e8p27, 0x1.1783cep30 },
 	   { 0x1.17d2c6p27, 0x1.f5f7d8p29 },
 	   { 0x1.f42c18p26, 0x1.c282cep29 },
 	   { 0x1.beceb2p26, 0x1.94219cp29 },
 	   { 0x1.8ef2aap26, 0x1.6a5972p29 },
 	   { 0x1.640bf6p26, 0x1.44ba86p29 },
 	   { 0x1.3d9be6p26, 0x1.22df2ap29 },
 	   { 0x1.1b2fe4p26, 0x1.046aeap29 },
 	   { 0x1.f8c0c2p25, 0x1.d21398p28 },
 	   { 0x1.c19fa8p25, 0x1.a0df1p28 },
 	   { 0x1.90538cp25, 0x1.74adc8p28 },
 	   { 0x1.6443fep25, 0x1.4d0232p28 },
 	   { 0x1.3ce784p25, 0x1.296a7p28 },
 	   { 0x1.19c232p25, 0x1.097f62p28 },
 	   { 0x1.f4c8c4p24, 0x1.d9c736p27 },
 	   { 0x1.bcd30ep24, 0x1.a6852cp27 },
 	   { 0x1.8aee4cp24, 0x1.789fb8p27 },
 	   { 0x1.5e77b6p24, 0x1.4f8c96p27 },
 	   { 0x1.36dcf2p24, 0x1.2acee2p27 },
 	   { 0x1.139a7cp24, 0x1.09f5dp27 },
 	   { 0x1.e8747p23, 0x1.d9371ep26 },
 	   { 0x1.b0a44ap23, 0x1.a4c89ep26 },
 	   { 0x1.7f064ap23, 0x1.75fa8ep26 },
 	   { 0x1.52efep23, 0x1.4c37cp26 },
 	   { 0x1.2bc82ap23, 0x1.26f9ep26 },
 	   { 0x1.09064p23, 0x1.05c804p26 },
 	   { 0x1.d45f16p22, 0x1.d06ad6p25 },
 	   { 0x1.9dacb2p22, 0x1.9bc0ap25 },
 	   { 0x1.6d3126p22, 0x1.6ce1aap25 },
 	   { 0x1.423d14p22, 0x1.43302cp25 },
 	   { 0x1.1c33cep22, 0x1.1e1e86p25 },
 	   { 0x1.f512dep21, 0x1.fa5b5p24 },
 	   { 0x1.b9823cp21, 0x1.bfd756p24 },
 	   { 0x1.84d6fep21, 0x1.8be4f8p24 },
 	   { 0x1.564a92p21, 0x1.5dcd66p24 },
 	   { 0x1.2d2c0ap21, 0x1.34ecf8p24 },
 	   { 0x1.08ddd2p21, 0x1.10b148p24 },
 	   { 0x1.d1a75p20, 0x1.e12eep23 },
 	   { 0x1.99218cp20, 0x1.a854eap23 },
 	   { 0x1.674c6ap20, 0x1.7603bap23 },
 	   { 0x1.3b62b6p20, 0x1.4980ccp23 },
 	   { 0x1.14b54p20, 0x1.2225b2p23 },
 	   { 0x1.e55102p19, 0x1.febc1p22 },
 	   { 0x1.a964eep19, 0x1.c14b22p22 },
 	   { 0x1.74b17ap19, 0x1.8b0cfcp22 },
 	   { 0x1.465daap19, 0x1.5b2fe6p22 },
 	   { 0x1.1da944p19, 0x1.30f93cp22 },
 	   { 0x1.f3d41p18, 0x1.0bc30cp22 },
 	   { 0x1.b512a2p18, 0x1.d5f3a8p21 },
 	   { 0x1.7e03b2p18, 0x1.9c3518p21 },
 	   { 0x1.4dbb98p18, 0x1.6961b8p21 },
 	   { 0x1.236a1ap18, 0x1.3cab14p21 },
 	   { 0x1.fcae94p17, 0x1.155a0ap21 },
 	   { 0x1.bbc1ap17, 0x1.e5989p20 },
 	   { 0x1.82eedcp17, 0x1.a8e406p20 },
 	   { 0x1.5139a6p17, 0x1.7397c6p20 },
 	   { 0x1.25c354p17, 0x1.44d26ep20 },
 	   { 0x1.ff8f84p16, 0x1.1bcca4p20 },
 	   { 0x1.bd3474p16, 0x1.efac52p19 },
 	   { 0x1.834586p16, 0x1.b0a68ap19 },
 	   { 0x1.50b75cp16, 0x1.7974e8p19 },
 	   { 0x1.249ef2p16, 0x1.4924a8p19 },
 	   { 0x1.fc5b88p15, 0x1.1edfa4p19 },
 	   { 0x1.b95ceep15, 0x1.f3d218p18 },
 	   { 0x1.7f03bap15, 0x1.b334fap18 },
 	   { 0x1.4c389cp15, 0x1.7ac2d8p18 },
 	   { 0x1.2006aep15, 0x1.4979acp18 },
 	   { 0x1.f32eap14, 0x1.1e767cp18 },
 	   { 0x1.b05cfep14, 0x1.f1e352p17 },
 	   { 0x1.764f46p14, 0x1.b0778cp17 },
 	   { 0x1.43e56cp14, 0x1.77756ep17 },
 	   { 0x1.18238p14, 0x1.45ce66p17 },
 	   { 0x1.e45a98p13, 0x1.1a95p17 },
 	   { 0x1.a284ccp13, 0x1.e9f2p16 },
 	   { 0x1.697596p13, 0x1.a887bep16 },
 	   { 0x1.3807acp13, 0x1.6fab64p16 },
 	   { 0x1.0d3b36p13, 0x1.3e44e4p16 },
 	   { 0x1.d0624p12, 0x1.135f28p16 },
 	   { 0x1.904e0cp12, 0x1.dc479ep15 },
 	   { 0x1.58e72ap12, 0x1.9baed4p15 },
 	   { 0x1.2906ccp12, 0x1.63ac6cp15 },
 	   { 0x1.ff58dap11, 0x1.33225ap15 },
 	   { 0x1.b7f1f4p11, 0x1.0916fp15 },
 	   { 0x1.7a551p11, 0x1.c960cp14 },
 	   { 0x1.453142p11, 0x1.8a6174p14 },
 	   { 0x1.1761f8p11, 0x1.53e4f8p14 },
 	   { 0x1.dfd296p10, 0x1.24caf2p14 },
 	   { 0x1.9bd5fp10, 0x1.f830cp13 },
 	   { 0x1.61501p10, 0x1.b1e5acp13 },
 	   { 0x1.2ef6p10, 0x1.7538c6p13 },
 	   { 0x1.03a918p10, 0x1.40dfd8p13 },
 	   { 0x1.bce26ap9, 0x1.13bc08p13 },
 	   { 0x1.7cef42p9, 0x1.d9a88p12 },
 	   { 0x1.46056p9, 0x1.96a0b4p12 },
 	   { 0x1.16e3cap9, 0x1.5ce9acp12 },
 	   { 0x1.dcea68p8, 0x1.2b3e54p12 },
 	   { 0x1.97945ap8, 0x1.0085p12 },
 	   { 0x1.5c2828p8, 0x1.b7937ep11 },
 	   { 0x1.29415p8, 0x1.7872dap11 },
 	   { 0x1.fb58fap7, 0x1.423acp11 },
 	   { 0x1.b0c1a8p7, 0x1.13af5p11 },
 	   { 0x1.70f474p7, 0x1.d77f0cp10 },
 	   { 0x1.3a68a8p7, 0x1.92ff34p10 },
 	   { 0x1.0bcc6p7, 0x1.5847eep10 },
 	   { 0x1.c7fa0cp6, 0x1.25f9eep10 },
 	   { 0x1.8401b6p6, 0x1.f5cc78p9 },
 	   { 0x1.4a029ap6, 0x1.ac0f6p9 },
 	   { 0x1.188c46p6, 0x1.6cfa9cp9 },
 	   { 0x1.dcc4fap5, 0x1.370ab8p9 },
 	   { 0x1.94ec06p5, 0x1.08f24p9 },
 	   { 0x1.57bc96p5, 0x1.c324c2p8 },
 	   { 0x1.23a81ap5, 0x1.7fe904p8 },
 	   { 0x1.eeb278p4, 0x1.46897ep8 },
 	   { 0x1.a35794p4, 0x1.159a38p8 },
 	   { 0x1.634b8p4, 0x1.d7c594p7 },
 	   { 0x1.2ce2a4p4, 0x1.90ae4ep7 },
 	   { 0x1.fd5f08p3, 0x1.5422fp7 },
 	   { 0x1.aef3cep3, 0x1.20998p7 },
 	   { 0x1.6c6e62p3, 0x1.e98102p6 },
 	   { 0x1.3407b6p3, 0x1.9eee06p6 },
 	   { 0x1.043bap3, 0x1.5f8b88p6 },
 	   { 0x1.b77e5cp2, 0x1.29b294p6 },
 	   { 0x1.72f0c4p2, 0x1.f7f338p5 },
 	   { 0x1.38ee18p2, 0x1.aa5772p5 },
 	   { 0x1.07dd68p2, 0x1.68823ep5 },
 	   { 0x1.bcc58ep1, 0x1.30b14ep5 },
 	   { 0x1.76aca4p1, 0x1.01647cp5 },
 	   { 0x1.3b7912p1, 0x1.b2a87ep4 },
 	   { 0x1.097f82p1, 0x1.6ed2f2p4 },
 	   { 0x1.beaa3ep0, 0x1.356cd6p4 },
 	   { 0x1.778be2p0, 0x1.04e15ep4 },
 	   { 0x1.3b9984p0, 0x1.b7b04p3 },
 	   { 0x1.09182cp0, 0x1.725862p3 },
 	   { 0x1.bd20fcp-1, 0x1.37c92cp3 },
 	   { 0x1.75892p-1, 0x1.065b96p3 },
 	   { 0x1.394e7ap-1, 0x1.b950d4p2 },
 	   { 0x1.06a996p-1, 0x1.72fd94p2 },
 	   { 0x1.b8328ep-2, 0x1.37b83cp2 },
 	   { 0x1.70aff4p-2, 0x1.05ca5p2 },
 	   { 0x1.34a53cp-2, 0x1.b7807ep1 },
 	   { 0x1.0241dep-2, 0x1.70bebp1 },
 	   { 0x1.affb9p-3, 0x1.353a6cp1 },
 	   { 0x1.691c7cp-3, 0x1.0330fp1 },
 	   { 0x1.2db8cap-3, 0x1.b24a16p0 },
 	   { 0x1.f7f4f8p-4, 0x1.6ba91ap0 },
 	   { 0x1.a4ab64p-4, 0x1.305e98p0 },
 	   { 0x1.5efa4ep-4, 0x1.fd3de2p-1 },
 	   { 0x1.24b0d8p-4, 0x1.a9cc94p-1 },
 	   { 0x1.e7eeap-5, 0x1.63daf8p-1 },
 	   { 0x1.96826ep-5, 0x1.294176p-1 },
 	   { 0x1.5282d2p-5, 0x1.f05e82p-2 },
 	   { 0x1.19c05p-5, 0x1.9e39dcp-2 },
 	   { 0x1.d4ca9cp-6, 0x1.5982p-2 },
 	   { 0x1.85cfacp-6, 0x1.200c8ap-2 },
 	   { 0x1.43fb32p-6, 0x1.e00e92p-3 },
 	   { 0x1.0d2382p-6, 0x1.8fd4ep-3 },
 	   { 0x1.bef1b2p-7, 0x1.4cd9cp-3 },
 	   { 0x1.72ede4p-7, 0x1.14f48ap-3 },
 	   { 0x1.33b1cap-7, 0x1.ccaaeap-4 },
 	   { 0x1.fe3bdp-8, 0x1.7eef14p-4 },
 	   { 0x1.a6d7d2p-8, 0x1.3e2964p-4 },
 	   { 0x1.5e4062p-8, 0x1.083768p-4 },
 	   { 0x1.21fb7ap-8, 0x1.b69f1p-5 },
 	   { 0x1.dfefbep-9, 0x1.6be574p-5 },
 	   { 0x1.8cf816p-9, 0x1.2dc11ap-5 },
 	   { 0x1.482fa8p-9, 0x1.f4343cp-6 },
 	   { 0x1.0f30c4p-9, 0x1.9e614ep-6 },
 	   { 0x1.bff86ep-10, 0x1.571d34p-6 },
 	   { 0x1.71d0b6p-10, 0x1.1bf742p-6 },
 	   { 0x1.3125f6p-10, 0x1.d5cc6cp-7 },
 	   { 0x1.f755eap-11, 0x1.846e9ep-7 },
 	   { 0x1.9eebaap-11, 0x1.410048p-7 },
 	   { 0x1.55df18p-11, 0x1.09258p-7 },
 	   { 0x1.198c18p-11, 0x1.b5ceb6p-8 },
 	   { 0x1.cf82ep-12, 0x1.69468p-8 },
 	   { 0x1.7d5af6p-12, 0x1.29f9e8p-8 },
 	   { 0x1.399c28p-12, 0x1.eb4b9ep-9 },
 	   { 0x1.01c65ap-12, 0x1.94d1dep-9 },
 	   { 0x1.a78e82p-13, 0x1.4d6706p-9 },
 	   { 0x1.5bcf92p-13, 0x1.127346p-9 },
 	   { 0x1.1d791cp-13, 0x1.c39fap-10 },
 	   { 0x1.d463dcp-14, 0x1.73679cp-10 },
 	   { 0x1.8011fcp-14, 0x1.314916p-10 },
 	   { 0x1.3ac71cp-14, 0x1.f5a11ap-11 },
 	   { 0x1.01dcc2p-14, 0x1.9beca8p-11 },
 	   { 0x1.a6459cp-15, 0x1.52189ap-11 },
 	   { 0x1.59962ap-15, 0x1.155d48p-11 },
 	   { 0x1.1ab0e4p-15, 0x1.c6dc8ap-12 },
 	   { 0x1.ce42dep-16, 0x1.74ca88p-12 },
 	   { 0x1.79c43p-16, 0x1.31612ap-12 },
 	   { 0x1.349128p-16, 0x1.f4125ap-13 },
 	   { 0x1.f7d80ep-17, 0x1.993e82p-13 },
 	   { 0x1.9b270cp-17, 0x1.4ec006p-13 },
 	   { 0x1.4f59fap-17, 0x1.11aebp-13 },
 	   { 0x1.1164acp-17, 0x1.bf4ab2p-14 },
 	   { 0x1.bd8c96p-18, 0x1.6d561ep-14 },
 	   { 0x1.6ae172p-18, 0x1.2a406ep-14 },
 	   { 0x1.276874p-18, 0x1.e6bba6p-15 },
 	   { 0x1.e0bad2p-19, 0x1.8cf814p-15 },
 	   { 0x1.86f788p-19, 0x1.4399f8p-15 },
 	   { 0x1.3dcfaep-19, 0x1.07aa3p-15 },
 	   { 0x1.023828p-19, 0x1.ad7302p-16 },
 	   { 0x1.a3666ep-20, 0x1.5d90f4p-16 },
 	   { 0x1.546e38p-20, 0x1.1c674ep-16 },
 	   { 0x1.143264p-20, 0x1.ce8ccp-17 },
 	   { 0x1.bff316p-21, 0x1.77f562p-17 },
 	   { 0x1.6b13ecp-21, 0x1.316da8p-17 },
 	   { 0x1.2624f4p-21, 0x1.f0046p-18 },
 	   { 0x1.dc5de4p-22, 0x1.92920ap-18 },
 	   { 0x1.818d3ap-22, 0x1.4691b2p-18 },
 	   { 0x1.37e62p-22, 0x1.08c96ap-18 },
 	   { 0x1.f8637ep-23, 0x1.ad2d0ap-19 },
 	   { 0x1.97a3dcp-23, 0x1.5ba462p-19 },
 	   { 0x1.494a4p-23, 0x1.1975ep-19 },
 	   { 0x1.09dee4p-23, 0x1.c78892p-20 },
 	   { 0x1.ad1fap-24, 0x1.7073c4p-20 },
 	   { 0x1.5a245ep-24, 0x1.29df48p-20 },
 	   { 0x1.171278p-24, 0x1.e163bep-21 },
 	   { 0x1.c1c74cp-25, 0x1.84cbbp-21 },
 	   { 0x1.6a46f4p-25, 0x1.39dbcep-21 },
 	   { 0x1.23a858p-25, 0x1.fa7b92p-22 },
 	   { 0x1.d56196p-26, 0x1.9876ap-22 },
 	   { 0x1.7984b6p-26, 0x1.4940bcp-22 },
 	   { 0x1.2f7cc4p-26, 0x1.094608p-22 },
 	   { 0x1.e7b62cp-27, 0x1.ab3e8cp-23 },
 	   { 0x1.87b15ep-27, 0x1.57e33ep-23 },
 	   { 0x1.3a6dp-27, 0x1.14a8b6p-23 },
 	   { 0x1.f88ebap-28, 0x1.bcede6p-24 },
 	   { 0x1.94a282p-28, 0x1.659918p-24 },
 	   { 0x1.44580ap-28, 0x1.1f4498p-24 },
 	   { 0x1.03dbf8p-28, 0x1.cd5086p-25 },
 	   { 0x1.a03066p-29, 0x1.723974p-25 },
 	   { 0x1.4d1f2ep-29, 0x1.28f9cap-25 },
 	   { 0x1.0a814ap-29, 0x1.dc34b6p-26 },
 	   { 0x1.aa36cap-30, 0x1.7d9dbp-26 },
 	   { 0x1.54a6b6p-30, 0x1.31aa56p-26 },
 	   { 0x1.102232p-30, 0x1.e96c26p-27 },
 	   { 0x1.b2959ep-31, 0x1.87a218p-27 },
 	   { 0x1.5ad66cp-31, 0x1.393ad2p-27 },
 	   { 0x1.14ac7ep-31, 0x1.f4ccdap-28 },
 	   { 0x1.b931b8p-32, 0x1.9026a8p-28 },
 	   { 0x1.5f9a24p-32, 0x1.3f92eap-28 },
 	   { 0x1.181154p-32, 0x1.fe3208p-29 },
 	   { 0x1.bdf55ep-33, 0x1.970fbp-29 },
 	   { 0x1.62e226p-33, 0x1.449de6p-29 },
 	   { 0x1.1a4576p-33, 0x1.02be7p-29 },
 	   { 0x1.c0d0bep-34, 0x1.9c4672p-30 },
 	   { 0x1.64a386p-34, 0x1.484b1ep-30 },
 	   { 0x1.1b418cp-34, 0x1.054a9ap-30 },
 	   { 0x1.c1ba4ap-35, 0x1.9fb994p-31 },
 	   { 0x1.64d86p-35, 0x1.4a8e4ep-31 },
 	   { 0x1.1b0242p-35, 0x1.06b4fep-31 },
 	   { 0x1.c0aee6p-36, 0x1.a15d86p-32 },
 	   { 0x1.637ffap-36, 0x1.4b5fdep-32 },
 	   { 0x1.198862p-36, 0x1.06f8dap-32 },
 	   { 0x1.bdb204p-37, 0x1.a12cc8p-33 },
 	   { 0x1.609ec2p-37, 0x1.4abd0ap-33 },
 	   { 0x1.16d8d2p-37, 0x1.06154ap-33 },
 	   { 0x1.b8cd88p-38, 0x1.9f27fap-34 },
 	   { 0x1.5c3e42p-38, 0x1.48a7fcp-34 },
 	   { 0x1.12fc6cp-38, 0x1.040d4ap-34 },
 	   { 0x1.b2119p-39, 0x1.9b55e8p-35 },
 	   { 0x1.566cep-39, 0x1.4527acp-35 },
 	   { 0x1.0dffep-39, 0x1.00e7acp-35 },
 	   { 0x1.a99426p-40, 0x1.95c358p-36 },
 	   { 0x1.4f3d92p-40, 0x1.4047cep-36 },
 	   { 0x1.07f35ep-40, 0x1.f95dcep-37 },
 	   { 0x1.9f70cp-41, 0x1.8e82cep-37 },
 	   { 0x1.46c77ap-41, 0x1.3a1882p-37 },
 	   { 0x1.00ea48p-41, 0x1.eee1d4p-38 },
 	   { 0x1.93c7acp-42, 0x1.85ac18p-38 },
 	   { 0x1.3d256ap-42, 0x1.32ae04p-38 },
 	   { 0x1.f1f59p-43, 0x1.e27d88p-39 },
 	   { 0x1.86bd6ap-43, 0x1.7b5bdap-39 },
 	   { 0x1.327554p-43, 0x1.2a2036p-39 },
 	   { 0x1.e07ab4p-44, 0x1.d458ap-40 },
 	   { 0x1.7879ecp-44, 0x1.6fb2eap-40 },
 	   { 0x1.26d7bp-44, 0x1.208a2cp-40 },
 	   { 0x1.cd98a2p-45, 0x1.c49f8ap-41 },
 	   { 0x1.6927c2p-45, 0x1.62d5aap-41 },
 	   { 0x1.1a6ed6p-45, 0x1.16098ep-41 },
 	   { 0x1.b986acp-46, 0x1.b3828ep-42 },
 	   { 0x1.58f35ap-46, 0x1.54eb3ep-42 },
 	   { 0x1.0d5e6p-46, 0x1.0abe0ep-42 },
 	   { 0x1.a47db6p-47, 0x1.a134d4p-43 },
 	   { 0x1.480a18p-47, 0x1.461cdap-43 },
 	   { 0x1.ff94e4p-48, 0x1.fd9182p-44 },
 	   { 0x1.8eb738p-48, 0x1.8deb62p-44 },
 	   { 0x1.369994p-48, 0x1.3694e8p-44 },
 	   { 0x1.e3ae4ap-49, 0x1.e49706p-45 },
 	   { 0x1.786c3ep-49, 0x1.79dc28p-45 },
 	   { 0x1.24cec8p-49, 0x1.267e46p-45 },
 	   { 0x1.c74fc4p-50, 0x1.cad0bp-46 },
 	   { 0x1.61d46cp-50, 0x1.653d08p-46 },
 	   { 0x1.12d55cp-50, 0x1.16038cp-46 },
 	   { 0x1.aabdacp-51, 0x1.b081aap-47 },
 	   { 0x1.4b252ep-51, 0x1.5042e2p-47 },
 	   { 0x1.00d6f8p-51, 0x1.054e44p-47 },
 	   { 0x1.8e38ep-52, 0x1.95eb2cp-48 },
 	   { 0x1.3490e8p-52, 0x1.3b20c6p-48 },
 	   { 0x1.ddf56ap-53, 0x1.e90cb6p-49 },
 	   { 0x1.71fdep-53, 0x1.7b4b76p-49 },
 	   { 0x1.1e465ap-53, 0x1.26072ap-49 },
 	   { 0x1.bac92ep-54, 0x1.c7a2ecp-50 },
 	   { 0x1.56441cp-54, 0x1.60dcfp-50 },
 	   { 0x1.08700cp-54, 0x1.112346p-50 },
 	   { 0x1.986a66p-55, 0x1.a6a50ap-51 },
 	   { 0x1.3b3d56p-55, 0x1.46d572p-51 },
 	   { 0x1.e667dap-56, 0x1.f93d0ep-52 },
 	   { 0x1.7712b8p-56, 0x1.86529ep-52 },
 	   { 0x1.211544p-56, 0x1.2d65aep-52 },
 	   { 0x1.bd660ap-57, 0x1.d13c32p-53 },
 	   { 0x1.56f3eep-57, 0x1.66e45ap-53 },
 	   { 0x1.07f14ap-57, 0x1.14b8b6p-53 },
 	   { 0x1.96129cp-58, 0x1.aa854cp-54 },
 	   { 0x1.3837cp-58, 0x1.488b94p-54 },
 	   { 0x1.dfe0c2p-59, 0x1.f9e772p-55 },
 	   { 0x1.709b5ap-59, 0x1.85503p-55 },
 	   { 0x1.1affd2p-59, 0x1.2b7218p-55 },
 	   { 0x1.b2564p-60, 0x1.cc6bb6p-56 },
 	   { 0x1.4d23fap-60, 0x1.61cb1ap-56 },
 	   { 0x1.fecbdp-61, 0x1.0fba0ep-56 },
 	   { 0x1.8767d8p-61, 0x1.a13072p-57 },
 	   { 0x1.2bc67ep-61, 0x1.401abcp-57 },
 	   { 0x1.caf846p-62, 0x1.eafc2cp-58 },
 	   { 0x1.5f2e7ap-62, 0x1.785cp-58 },
 	   { 0x1.0c93acp-62, 0x1.205a7ep-58 },
 	   { 0x1.9a9b06p-63, 0x1.b9a31ap-59 },
 	   { 0x1.39b7fcp-63, 0x1.520968p-59 },
 	   { 0x1.df277ap-64, 0x1.029ce6p-59 },
 	   { 0x1.6dbcdp-64, 0x1.8b81d6p-60 },
 	   { 0x1.17080ap-64, 0x1.2e48f2p-60 },
 	   { 0x1.a98e26p-65, 0x1.cdd86cp-61 },
 	   { 0x1.445a6ap-65, 0x1.60a47ap-61 },
 	   { 0x1.ee324ep-66, 0x1.0d210cp-61 },
 	   { 0x1.784e3p-66, 0x1.9a961ep-62 },
 	   { 0x1.1e65fep-66, 0x1.390b74p-62 },
 	   { 0x1.b3bb86p-67, 0x1.dd1e52p-63 },
 	   { 0x1.4b4e36p-67, 0x1.6b6a7ap-63 },
 	   { 0x1.f790f6p-68, 0x1.14acc2p-63 },
 	   { 0x1.7e82cep-68, 0x1.a511aap-64 },
 	   { 0x1.226a7ap-68, 0x1.404114p-64 },
 	   { 0x1.b8c634p-69, 0x1.e6ea96p-65 },
 	   { 0x1.4e53acp-69, 0x1.71f97ap-65 },
 	   { 0x1.faed5cp-70, 0x1.18fb2ep-65 },
 	   { 0x1.80217ep-70, 0x1.aa947ep-66 },
 	   { 0x1.22f066p-70, 0x1.43a796p-66 },
 	   { 0x1.b87f86p-71, 0x1.eae2fp-67 },
 	   { 0x1.4d4ec8p-71, 0x1.7414e6p-67 },
 	   { 0x1.f8283ep-72, 0x1.19e474p-67 },
 	   { 0x1.7d1b22p-72, 0x1.aaeb7ep-68 },
 	   { 0x1.1ff2dp-72, 0x1.431f66p-68 },
 	   { 0x1.b2e9e8p-73, 0x1.e8e272p-69 },
 	   { 0x1.4848dep-73, 0x1.71a91ep-69 },
 	   { 0x1.ef5b16p-74, 0x1.176014p-69 },
 	   { 0x1.758b92p-74, 0x1.a6137cp-70 },
 	   { 0x1.198d42p-74, 0x1.3ead74p-70 },
 	   { 0x1.a838bp-75, 0x1.e0fbc2p-71 },
 	   { 0x1.3f700cp-75, 0x1.6accaep-71 },
 	   { 0x1.e0d68ep-76, 0x1.118578p-71 },
 	   { 0x1.69b7f4p-76, 0x1.9c3974p-72 },
 	   { 0x1.0ffa12p-76, 0x1.367afap-72 },
 	   { 0x1.98cd1cp-77, 0x1.d377fap-73 },
 	   { 0x1.33148p-77, 0x1.5fbee6p-73 },
 	   { 0x1.cd1dbap-78, 0x1.088a8p-73 },
 	   { 0x1.5a0a9cp-78, 0x1.8db7ccp-74 },
 	   { 0x1.038ef4p-78, 0x1.2ad2ecp-74 },
 	   { 0x1.85308ap-79, 0x1.c0d23ep-75 },
 	   { 0x1.23a3cp-79, 0x1.50e41ap-75 },
 	   { 0x1.b4de68p-80, 0x1.f980a8p-76 },
 	   { 0x1.470ce4p-80, 0x1.7b10fep-76 },
 	   { 0x1.e9700cp-81, 0x1.1c1d98p-76 },
 	   { 0x1.6e0c9p-81, 0x1.a9b08p-77 },
 	   { 0x1.11a25ap-81, 0x1.3ebfb4p-77 },
 	   { 0x1.98e73ap-82, 0x1.dd1d36p-78 },
 	   { 0x1.315f58p-82, 0x1.64e7fp-78 },
 	   { 0x1.c7e35cp-83, 0x1.0ada94p-78 },
 	   { 0x1.542176p-83, 0x1.8ed9e8p-79 },
 	   { 0x1.fb491ep-84, 0x1.29ecb2p-79 },
 	   { 0x1.7a1c34p-84, 0x1.bcdb34p-80 },
 	   { 0x1.19b0f2p-84, 0x1.4bf6cap-80 },
 	   { 0x1.a383cap-85, 0x1.ef3318p-81 },
 	   { 0x1.383bf2p-85, 0x1.712bc2p-81 },
 	   { 0x1.d08cdap-86, 0x1.13151p-81 },
 	   { 0x1.596adp-86, 0x1.99bf36p-82 },
 	   { 0x1.00b602p-86, 0x1.3104d6p-82 },
 	   { 0x1.7d62a2p-87, 0x1.c5e534p-83 },
 	   { 0x1.1b2abcp-87, 0x1.518db2p-83 },
 	   { 0x1.a4480ep-88, 0x1.f5d1c6p-84 },
 	   { 0x1.37be42p-88, 0x1.74d45ap-84 },
 	   { 0x1.ce3ee4p-89, 0x1.14dc4ap-84 },
 	   { 0x1.568986p-89, 0x1.9afd0ep-85 },
 	   { 0x1.fb69c6p-90, 0x1.30e632p-85 },
 	   { 0x1.77a47ep-90, 0x1.c42b48p-86 },
 	   { 0x1.15f4ep-90, 0x1.4f1f52p-86 },
 	   { 0x1.9b25dcp-91, 0x1.f08156p-87 },
 	   { 0x1.2feeeep-91, 0x1.6f9f62p-87 },
 	   { 0x1.c122bcp-92, 0x1.100ffap-87 },
 	   { 0x1.4bb154p-92, 0x1.927ce6p-88 },
 	   { 0x1.e9ae56p-93, 0x1.2992f4p-88 },
 	   { 0x1.6948e8p-93, 0x1.b7cccap-89 },
 	   { 0x1.0a6cd2p-93, 0x1.44d7c4p-89 },
 	   { 0x1.88c0cap-94, 0x1.dfa22p-90 },
 	   { 0x1.215988p-94, 0x1.61eb26p-90 },
 	   { 0x1.aa222ap-95, 0x1.0506e2p-90 },
 	   { 0x1.39a30ep-95, 0x1.80d828p-91 },
 	   { 0x1.cd740ep-96, 0x1.1b8f04p-91 },
 	   { 0x1.534d82p-96, 0x1.a1a7ecp-92 },
 	   { 0x1.f2bb06p-97, 0x1.336f3p-92 },
 	   { 0x1.6e5b34p-97, 0x1.c46172p-93 },
 	   { 0x1.0cfc82p-97, 0x1.4cab82p-93 },
 	   { 0x1.8acc82p-98, 0x1.e9094cp-94 },
 	   { 0x1.219686p-98, 0x1.67465p-94 },
 	   { 0x1.a89fa6p-99, 0x1.07d0b8p-94 },
 	   { 0x1.372982p-99, 0x1.833ffap-95 },
 	   { 0x1.c7d094p-100, 0x1.1c147ap-95 },
 	   { 0x1.4db1c8p-100, 0x1.a096ccp-96 },
 	   { 0x1.e858d8p-101, 0x1.314decp-96 },
 	   { 0x1.6529ep-101, 0x1.bf46cep-97 },
 	   { 0x1.0517bap-101, 0x1.47796ap-97 },
 	   { 0x1.7d8a8p-102, 0x1.df49a2p-98 },
 	   { 0x1.16a46p-102, 0x1.5e9198p-98 },
 	   { 0x1.96ca76p-103, 0x1.004b34p-98 },
 	   { 0x1.28cb2cp-103, 0x1.768f3ep-99 },
 	   { 0x1.b0de98p-104, 0x1.1190d2p-99 },
 	},
  };
diff --git a/contrib/arm-optimized-routines/pl/math/erff_data.c b/contrib/arm-optimized-routines/math/aarch64/v_erff_data.c
similarity index 98%
rename from contrib/arm-optimized-routines/pl/math/erff_data.c
rename to contrib/arm-optimized-routines/math/aarch64/v_erff_data.c
index 84c0d2e95463..8d11d8b6c10b 100644
--- a/contrib/arm-optimized-routines/pl/math/erff_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_erff_data.c
@@ -1,532 +1,532 @@
 /*
  * Data for approximation of erff.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
-/* Lookup table used in erff.
+/* Lookup table used in vector erff.
    For each possible rounded input r (multiples of 1/128), between
    r = 0.0 and r = 4.0 (513 values):
-   - the first entry __erff_data.tab.erf contains the values of erf(r),
-   - the second entry __erff_data.tab.scale contains the values of
+   - the first entry __v_erff_data.tab.erf contains the values of erf(r),
+   - the second entry __v_erff_data.tab.scale contains the values of
    2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the
    algorithm, since lookup is performed only for x >= 1/64-1/512.  */
-const struct erff_data __erff_data = {
+const struct v_erff_data __v_erff_data = {
   .tab = { { 0x0.000000p+0, 0x1.20dd76p+0 },
 	   { 0x1.20dbf4p-7, 0x1.20d8f2p+0 },
 	   { 0x1.20d770p-6, 0x1.20cb68p+0 },
 	   { 0x1.b137e0p-6, 0x1.20b4d8p+0 },
 	   { 0x1.20c564p-5, 0x1.209546p+0 },
 	   { 0x1.68e5d4p-5, 0x1.206cb4p+0 },
 	   { 0x1.b0fafep-5, 0x1.203b26p+0 },
 	   { 0x1.f902a8p-5, 0x1.2000a0p+0 },
 	   { 0x1.207d48p-4, 0x1.1fbd28p+0 },
 	   { 0x1.44703ep-4, 0x1.1f70c4p+0 },
 	   { 0x1.68591ap-4, 0x1.1f1b7ap+0 },
 	   { 0x1.8c36bep-4, 0x1.1ebd56p+0 },
 	   { 0x1.b00812p-4, 0x1.1e565cp+0 },
 	   { 0x1.d3cbf8p-4, 0x1.1de698p+0 },
 	   { 0x1.f7815ap-4, 0x1.1d6e14p+0 },
 	   { 0x1.0d9390p-3, 0x1.1cecdcp+0 },
 	   { 0x1.1f5e1ap-3, 0x1.1c62fap+0 },
 	   { 0x1.311fc2p-3, 0x1.1bd07cp+0 },
 	   { 0x1.42d7fcp-3, 0x1.1b3572p+0 },
 	   { 0x1.548642p-3, 0x1.1a91e6p+0 },
 	   { 0x1.662a0cp-3, 0x1.19e5eap+0 },
 	   { 0x1.77c2d2p-3, 0x1.19318cp+0 },
 	   { 0x1.895010p-3, 0x1.1874dep+0 },
 	   { 0x1.9ad142p-3, 0x1.17aff0p+0 },
 	   { 0x1.ac45e4p-3, 0x1.16e2d8p+0 },
 	   { 0x1.bdad72p-3, 0x1.160da4p+0 },
 	   { 0x1.cf076ep-3, 0x1.153068p+0 },
 	   { 0x1.e05354p-3, 0x1.144b3cp+0 },
 	   { 0x1.f190aap-3, 0x1.135e30p+0 },
 	   { 0x1.015f78p-2, 0x1.12695ep+0 },
 	   { 0x1.09eed6p-2, 0x1.116cd8p+0 },
 	   { 0x1.127632p-2, 0x1.1068bap+0 },
 	   { 0x1.1af54ep-2, 0x1.0f5d16p+0 },
 	   { 0x1.236bf0p-2, 0x1.0e4a08p+0 },
 	   { 0x1.2bd9dcp-2, 0x1.0d2fa6p+0 },
 	   { 0x1.343ed6p-2, 0x1.0c0e0ap+0 },
 	   { 0x1.3c9aa8p-2, 0x1.0ae550p+0 },
 	   { 0x1.44ed18p-2, 0x1.09b590p+0 },
 	   { 0x1.4d35f0p-2, 0x1.087ee4p+0 },
 	   { 0x1.5574f4p-2, 0x1.07416cp+0 },
 	   { 0x1.5da9f4p-2, 0x1.05fd3ep+0 },
 	   { 0x1.65d4b8p-2, 0x1.04b27cp+0 },
 	   { 0x1.6df50ap-2, 0x1.036140p+0 },
 	   { 0x1.760abap-2, 0x1.0209a6p+0 },
 	   { 0x1.7e1594p-2, 0x1.00abd0p+0 },
 	   { 0x1.861566p-2, 0x1.fe8fb0p-1 },
 	   { 0x1.8e0a02p-2, 0x1.fbbbbep-1 },
 	   { 0x1.95f336p-2, 0x1.f8dc0ap-1 },
 	   { 0x1.9dd0d2p-2, 0x1.f5f0cep-1 },
 	   { 0x1.a5a2acp-2, 0x1.f2fa4cp-1 },
 	   { 0x1.ad6896p-2, 0x1.eff8c4p-1 },
 	   { 0x1.b52264p-2, 0x1.ecec78p-1 },
 	   { 0x1.bccfecp-2, 0x1.e9d5a8p-1 },
 	   { 0x1.c47104p-2, 0x1.e6b498p-1 },
 	   { 0x1.cc0584p-2, 0x1.e38988p-1 },
 	   { 0x1.d38d44p-2, 0x1.e054bep-1 },
 	   { 0x1.db081cp-2, 0x1.dd167cp-1 },
 	   { 0x1.e275eap-2, 0x1.d9cf06p-1 },
 	   { 0x1.e9d68ap-2, 0x1.d67ea2p-1 },
 	   { 0x1.f129d4p-2, 0x1.d32592p-1 },
 	   { 0x1.f86faap-2, 0x1.cfc41ep-1 },
 	   { 0x1.ffa7eap-2, 0x1.cc5a8ap-1 },
 	   { 0x1.03693ap-1, 0x1.c8e91cp-1 },
 	   { 0x1.06f794p-1, 0x1.c5701ap-1 },
 	   { 0x1.0a7ef6p-1, 0x1.c1efcap-1 },
 	   { 0x1.0dff50p-1, 0x1.be6872p-1 },
 	   { 0x1.117894p-1, 0x1.bada5ap-1 },
 	   { 0x1.14eab4p-1, 0x1.b745c6p-1 },
 	   { 0x1.1855a6p-1, 0x1.b3aafcp-1 },
 	   { 0x1.1bb95cp-1, 0x1.b00a46p-1 },
 	   { 0x1.1f15ccp-1, 0x1.ac63e8p-1 },
 	   { 0x1.226ae8p-1, 0x1.a8b828p-1 },
 	   { 0x1.25b8a8p-1, 0x1.a5074ep-1 },
 	   { 0x1.28ff02p-1, 0x1.a1519ep-1 },
 	   { 0x1.2c3decp-1, 0x1.9d9762p-1 },
 	   { 0x1.2f755cp-1, 0x1.99d8dap-1 },
 	   { 0x1.32a54cp-1, 0x1.961650p-1 },
 	   { 0x1.35cdb4p-1, 0x1.925008p-1 },
 	   { 0x1.38ee8ap-1, 0x1.8e8646p-1 },
 	   { 0x1.3c07cap-1, 0x1.8ab950p-1 },
 	   { 0x1.3f196ep-1, 0x1.86e96ap-1 },
 	   { 0x1.42236ep-1, 0x1.8316d6p-1 },
 	   { 0x1.4525c8p-1, 0x1.7f41dcp-1 },
 	   { 0x1.482074p-1, 0x1.7b6abcp-1 },
 	   { 0x1.4b1372p-1, 0x1.7791b8p-1 },
 	   { 0x1.4dfebap-1, 0x1.73b714p-1 },
 	   { 0x1.50e24cp-1, 0x1.6fdb12p-1 },
 	   { 0x1.53be26p-1, 0x1.6bfdf0p-1 },
 	   { 0x1.569244p-1, 0x1.681ff2p-1 },
 	   { 0x1.595ea6p-1, 0x1.644156p-1 },
 	   { 0x1.5c2348p-1, 0x1.60625cp-1 },
 	   { 0x1.5ee02ep-1, 0x1.5c8342p-1 },
 	   { 0x1.619556p-1, 0x1.58a446p-1 },
 	   { 0x1.6442c0p-1, 0x1.54c5a6p-1 },
 	   { 0x1.66e86ep-1, 0x1.50e79ep-1 },
 	   { 0x1.69865ep-1, 0x1.4d0a68p-1 },
 	   { 0x1.6c1c98p-1, 0x1.492e42p-1 },
 	   { 0x1.6eab18p-1, 0x1.455366p-1 },
 	   { 0x1.7131e6p-1, 0x1.417a0cp-1 },
 	   { 0x1.73b102p-1, 0x1.3da26ep-1 },
 	   { 0x1.762870p-1, 0x1.39ccc2p-1 },
 	   { 0x1.789836p-1, 0x1.35f940p-1 },
 	   { 0x1.7b0058p-1, 0x1.32281ep-1 },
 	   { 0x1.7d60d8p-1, 0x1.2e5992p-1 },
 	   { 0x1.7fb9c0p-1, 0x1.2a8dcep-1 },
 	   { 0x1.820b12p-1, 0x1.26c508p-1 },
 	   { 0x1.8454d6p-1, 0x1.22ff72p-1 },
 	   { 0x1.869712p-1, 0x1.1f3d3cp-1 },
 	   { 0x1.88d1cep-1, 0x1.1b7e98p-1 },
 	   { 0x1.8b050ep-1, 0x1.17c3b6p-1 },
 	   { 0x1.8d30dep-1, 0x1.140cc4p-1 },
 	   { 0x1.8f5544p-1, 0x1.1059eep-1 },
 	   { 0x1.91724ap-1, 0x1.0cab62p-1 },
 	   { 0x1.9387f6p-1, 0x1.09014cp-1 },
 	   { 0x1.959652p-1, 0x1.055bd6p-1 },
 	   { 0x1.979d68p-1, 0x1.01bb2cp-1 },
 	   { 0x1.999d42p-1, 0x1.fc3ee6p-2 },
 	   { 0x1.9b95e8p-1, 0x1.f511aap-2 },
 	   { 0x1.9d8768p-1, 0x1.edeeeep-2 },
 	   { 0x1.9f71cap-1, 0x1.e6d700p-2 },
 	   { 0x1.a1551ap-1, 0x1.dfca26p-2 },
 	   { 0x1.a33162p-1, 0x1.d8c8aap-2 },
 	   { 0x1.a506b0p-1, 0x1.d1d2d0p-2 },
 	   { 0x1.a6d50cp-1, 0x1.cae8dap-2 },
 	   { 0x1.a89c86p-1, 0x1.c40b08p-2 },
 	   { 0x1.aa5d26p-1, 0x1.bd3998p-2 },
 	   { 0x1.ac16fcp-1, 0x1.b674c8p-2 },
 	   { 0x1.adca14p-1, 0x1.afbcd4p-2 },
 	   { 0x1.af767ap-1, 0x1.a911f0p-2 },
 	   { 0x1.b11c3cp-1, 0x1.a27456p-2 },
 	   { 0x1.b2bb68p-1, 0x1.9be438p-2 },
 	   { 0x1.b4540ap-1, 0x1.9561c8p-2 },
 	   { 0x1.b5e630p-1, 0x1.8eed36p-2 },
 	   { 0x1.b771e8p-1, 0x1.8886b2p-2 },
 	   { 0x1.b8f742p-1, 0x1.822e66p-2 },
 	   { 0x1.ba764ap-1, 0x1.7be47ap-2 },
 	   { 0x1.bbef10p-1, 0x1.75a91ap-2 },
 	   { 0x1.bd61a2p-1, 0x1.6f7c6ap-2 },
 	   { 0x1.bece0ep-1, 0x1.695e8cp-2 },
 	   { 0x1.c03464p-1, 0x1.634fa6p-2 },
 	   { 0x1.c194b2p-1, 0x1.5d4fd4p-2 },
 	   { 0x1.c2ef08p-1, 0x1.575f34p-2 },
 	   { 0x1.c44376p-1, 0x1.517de6p-2 },
 	   { 0x1.c5920ap-1, 0x1.4bac00p-2 },
 	   { 0x1.c6dad2p-1, 0x1.45e99cp-2 },
 	   { 0x1.c81de2p-1, 0x1.4036d0p-2 },
 	   { 0x1.c95b46p-1, 0x1.3a93b2p-2 },
 	   { 0x1.ca930ep-1, 0x1.350052p-2 },
 	   { 0x1.cbc54cp-1, 0x1.2f7cc4p-2 },
 	   { 0x1.ccf20cp-1, 0x1.2a0916p-2 },
 	   { 0x1.ce1962p-1, 0x1.24a554p-2 },
 	   { 0x1.cf3b5cp-1, 0x1.1f518ap-2 },
 	   { 0x1.d0580cp-1, 0x1.1a0dc6p-2 },
 	   { 0x1.d16f7ep-1, 0x1.14da0ap-2 },
 	   { 0x1.d281c4p-1, 0x1.0fb662p-2 },
 	   { 0x1.d38ef0p-1, 0x1.0aa2d0p-2 },
 	   { 0x1.d49710p-1, 0x1.059f5ap-2 },
 	   { 0x1.d59a34p-1, 0x1.00ac00p-2 },
 	   { 0x1.d6986cp-1, 0x1.f79184p-3 },
 	   { 0x1.d791cap-1, 0x1.edeb40p-3 },
 	   { 0x1.d8865ep-1, 0x1.e46530p-3 },
 	   { 0x1.d97636p-1, 0x1.daff4ap-3 },
 	   { 0x1.da6162p-1, 0x1.d1b982p-3 },
 	   { 0x1.db47f4p-1, 0x1.c893cep-3 },
 	   { 0x1.dc29fcp-1, 0x1.bf8e1cp-3 },
 	   { 0x1.dd0788p-1, 0x1.b6a856p-3 },
 	   { 0x1.dde0aap-1, 0x1.ade26cp-3 },
 	   { 0x1.deb570p-1, 0x1.a53c42p-3 },
 	   { 0x1.df85eap-1, 0x1.9cb5bep-3 },
 	   { 0x1.e0522ap-1, 0x1.944ec2p-3 },
 	   { 0x1.e11a3ep-1, 0x1.8c0732p-3 },
 	   { 0x1.e1de36p-1, 0x1.83deeap-3 },
 	   { 0x1.e29e22p-1, 0x1.7bd5c8p-3 },
 	   { 0x1.e35a12p-1, 0x1.73eba4p-3 },
 	   { 0x1.e41214p-1, 0x1.6c2056p-3 },
 	   { 0x1.e4c638p-1, 0x1.6473b6p-3 },
 	   { 0x1.e5768cp-1, 0x1.5ce596p-3 },
 	   { 0x1.e62322p-1, 0x1.5575c8p-3 },
 	   { 0x1.e6cc08p-1, 0x1.4e241ep-3 },
 	   { 0x1.e7714ap-1, 0x1.46f066p-3 },
 	   { 0x1.e812fcp-1, 0x1.3fda6cp-3 },
 	   { 0x1.e8b12ap-1, 0x1.38e1fap-3 },
 	   { 0x1.e94be4p-1, 0x1.3206dcp-3 },
 	   { 0x1.e9e336p-1, 0x1.2b48dap-3 },
 	   { 0x1.ea7730p-1, 0x1.24a7b8p-3 },
 	   { 0x1.eb07e2p-1, 0x1.1e233ep-3 },
 	   { 0x1.eb9558p-1, 0x1.17bb2cp-3 },
 	   { 0x1.ec1fa2p-1, 0x1.116f48p-3 },
 	   { 0x1.eca6ccp-1, 0x1.0b3f52p-3 },
 	   { 0x1.ed2ae6p-1, 0x1.052b0cp-3 },
 	   { 0x1.edabfcp-1, 0x1.fe6460p-4 },
 	   { 0x1.ee2a1ep-1, 0x1.f2a902p-4 },
 	   { 0x1.eea556p-1, 0x1.e72372p-4 },
 	   { 0x1.ef1db4p-1, 0x1.dbd32ap-4 },
 	   { 0x1.ef9344p-1, 0x1.d0b7a0p-4 },
 	   { 0x1.f00614p-1, 0x1.c5d04ap-4 },
 	   { 0x1.f07630p-1, 0x1.bb1c98p-4 },
 	   { 0x1.f0e3a6p-1, 0x1.b09bfcp-4 },
 	   { 0x1.f14e82p-1, 0x1.a64de6p-4 },
 	   { 0x1.f1b6d0p-1, 0x1.9c31c6p-4 },
 	   { 0x1.f21ca0p-1, 0x1.92470ap-4 },
 	   { 0x1.f27ff8p-1, 0x1.888d1ep-4 },
 	   { 0x1.f2e0eap-1, 0x1.7f036cp-4 },
 	   { 0x1.f33f7ep-1, 0x1.75a960p-4 },
 	   { 0x1.f39bc2p-1, 0x1.6c7e64p-4 },
 	   { 0x1.f3f5c2p-1, 0x1.6381e2p-4 },
 	   { 0x1.f44d88p-1, 0x1.5ab342p-4 },
 	   { 0x1.f4a31ep-1, 0x1.5211ecp-4 },
 	   { 0x1.f4f694p-1, 0x1.499d48p-4 },
 	   { 0x1.f547f2p-1, 0x1.4154bcp-4 },
 	   { 0x1.f59742p-1, 0x1.3937b2p-4 },
 	   { 0x1.f5e490p-1, 0x1.31458ep-4 },
 	   { 0x1.f62fe8p-1, 0x1.297dbap-4 },
 	   { 0x1.f67952p-1, 0x1.21df9ap-4 },
 	   { 0x1.f6c0dcp-1, 0x1.1a6a96p-4 },
 	   { 0x1.f7068cp-1, 0x1.131e14p-4 },
 	   { 0x1.f74a6ep-1, 0x1.0bf97ep-4 },
 	   { 0x1.f78c8cp-1, 0x1.04fc3ap-4 },
 	   { 0x1.f7cceep-1, 0x1.fc4b5ep-5 },
 	   { 0x1.f80ba2p-1, 0x1.eeea8cp-5 },
 	   { 0x1.f848acp-1, 0x1.e1d4d0p-5 },
 	   { 0x1.f8841ap-1, 0x1.d508fap-5 },
 	   { 0x1.f8bdf2p-1, 0x1.c885e0p-5 },
 	   { 0x1.f8f63ep-1, 0x1.bc4a54p-5 },
 	   { 0x1.f92d08p-1, 0x1.b05530p-5 },
 	   { 0x1.f96256p-1, 0x1.a4a54ap-5 },
 	   { 0x1.f99634p-1, 0x1.99397ap-5 },
 	   { 0x1.f9c8a8p-1, 0x1.8e109cp-5 },
 	   { 0x1.f9f9bap-1, 0x1.83298ep-5 },
 	   { 0x1.fa2974p-1, 0x1.78832cp-5 },
 	   { 0x1.fa57dep-1, 0x1.6e1c58p-5 },
 	   { 0x1.fa84fep-1, 0x1.63f3f6p-5 },
 	   { 0x1.fab0dep-1, 0x1.5a08e8p-5 },
 	   { 0x1.fadb84p-1, 0x1.505a18p-5 },
 	   { 0x1.fb04f6p-1, 0x1.46e66cp-5 },
 	   { 0x1.fb2d40p-1, 0x1.3dacd2p-5 },
 	   { 0x1.fb5464p-1, 0x1.34ac36p-5 },
 	   { 0x1.fb7a6cp-1, 0x1.2be38cp-5 },
 	   { 0x1.fb9f60p-1, 0x1.2351c2p-5 },
 	   { 0x1.fbc344p-1, 0x1.1af5d2p-5 },
 	   { 0x1.fbe61ep-1, 0x1.12ceb4p-5 },
 	   { 0x1.fc07fap-1, 0x1.0adb60p-5 },
 	   { 0x1.fc28d8p-1, 0x1.031ad6p-5 },
 	   { 0x1.fc48c2p-1, 0x1.f7182ap-6 },
 	   { 0x1.fc67bcp-1, 0x1.e85c44p-6 },
 	   { 0x1.fc85d0p-1, 0x1.da0006p-6 },
 	   { 0x1.fca2fep-1, 0x1.cc0180p-6 },
 	   { 0x1.fcbf52p-1, 0x1.be5ecep-6 },
 	   { 0x1.fcdaccp-1, 0x1.b1160ap-6 },
 	   { 0x1.fcf576p-1, 0x1.a4255ap-6 },
 	   { 0x1.fd0f54p-1, 0x1.978ae8p-6 },
 	   { 0x1.fd286ap-1, 0x1.8b44e6p-6 },
 	   { 0x1.fd40bep-1, 0x1.7f5188p-6 },
 	   { 0x1.fd5856p-1, 0x1.73af0cp-6 },
 	   { 0x1.fd6f34p-1, 0x1.685bb6p-6 },
 	   { 0x1.fd8562p-1, 0x1.5d55ccp-6 },
 	   { 0x1.fd9ae2p-1, 0x1.529b9ep-6 },
 	   { 0x1.fdafb8p-1, 0x1.482b84p-6 },
 	   { 0x1.fdc3e8p-1, 0x1.3e03d8p-6 },
 	   { 0x1.fdd77ap-1, 0x1.3422fep-6 },
 	   { 0x1.fdea6ep-1, 0x1.2a875cp-6 },
 	   { 0x1.fdfcccp-1, 0x1.212f62p-6 },
 	   { 0x1.fe0e96p-1, 0x1.181984p-6 },
 	   { 0x1.fe1fd0p-1, 0x1.0f443ep-6 },
 	   { 0x1.fe3080p-1, 0x1.06ae14p-6 },
 	   { 0x1.fe40a6p-1, 0x1.fcab14p-7 },
 	   { 0x1.fe504cp-1, 0x1.ec7262p-7 },
 	   { 0x1.fe5f70p-1, 0x1.dcaf36p-7 },
 	   { 0x1.fe6e18p-1, 0x1.cd5ecap-7 },
 	   { 0x1.fe7c46p-1, 0x1.be7e5ap-7 },
 	   { 0x1.fe8a00p-1, 0x1.b00b38p-7 },
 	   { 0x1.fe9748p-1, 0x1.a202bep-7 },
 	   { 0x1.fea422p-1, 0x1.94624ep-7 },
 	   { 0x1.feb090p-1, 0x1.87275ep-7 },
 	   { 0x1.febc96p-1, 0x1.7a4f6ap-7 },
 	   { 0x1.fec836p-1, 0x1.6dd7fep-7 },
 	   { 0x1.fed374p-1, 0x1.61beaep-7 },
 	   { 0x1.fede52p-1, 0x1.56011cp-7 },
 	   { 0x1.fee8d4p-1, 0x1.4a9cf6p-7 },
 	   { 0x1.fef2fep-1, 0x1.3f8ff6p-7 },
 	   { 0x1.fefccep-1, 0x1.34d7dcp-7 },
 	   { 0x1.ff064cp-1, 0x1.2a727ap-7 },
 	   { 0x1.ff0f76p-1, 0x1.205dacp-7 },
 	   { 0x1.ff1852p-1, 0x1.169756p-7 },
 	   { 0x1.ff20e0p-1, 0x1.0d1d6ap-7 },
 	   { 0x1.ff2924p-1, 0x1.03ede2p-7 },
 	   { 0x1.ff3120p-1, 0x1.f60d8ap-8 },
 	   { 0x1.ff38d6p-1, 0x1.e4cc4ap-8 },
 	   { 0x1.ff4048p-1, 0x1.d4143ap-8 },
 	   { 0x1.ff4778p-1, 0x1.c3e1a6p-8 },
 	   { 0x1.ff4e68p-1, 0x1.b430ecp-8 },
 	   { 0x1.ff551ap-1, 0x1.a4fe84p-8 },
 	   { 0x1.ff5b90p-1, 0x1.9646f4p-8 },
 	   { 0x1.ff61ccp-1, 0x1.8806d8p-8 },
 	   { 0x1.ff67d0p-1, 0x1.7a3adep-8 },
 	   { 0x1.ff6d9ep-1, 0x1.6cdfccp-8 },
 	   { 0x1.ff7338p-1, 0x1.5ff276p-8 },
 	   { 0x1.ff789ep-1, 0x1.536fc2p-8 },
 	   { 0x1.ff7dd4p-1, 0x1.4754acp-8 },
 	   { 0x1.ff82dap-1, 0x1.3b9e40p-8 },
 	   { 0x1.ff87b2p-1, 0x1.30499cp-8 },
 	   { 0x1.ff8c5cp-1, 0x1.2553eep-8 },
 	   { 0x1.ff90dcp-1, 0x1.1aba78p-8 },
 	   { 0x1.ff9532p-1, 0x1.107a8cp-8 },
 	   { 0x1.ff9960p-1, 0x1.06918cp-8 },
 	   { 0x1.ff9d68p-1, 0x1.f9f9d0p-9 },
 	   { 0x1.ffa14ap-1, 0x1.e77448p-9 },
 	   { 0x1.ffa506p-1, 0x1.d58da6p-9 },
 	   { 0x1.ffa8a0p-1, 0x1.c4412cp-9 },
 	   { 0x1.ffac18p-1, 0x1.b38a3ap-9 },
 	   { 0x1.ffaf6ep-1, 0x1.a36454p-9 },
 	   { 0x1.ffb2a6p-1, 0x1.93cb12p-9 },
 	   { 0x1.ffb5bep-1, 0x1.84ba30p-9 },
 	   { 0x1.ffb8b8p-1, 0x1.762d84p-9 },
 	   { 0x1.ffbb98p-1, 0x1.682100p-9 },
 	   { 0x1.ffbe5ap-1, 0x1.5a90b0p-9 },
 	   { 0x1.ffc102p-1, 0x1.4d78bcp-9 },
 	   { 0x1.ffc390p-1, 0x1.40d564p-9 },
 	   { 0x1.ffc606p-1, 0x1.34a306p-9 },
 	   { 0x1.ffc862p-1, 0x1.28de12p-9 },
 	   { 0x1.ffcaa8p-1, 0x1.1d8318p-9 },
 	   { 0x1.ffccd8p-1, 0x1.128ebap-9 },
 	   { 0x1.ffcef4p-1, 0x1.07fdb4p-9 },
 	   { 0x1.ffd0fap-1, 0x1.fb99b8p-10 },
 	   { 0x1.ffd2eap-1, 0x1.e7f232p-10 },
 	   { 0x1.ffd4cap-1, 0x1.d4fed8p-10 },
 	   { 0x1.ffd696p-1, 0x1.c2b9d0p-10 },
 	   { 0x1.ffd84ep-1, 0x1.b11d70p-10 },
 	   { 0x1.ffd9f8p-1, 0x1.a02436p-10 },
 	   { 0x1.ffdb90p-1, 0x1.8fc8c8p-10 },
 	   { 0x1.ffdd18p-1, 0x1.8005f0p-10 },
 	   { 0x1.ffde90p-1, 0x1.70d6a4p-10 },
 	   { 0x1.ffdffap-1, 0x1.6235fcp-10 },
 	   { 0x1.ffe154p-1, 0x1.541f34p-10 },
 	   { 0x1.ffe2a2p-1, 0x1.468daep-10 },
 	   { 0x1.ffe3e2p-1, 0x1.397ceep-10 },
 	   { 0x1.ffe514p-1, 0x1.2ce898p-10 },
 	   { 0x1.ffe63cp-1, 0x1.20cc76p-10 },
 	   { 0x1.ffe756p-1, 0x1.15246ep-10 },
 	   { 0x1.ffe866p-1, 0x1.09ec86p-10 },
 	   { 0x1.ffe96ap-1, 0x1.fe41cep-11 },
 	   { 0x1.ffea64p-1, 0x1.e97ba4p-11 },
 	   { 0x1.ffeb54p-1, 0x1.d57f52p-11 },
 	   { 0x1.ffec3ap-1, 0x1.c245d4p-11 },
 	   { 0x1.ffed16p-1, 0x1.afc85ep-11 },
 	   { 0x1.ffedeap-1, 0x1.9e0058p-11 },
 	   { 0x1.ffeeb4p-1, 0x1.8ce75ep-11 },
 	   { 0x1.ffef76p-1, 0x1.7c7744p-11 },
 	   { 0x1.fff032p-1, 0x1.6caa0ep-11 },
 	   { 0x1.fff0e4p-1, 0x1.5d79ecp-11 },
 	   { 0x1.fff18ep-1, 0x1.4ee142p-11 },
 	   { 0x1.fff232p-1, 0x1.40daa4p-11 },
 	   { 0x1.fff2d0p-1, 0x1.3360ccp-11 },
 	   { 0x1.fff366p-1, 0x1.266ea8p-11 },
 	   { 0x1.fff3f6p-1, 0x1.19ff46p-11 },
 	   { 0x1.fff480p-1, 0x1.0e0de8p-11 },
 	   { 0x1.fff504p-1, 0x1.0295f0p-11 },
 	   { 0x1.fff582p-1, 0x1.ef25d4p-12 },
 	   { 0x1.fff5fcp-1, 0x1.da0110p-12 },
 	   { 0x1.fff670p-1, 0x1.c5b542p-12 },
 	   { 0x1.fff6dep-1, 0x1.b23a5ap-12 },
 	   { 0x1.fff74ap-1, 0x1.9f8894p-12 },
 	   { 0x1.fff7aep-1, 0x1.8d986ap-12 },
 	   { 0x1.fff810p-1, 0x1.7c629ap-12 },
 	   { 0x1.fff86cp-1, 0x1.6be022p-12 },
 	   { 0x1.fff8c6p-1, 0x1.5c0a38p-12 },
 	   { 0x1.fff91cp-1, 0x1.4cda54p-12 },
 	   { 0x1.fff96cp-1, 0x1.3e4a24p-12 },
 	   { 0x1.fff9bap-1, 0x1.305390p-12 },
 	   { 0x1.fffa04p-1, 0x1.22f0b4p-12 },
 	   { 0x1.fffa4cp-1, 0x1.161be4p-12 },
 	   { 0x1.fffa90p-1, 0x1.09cfa4p-12 },
 	   { 0x1.fffad0p-1, 0x1.fc0d56p-13 },
 	   { 0x1.fffb0ep-1, 0x1.e577bcp-13 },
 	   { 0x1.fffb4ap-1, 0x1.cfd4a6p-13 },
 	   { 0x1.fffb82p-1, 0x1.bb1a96p-13 },
 	   { 0x1.fffbb8p-1, 0x1.a74068p-13 },
 	   { 0x1.fffbecp-1, 0x1.943d4ap-13 },
 	   { 0x1.fffc1ep-1, 0x1.8208bcp-13 },
 	   { 0x1.fffc4ep-1, 0x1.709a8ep-13 },
 	   { 0x1.fffc7ap-1, 0x1.5feadap-13 },
 	   { 0x1.fffca6p-1, 0x1.4ff208p-13 },
 	   { 0x1.fffccep-1, 0x1.40a8c2p-13 },
 	   { 0x1.fffcf6p-1, 0x1.3207fcp-13 },
 	   { 0x1.fffd1ap-1, 0x1.2408eap-13 },
 	   { 0x1.fffd3ep-1, 0x1.16a502p-13 },
 	   { 0x1.fffd60p-1, 0x1.09d5f8p-13 },
 	   { 0x1.fffd80p-1, 0x1.fb2b7ap-14 },
 	   { 0x1.fffda0p-1, 0x1.e3bcf4p-14 },
 	   { 0x1.fffdbep-1, 0x1.cd5528p-14 },
 	   { 0x1.fffddap-1, 0x1.b7e946p-14 },
 	   { 0x1.fffdf4p-1, 0x1.a36eecp-14 },
 	   { 0x1.fffe0ep-1, 0x1.8fdc1cp-14 },
 	   { 0x1.fffe26p-1, 0x1.7d2738p-14 },
 	   { 0x1.fffe3ep-1, 0x1.6b4702p-14 },
 	   { 0x1.fffe54p-1, 0x1.5a329cp-14 },
 	   { 0x1.fffe68p-1, 0x1.49e178p-14 },
 	   { 0x1.fffe7ep-1, 0x1.3a4b60p-14 },
 	   { 0x1.fffe90p-1, 0x1.2b6876p-14 },
 	   { 0x1.fffea2p-1, 0x1.1d3120p-14 },
 	   { 0x1.fffeb4p-1, 0x1.0f9e1cp-14 },
 	   { 0x1.fffec4p-1, 0x1.02a868p-14 },
 	   { 0x1.fffed4p-1, 0x1.ec929ap-15 },
 	   { 0x1.fffee4p-1, 0x1.d4f4b4p-15 },
 	   { 0x1.fffef2p-1, 0x1.be6abcp-15 },
 	   { 0x1.ffff00p-1, 0x1.a8e8ccp-15 },
 	   { 0x1.ffff0cp-1, 0x1.94637ep-15 },
 	   { 0x1.ffff18p-1, 0x1.80cfdcp-15 },
 	   { 0x1.ffff24p-1, 0x1.6e2368p-15 },
 	   { 0x1.ffff30p-1, 0x1.5c540cp-15 },
 	   { 0x1.ffff3ap-1, 0x1.4b581cp-15 },
 	   { 0x1.ffff44p-1, 0x1.3b2652p-15 },
 	   { 0x1.ffff4ep-1, 0x1.2bb5ccp-15 },
 	   { 0x1.ffff56p-1, 0x1.1cfe02p-15 },
 	   { 0x1.ffff60p-1, 0x1.0ef6c4p-15 },
 	   { 0x1.ffff68p-1, 0x1.019842p-15 },
 	   { 0x1.ffff70p-1, 0x1.e9b5e8p-16 },
 	   { 0x1.ffff78p-1, 0x1.d16f58p-16 },
 	   { 0x1.ffff7ep-1, 0x1.ba4f04p-16 },
 	   { 0x1.ffff84p-1, 0x1.a447b8p-16 },
 	   { 0x1.ffff8cp-1, 0x1.8f4cccp-16 },
 	   { 0x1.ffff92p-1, 0x1.7b5224p-16 },
 	   { 0x1.ffff98p-1, 0x1.684c22p-16 },
 	   { 0x1.ffff9cp-1, 0x1.562facp-16 },
 	   { 0x1.ffffa2p-1, 0x1.44f21ep-16 },
 	   { 0x1.ffffa6p-1, 0x1.34894ap-16 },
 	   { 0x1.ffffacp-1, 0x1.24eb72p-16 },
 	   { 0x1.ffffb0p-1, 0x1.160f44p-16 },
 	   { 0x1.ffffb4p-1, 0x1.07ebd2p-16 },
 	   { 0x1.ffffb8p-1, 0x1.f4f12ep-17 },
 	   { 0x1.ffffbcp-1, 0x1.db5ad0p-17 },
 	   { 0x1.ffffc0p-1, 0x1.c304f0p-17 },
 	   { 0x1.ffffc4p-1, 0x1.abe09ep-17 },
 	   { 0x1.ffffc6p-1, 0x1.95df98p-17 },
 	   { 0x1.ffffcap-1, 0x1.80f43ap-17 },
 	   { 0x1.ffffccp-1, 0x1.6d1178p-17 },
 	   { 0x1.ffffd0p-1, 0x1.5a2ae0p-17 },
 	   { 0x1.ffffd2p-1, 0x1.483488p-17 },
 	   { 0x1.ffffd4p-1, 0x1.372310p-17 },
 	   { 0x1.ffffd6p-1, 0x1.26eb9ep-17 },
 	   { 0x1.ffffd8p-1, 0x1.1783cep-17 },
 	   { 0x1.ffffdcp-1, 0x1.08e1bap-17 },
 	   { 0x1.ffffdep-1, 0x1.f5f7d8p-18 },
 	   { 0x1.ffffdep-1, 0x1.db92b6p-18 },
 	   { 0x1.ffffe0p-1, 0x1.c282cep-18 },
 	   { 0x1.ffffe2p-1, 0x1.aab7acp-18 },
 	   { 0x1.ffffe4p-1, 0x1.94219cp-18 },
 	   { 0x1.ffffe6p-1, 0x1.7eb1a2p-18 },
 	   { 0x1.ffffe8p-1, 0x1.6a5972p-18 },
 	   { 0x1.ffffe8p-1, 0x1.570b6ap-18 },
 	   { 0x1.ffffeap-1, 0x1.44ba86p-18 },
 	   { 0x1.ffffeap-1, 0x1.335a62p-18 },
 	   { 0x1.ffffecp-1, 0x1.22df2ap-18 },
 	   { 0x1.ffffeep-1, 0x1.133d96p-18 },
 	   { 0x1.ffffeep-1, 0x1.046aeap-18 },
 	   { 0x1.fffff0p-1, 0x1.ecb9d0p-19 },
 	   { 0x1.fffff0p-1, 0x1.d21398p-19 },
 	   { 0x1.fffff2p-1, 0x1.b8d094p-19 },
 	   { 0x1.fffff2p-1, 0x1.a0df10p-19 },
 	   { 0x1.fffff2p-1, 0x1.8a2e26p-19 },
 	   { 0x1.fffff4p-1, 0x1.74adc8p-19 },
 	   { 0x1.fffff4p-1, 0x1.604ea8p-19 },
 	   { 0x1.fffff4p-1, 0x1.4d0232p-19 },
 	   { 0x1.fffff6p-1, 0x1.3aba86p-19 },
 	   { 0x1.fffff6p-1, 0x1.296a70p-19 },
 	   { 0x1.fffff6p-1, 0x1.190562p-19 },
 	   { 0x1.fffff8p-1, 0x1.097f62p-19 },
 	   { 0x1.fffff8p-1, 0x1.f59a20p-20 },
 	   { 0x1.fffff8p-1, 0x1.d9c736p-20 },
 	   { 0x1.fffff8p-1, 0x1.bf716cp-20 },
 	   { 0x1.fffffap-1, 0x1.a6852cp-20 },
 	   { 0x1.fffffap-1, 0x1.8eefd8p-20 },
 	   { 0x1.fffffap-1, 0x1.789fb8p-20 },
 	   { 0x1.fffffap-1, 0x1.6383f8p-20 },
 	   { 0x1.fffffap-1, 0x1.4f8c96p-20 },
 	   { 0x1.fffffap-1, 0x1.3caa62p-20 },
 	   { 0x1.fffffcp-1, 0x1.2acee2p-20 },
 	   { 0x1.fffffcp-1, 0x1.19ec60p-20 },
 	   { 0x1.fffffcp-1, 0x1.09f5d0p-20 },
 	   { 0x1.fffffcp-1, 0x1.f5bd96p-21 },
 	   { 0x1.fffffcp-1, 0x1.d9371ep-21 },
 	   { 0x1.fffffcp-1, 0x1.be41dep-21 },
 	   { 0x1.fffffcp-1, 0x1.a4c89ep-21 },
 	   { 0x1.fffffcp-1, 0x1.8cb738p-21 },
 	   { 0x1.fffffep-1, 0x1.75fa8ep-21 },
 	   { 0x1.fffffep-1, 0x1.608078p-21 },
 	   { 0x1.fffffep-1, 0x1.4c37c0p-21 },
 	   { 0x1.fffffep-1, 0x1.39100ep-21 },
 	   { 0x1.fffffep-1, 0x1.26f9e0p-21 },
 	   { 0x1.fffffep-1, 0x1.15e682p-21 },
 	   { 0x1.fffffep-1, 0x1.05c804p-21 },
 	   { 0x1.fffffep-1, 0x1.ed2254p-22 },
 	   { 0x1.fffffep-1, 0x1.d06ad6p-22 },
 	   { 0x1.fffffep-1, 0x1.b551c8p-22 },
 	   { 0x1.fffffep-1, 0x1.9bc0a0p-22 },
 	   { 0x1.fffffep-1, 0x1.83a200p-22 },
 	   { 0x1.fffffep-1, 0x1.6ce1aap-22 },
 	   { 0x1.fffffep-1, 0x1.576c72p-22 },
 	   { 0x1.fffffep-1, 0x1.43302cp-22 },
 	   { 0x1.fffffep-1, 0x1.301ba2p-22 },
 	   { 0x1.fffffep-1, 0x1.1e1e86p-22 },
 	   { 0x1.fffffep-1, 0x1.0d2966p-22 },
 	   { 0x1.000000p+0, 0x1.fa5b50p-23 },
 	   { 0x1.000000p+0, 0x1.dc3ae4p-23 },
 	   { 0x1.000000p+0, 0x1.bfd756p-23 },
 	   { 0x1.000000p+0, 0x1.a517dap-23 },
 	   { 0x1.000000p+0, 0x1.8be4f8p-23 },
 	   { 0x1.000000p+0, 0x1.74287ep-23 },
 	   { 0x1.000000p+0, 0x1.5dcd66p-23 },
 	   { 0x1.000000p+0, 0x1.48bfd4p-23 },
 	   { 0x1.000000p+0, 0x1.34ecf8p-23 },
 	   { 0x1.000000p+0, 0x1.224310p-23 },
 	   { 0x1.000000p+0, 0x1.10b148p-23 },
   },
 };
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_exp2f_1u.c b/contrib/arm-optimized-routines/math/aarch64/v_exp2f_1u.c
deleted file mode 100644
index ba6b02fbb4bc..000000000000
--- a/contrib/arm-optimized-routines/math/aarch64/v_exp2f_1u.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Single-precision vector 2^x function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-
-static const float Poly[] = {
-  /*  maxerr: 0.878 ulp.  */
-  0x1.416b5ep-13f, 0x1.5f082ep-10f, 0x1.3b2dep-7f, 0x1.c6af7cp-5f, 0x1.ebfbdcp-3f, 0x1.62e43p-1f
-};
-#define C0 v_f32 (Poly[0])
-#define C1 v_f32 (Poly[1])
-#define C2 v_f32 (Poly[2])
-#define C3 v_f32 (Poly[3])
-#define C4 v_f32 (Poly[4])
-#define C5 v_f32 (Poly[5])
-
-#define Shift v_f32 (0x1.8p23f)
-#define InvLn2 v_f32 (0x1.715476p+0f)
-#define Ln2hi v_f32 (0x1.62e4p-1f)
-#define Ln2lo v_f32 (0x1.7f7d1cp-20f)
-
-static float32x4_t VPCS_ATTR NOINLINE
-specialcase (float32x4_t poly, float32x4_t n, uint32x4_t e, float32x4_t absn)
-{
-  /* 2^n may overflow, break it up into s1*s2.  */
-  uint32x4_t b = (n <= v_f32 (0.0f)) & v_u32 (0x83000000);
-  float32x4_t s1 = vreinterpretq_f32_u32 (v_u32 (0x7f000000) + b);
-  float32x4_t s2 = vreinterpretq_f32_u32 (e - b);
-  uint32x4_t cmp = absn > v_f32 (192.0f);
-  float32x4_t r1 = s1 * s1;
-  float32x4_t r0 = poly * s1 * s2;
-  return vreinterpretq_f32_u32 ((cmp & vreinterpretq_u32_f32 (r1))
-				| (~cmp & vreinterpretq_u32_f32 (r0)));
-}
-
-float32x4_t VPCS_ATTR
-_ZGVnN4v_exp2f_1u (float32x4_t x)
-{
-  float32x4_t n, r, scale, poly, absn;
-  uint32x4_t cmp, e;
-
-  /* exp2(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
-     x = n + r, with r in [-1/2, 1/2].  */
-#if 0
-  float32x4_t z;
-  z = x + Shift;
-  n = z - Shift;
-  r = x - n;
-  e = vreinterpretq_u32_f32 (z) << 23;
-#else
-  n = vrndaq_f32 (x);
-  r = x - n;
-  e = vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)) << 23;
-#endif
-  scale = vreinterpretq_f32_u32 (e + v_u32 (0x3f800000));
-  absn = vabsq_f32 (n);
-  cmp = absn > v_f32 (126.0f);
-  poly = vfmaq_f32 (C1, C0, r);
-  poly = vfmaq_f32 (C2, poly, r);
-  poly = vfmaq_f32 (C3, poly, r);
-  poly = vfmaq_f32 (C4, poly, r);
-  poly = vfmaq_f32 (C5, poly, r);
-  poly = vfmaq_f32 (v_f32 (1.0f), poly, r);
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (poly, n, e, absn);
-  return scale * poly;
-}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_exp_data.c b/contrib/arm-optimized-routines/math/aarch64/v_exp_data.c
index 45f0848cac5b..59db77ac58cc 100644
--- a/contrib/arm-optimized-routines/math/aarch64/v_exp_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_exp_data.c
@@ -1,146 +1,55 @@
 /*
- * Lookup table for double-precision e^x vector function.
+ * Scale values for vector exp and exp2
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "v_math.h"
+#include "math_config.h"
 
-# define N (1 << V_EXP_TABLE_BITS)
-
-/* 2^(j/N), j=0..N.  */
+/* 2^(j/N), j=0..N, N=2^7=128.  */
 const uint64_t __v_exp_data[] = {
-# if N == 128
   0x3ff0000000000000, 0x3feff63da9fb3335, 0x3fefec9a3e778061,
   0x3fefe315e86e7f85, 0x3fefd9b0d3158574, 0x3fefd06b29ddf6de,
   0x3fefc74518759bc8, 0x3fefbe3ecac6f383, 0x3fefb5586cf9890f,
   0x3fefac922b7247f7, 0x3fefa3ec32d3d1a2, 0x3fef9b66affed31b,
   0x3fef9301d0125b51, 0x3fef8abdc06c31cc, 0x3fef829aaea92de0,
   0x3fef7a98c8a58e51, 0x3fef72b83c7d517b, 0x3fef6af9388c8dea,
   0x3fef635beb6fcb75, 0x3fef5be084045cd4, 0x3fef54873168b9aa,
   0x3fef4d5022fcd91d, 0x3fef463b88628cd6, 0x3fef3f49917ddc96,
   0x3fef387a6e756238, 0x3fef31ce4fb2a63f, 0x3fef2b4565e27cdd,
   0x3fef24dfe1f56381, 0x3fef1e9df51fdee1, 0x3fef187fd0dad990,
   0x3fef1285a6e4030b, 0x3fef0cafa93e2f56, 0x3fef06fe0a31b715,
   0x3fef0170fc4cd831, 0x3feefc08b26416ff, 0x3feef6c55f929ff1,
   0x3feef1a7373aa9cb, 0x3feeecae6d05d866, 0x3feee7db34e59ff7,
   0x3feee32dc313a8e5, 0x3feedea64c123422, 0x3feeda4504ac801c,
   0x3feed60a21f72e2a, 0x3feed1f5d950a897, 0x3feece086061892d,
   0x3feeca41ed1d0057, 0x3feec6a2b5c13cd0, 0x3feec32af0d7d3de,
   0x3feebfdad5362a27, 0x3feebcb299fddd0d, 0x3feeb9b2769d2ca7,
   0x3feeb6daa2cf6642, 0x3feeb42b569d4f82, 0x3feeb1a4ca5d920f,
   0x3feeaf4736b527da, 0x3feead12d497c7fd, 0x3feeab07dd485429,
   0x3feea9268a5946b7, 0x3feea76f15ad2148, 0x3feea5e1b976dc09,
   0x3feea47eb03a5585, 0x3feea34634ccc320, 0x3feea23882552225,
   0x3feea155d44ca973, 0x3feea09e667f3bcd, 0x3feea012750bdabf,
   0x3fee9fb23c651a2f, 0x3fee9f7df9519484, 0x3fee9f75e8ec5f74,
   0x3fee9f9a48a58174, 0x3fee9feb564267c9, 0x3feea0694fde5d3f,
   0x3feea11473eb0187, 0x3feea1ed0130c132, 0x3feea2f336cf4e62,
   0x3feea427543e1a12, 0x3feea589994cce13, 0x3feea71a4623c7ad,
   0x3feea8d99b4492ed, 0x3feeaac7d98a6699, 0x3feeace5422aa0db,
   0x3feeaf3216b5448c, 0x3feeb1ae99157736, 0x3feeb45b0b91ffc6,
   0x3feeb737b0cdc5e5, 0x3feeba44cbc8520f, 0x3feebd829fde4e50,
   0x3feec0f170ca07ba, 0x3feec49182a3f090, 0x3feec86319e32323,
   0x3feecc667b5de565, 0x3feed09bec4a2d33, 0x3feed503b23e255d,
   0x3feed99e1330b358, 0x3feede6b5579fdbf, 0x3feee36bbfd3f37a,
   0x3feee89f995ad3ad, 0x3feeee07298db666, 0x3feef3a2b84f15fb,
   0x3feef9728de5593a, 0x3feeff76f2fb5e47, 0x3fef05b030a1064a,
   0x3fef0c1e904bc1d2, 0x3fef12c25bd71e09, 0x3fef199bdd85529c,
   0x3fef20ab5fffd07a, 0x3fef27f12e57d14b, 0x3fef2f6d9406e7b5,
   0x3fef3720dcef9069, 0x3fef3f0b555dc3fa, 0x3fef472d4a07897c,
   0x3fef4f87080d89f2, 0x3fef5818dcfba487, 0x3fef60e316c98398,
   0x3fef69e603db3285, 0x3fef7321f301b460, 0x3fef7c97337b9b5f,
   0x3fef864614f5a129, 0x3fef902ee78b3ff6, 0x3fef9a51fbc74c83,
   0x3fefa4afa2a490da, 0x3fefaf482d8e67f1, 0x3fefba1bee615a27,
   0x3fefc52b376bba97, 0x3fefd0765b6e4540, 0x3fefdbfdad9cbe14,
   0x3fefe7c1819e90d8, 0x3feff3c22b8f71f1,
-# elif N == 256
-  0x3ff0000000000000, 0x3feffb1afa5abcbf, 0x3feff63da9fb3335,
-  0x3feff168143b0281, 0x3fefec9a3e778061, 0x3fefe7d42e11bbcc,
-  0x3fefe315e86e7f85, 0x3fefde5f72f654b1, 0x3fefd9b0d3158574,
-  0x3fefd50a0e3c1f89, 0x3fefd06b29ddf6de, 0x3fefcbd42b72a836,
-  0x3fefc74518759bc8, 0x3fefc2bdf66607e0, 0x3fefbe3ecac6f383,
-  0x3fefb9c79b1f3919, 0x3fefb5586cf9890f, 0x3fefb0f145e46c85,
-  0x3fefac922b7247f7, 0x3fefa83b23395dec, 0x3fefa3ec32d3d1a2,
-  0x3fef9fa55fdfa9c5, 0x3fef9b66affed31b, 0x3fef973028d7233e,
-  0x3fef9301d0125b51, 0x3fef8edbab5e2ab6, 0x3fef8abdc06c31cc,
-  0x3fef86a814f204ab, 0x3fef829aaea92de0, 0x3fef7e95934f312e,
-  0x3fef7a98c8a58e51, 0x3fef76a45471c3c2, 0x3fef72b83c7d517b,
-  0x3fef6ed48695bbc0, 0x3fef6af9388c8dea, 0x3fef672658375d2f,
-  0x3fef635beb6fcb75, 0x3fef5f99f8138a1c, 0x3fef5be084045cd4,
-  0x3fef582f95281c6b, 0x3fef54873168b9aa, 0x3fef50e75eb44027,
-  0x3fef4d5022fcd91d, 0x3fef49c18438ce4d, 0x3fef463b88628cd6,
-  0x3fef42be3578a819, 0x3fef3f49917ddc96, 0x3fef3bdda27912d1,
-  0x3fef387a6e756238, 0x3fef351ffb82140a, 0x3fef31ce4fb2a63f,
-  0x3fef2e85711ece75, 0x3fef2b4565e27cdd, 0x3fef280e341ddf29,
-  0x3fef24dfe1f56381, 0x3fef21ba7591bb70, 0x3fef1e9df51fdee1,
-  0x3fef1b8a66d10f13, 0x3fef187fd0dad990, 0x3fef157e39771b2f,
-  0x3fef1285a6e4030b, 0x3fef0f961f641589, 0x3fef0cafa93e2f56,
-  0x3fef09d24abd886b, 0x3fef06fe0a31b715, 0x3fef0432edeeb2fd,
-  0x3fef0170fc4cd831, 0x3feefeb83ba8ea32, 0x3feefc08b26416ff,
-  0x3feef96266e3fa2d, 0x3feef6c55f929ff1, 0x3feef431a2de883b,
-  0x3feef1a7373aa9cb, 0x3feeef26231e754a, 0x3feeecae6d05d866,
-  0x3feeea401b7140ef, 0x3feee7db34e59ff7, 0x3feee57fbfec6cf4,
-  0x3feee32dc313a8e5, 0x3feee0e544ede173, 0x3feedea64c123422,
-  0x3feedc70df1c5175, 0x3feeda4504ac801c, 0x3feed822c367a024,
-  0x3feed60a21f72e2a, 0x3feed3fb2709468a, 0x3feed1f5d950a897,
-  0x3feecffa3f84b9d4, 0x3feece086061892d, 0x3feecc2042a7d232,
-  0x3feeca41ed1d0057, 0x3feec86d668b3237, 0x3feec6a2b5c13cd0,
-  0x3feec4e1e192aed2, 0x3feec32af0d7d3de, 0x3feec17dea6db7d7,
-  0x3feebfdad5362a27, 0x3feebe41b817c114, 0x3feebcb299fddd0d,
-  0x3feebb2d81d8abff, 0x3feeb9b2769d2ca7, 0x3feeb8417f4531ee,
-  0x3feeb6daa2cf6642, 0x3feeb57de83f4eef, 0x3feeb42b569d4f82,
-  0x3feeb2e2f4f6ad27, 0x3feeb1a4ca5d920f, 0x3feeb070dde910d2,
-  0x3feeaf4736b527da, 0x3feeae27dbe2c4cf, 0x3feead12d497c7fd,
-  0x3feeac0827ff07cc, 0x3feeab07dd485429, 0x3feeaa11fba87a03,
-  0x3feea9268a5946b7, 0x3feea84590998b93, 0x3feea76f15ad2148,
-  0x3feea6a320dceb71, 0x3feea5e1b976dc09, 0x3feea52ae6cdf6f4,
-  0x3feea47eb03a5585, 0x3feea3dd1d1929fd, 0x3feea34634ccc320,
-  0x3feea2b9febc8fb7, 0x3feea23882552225, 0x3feea1c1c70833f6,
-  0x3feea155d44ca973, 0x3feea0f4b19e9538, 0x3feea09e667f3bcd,
-  0x3feea052fa75173e, 0x3feea012750bdabf, 0x3fee9fdcddd47645,
-  0x3fee9fb23c651a2f, 0x3fee9f9298593ae5, 0x3fee9f7df9519484,
-  0x3fee9f7466f42e87, 0x3fee9f75e8ec5f74, 0x3fee9f8286ead08a,
-  0x3fee9f9a48a58174, 0x3fee9fbd35d7cbfd, 0x3fee9feb564267c9,
-  0x3feea024b1ab6e09, 0x3feea0694fde5d3f, 0x3feea0b938ac1cf6,
-  0x3feea11473eb0187, 0x3feea17b0976cfdb, 0x3feea1ed0130c132,
-  0x3feea26a62ff86f0, 0x3feea2f336cf4e62, 0x3feea3878491c491,
-  0x3feea427543e1a12, 0x3feea4d2add106d9, 0x3feea589994cce13,
-  0x3feea64c1eb941f7, 0x3feea71a4623c7ad, 0x3feea7f4179f5b21,
-  0x3feea8d99b4492ed, 0x3feea9cad931a436, 0x3feeaac7d98a6699,
-  0x3feeabd0a478580f, 0x3feeace5422aa0db, 0x3feeae05bad61778,
-  0x3feeaf3216b5448c, 0x3feeb06a5e0866d9, 0x3feeb1ae99157736,
-  0x3feeb2fed0282c8a, 0x3feeb45b0b91ffc6, 0x3feeb5c353aa2fe2,
-  0x3feeb737b0cdc5e5, 0x3feeb8b82b5f98e5, 0x3feeba44cbc8520f,
-  0x3feebbdd9a7670b3, 0x3feebd829fde4e50, 0x3feebf33e47a22a2,
-  0x3feec0f170ca07ba, 0x3feec2bb4d53fe0d, 0x3feec49182a3f090,
-  0x3feec674194bb8d5, 0x3feec86319e32323, 0x3feeca5e8d07f29e,
-  0x3feecc667b5de565, 0x3feece7aed8eb8bb, 0x3feed09bec4a2d33,
-  0x3feed2c980460ad8, 0x3feed503b23e255d, 0x3feed74a8af46052,
-  0x3feed99e1330b358, 0x3feedbfe53c12e59, 0x3feede6b5579fdbf,
-  0x3feee0e521356eba, 0x3feee36bbfd3f37a, 0x3feee5ff3a3c2774,
-  0x3feee89f995ad3ad, 0x3feeeb4ce622f2ff, 0x3feeee07298db666,
-  0x3feef0ce6c9a8952, 0x3feef3a2b84f15fb, 0x3feef68415b749b1,
-  0x3feef9728de5593a, 0x3feefc6e29f1c52a, 0x3feeff76f2fb5e47,
-  0x3fef028cf22749e4, 0x3fef05b030a1064a, 0x3fef08e0b79a6f1f,
-  0x3fef0c1e904bc1d2, 0x3fef0f69c3f3a207, 0x3fef12c25bd71e09,
-  0x3fef16286141b33d, 0x3fef199bdd85529c, 0x3fef1d1cd9fa652c,
-  0x3fef20ab5fffd07a, 0x3fef244778fafb22, 0x3fef27f12e57d14b,
-  0x3fef2ba88988c933, 0x3fef2f6d9406e7b5, 0x3fef33405751c4db,
-  0x3fef3720dcef9069, 0x3fef3b0f2e6d1675, 0x3fef3f0b555dc3fa,
-  0x3fef43155b5bab74, 0x3fef472d4a07897c, 0x3fef4b532b08c968,
-  0x3fef4f87080d89f2, 0x3fef53c8eacaa1d6, 0x3fef5818dcfba487,
-  0x3fef5c76e862e6d3, 0x3fef60e316c98398, 0x3fef655d71ff6075,
-  0x3fef69e603db3285, 0x3fef6e7cd63a8315, 0x3fef7321f301b460,
-  0x3fef77d5641c0658, 0x3fef7c97337b9b5f, 0x3fef81676b197d17,
-  0x3fef864614f5a129, 0x3fef8b333b16ee12, 0x3fef902ee78b3ff6,
-  0x3fef953924676d76, 0x3fef9a51fbc74c83, 0x3fef9f7977cdb740,
-  0x3fefa4afa2a490da, 0x3fefa9f4867cca6e, 0x3fefaf482d8e67f1,
-  0x3fefb4aaa2188510, 0x3fefba1bee615a27, 0x3fefbf9c1cb6412a,
-  0x3fefc52b376bba97, 0x3fefcac948dd7274, 0x3fefd0765b6e4540,
-  0x3fefd632798844f8, 0x3fefdbfdad9cbe14, 0x3fefe1d802243c89,
-  0x3fefe7c1819e90d8, 0x3fefedba3692d514, 0x3feff3c22b8f71f1,
-  0x3feff9d96b2a23d9,
-# endif
 };
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp_tail_data.c b/contrib/arm-optimized-routines/math/aarch64/v_exp_tail_data.c
similarity index 98%
rename from contrib/arm-optimized-routines/pl/math/v_exp_tail_data.c
rename to contrib/arm-optimized-routines/math/aarch64/v_exp_tail_data.c
index 989dd41d949a..5cc58a40b6b7 100644
--- a/contrib/arm-optimized-routines/pl/math/v_exp_tail_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_exp_tail_data.c
@@ -1,98 +1,98 @@
 /*
  * Lookup table for double-precision e^x vector function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
-/* 2^(j/N), j=0..N, N=2^8=256. Copied from math/v_exp_data.c.  */
+/* 2^(j/N), j=0..N, N=2^8=256.  */
 const uint64_t __v_exp_tail_data[] = {
   0x3ff0000000000000, 0x3feffb1afa5abcbf, 0x3feff63da9fb3335,
   0x3feff168143b0281, 0x3fefec9a3e778061, 0x3fefe7d42e11bbcc,
   0x3fefe315e86e7f85, 0x3fefde5f72f654b1, 0x3fefd9b0d3158574,
   0x3fefd50a0e3c1f89, 0x3fefd06b29ddf6de, 0x3fefcbd42b72a836,
   0x3fefc74518759bc8, 0x3fefc2bdf66607e0, 0x3fefbe3ecac6f383,
   0x3fefb9c79b1f3919, 0x3fefb5586cf9890f, 0x3fefb0f145e46c85,
   0x3fefac922b7247f7, 0x3fefa83b23395dec, 0x3fefa3ec32d3d1a2,
   0x3fef9fa55fdfa9c5, 0x3fef9b66affed31b, 0x3fef973028d7233e,
   0x3fef9301d0125b51, 0x3fef8edbab5e2ab6, 0x3fef8abdc06c31cc,
   0x3fef86a814f204ab, 0x3fef829aaea92de0, 0x3fef7e95934f312e,
   0x3fef7a98c8a58e51, 0x3fef76a45471c3c2, 0x3fef72b83c7d517b,
   0x3fef6ed48695bbc0, 0x3fef6af9388c8dea, 0x3fef672658375d2f,
   0x3fef635beb6fcb75, 0x3fef5f99f8138a1c, 0x3fef5be084045cd4,
   0x3fef582f95281c6b, 0x3fef54873168b9aa, 0x3fef50e75eb44027,
   0x3fef4d5022fcd91d, 0x3fef49c18438ce4d, 0x3fef463b88628cd6,
   0x3fef42be3578a819, 0x3fef3f49917ddc96, 0x3fef3bdda27912d1,
   0x3fef387a6e756238, 0x3fef351ffb82140a, 0x3fef31ce4fb2a63f,
   0x3fef2e85711ece75, 0x3fef2b4565e27cdd, 0x3fef280e341ddf29,
   0x3fef24dfe1f56381, 0x3fef21ba7591bb70, 0x3fef1e9df51fdee1,
   0x3fef1b8a66d10f13, 0x3fef187fd0dad990, 0x3fef157e39771b2f,
   0x3fef1285a6e4030b, 0x3fef0f961f641589, 0x3fef0cafa93e2f56,
   0x3fef09d24abd886b, 0x3fef06fe0a31b715, 0x3fef0432edeeb2fd,
   0x3fef0170fc4cd831, 0x3feefeb83ba8ea32, 0x3feefc08b26416ff,
   0x3feef96266e3fa2d, 0x3feef6c55f929ff1, 0x3feef431a2de883b,
   0x3feef1a7373aa9cb, 0x3feeef26231e754a, 0x3feeecae6d05d866,
   0x3feeea401b7140ef, 0x3feee7db34e59ff7, 0x3feee57fbfec6cf4,
   0x3feee32dc313a8e5, 0x3feee0e544ede173, 0x3feedea64c123422,
   0x3feedc70df1c5175, 0x3feeda4504ac801c, 0x3feed822c367a024,
   0x3feed60a21f72e2a, 0x3feed3fb2709468a, 0x3feed1f5d950a897,
   0x3feecffa3f84b9d4, 0x3feece086061892d, 0x3feecc2042a7d232,
   0x3feeca41ed1d0057, 0x3feec86d668b3237, 0x3feec6a2b5c13cd0,
   0x3feec4e1e192aed2, 0x3feec32af0d7d3de, 0x3feec17dea6db7d7,
   0x3feebfdad5362a27, 0x3feebe41b817c114, 0x3feebcb299fddd0d,
   0x3feebb2d81d8abff, 0x3feeb9b2769d2ca7, 0x3feeb8417f4531ee,
   0x3feeb6daa2cf6642, 0x3feeb57de83f4eef, 0x3feeb42b569d4f82,
   0x3feeb2e2f4f6ad27, 0x3feeb1a4ca5d920f, 0x3feeb070dde910d2,
   0x3feeaf4736b527da, 0x3feeae27dbe2c4cf, 0x3feead12d497c7fd,
   0x3feeac0827ff07cc, 0x3feeab07dd485429, 0x3feeaa11fba87a03,
   0x3feea9268a5946b7, 0x3feea84590998b93, 0x3feea76f15ad2148,
   0x3feea6a320dceb71, 0x3feea5e1b976dc09, 0x3feea52ae6cdf6f4,
   0x3feea47eb03a5585, 0x3feea3dd1d1929fd, 0x3feea34634ccc320,
   0x3feea2b9febc8fb7, 0x3feea23882552225, 0x3feea1c1c70833f6,
   0x3feea155d44ca973, 0x3feea0f4b19e9538, 0x3feea09e667f3bcd,
   0x3feea052fa75173e, 0x3feea012750bdabf, 0x3fee9fdcddd47645,
   0x3fee9fb23c651a2f, 0x3fee9f9298593ae5, 0x3fee9f7df9519484,
   0x3fee9f7466f42e87, 0x3fee9f75e8ec5f74, 0x3fee9f8286ead08a,
   0x3fee9f9a48a58174, 0x3fee9fbd35d7cbfd, 0x3fee9feb564267c9,
   0x3feea024b1ab6e09, 0x3feea0694fde5d3f, 0x3feea0b938ac1cf6,
   0x3feea11473eb0187, 0x3feea17b0976cfdb, 0x3feea1ed0130c132,
   0x3feea26a62ff86f0, 0x3feea2f336cf4e62, 0x3feea3878491c491,
   0x3feea427543e1a12, 0x3feea4d2add106d9, 0x3feea589994cce13,
   0x3feea64c1eb941f7, 0x3feea71a4623c7ad, 0x3feea7f4179f5b21,
   0x3feea8d99b4492ed, 0x3feea9cad931a436, 0x3feeaac7d98a6699,
   0x3feeabd0a478580f, 0x3feeace5422aa0db, 0x3feeae05bad61778,
   0x3feeaf3216b5448c, 0x3feeb06a5e0866d9, 0x3feeb1ae99157736,
   0x3feeb2fed0282c8a, 0x3feeb45b0b91ffc6, 0x3feeb5c353aa2fe2,
   0x3feeb737b0cdc5e5, 0x3feeb8b82b5f98e5, 0x3feeba44cbc8520f,
   0x3feebbdd9a7670b3, 0x3feebd829fde4e50, 0x3feebf33e47a22a2,
   0x3feec0f170ca07ba, 0x3feec2bb4d53fe0d, 0x3feec49182a3f090,
   0x3feec674194bb8d5, 0x3feec86319e32323, 0x3feeca5e8d07f29e,
   0x3feecc667b5de565, 0x3feece7aed8eb8bb, 0x3feed09bec4a2d33,
   0x3feed2c980460ad8, 0x3feed503b23e255d, 0x3feed74a8af46052,
   0x3feed99e1330b358, 0x3feedbfe53c12e59, 0x3feede6b5579fdbf,
   0x3feee0e521356eba, 0x3feee36bbfd3f37a, 0x3feee5ff3a3c2774,
   0x3feee89f995ad3ad, 0x3feeeb4ce622f2ff, 0x3feeee07298db666,
   0x3feef0ce6c9a8952, 0x3feef3a2b84f15fb, 0x3feef68415b749b1,
   0x3feef9728de5593a, 0x3feefc6e29f1c52a, 0x3feeff76f2fb5e47,
   0x3fef028cf22749e4, 0x3fef05b030a1064a, 0x3fef08e0b79a6f1f,
   0x3fef0c1e904bc1d2, 0x3fef0f69c3f3a207, 0x3fef12c25bd71e09,
   0x3fef16286141b33d, 0x3fef199bdd85529c, 0x3fef1d1cd9fa652c,
   0x3fef20ab5fffd07a, 0x3fef244778fafb22, 0x3fef27f12e57d14b,
   0x3fef2ba88988c933, 0x3fef2f6d9406e7b5, 0x3fef33405751c4db,
   0x3fef3720dcef9069, 0x3fef3b0f2e6d1675, 0x3fef3f0b555dc3fa,
   0x3fef43155b5bab74, 0x3fef472d4a07897c, 0x3fef4b532b08c968,
   0x3fef4f87080d89f2, 0x3fef53c8eacaa1d6, 0x3fef5818dcfba487,
   0x3fef5c76e862e6d3, 0x3fef60e316c98398, 0x3fef655d71ff6075,
   0x3fef69e603db3285, 0x3fef6e7cd63a8315, 0x3fef7321f301b460,
   0x3fef77d5641c0658, 0x3fef7c97337b9b5f, 0x3fef81676b197d17,
   0x3fef864614f5a129, 0x3fef8b333b16ee12, 0x3fef902ee78b3ff6,
   0x3fef953924676d76, 0x3fef9a51fbc74c83, 0x3fef9f7977cdb740,
   0x3fefa4afa2a490da, 0x3fefa9f4867cca6e, 0x3fefaf482d8e67f1,
   0x3fefb4aaa2188510, 0x3fefba1bee615a27, 0x3fefbf9c1cb6412a,
   0x3fefc52b376bba97, 0x3fefcac948dd7274, 0x3fefd0765b6e4540,
   0x3fefd632798844f8, 0x3fefdbfdad9cbe14, 0x3fefe1d802243c89,
   0x3fefe7c1819e90d8, 0x3fefedba3692d514, 0x3feff3c22b8f71f1,
   0x3feff9d96b2a23d9,
 };
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_expf_1u.c b/contrib/arm-optimized-routines/math/aarch64/v_expf_1u.c
deleted file mode 100644
index 43d03fa34efa..000000000000
--- a/contrib/arm-optimized-routines/math/aarch64/v_expf_1u.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Single-precision vector e^x function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-
-static const float Poly[] = {
-  /*  maxerr: 0.36565 +0.5 ulp.  */
-  0x1.6a6000p-10f,
-  0x1.12718ep-7f,
-  0x1.555af0p-5f,
-  0x1.555430p-3f,
-  0x1.fffff4p-2f,
-};
-#define C0 v_f32 (Poly[0])
-#define C1 v_f32 (Poly[1])
-#define C2 v_f32 (Poly[2])
-#define C3 v_f32 (Poly[3])
-#define C4 v_f32 (Poly[4])
-
-#define Shift v_f32 (0x1.8p23f)
-#define InvLn2 v_f32 (0x1.715476p+0f)
-#define Ln2hi v_f32 (0x1.62e4p-1f)
-#define Ln2lo v_f32 (0x1.7f7d1cp-20f)
-
-static float32x4_t VPCS_ATTR NOINLINE
-specialcase (float32x4_t poly, float32x4_t n, uint32x4_t e, float32x4_t absn)
-{
-  /* 2^n may overflow, break it up into s1*s2.  */
-  uint32x4_t b = (n <= v_f32 (0.0f)) & v_u32 (0x83000000);
-  float32x4_t s1 = vreinterpretq_f32_u32 (v_u32 (0x7f000000) + b);
-  float32x4_t s2 = vreinterpretq_f32_u32 (e - b);
-  uint32x4_t cmp = absn > v_f32 (192.0f);
-  float32x4_t r1 = s1 * s1;
-  float32x4_t r0 = poly * s1 * s2;
-  return vreinterpretq_f32_u32 ((cmp & vreinterpretq_u32_f32 (r1))
-				| (~cmp & vreinterpretq_u32_f32 (r0)));
-}
-
-float32x4_t VPCS_ATTR
-_ZGVnN4v_expf_1u (float32x4_t x)
-{
-  float32x4_t n, r, scale, poly, absn, z;
-  uint32x4_t cmp, e;
-
-  /* exp(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
-     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-#if 1
-  z = vfmaq_f32 (Shift, x, InvLn2);
-  n = z - Shift;
-  r = vfmaq_f32 (x, n, -Ln2hi);
-  r = vfmaq_f32 (r, n, -Ln2lo);
-  e = vreinterpretq_u32_f32 (z) << 23;
-#else
-  z = x * InvLn2;
-  n = vrndaq_f32 (z);
-  r = vfmaq_f32 (x, n, -Ln2hi);
-  r = vfmaq_f32 (r, n, -Ln2lo);
-  e = vreinterpretq_u32_s32 (vcvtaq_s32_f32 (z)) << 23;
-#endif
-  scale = vreinterpretq_f32_u32 (e + v_u32 (0x3f800000));
-  absn = vabsq_f32 (n);
-  cmp = absn > v_f32 (126.0f);
-  poly = vfmaq_f32 (C1, C0, r);
-  poly = vfmaq_f32 (C2, poly, r);
-  poly = vfmaq_f32 (C3, poly, r);
-  poly = vfmaq_f32 (C4, poly, r);
-  poly = vfmaq_f32 (v_f32 (1.0f), poly, r);
-  poly = vfmaq_f32 (v_f32 (1.0f), poly, r);
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (poly, n, e, absn);
-  return scale * poly;
-}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_log.c b/contrib/arm-optimized-routines/math/aarch64/v_log.c
deleted file mode 100644
index 1d1c1fa62c04..000000000000
--- a/contrib/arm-optimized-routines/math/aarch64/v_log.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Double-precision vector log(x) function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-
-static const struct data
-{
-  uint64x2_t min_norm;
-  uint32x4_t special_bound;
-  float64x2_t poly[5];
-  float64x2_t ln2;
-  uint64x2_t sign_exp_mask;
-} data = {
-  /* Worst-case error: 1.17 + 0.5 ulp.
-     Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
-  .poly = { V2 (-0x1.ffffffffffff7p-2), V2 (0x1.55555555170d4p-2),
-	    V2 (-0x1.0000000399c27p-2), V2 (0x1.999b2e90e94cap-3),
-	    V2 (-0x1.554e550bd501ep-3) },
-  .ln2 = V2 (0x1.62e42fefa39efp-1),
-  .min_norm = V2 (0x0010000000000000),
-  .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm.  */
-  .sign_exp_mask = V2 (0xfff0000000000000)
-};
-
-#define A(i) d->poly[i]
-#define N (1 << V_LOG_TABLE_BITS)
-#define IndexMask (N - 1)
-#define Off v_u64 (0x3fe6900900000000)
-
-struct entry
-{
-  float64x2_t invc;
-  float64x2_t logc;
-};
-
-static inline struct entry
-lookup (uint64x2_t i)
-{
-  /* Since N is a power of 2, n % N = n & (N - 1).  */
-  struct entry e;
-  uint64_t i0 = (i[0] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
-  uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
-  float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
-  float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
-  e.invc = vuzp1q_f64 (e0, e1);
-  e.logc = vuzp2q_f64 (e0, e1);
-  return e;
-}
-
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2,
-	      uint32x2_t cmp)
-{
-  return v_call_f64 (log, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (cmp));
-}
-
-float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  float64x2_t z, r, r2, p, y, kd, hi;
-  uint64x2_t ix, iz, tmp;
-  uint32x2_t cmp;
-  int64x2_t k;
-  struct entry e;
-
-  ix = vreinterpretq_u64_f64 (x);
-  cmp = vcge_u32 (vsubhn_u64 (ix, d->min_norm),
-		  vget_low_u32 (d->special_bound));
-
-  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  tmp = vsubq_u64 (ix, Off);
-  k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift.  */
-  iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
-  z = vreinterpretq_f64_u64 (iz);
-  e = lookup (tmp);
-
-  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
-  r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
-  kd = vcvtq_f64_s64 (k);
-
-  /* hi = r + log(c) + k*Ln2.  */
-  hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2);
-  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
-  r2 = vmulq_f64 (r, r);
-  y = vfmaq_f64 (A (2), A (3), r);
-  p = vfmaq_f64 (A (0), A (1), r);
-  y = vfmaq_f64 (y, A (4), r2);
-  y = vfmaq_f64 (p, y, r2);
-
-  if (unlikely (v_any_u32h (cmp)))
-    return special_case (x, y, hi, r2, cmp);
-  return vfmaq_f64 (hi, y, r2);
-}
diff --git a/contrib/arm-optimized-routines/pl/math/v_log10_data.c b/contrib/arm-optimized-routines/math/aarch64/v_log10_data.c
similarity index 99%
rename from contrib/arm-optimized-routines/pl/math/v_log10_data.c
rename to contrib/arm-optimized-routines/math/aarch64/v_log10_data.c
index d9a624dab9ce..bae2685822f6 100644
--- a/contrib/arm-optimized-routines/pl/math/v_log10_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_log10_data.c
@@ -1,163 +1,163 @@
 /*
  * Lookup table for double-precision log10(x) vector function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 const struct v_log10_data __v_log10_data = {
   /* Computed from log's coefficients div by log(10) then rounded to double
      precision.  */
   .poly = { -0x1.bcb7b1526e506p-3, 0x1.287a7636be1d1p-3, -0x1.bcb7b158af938p-4,
 	    0x1.63c78734e6d07p-4, -0x1.287461742fee4p-4 },
   .invln10 = 0x1.bcb7b1526e50ep-2,
   .log10_2 = 0x1.34413509f79ffp-2,
   /* Algorithm:
 
 	x = 2^k z
 	log10(x) = k log10(2) + log10(c) + poly(z/c - 1) / log(10)
 
      where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
      N=128) and log(c) and 1/c for the ith subinterval comes from lookup
      tables:
 
 	table[i].invc = 1/c
 	table[i].log10c = (double)log10(c)
 
      where c is near the center of the subinterval and is chosen by trying
      several floating point invc candidates around 1/center and selecting one
      for which the error in (double)log(c) is minimized (< 0x1p-74), except the
      subinterval that contains 1 and the previous one got tweaked to avoid
      cancellation. NB: invc should be optimized to minimize error in
      (double)log10(c) instead.  */
   .table = { { 0x1.6a133d0dec120p+0, -0x1.345825f221684p-3 },
 	     { 0x1.6815f2f3e42edp+0, -0x1.2f71a1f0c554ep-3 },
 	     { 0x1.661e39be1ac9ep+0, -0x1.2a91fdb30b1f4p-3 },
 	     { 0x1.642bfa30ac371p+0, -0x1.25b9260981a04p-3 },
 	     { 0x1.623f1d916f323p+0, -0x1.20e7081762193p-3 },
 	     { 0x1.60578da220f65p+0, -0x1.1c1b914aeefacp-3 },
 	     { 0x1.5e75349dea571p+0, -0x1.1756af5de404dp-3 },
 	     { 0x1.5c97fd387a75ap+0, -0x1.12985059c90bfp-3 },
 	     { 0x1.5abfd2981f200p+0, -0x1.0de0628f63df4p-3 },
 	     { 0x1.58eca051dc99cp+0, -0x1.092ed492e08eep-3 },
 	     { 0x1.571e526d9df12p+0, -0x1.0483954caf1dfp-3 },
 	     { 0x1.5554d555b3fcbp+0, -0x1.ffbd27a9adbcp-4 },
 	     { 0x1.539015e2a20cdp+0, -0x1.f67f7f2e3d1ap-4 },
 	     { 0x1.51d0014ee0164p+0, -0x1.ed4e1071ceebep-4 },
 	     { 0x1.50148538cd9eep+0, -0x1.e428bb47413c4p-4 },
 	     { 0x1.4e5d8f9f698a1p+0, -0x1.db0f6003028d6p-4 },
 	     { 0x1.4cab0edca66bep+0, -0x1.d201df6749831p-4 },
 	     { 0x1.4afcf1a9db874p+0, -0x1.c9001ac5c9672p-4 },
 	     { 0x1.495327136e16fp+0, -0x1.c009f3c78c79p-4 },
 	     { 0x1.47ad9e84af28fp+0, -0x1.b71f4cb642e53p-4 },
 	     { 0x1.460c47b39ae15p+0, -0x1.ae400818526b2p-4 },
 	     { 0x1.446f12b278001p+0, -0x1.a56c091954f87p-4 },
 	     { 0x1.42d5efdd720ecp+0, -0x1.9ca3332f096eep-4 },
 	     { 0x1.4140cfe001a0fp+0, -0x1.93e56a3f23e55p-4 },
 	     { 0x1.3fafa3b421f69p+0, -0x1.8b3292a3903bp-4 },
 	     { 0x1.3e225c9c8ece5p+0, -0x1.828a9112d9618p-4 },
 	     { 0x1.3c98ec29a211ap+0, -0x1.79ed4ac35f5acp-4 },
 	     { 0x1.3b13442a413fep+0, -0x1.715aa51ed28c4p-4 },
 	     { 0x1.399156baa3c54p+0, -0x1.68d2861c999e9p-4 },
 	     { 0x1.38131639b4cdbp+0, -0x1.6054d40ded21p-4 },
 	     { 0x1.36987540fbf53p+0, -0x1.57e17576bc9a2p-4 },
 	     { 0x1.352166b648f61p+0, -0x1.4f7851798bb0bp-4 },
 	     { 0x1.33adddb3eb575p+0, -0x1.47194f5690ae3p-4 },
 	     { 0x1.323dcd99fc1d3p+0, -0x1.3ec456d58ec47p-4 },
 	     { 0x1.30d129fefc7d2p+0, -0x1.36794ff3e5f55p-4 },
 	     { 0x1.2f67e6b72fe7dp+0, -0x1.2e382315725e4p-4 },
 	     { 0x1.2e01f7cf8b187p+0, -0x1.2600b8ed82e91p-4 },
 	     { 0x1.2c9f518ddc86ep+0, -0x1.1dd2fa85efc12p-4 },
 	     { 0x1.2b3fe86e5f413p+0, -0x1.15aed136e3961p-4 },
 	     { 0x1.29e3b1211b25cp+0, -0x1.0d94269d1a30dp-4 },
 	     { 0x1.288aa08b373cfp+0, -0x1.0582e4a7659f5p-4 },
 	     { 0x1.2734abcaa8467p+0, -0x1.faf5eb655742dp-5 },
 	     { 0x1.25e1c82459b81p+0, -0x1.eaf888487e8eep-5 },
 	     { 0x1.2491eb1ad59c5p+0, -0x1.db0d75ef25a82p-5 },
 	     { 0x1.23450a54048b5p+0, -0x1.cb348a49e6431p-5 },
 	     { 0x1.21fb1bb09e578p+0, -0x1.bb6d9c69acdd8p-5 },
 	     { 0x1.20b415346d8f7p+0, -0x1.abb88368aa7ap-5 },
 	     { 0x1.1f6fed179a1acp+0, -0x1.9c1517476af14p-5 },
 	     { 0x1.1e2e99b93c7b3p+0, -0x1.8c833051bfa4dp-5 },
 	     { 0x1.1cf011a7a882ap+0, -0x1.7d02a78e7fb31p-5 },
 	     { 0x1.1bb44b97dba5ap+0, -0x1.6d93565e97c5fp-5 },
 	     { 0x1.1a7b3e66cdd4fp+0, -0x1.5e351695db0c5p-5 },
 	     { 0x1.1944e11dc56cdp+0, -0x1.4ee7c2ba67adcp-5 },
 	     { 0x1.18112aebb1a6ep+0, -0x1.3fab35ba16c01p-5 },
 	     { 0x1.16e013231b7e9p+0, -0x1.307f4ad854bc9p-5 },
 	     { 0x1.15b1913f156cfp+0, -0x1.2163ddf4f988cp-5 },
 	     { 0x1.14859cdedde13p+0, -0x1.1258cb5d19e22p-5 },
 	     { 0x1.135c2dc68cfa4p+0, -0x1.035defdba3188p-5 },
 	     { 0x1.12353bdb01684p+0, -0x1.e8e651191bce4p-6 },
 	     { 0x1.1110bf25b85b4p+0, -0x1.cb30a62be444cp-6 },
 	     { 0x1.0feeafd2f8577p+0, -0x1.ad9a9b3043823p-6 },
 	     { 0x1.0ecf062c51c3bp+0, -0x1.9023ecda1ccdep-6 },
 	     { 0x1.0db1baa076c8bp+0, -0x1.72cc592bd82dp-6 },
 	     { 0x1.0c96c5bb3048ep+0, -0x1.55939eb1f9c6ep-6 },
 	     { 0x1.0b7e20263e070p+0, -0x1.38797ca6cc5ap-6 },
 	     { 0x1.0a67c2acd0ce3p+0, -0x1.1b7db35c2c072p-6 },
 	     { 0x1.0953a6391e982p+0, -0x1.fd400812ee9a2p-7 },
 	     { 0x1.0841c3caea380p+0, -0x1.c3c05fb4620f1p-7 },
 	     { 0x1.07321489b13eap+0, -0x1.8a7bf3c40e2e3p-7 },
 	     { 0x1.062491aee9904p+0, -0x1.517249c15a75cp-7 },
 	     { 0x1.05193497a7cc5p+0, -0x1.18a2ea5330c91p-7 },
 	     { 0x1.040ff6b5f5e9fp+0, -0x1.c01abc8cdc4e2p-8 },
 	     { 0x1.0308d19aa6127p+0, -0x1.4f6261750dec9p-8 },
 	     { 0x1.0203beedb0c67p+0, -0x1.be37b6612afa7p-9 },
 	     { 0x1.010037d38bcc2p+0, -0x1.bc3a8398ac26p-10 },
 	     { 1.0, 0.0 },
 	     { 0x1.fc06d493cca10p-1, 0x1.bb796219f30a5p-9 },
 	     { 0x1.f81e6ac3b918fp-1, 0x1.b984fdcba61cep-8 },
 	     { 0x1.f44546ef18996p-1, 0x1.49cf12adf8e8cp-7 },
 	     { 0x1.f07b10382c84bp-1, 0x1.b6075b5217083p-7 },
 	     { 0x1.ecbf7070e59d4p-1, 0x1.10b7466fc30ddp-6 },
 	     { 0x1.e91213f715939p-1, 0x1.4603e4db6a3a1p-6 },
 	     { 0x1.e572a9a75f7b7p-1, 0x1.7aeb10e99e105p-6 },
 	     { 0x1.e1e0e2c530207p-1, 0x1.af6e49b0f0e36p-6 },
 	     { 0x1.de5c72d8a8be3p-1, 0x1.e38f064f41179p-6 },
 	     { 0x1.dae50fa5658ccp-1, 0x1.0ba75abbb7623p-5 },
 	     { 0x1.d77a71145a2dap-1, 0x1.25575ee2dba86p-5 },
 	     { 0x1.d41c51166623ep-1, 0x1.3ed83f477f946p-5 },
 	     { 0x1.d0ca6ba0bb29fp-1, 0x1.582aa79af60efp-5 },
 	     { 0x1.cd847e8e59681p-1, 0x1.714f400fa83aep-5 },
 	     { 0x1.ca4a499693e00p-1, 0x1.8a46ad3901cb9p-5 },
 	     { 0x1.c71b8e399e821p-1, 0x1.a311903b6b87p-5 },
 	     { 0x1.c3f80faf19077p-1, 0x1.bbb086f216911p-5 },
 	     { 0x1.c0df92dc2b0ecp-1, 0x1.d4242bdda648ep-5 },
 	     { 0x1.bdd1de3cbb542p-1, 0x1.ec6d167c2af1p-5 },
 	     { 0x1.baceb9e1007a3p-1, 0x1.0245ed8221426p-4 },
 	     { 0x1.b7d5ef543e55ep-1, 0x1.0e40856c74f64p-4 },
 	     { 0x1.b4e749977d953p-1, 0x1.1a269a31120fep-4 },
 	     { 0x1.b20295155478ep-1, 0x1.25f8718fc076cp-4 },
 	     { 0x1.af279f8e82be2p-1, 0x1.31b64ffc95bfp-4 },
 	     { 0x1.ac5638197fdf3p-1, 0x1.3d60787ca5063p-4 },
 	     { 0x1.a98e2f102e087p-1, 0x1.48f72ccd187fdp-4 },
 	     { 0x1.a6cf5606d05c1p-1, 0x1.547aad6602f1cp-4 },
 	     { 0x1.a4197fc04d746p-1, 0x1.5feb3989d3acbp-4 },
 	     { 0x1.a16c80293dc01p-1, 0x1.6b490f3978c79p-4 },
 	     { 0x1.9ec82c4dc5bc9p-1, 0x1.76946b3f5e703p-4 },
 	     { 0x1.9c2c5a491f534p-1, 0x1.81cd895717c83p-4 },
 	     { 0x1.9998e1480b618p-1, 0x1.8cf4a4055c30ep-4 },
 	     { 0x1.970d9977c6c2dp-1, 0x1.9809f4c48c0ebp-4 },
 	     { 0x1.948a5c023d212p-1, 0x1.a30db3f9899efp-4 },
 	     { 0x1.920f0303d6809p-1, 0x1.ae001905458fcp-4 },
 	     { 0x1.8f9b698a98b45p-1, 0x1.b8e15a2e3a2cdp-4 },
 	     { 0x1.8d2f6b81726f6p-1, 0x1.c3b1ace2b0996p-4 },
 	     { 0x1.8acae5bb55badp-1, 0x1.ce71456edfa62p-4 },
 	     { 0x1.886db5d9275b8p-1, 0x1.d9205759882c4p-4 },
 	     { 0x1.8617ba567c13cp-1, 0x1.e3bf1513af0dfp-4 },
 	     { 0x1.83c8d27487800p-1, 0x1.ee4db0412c414p-4 },
 	     { 0x1.8180de3c5dbe7p-1, 0x1.f8cc5998de3a5p-4 },
 	     { 0x1.7f3fbe71cdb71p-1, 0x1.019da085eaeb1p-3 },
 	     { 0x1.7d055498071c1p-1, 0x1.06cd4acdb4e3dp-3 },
 	     { 0x1.7ad182e54f65ap-1, 0x1.0bf542bef813fp-3 },
 	     { 0x1.78a42c3c90125p-1, 0x1.11159f14da262p-3 },
 	     { 0x1.767d342f76944p-1, 0x1.162e761c10d1cp-3 },
 	     { 0x1.745c7ef26b00ap-1, 0x1.1b3fddc60d43ep-3 },
 	     { 0x1.7241f15769d0fp-1, 0x1.2049ebac86aa6p-3 },
 	     { 0x1.702d70d396e41p-1, 0x1.254cb4fb7836ap-3 },
 	     { 0x1.6e1ee3700cd11p-1, 0x1.2a484e8d0d252p-3 },
 	     { 0x1.6c162fc9cbe02p-1, 0x1.2f3ccce1c860bp-3 } }
 };
diff --git a/contrib/arm-optimized-routines/pl/math/v_log2_data.c b/contrib/arm-optimized-routines/math/aarch64/v_log2_data.c
similarity index 99%
rename from contrib/arm-optimized-routines/pl/math/v_log2_data.c
rename to contrib/arm-optimized-routines/math/aarch64/v_log2_data.c
index 50697daff925..fad91d654da8 100644
--- a/contrib/arm-optimized-routines/pl/math/v_log2_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_log2_data.c
@@ -1,153 +1,153 @@
 /*
  * Coefficients and table entries for vector log2
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 #define N (1 << V_LOG2_TABLE_BITS)
 
 const struct v_log2_data __v_log2_data = {
 
   /* Each coefficient was generated to approximate log(r) for |r| < 0x1.fp-9
      and N = 128, then scaled by log2(e) in extended precision and rounded back
      to double precision.  */
   .poly = { -0x1.71547652b83p-1, 0x1.ec709dc340953p-2, -0x1.71547651c8f35p-2,
 	    0x1.2777ebe12dda5p-2, -0x1.ec738d616fe26p-3 },
 
   .invln2 = 0x1.71547652b82fep0,
 
   /* Derived from tables in v_log_data.c in a similar way as v_log10_data.c.
      This means invc is unchanged and log2c was calculated by scaling log(c) by
      log2(e) in extended precision and rounding back to double precision.  */
   .table = { { 0x1.6a133d0dec120p+0, -0x1.00130d57f5fadp-1 },
 	     { 0x1.6815f2f3e42edp+0, -0x1.f802661bd725ep-2 },
 	     { 0x1.661e39be1ac9ep+0, -0x1.efea1c6f73a5bp-2 },
 	     { 0x1.642bfa30ac371p+0, -0x1.e7dd1dcd06f05p-2 },
 	     { 0x1.623f1d916f323p+0, -0x1.dfdb4ae024809p-2 },
 	     { 0x1.60578da220f65p+0, -0x1.d7e484d101958p-2 },
 	     { 0x1.5e75349dea571p+0, -0x1.cff8ad452f6ep-2 },
 	     { 0x1.5c97fd387a75ap+0, -0x1.c817a666c997fp-2 },
 	     { 0x1.5abfd2981f200p+0, -0x1.c04152d640419p-2 },
 	     { 0x1.58eca051dc99cp+0, -0x1.b87595a3f64b2p-2 },
 	     { 0x1.571e526d9df12p+0, -0x1.b0b4526c44d07p-2 },
 	     { 0x1.5554d555b3fcbp+0, -0x1.a8fd6d1a90f5ep-2 },
 	     { 0x1.539015e2a20cdp+0, -0x1.a150ca2559fc6p-2 },
 	     { 0x1.51d0014ee0164p+0, -0x1.99ae4e62cca29p-2 },
 	     { 0x1.50148538cd9eep+0, -0x1.9215df1a1e842p-2 },
 	     { 0x1.4e5d8f9f698a1p+0, -0x1.8a8761fe1f0d9p-2 },
 	     { 0x1.4cab0edca66bep+0, -0x1.8302bd1cc9a54p-2 },
 	     { 0x1.4afcf1a9db874p+0, -0x1.7b87d6fb437f6p-2 },
 	     { 0x1.495327136e16fp+0, -0x1.741696673a86dp-2 },
 	     { 0x1.47ad9e84af28fp+0, -0x1.6caee2b3c6fe4p-2 },
 	     { 0x1.460c47b39ae15p+0, -0x1.6550a3666c27ap-2 },
 	     { 0x1.446f12b278001p+0, -0x1.5dfbc08de02a4p-2 },
 	     { 0x1.42d5efdd720ecp+0, -0x1.56b022766c84ap-2 },
 	     { 0x1.4140cfe001a0fp+0, -0x1.4f6db1c955536p-2 },
 	     { 0x1.3fafa3b421f69p+0, -0x1.4834579063054p-2 },
 	     { 0x1.3e225c9c8ece5p+0, -0x1.4103fd2249a76p-2 },
 	     { 0x1.3c98ec29a211ap+0, -0x1.39dc8c3fe6dabp-2 },
 	     { 0x1.3b13442a413fep+0, -0x1.32bdeed4b5c8fp-2 },
 	     { 0x1.399156baa3c54p+0, -0x1.2ba80f41e20ddp-2 },
 	     { 0x1.38131639b4cdbp+0, -0x1.249ad8332f4a7p-2 },
 	     { 0x1.36987540fbf53p+0, -0x1.1d96347e7f3ebp-2 },
 	     { 0x1.352166b648f61p+0, -0x1.169a0f7d6604ap-2 },
 	     { 0x1.33adddb3eb575p+0, -0x1.0fa654a221909p-2 },
 	     { 0x1.323dcd99fc1d3p+0, -0x1.08baefcf8251ap-2 },
 	     { 0x1.30d129fefc7d2p+0, -0x1.01d7cd14deecdp-2 },
 	     { 0x1.2f67e6b72fe7dp+0, -0x1.f5f9b1ad55495p-3 },
 	     { 0x1.2e01f7cf8b187p+0, -0x1.e853ff76a77afp-3 },
 	     { 0x1.2c9f518ddc86ep+0, -0x1.dabe5d624cba1p-3 },
 	     { 0x1.2b3fe86e5f413p+0, -0x1.cd38a5cef4822p-3 },
 	     { 0x1.29e3b1211b25cp+0, -0x1.bfc2b38d315f9p-3 },
 	     { 0x1.288aa08b373cfp+0, -0x1.b25c61f5edd0fp-3 },
 	     { 0x1.2734abcaa8467p+0, -0x1.a5058d18e9cacp-3 },
 	     { 0x1.25e1c82459b81p+0, -0x1.97be1113e47a3p-3 },
 	     { 0x1.2491eb1ad59c5p+0, -0x1.8a85cafdf5e27p-3 },
 	     { 0x1.23450a54048b5p+0, -0x1.7d5c97e8fc45bp-3 },
 	     { 0x1.21fb1bb09e578p+0, -0x1.704255d6486e4p-3 },
 	     { 0x1.20b415346d8f7p+0, -0x1.6336e2cedd7bfp-3 },
 	     { 0x1.1f6fed179a1acp+0, -0x1.563a1d9b0cc6ap-3 },
 	     { 0x1.1e2e99b93c7b3p+0, -0x1.494be541aaa6fp-3 },
 	     { 0x1.1cf011a7a882ap+0, -0x1.3c6c1964dd0f2p-3 },
 	     { 0x1.1bb44b97dba5ap+0, -0x1.2f9a99f19a243p-3 },
 	     { 0x1.1a7b3e66cdd4fp+0, -0x1.22d747344446p-3 },
 	     { 0x1.1944e11dc56cdp+0, -0x1.1622020d4f7f5p-3 },
 	     { 0x1.18112aebb1a6ep+0, -0x1.097aabb3553f3p-3 },
 	     { 0x1.16e013231b7e9p+0, -0x1.f9c24b48014c5p-4 },
 	     { 0x1.15b1913f156cfp+0, -0x1.e0aaa3bdc858ap-4 },
 	     { 0x1.14859cdedde13p+0, -0x1.c7ae257c952d6p-4 },
 	     { 0x1.135c2dc68cfa4p+0, -0x1.aecc960a03e58p-4 },
 	     { 0x1.12353bdb01684p+0, -0x1.9605bb724d541p-4 },
 	     { 0x1.1110bf25b85b4p+0, -0x1.7d595ca7147cep-4 },
 	     { 0x1.0feeafd2f8577p+0, -0x1.64c74165002d9p-4 },
 	     { 0x1.0ecf062c51c3bp+0, -0x1.4c4f31c86d344p-4 },
 	     { 0x1.0db1baa076c8bp+0, -0x1.33f0f70388258p-4 },
 	     { 0x1.0c96c5bb3048ep+0, -0x1.1bac5abb3037dp-4 },
 	     { 0x1.0b7e20263e070p+0, -0x1.0381272495f21p-4 },
 	     { 0x1.0a67c2acd0ce3p+0, -0x1.d6de4eba2de2ap-5 },
 	     { 0x1.0953a6391e982p+0, -0x1.a6ec4e8156898p-5 },
 	     { 0x1.0841c3caea380p+0, -0x1.772be542e3e1bp-5 },
 	     { 0x1.07321489b13eap+0, -0x1.479cadcde852dp-5 },
 	     { 0x1.062491aee9904p+0, -0x1.183e4265faa5p-5 },
 	     { 0x1.05193497a7cc5p+0, -0x1.d2207fdaa1b85p-6 },
 	     { 0x1.040ff6b5f5e9fp+0, -0x1.742486cb4a6a2p-6 },
 	     { 0x1.0308d19aa6127p+0, -0x1.1687d77cfc299p-6 },
 	     { 0x1.0203beedb0c67p+0, -0x1.7293623a6b5dep-7 },
 	     { 0x1.010037d38bcc2p+0, -0x1.70ec80ec8f25dp-8 },
 	     { 1.0, 0.0 },
 	     { 0x1.fc06d493cca10p-1, 0x1.704c1ca6b6bc9p-7 },
 	     { 0x1.f81e6ac3b918fp-1, 0x1.6eac8ba664beap-6 },
 	     { 0x1.f44546ef18996p-1, 0x1.11e67d040772dp-5 },
 	     { 0x1.f07b10382c84bp-1, 0x1.6bc665e2105dep-5 },
 	     { 0x1.ecbf7070e59d4p-1, 0x1.c4f8a9772bf1dp-5 },
 	     { 0x1.e91213f715939p-1, 0x1.0ebff10fbb951p-4 },
 	     { 0x1.e572a9a75f7b7p-1, 0x1.3aaf4d7805d11p-4 },
 	     { 0x1.e1e0e2c530207p-1, 0x1.664ba81a4d717p-4 },
 	     { 0x1.de5c72d8a8be3p-1, 0x1.9196387da6de4p-4 },
 	     { 0x1.dae50fa5658ccp-1, 0x1.bc902f2b7796p-4 },
 	     { 0x1.d77a71145a2dap-1, 0x1.e73ab5f584f28p-4 },
 	     { 0x1.d41c51166623ep-1, 0x1.08cb78510d232p-3 },
 	     { 0x1.d0ca6ba0bb29fp-1, 0x1.1dd2fe2f0dcb5p-3 },
 	     { 0x1.cd847e8e59681p-1, 0x1.32b4784400df4p-3 },
 	     { 0x1.ca4a499693e00p-1, 0x1.47706f3d49942p-3 },
 	     { 0x1.c71b8e399e821p-1, 0x1.5c0768ee4a4dcp-3 },
 	     { 0x1.c3f80faf19077p-1, 0x1.7079e86fc7c6dp-3 },
 	     { 0x1.c0df92dc2b0ecp-1, 0x1.84c86e1183467p-3 },
 	     { 0x1.bdd1de3cbb542p-1, 0x1.98f377a34b499p-3 },
 	     { 0x1.baceb9e1007a3p-1, 0x1.acfb803bc924bp-3 },
 	     { 0x1.b7d5ef543e55ep-1, 0x1.c0e10098b025fp-3 },
 	     { 0x1.b4e749977d953p-1, 0x1.d4a46efe103efp-3 },
 	     { 0x1.b20295155478ep-1, 0x1.e8463f45b8d0bp-3 },
 	     { 0x1.af279f8e82be2p-1, 0x1.fbc6e3228997fp-3 },
 	     { 0x1.ac5638197fdf3p-1, 0x1.079364f2e5aa8p-2 },
 	     { 0x1.a98e2f102e087p-1, 0x1.1133306010a63p-2 },
 	     { 0x1.a6cf5606d05c1p-1, 0x1.1ac309631bd17p-2 },
 	     { 0x1.a4197fc04d746p-1, 0x1.24432485370c1p-2 },
 	     { 0x1.a16c80293dc01p-1, 0x1.2db3b5449132fp-2 },
 	     { 0x1.9ec82c4dc5bc9p-1, 0x1.3714ee1d7a32p-2 },
 	     { 0x1.9c2c5a491f534p-1, 0x1.406700ab52c94p-2 },
 	     { 0x1.9998e1480b618p-1, 0x1.49aa1d87522b2p-2 },
 	     { 0x1.970d9977c6c2dp-1, 0x1.52de746d7ecb2p-2 },
 	     { 0x1.948a5c023d212p-1, 0x1.5c0434336b343p-2 },
 	     { 0x1.920f0303d6809p-1, 0x1.651b8ad6c90d1p-2 },
 	     { 0x1.8f9b698a98b45p-1, 0x1.6e24a56ab5831p-2 },
 	     { 0x1.8d2f6b81726f6p-1, 0x1.771fb04ec29b1p-2 },
 	     { 0x1.8acae5bb55badp-1, 0x1.800cd6f19c25ep-2 },
 	     { 0x1.886db5d9275b8p-1, 0x1.88ec441df11dfp-2 },
 	     { 0x1.8617ba567c13cp-1, 0x1.91be21b7c93f5p-2 },
 	     { 0x1.83c8d27487800p-1, 0x1.9a8298f8c7454p-2 },
 	     { 0x1.8180de3c5dbe7p-1, 0x1.a339d255c04ddp-2 },
 	     { 0x1.7f3fbe71cdb71p-1, 0x1.abe3f59f43db7p-2 },
 	     { 0x1.7d055498071c1p-1, 0x1.b48129deca9efp-2 },
 	     { 0x1.7ad182e54f65ap-1, 0x1.bd119575364c1p-2 },
 	     { 0x1.78a42c3c90125p-1, 0x1.c5955e23ebcbcp-2 },
 	     { 0x1.767d342f76944p-1, 0x1.ce0ca8f4e1557p-2 },
 	     { 0x1.745c7ef26b00ap-1, 0x1.d6779a5a75774p-2 },
 	     { 0x1.7241f15769d0fp-1, 0x1.ded6563550d27p-2 },
 	     { 0x1.702d70d396e41p-1, 0x1.e728ffafd840ep-2 },
 	     { 0x1.6e1ee3700cd11p-1, 0x1.ef6fb96c8d739p-2 },
 	     { 0x1.6c162fc9cbe02p-1, 0x1.f7aaa57907219p-2 } }
 };
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_log_data.c b/contrib/arm-optimized-routines/math/aarch64/v_log_data.c
index 82351bb14766..4f0e6e167381 100644
--- a/contrib/arm-optimized-routines/math/aarch64/v_log_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_log_data.c
@@ -1,156 +1,161 @@
 /*
  * Lookup table for double-precision log(x) vector function.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#include "v_math.h"
-
-#define N (1 << V_LOG_TABLE_BITS)
+#include "math_config.h"
 
 const struct v_log_data __v_log_data = {
+  /* Worst-case error: 1.17 + 0.5 ulp.
+     Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
+  .poly = { -0x1.ffffffffffff7p-2, 0x1.55555555170d4p-2, -0x1.0000000399c27p-2,
+	    0x1.999b2e90e94cap-3, -0x1.554e550bd501ep-3 },
+  .ln2 = 0x1.62e42fefa39efp-1,
   /* Algorithm:
 
 	x = 2^k z
 	log(x) = k ln2 + log(c) + poly(z/c - 1)
 
-  where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
-  N=128) and log(c) and 1/c for the ith subinterval comes from lookup tables:
+     where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
+     N=128) and log(c) and 1/c for the ith subinterval comes from two lookup
+     tables:
 
 	table[i].invc = 1/c
 	table[i].logc = (double)log(c)
 
-  where c is near the center of the subinterval and is chosen by trying several
-  floating point invc candidates around 1/center and selecting one for which
-  the error in (double)log(c) is minimized (< 0x1p-74), except the subinterval
-  that contains 1 and the previous one got tweaked to avoid cancellation.  */
+     where c is near the center of the subinterval and is chosen by trying
+     several floating point invc candidates around 1/center and selecting one
+     for which the error in (double)log(c) is minimized (< 0x1p-74), except the
+     subinterval that contains 1 and the previous one got tweaked to avoid
+     cancellation.  */
   .table = { { 0x1.6a133d0dec120p+0, -0x1.62fe995eb963ap-2 },
 	     { 0x1.6815f2f3e42edp+0, -0x1.5d5a48dad6b67p-2 },
 	     { 0x1.661e39be1ac9ep+0, -0x1.57bde257d2769p-2 },
 	     { 0x1.642bfa30ac371p+0, -0x1.52294fbf2af55p-2 },
 	     { 0x1.623f1d916f323p+0, -0x1.4c9c7b598aa38p-2 },
 	     { 0x1.60578da220f65p+0, -0x1.47174fc5ff560p-2 },
 	     { 0x1.5e75349dea571p+0, -0x1.4199b7fa7b5cap-2 },
 	     { 0x1.5c97fd387a75ap+0, -0x1.3c239f48cfb99p-2 },
 	     { 0x1.5abfd2981f200p+0, -0x1.36b4f154d2aebp-2 },
 	     { 0x1.58eca051dc99cp+0, -0x1.314d9a0ff32fbp-2 },
 	     { 0x1.571e526d9df12p+0, -0x1.2bed85cca3cffp-2 },
 	     { 0x1.5554d555b3fcbp+0, -0x1.2694a11421af9p-2 },
 	     { 0x1.539015e2a20cdp+0, -0x1.2142d8d014fb2p-2 },
 	     { 0x1.51d0014ee0164p+0, -0x1.1bf81a2c77776p-2 },
 	     { 0x1.50148538cd9eep+0, -0x1.16b452a39c6a4p-2 },
 	     { 0x1.4e5d8f9f698a1p+0, -0x1.11776ffa6c67ep-2 },
 	     { 0x1.4cab0edca66bep+0, -0x1.0c416035020e0p-2 },
 	     { 0x1.4afcf1a9db874p+0, -0x1.071211aa10fdap-2 },
 	     { 0x1.495327136e16fp+0, -0x1.01e972e293b1bp-2 },
 	     { 0x1.47ad9e84af28fp+0, -0x1.f98ee587fd434p-3 },
 	     { 0x1.460c47b39ae15p+0, -0x1.ef5800ad716fbp-3 },
 	     { 0x1.446f12b278001p+0, -0x1.e52e160484698p-3 },
 	     { 0x1.42d5efdd720ecp+0, -0x1.db1104b19352ep-3 },
 	     { 0x1.4140cfe001a0fp+0, -0x1.d100ac59e0bd6p-3 },
 	     { 0x1.3fafa3b421f69p+0, -0x1.c6fced287c3bdp-3 },
 	     { 0x1.3e225c9c8ece5p+0, -0x1.bd05a7b317c29p-3 },
 	     { 0x1.3c98ec29a211ap+0, -0x1.b31abd229164fp-3 },
 	     { 0x1.3b13442a413fep+0, -0x1.a93c0edadb0a3p-3 },
 	     { 0x1.399156baa3c54p+0, -0x1.9f697ee30d7ddp-3 },
 	     { 0x1.38131639b4cdbp+0, -0x1.95a2efa9aa40ap-3 },
 	     { 0x1.36987540fbf53p+0, -0x1.8be843d796044p-3 },
 	     { 0x1.352166b648f61p+0, -0x1.82395ecc477edp-3 },
 	     { 0x1.33adddb3eb575p+0, -0x1.7896240966422p-3 },
 	     { 0x1.323dcd99fc1d3p+0, -0x1.6efe77aca8c55p-3 },
 	     { 0x1.30d129fefc7d2p+0, -0x1.65723e117ec5cp-3 },
 	     { 0x1.2f67e6b72fe7dp+0, -0x1.5bf15c0955706p-3 },
 	     { 0x1.2e01f7cf8b187p+0, -0x1.527bb6c111da1p-3 },
 	     { 0x1.2c9f518ddc86ep+0, -0x1.491133c939f8fp-3 },
 	     { 0x1.2b3fe86e5f413p+0, -0x1.3fb1b90c7fc58p-3 },
 	     { 0x1.29e3b1211b25cp+0, -0x1.365d2cc485f8dp-3 },
 	     { 0x1.288aa08b373cfp+0, -0x1.2d13758970de7p-3 },
 	     { 0x1.2734abcaa8467p+0, -0x1.23d47a721fd47p-3 },
 	     { 0x1.25e1c82459b81p+0, -0x1.1aa0229f25ec2p-3 },
 	     { 0x1.2491eb1ad59c5p+0, -0x1.117655ddebc3bp-3 },
 	     { 0x1.23450a54048b5p+0, -0x1.0856fbf83ab6bp-3 },
 	     { 0x1.21fb1bb09e578p+0, -0x1.fe83fabbaa106p-4 },
 	     { 0x1.20b415346d8f7p+0, -0x1.ec6e8507a56cdp-4 },
 	     { 0x1.1f6fed179a1acp+0, -0x1.da6d68c7cc2eap-4 },
 	     { 0x1.1e2e99b93c7b3p+0, -0x1.c88078462be0cp-4 },
 	     { 0x1.1cf011a7a882ap+0, -0x1.b6a786a423565p-4 },
 	     { 0x1.1bb44b97dba5ap+0, -0x1.a4e2676ac7f85p-4 },
 	     { 0x1.1a7b3e66cdd4fp+0, -0x1.9330eea777e76p-4 },
 	     { 0x1.1944e11dc56cdp+0, -0x1.8192f134d5ad9p-4 },
 	     { 0x1.18112aebb1a6ep+0, -0x1.70084464f0538p-4 },
 	     { 0x1.16e013231b7e9p+0, -0x1.5e90bdec5cb1fp-4 },
 	     { 0x1.15b1913f156cfp+0, -0x1.4d2c3433c5536p-4 },
 	     { 0x1.14859cdedde13p+0, -0x1.3bda7e219879ap-4 },
 	     { 0x1.135c2dc68cfa4p+0, -0x1.2a9b732d27194p-4 },
 	     { 0x1.12353bdb01684p+0, -0x1.196eeb2b10807p-4 },
 	     { 0x1.1110bf25b85b4p+0, -0x1.0854be8ef8a7ep-4 },
 	     { 0x1.0feeafd2f8577p+0, -0x1.ee998cb277432p-5 },
 	     { 0x1.0ecf062c51c3bp+0, -0x1.ccadb79919fb9p-5 },
 	     { 0x1.0db1baa076c8bp+0, -0x1.aae5b1d8618b0p-5 },
 	     { 0x1.0c96c5bb3048ep+0, -0x1.89413015d7442p-5 },
 	     { 0x1.0b7e20263e070p+0, -0x1.67bfe7bf158dep-5 },
 	     { 0x1.0a67c2acd0ce3p+0, -0x1.46618f83941bep-5 },
 	     { 0x1.0953a6391e982p+0, -0x1.2525df1b0618ap-5 },
 	     { 0x1.0841c3caea380p+0, -0x1.040c8e2f77c6ap-5 },
 	     { 0x1.07321489b13eap+0, -0x1.c62aad39f738ap-6 },
 	     { 0x1.062491aee9904p+0, -0x1.847fe3bdead9cp-6 },
 	     { 0x1.05193497a7cc5p+0, -0x1.43183683400acp-6 },
 	     { 0x1.040ff6b5f5e9fp+0, -0x1.01f31c4e1d544p-6 },
 	     { 0x1.0308d19aa6127p+0, -0x1.82201d1e6b69ap-7 },
 	     { 0x1.0203beedb0c67p+0, -0x1.00dd0f3e1bfd6p-7 },
 	     { 0x1.010037d38bcc2p+0, -0x1.ff6fe1feb4e53p-9 },
 	     { 1.0, 0.0 },
 	     { 0x1.fc06d493cca10p-1, 0x1.fe91885ec8e20p-8 },
 	     { 0x1.f81e6ac3b918fp-1, 0x1.fc516f716296dp-7 },
 	     { 0x1.f44546ef18996p-1, 0x1.7bb4dd70a015bp-6 },
 	     { 0x1.f07b10382c84bp-1, 0x1.f84c99b34b674p-6 },
 	     { 0x1.ecbf7070e59d4p-1, 0x1.39f9ce4fb2d71p-5 },
 	     { 0x1.e91213f715939p-1, 0x1.7756c0fd22e78p-5 },
 	     { 0x1.e572a9a75f7b7p-1, 0x1.b43ee82db8f3ap-5 },
 	     { 0x1.e1e0e2c530207p-1, 0x1.f0b3fced60034p-5 },
 	     { 0x1.de5c72d8a8be3p-1, 0x1.165bd78d4878ep-4 },
 	     { 0x1.dae50fa5658ccp-1, 0x1.3425d2715ebe6p-4 },
 	     { 0x1.d77a71145a2dap-1, 0x1.51b8bd91b7915p-4 },
 	     { 0x1.d41c51166623ep-1, 0x1.6f15632c76a47p-4 },
 	     { 0x1.d0ca6ba0bb29fp-1, 0x1.8c3c88ecbe503p-4 },
 	     { 0x1.cd847e8e59681p-1, 0x1.a92ef077625dap-4 },
 	     { 0x1.ca4a499693e00p-1, 0x1.c5ed5745fa006p-4 },
 	     { 0x1.c71b8e399e821p-1, 0x1.e27876de1c993p-4 },
 	     { 0x1.c3f80faf19077p-1, 0x1.fed104fce4cdcp-4 },
 	     { 0x1.c0df92dc2b0ecp-1, 0x1.0d7bd9c17d78bp-3 },
 	     { 0x1.bdd1de3cbb542p-1, 0x1.1b76986cef97bp-3 },
 	     { 0x1.baceb9e1007a3p-1, 0x1.295913d24f750p-3 },
 	     { 0x1.b7d5ef543e55ep-1, 0x1.37239fa295d17p-3 },
 	     { 0x1.b4e749977d953p-1, 0x1.44d68dd78714bp-3 },
 	     { 0x1.b20295155478ep-1, 0x1.52722ebe5d780p-3 },
 	     { 0x1.af279f8e82be2p-1, 0x1.5ff6d12671f98p-3 },
 	     { 0x1.ac5638197fdf3p-1, 0x1.6d64c2389484bp-3 },
 	     { 0x1.a98e2f102e087p-1, 0x1.7abc4da40fddap-3 },
 	     { 0x1.a6cf5606d05c1p-1, 0x1.87fdbda1e8452p-3 },
 	     { 0x1.a4197fc04d746p-1, 0x1.95295b06a5f37p-3 },
 	     { 0x1.a16c80293dc01p-1, 0x1.a23f6d34abbc5p-3 },
 	     { 0x1.9ec82c4dc5bc9p-1, 0x1.af403a28e04f2p-3 },
 	     { 0x1.9c2c5a491f534p-1, 0x1.bc2c06a85721ap-3 },
 	     { 0x1.9998e1480b618p-1, 0x1.c903161240163p-3 },
 	     { 0x1.970d9977c6c2dp-1, 0x1.d5c5aa93287ebp-3 },
 	     { 0x1.948a5c023d212p-1, 0x1.e274051823fa9p-3 },
 	     { 0x1.920f0303d6809p-1, 0x1.ef0e656300c16p-3 },
 	     { 0x1.8f9b698a98b45p-1, 0x1.fb9509f05aa2ap-3 },
 	     { 0x1.8d2f6b81726f6p-1, 0x1.04041821f37afp-2 },
 	     { 0x1.8acae5bb55badp-1, 0x1.0a340a49b3029p-2 },
 	     { 0x1.886db5d9275b8p-1, 0x1.105a7918a126dp-2 },
 	     { 0x1.8617ba567c13cp-1, 0x1.1677819812b84p-2 },
 	     { 0x1.83c8d27487800p-1, 0x1.1c8b405b40c0ep-2 },
 	     { 0x1.8180de3c5dbe7p-1, 0x1.2295d16cfa6b1p-2 },
 	     { 0x1.7f3fbe71cdb71p-1, 0x1.28975066318a2p-2 },
 	     { 0x1.7d055498071c1p-1, 0x1.2e8fd855d86fcp-2 },
 	     { 0x1.7ad182e54f65ap-1, 0x1.347f83d605e59p-2 },
 	     { 0x1.78a42c3c90125p-1, 0x1.3a666d1244588p-2 },
 	     { 0x1.767d342f76944p-1, 0x1.4044adb6f8ec4p-2 },
 	     { 0x1.745c7ef26b00ap-1, 0x1.461a5f077558cp-2 },
 	     { 0x1.7241f15769d0fp-1, 0x1.4be799e20b9c8p-2 },
 	     { 0x1.702d70d396e41p-1, 0x1.51ac76a6b79dfp-2 },
 	     { 0x1.6e1ee3700cd11p-1, 0x1.57690d5744a45p-2 },
 	     { 0x1.6c162fc9cbe02p-1, 0x1.5d1d758e45217p-2 } }
 };
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_logf.c b/contrib/arm-optimized-routines/math/aarch64/v_logf.c
deleted file mode 100644
index 66ebbbcd2b5a..000000000000
--- a/contrib/arm-optimized-routines/math/aarch64/v_logf.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Single-precision vector log function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-
-static const struct data
-{
-  uint32x4_t min_norm;
-  uint16x8_t special_bound;
-  float32x4_t poly[7];
-  float32x4_t ln2, tiny_bound;
-  uint32x4_t off, mantissa_mask;
-} data = {
-  /* 3.34 ulp error.  */
-  .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f),
-	    V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f),
-	    V4 (-0x1.ffffc8p-2f) },
-  .ln2 = V4 (0x1.62e43p-1f),
-  .tiny_bound = V4 (0x1p-126),
-  .min_norm = V4 (0x00800000),
-  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
-  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
-  .mantissa_mask = V4 (0x007fffff)
-};
-
-#define P(i) d->poly[7 - i]
-
-static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, float32x4_t r2, float32x4_t p,
-	      uint16x4_t cmp)
-{
-  /* Fall back to scalar code.  */
-  return v_call_f32 (logf, x, vfmaq_f32 (p, y, r2), vmovl_u16 (cmp));
-}
-
-float32x4_t VPCS_ATTR V_NAME_F1 (log) (float32x4_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  float32x4_t n, p, q, r, r2, y;
-  uint32x4_t u;
-  uint16x4_t cmp;
-
-  u = vreinterpretq_u32_f32 (x);
-  cmp = vcge_u16 (vsubhn_u32 (u, d->min_norm),
-		  vget_low_u16 (d->special_bound));
-
-  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = vsubq_u32 (u, d->off);
-  n = vcvtq_f32_s32 (
-      vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
-  u = vandq_u32 (u, d->mantissa_mask);
-  u = vaddq_u32 (u, d->off);
-  r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
-
-  /* y = log(1+r) + n*ln2.  */
-  r2 = vmulq_f32 (r, r);
-  /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))).  */
-  p = vfmaq_f32 (P (5), P (6), r);
-  q = vfmaq_f32 (P (3), P (4), r);
-  y = vfmaq_f32 (P (1), P (2), r);
-  p = vfmaq_f32 (p, P (7), r2);
-  q = vfmaq_f32 (q, p, r2);
-  y = vfmaq_f32 (y, q, r2);
-  p = vfmaq_f32 (r, d->ln2, n);
-
-  if (unlikely (v_any_u16h (cmp)))
-    return special_case (x, y, r2, p, cmp);
-  return vfmaq_f32 (p, y, r2);
-}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_math.h b/contrib/arm-optimized-routines/math/aarch64/v_math.h
deleted file mode 100644
index 1dc9916c6fb0..000000000000
--- a/contrib/arm-optimized-routines/math/aarch64/v_math.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Vector math abstractions.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef _V_MATH_H
-#define _V_MATH_H
-
-#if !__aarch64__
-# error "Cannot build without AArch64"
-#endif
-
-#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
-
-#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
-#define V_NAME_D1(fun) _ZGVnN2v_##fun
-#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
-#define V_NAME_D2(fun) _ZGVnN2vv_##fun
-
-#include <stdint.h>
-#include "../math_config.h"
-#include <arm_neon.h>
-
-/* Shorthand helpers for declaring constants.  */
-#  define V2(X) { X, X }
-#  define V4(X) { X, X, X, X }
-#  define V8(X) { X, X, X, X, X, X, X, X }
-
-static inline int
-v_any_u16h (uint16x4_t x)
-{
-  return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
-}
-
-static inline int
-v_lanes32 (void)
-{
-  return 4;
-}
-
-static inline float32x4_t
-v_f32 (float x)
-{
-  return (float32x4_t) V4 (x);
-}
-static inline uint32x4_t
-v_u32 (uint32_t x)
-{
-  return (uint32x4_t) V4 (x);
-}
-/* true if any elements of a v_cond result is non-zero.  */
-static inline int
-v_any_u32 (uint32x4_t x)
-{
-  /* assume elements in x are either 0 or -1u.  */
-  return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
-}
-static inline int
-v_any_u32h (uint32x2_t x)
-{
-  return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
-}
-static inline float32x4_t
-v_lookup_f32 (const float *tab, uint32x4_t idx)
-{
-  return (float32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
-}
-static inline uint32x4_t
-v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
-{
-  return (uint32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
-}
-static inline float32x4_t
-v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
-{
-  return (float32x4_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
-		       p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
-}
-static inline float32x4_t
-v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
-	     float32x4_t y, uint32x4_t p)
-{
-  return (float32x4_t){p[0] ? f (x1[0], x2[0]) : y[0],
-		       p[1] ? f (x1[1], x2[1]) : y[1],
-		       p[2] ? f (x1[2], x2[2]) : y[2],
-		       p[3] ? f (x1[3], x2[3]) : y[3]};
-}
-
-static inline int
-v_lanes64 (void)
-{
-  return 2;
-}
-static inline float64x2_t
-v_f64 (double x)
-{
-  return (float64x2_t) V2 (x);
-}
-static inline uint64x2_t
-v_u64 (uint64_t x)
-{
-  return (uint64x2_t) V2 (x);
-}
-/* true if any elements of a v_cond result is non-zero.  */
-static inline int
-v_any_u64 (uint64x2_t x)
-{
-  /* assume elements in x are either 0 or -1u.  */
-  return vpaddd_u64 (x) != 0;
-}
-static inline float64x2_t
-v_lookup_f64 (const double *tab, uint64x2_t idx)
-{
-  return (float64x2_t){tab[idx[0]], tab[idx[1]]};
-}
-static inline uint64x2_t
-v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
-{
-  return (uint64x2_t){tab[idx[0]], tab[idx[1]]};
-}
-static inline float64x2_t
-v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
-{
-  double p1 = p[1];
-  double x1 = x[1];
-  if (likely (p[0]))
-    y[0] = f (x[0]);
-  if (likely (p1))
-    y[1] = f (x1);
-  return y;
-}
-
-#endif
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_pow.c b/contrib/arm-optimized-routines/math/aarch64/v_pow.c
deleted file mode 100644
index 734f1663a283..000000000000
--- a/contrib/arm-optimized-routines/math/aarch64/v_pow.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Double-precision vector pow function.
- *
- * Copyright (c) 2020-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-
-float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
-{
-  float64x2_t z;
-  for (int lane = 0; lane < v_lanes64 (); lane++)
-    {
-      double sx = x[lane];
-      double sy = y[lane];
-      double sz = pow (sx, sy);
-      z[lane] = sz;
-    }
-  return z;
-}
diff --git a/contrib/arm-optimized-routines/pl/math/v_pow_exp_data.c b/contrib/arm-optimized-routines/math/aarch64/v_pow_exp_data.c
similarity index 99%
rename from contrib/arm-optimized-routines/pl/math/v_pow_exp_data.c
rename to contrib/arm-optimized-routines/math/aarch64/v_pow_exp_data.c
index 5d921ef648a4..db615ce94bd7 100644
--- a/contrib/arm-optimized-routines/pl/math/v_pow_exp_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_pow_exp_data.c
@@ -1,289 +1,289 @@
 /*
  * Shared data between exp, exp2 and pow.
  *
- * Copyright (c) 2018-2023, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 #define N (1 << V_POW_EXP_TABLE_BITS)
 
 const struct v_pow_exp_data __v_pow_exp_data = {
 // exp polynomial coefficients.
 .poly = {
 // abs error: 1.43*2^-58
 // ulp error: 0.549 (0.550 without fma)
 // if |x| < ln2/512
 0x1.fffffffffffd4p-2,
 0x1.5555571d6ef9p-3,
 0x1.5555576a5adcep-5,
 },
 // N/ln2
 .n_over_ln2 = 0x1.71547652b82fep0 * N,
 // ln2/N
 .ln2_over_n_hi = 0x1.62e42fefc0000p-9,
 .ln2_over_n_lo = -0x1.c610ca86c3899p-45,
 // Used for rounding to nearest integer without using intrinsics.
 .shift = 0x1.8p52,
 // 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
 // sbits[k] = asuint64(H[k]) - (k << 52)/N
 .sbits = {
 0x3ff0000000000000,
 0x3feffb1afa5abcbf,
 0x3feff63da9fb3335,
 0x3feff168143b0281,
 0x3fefec9a3e778061,
 0x3fefe7d42e11bbcc,
 0x3fefe315e86e7f85,
 0x3fefde5f72f654b1,
 0x3fefd9b0d3158574,
 0x3fefd50a0e3c1f89,
 0x3fefd06b29ddf6de,
 0x3fefcbd42b72a836,
 0x3fefc74518759bc8,
 0x3fefc2bdf66607e0,
 0x3fefbe3ecac6f383,
 0x3fefb9c79b1f3919,
 0x3fefb5586cf9890f,
 0x3fefb0f145e46c85,
 0x3fefac922b7247f7,
 0x3fefa83b23395dec,
 0x3fefa3ec32d3d1a2,
 0x3fef9fa55fdfa9c5,
 0x3fef9b66affed31b,
 0x3fef973028d7233e,
 0x3fef9301d0125b51,
 0x3fef8edbab5e2ab6,
 0x3fef8abdc06c31cc,
 0x3fef86a814f204ab,
 0x3fef829aaea92de0,
 0x3fef7e95934f312e,
 0x3fef7a98c8a58e51,
 0x3fef76a45471c3c2,
 0x3fef72b83c7d517b,
 0x3fef6ed48695bbc0,
 0x3fef6af9388c8dea,
 0x3fef672658375d2f,
 0x3fef635beb6fcb75,
 0x3fef5f99f8138a1c,
 0x3fef5be084045cd4,
 0x3fef582f95281c6b,
 0x3fef54873168b9aa,
 0x3fef50e75eb44027,
 0x3fef4d5022fcd91d,
 0x3fef49c18438ce4d,
 0x3fef463b88628cd6,
 0x3fef42be3578a819,
 0x3fef3f49917ddc96,
 0x3fef3bdda27912d1,
 0x3fef387a6e756238,
 0x3fef351ffb82140a,
 0x3fef31ce4fb2a63f,
 0x3fef2e85711ece75,
 0x3fef2b4565e27cdd,
 0x3fef280e341ddf29,
 0x3fef24dfe1f56381,
 0x3fef21ba7591bb70,
 0x3fef1e9df51fdee1,
 0x3fef1b8a66d10f13,
 0x3fef187fd0dad990,
 0x3fef157e39771b2f,
 0x3fef1285a6e4030b,
 0x3fef0f961f641589,
 0x3fef0cafa93e2f56,
 0x3fef09d24abd886b,
 0x3fef06fe0a31b715,
 0x3fef0432edeeb2fd,
 0x3fef0170fc4cd831,
 0x3feefeb83ba8ea32,
 0x3feefc08b26416ff,
 0x3feef96266e3fa2d,
 0x3feef6c55f929ff1,
 0x3feef431a2de883b,
 0x3feef1a7373aa9cb,
 0x3feeef26231e754a,
 0x3feeecae6d05d866,
 0x3feeea401b7140ef,
 0x3feee7db34e59ff7,
 0x3feee57fbfec6cf4,
 0x3feee32dc313a8e5,
 0x3feee0e544ede173,
 0x3feedea64c123422,
 0x3feedc70df1c5175,
 0x3feeda4504ac801c,
 0x3feed822c367a024,
 0x3feed60a21f72e2a,
 0x3feed3fb2709468a,
 0x3feed1f5d950a897,
 0x3feecffa3f84b9d4,
 0x3feece086061892d,
 0x3feecc2042a7d232,
 0x3feeca41ed1d0057,
 0x3feec86d668b3237,
 0x3feec6a2b5c13cd0,
 0x3feec4e1e192aed2,
 0x3feec32af0d7d3de,
 0x3feec17dea6db7d7,
 0x3feebfdad5362a27,
 0x3feebe41b817c114,
 0x3feebcb299fddd0d,
 0x3feebb2d81d8abff,
 0x3feeb9b2769d2ca7,
 0x3feeb8417f4531ee,
 0x3feeb6daa2cf6642,
 0x3feeb57de83f4eef,
 0x3feeb42b569d4f82,
 0x3feeb2e2f4f6ad27,
 0x3feeb1a4ca5d920f,
 0x3feeb070dde910d2,
 0x3feeaf4736b527da,
 0x3feeae27dbe2c4cf,
 0x3feead12d497c7fd,
 0x3feeac0827ff07cc,
 0x3feeab07dd485429,
 0x3feeaa11fba87a03,
 0x3feea9268a5946b7,
 0x3feea84590998b93,
 0x3feea76f15ad2148,
 0x3feea6a320dceb71,
 0x3feea5e1b976dc09,
 0x3feea52ae6cdf6f4,
 0x3feea47eb03a5585,
 0x3feea3dd1d1929fd,
 0x3feea34634ccc320,
 0x3feea2b9febc8fb7,
 0x3feea23882552225,
 0x3feea1c1c70833f6,
 0x3feea155d44ca973,
 0x3feea0f4b19e9538,
 0x3feea09e667f3bcd,
 0x3feea052fa75173e,
 0x3feea012750bdabf,
 0x3fee9fdcddd47645,
 0x3fee9fb23c651a2f,
 0x3fee9f9298593ae5,
 0x3fee9f7df9519484,
 0x3fee9f7466f42e87,
 0x3fee9f75e8ec5f74,
 0x3fee9f8286ead08a,
 0x3fee9f9a48a58174,
 0x3fee9fbd35d7cbfd,
 0x3fee9feb564267c9,
 0x3feea024b1ab6e09,
 0x3feea0694fde5d3f,
 0x3feea0b938ac1cf6,
 0x3feea11473eb0187,
 0x3feea17b0976cfdb,
 0x3feea1ed0130c132,
 0x3feea26a62ff86f0,
 0x3feea2f336cf4e62,
 0x3feea3878491c491,
 0x3feea427543e1a12,
 0x3feea4d2add106d9,
 0x3feea589994cce13,
 0x3feea64c1eb941f7,
 0x3feea71a4623c7ad,
 0x3feea7f4179f5b21,
 0x3feea8d99b4492ed,
 0x3feea9cad931a436,
 0x3feeaac7d98a6699,
 0x3feeabd0a478580f,
 0x3feeace5422aa0db,
 0x3feeae05bad61778,
 0x3feeaf3216b5448c,
 0x3feeb06a5e0866d9,
 0x3feeb1ae99157736,
 0x3feeb2fed0282c8a,
 0x3feeb45b0b91ffc6,
 0x3feeb5c353aa2fe2,
 0x3feeb737b0cdc5e5,
 0x3feeb8b82b5f98e5,
 0x3feeba44cbc8520f,
 0x3feebbdd9a7670b3,
 0x3feebd829fde4e50,
 0x3feebf33e47a22a2,
 0x3feec0f170ca07ba,
 0x3feec2bb4d53fe0d,
 0x3feec49182a3f090,
 0x3feec674194bb8d5,
 0x3feec86319e32323,
 0x3feeca5e8d07f29e,
 0x3feecc667b5de565,
 0x3feece7aed8eb8bb,
 0x3feed09bec4a2d33,
 0x3feed2c980460ad8,
 0x3feed503b23e255d,
 0x3feed74a8af46052,
 0x3feed99e1330b358,
 0x3feedbfe53c12e59,
 0x3feede6b5579fdbf,
 0x3feee0e521356eba,
 0x3feee36bbfd3f37a,
 0x3feee5ff3a3c2774,
 0x3feee89f995ad3ad,
 0x3feeeb4ce622f2ff,
 0x3feeee07298db666,
 0x3feef0ce6c9a8952,
 0x3feef3a2b84f15fb,
 0x3feef68415b749b1,
 0x3feef9728de5593a,
 0x3feefc6e29f1c52a,
 0x3feeff76f2fb5e47,
 0x3fef028cf22749e4,
 0x3fef05b030a1064a,
 0x3fef08e0b79a6f1f,
 0x3fef0c1e904bc1d2,
 0x3fef0f69c3f3a207,
 0x3fef12c25bd71e09,
 0x3fef16286141b33d,
 0x3fef199bdd85529c,
 0x3fef1d1cd9fa652c,
 0x3fef20ab5fffd07a,
 0x3fef244778fafb22,
 0x3fef27f12e57d14b,
 0x3fef2ba88988c933,
 0x3fef2f6d9406e7b5,
 0x3fef33405751c4db,
 0x3fef3720dcef9069,
 0x3fef3b0f2e6d1675,
 0x3fef3f0b555dc3fa,
 0x3fef43155b5bab74,
 0x3fef472d4a07897c,
 0x3fef4b532b08c968,
 0x3fef4f87080d89f2,
 0x3fef53c8eacaa1d6,
 0x3fef5818dcfba487,
 0x3fef5c76e862e6d3,
 0x3fef60e316c98398,
 0x3fef655d71ff6075,
 0x3fef69e603db3285,
 0x3fef6e7cd63a8315,
 0x3fef7321f301b460,
 0x3fef77d5641c0658,
 0x3fef7c97337b9b5f,
 0x3fef81676b197d17,
 0x3fef864614f5a129,
 0x3fef8b333b16ee12,
 0x3fef902ee78b3ff6,
 0x3fef953924676d76,
 0x3fef9a51fbc74c83,
 0x3fef9f7977cdb740,
 0x3fefa4afa2a490da,
 0x3fefa9f4867cca6e,
 0x3fefaf482d8e67f1,
 0x3fefb4aaa2188510,
 0x3fefba1bee615a27,
 0x3fefbf9c1cb6412a,
 0x3fefc52b376bba97,
 0x3fefcac948dd7274,
 0x3fefd0765b6e4540,
 0x3fefd632798844f8,
 0x3fefdbfdad9cbe14,
 0x3fefe1d802243c89,
 0x3fefe7c1819e90d8,
 0x3fefedba3692d514,
 0x3feff3c22b8f71f1,
 0x3feff9d96b2a23d9,
 },
 };
diff --git a/contrib/arm-optimized-routines/pl/math/v_pow_log_data.c b/contrib/arm-optimized-routines/math/aarch64/v_pow_log_data.c
similarity index 99%
rename from contrib/arm-optimized-routines/pl/math/v_pow_log_data.c
rename to contrib/arm-optimized-routines/math/aarch64/v_pow_log_data.c
index 036faa5c97c1..7df277f74e4f 100644
--- a/contrib/arm-optimized-routines/pl/math/v_pow_log_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_pow_log_data.c
@@ -1,174 +1,174 @@
 /*
  * Data for the log part of pow.
  *
- * Copyright (c) 2018-2023, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 #define N (1 << V_POW_LOG_TABLE_BITS)
 
 /* Algorithm:
 
 	x = 2^k z
 	log(x) = k ln2 + log(c) + log(z/c)
 	log(z/c) = poly(z/c - 1)
 
    where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
    and z falls into the ith one, then table entries are computed as
 
 	tab[i].invc = 1/c
 	tab[i].logc = round(0x1p43*log(c))/0x1p43
 	tab[i].logctail = (double)(log(c) - logc)
 
    where c is chosen near the center of the subinterval such that 1/c has only
    a few precision bits so z/c - 1 is exactly representible as double:
 
 	1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
 
    Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| <
    0x1p-97, the last few bits of logc are rounded away so k*ln2hi + logc has no
    rounding error and the interval for z is selected such that near x == 1,
    where log(x)
    is tiny, large cancellation error is avoided in logc + poly(z/c - 1).  */
 const struct v_pow_log_data __v_pow_log_data = {
   /* relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8]
      Coefficients are scaled to match the scaling during evaluation.  */
   .poly = { -0x1p-1, -0x1.555555555556p-1, 0x1.0000000000006p-1,
 	    0x1.999999959554ep-1, -0x1.555555529a47ap-1, -0x1.2495b9b4845e9p0,
 	    0x1.0002b8b263fc3p0, },
   .ln2_hi = 0x1.62e42fefa3800p-1,
   .ln2_lo = 0x1.ef35793c76730p-45,
   .invc = { 0x1.6a00000000000p+0, 0x1.6800000000000p+0, 0x1.6600000000000p+0,
 	    0x1.6400000000000p+0, 0x1.6200000000000p+0, 0x1.6000000000000p+0,
 	    0x1.5e00000000000p+0, 0x1.5c00000000000p+0, 0x1.5a00000000000p+0,
 	    0x1.5800000000000p+0, 0x1.5600000000000p+0, 0x1.5600000000000p+0,
 	    0x1.5400000000000p+0, 0x1.5200000000000p+0, 0x1.5000000000000p+0,
 	    0x1.4e00000000000p+0, 0x1.4c00000000000p+0, 0x1.4a00000000000p+0,
 	    0x1.4a00000000000p+0, 0x1.4800000000000p+0, 0x1.4600000000000p+0,
 	    0x1.4400000000000p+0, 0x1.4200000000000p+0, 0x1.4000000000000p+0,
 	    0x1.4000000000000p+0, 0x1.3e00000000000p+0, 0x1.3c00000000000p+0,
 	    0x1.3a00000000000p+0, 0x1.3a00000000000p+0, 0x1.3800000000000p+0,
 	    0x1.3600000000000p+0, 0x1.3400000000000p+0, 0x1.3400000000000p+0,
 	    0x1.3200000000000p+0, 0x1.3000000000000p+0, 0x1.3000000000000p+0,
 	    0x1.2e00000000000p+0, 0x1.2c00000000000p+0, 0x1.2c00000000000p+0,
 	    0x1.2a00000000000p+0, 0x1.2800000000000p+0, 0x1.2600000000000p+0,
 	    0x1.2600000000000p+0, 0x1.2400000000000p+0, 0x1.2400000000000p+0,
 	    0x1.2200000000000p+0, 0x1.2000000000000p+0, 0x1.2000000000000p+0,
 	    0x1.1e00000000000p+0, 0x1.1c00000000000p+0, 0x1.1c00000000000p+0,
 	    0x1.1a00000000000p+0, 0x1.1a00000000000p+0, 0x1.1800000000000p+0,
 	    0x1.1600000000000p+0, 0x1.1600000000000p+0, 0x1.1400000000000p+0,
 	    0x1.1400000000000p+0, 0x1.1200000000000p+0, 0x1.1000000000000p+0,
 	    0x1.1000000000000p+0, 0x1.0e00000000000p+0, 0x1.0e00000000000p+0,
 	    0x1.0c00000000000p+0, 0x1.0c00000000000p+0, 0x1.0a00000000000p+0,
 	    0x1.0a00000000000p+0, 0x1.0800000000000p+0, 0x1.0800000000000p+0,
 	    0x1.0600000000000p+0, 0x1.0400000000000p+0, 0x1.0400000000000p+0,
 	    0x1.0200000000000p+0, 0x1.0200000000000p+0, 0x1.0000000000000p+0,
 	    0x1.0000000000000p+0, 0x1.fc00000000000p-1, 0x1.f800000000000p-1,
 	    0x1.f400000000000p-1, 0x1.f000000000000p-1, 0x1.ec00000000000p-1,
 	    0x1.e800000000000p-1, 0x1.e400000000000p-1, 0x1.e200000000000p-1,
 	    0x1.de00000000000p-1, 0x1.da00000000000p-1, 0x1.d600000000000p-1,
 	    0x1.d400000000000p-1, 0x1.d000000000000p-1, 0x1.cc00000000000p-1,
 	    0x1.ca00000000000p-1, 0x1.c600000000000p-1, 0x1.c400000000000p-1,
 	    0x1.c000000000000p-1, 0x1.be00000000000p-1, 0x1.ba00000000000p-1,
 	    0x1.b800000000000p-1, 0x1.b400000000000p-1, 0x1.b200000000000p-1,
 	    0x1.ae00000000000p-1, 0x1.ac00000000000p-1, 0x1.aa00000000000p-1,
 	    0x1.a600000000000p-1, 0x1.a400000000000p-1, 0x1.a000000000000p-1,
 	    0x1.9e00000000000p-1, 0x1.9c00000000000p-1, 0x1.9a00000000000p-1,
 	    0x1.9600000000000p-1, 0x1.9400000000000p-1, 0x1.9200000000000p-1,
 	    0x1.9000000000000p-1, 0x1.8c00000000000p-1, 0x1.8a00000000000p-1,
 	    0x1.8800000000000p-1, 0x1.8600000000000p-1, 0x1.8400000000000p-1,
 	    0x1.8200000000000p-1, 0x1.7e00000000000p-1, 0x1.7c00000000000p-1,
 	    0x1.7a00000000000p-1, 0x1.7800000000000p-1, 0x1.7600000000000p-1,
 	    0x1.7400000000000p-1, 0x1.7200000000000p-1, 0x1.7000000000000p-1,
 	    0x1.6e00000000000p-1, 0x1.6c00000000000p-1, },
   .logc
   = { -0x1.62c82f2b9c800p-2, -0x1.5d1bdbf580800p-2, -0x1.5767717455800p-2,
       -0x1.51aad872df800p-2, -0x1.4be5f95777800p-2, -0x1.4618bc21c6000p-2,
       -0x1.404308686a800p-2, -0x1.3a64c55694800p-2, -0x1.347dd9a988000p-2,
       -0x1.2e8e2bae12000p-2, -0x1.2895a13de8800p-2, -0x1.2895a13de8800p-2,
       -0x1.22941fbcf7800p-2, -0x1.1c898c1699800p-2, -0x1.1675cababa800p-2,
       -0x1.1058bf9ae4800p-2, -0x1.0a324e2739000p-2, -0x1.0402594b4d000p-2,
       -0x1.0402594b4d000p-2, -0x1.fb9186d5e4000p-3, -0x1.ef0adcbdc6000p-3,
       -0x1.e27076e2af000p-3, -0x1.d5c216b4fc000p-3, -0x1.c8ff7c79aa000p-3,
       -0x1.c8ff7c79aa000p-3, -0x1.bc286742d9000p-3, -0x1.af3c94e80c000p-3,
       -0x1.a23bc1fe2b000p-3, -0x1.a23bc1fe2b000p-3, -0x1.9525a9cf45000p-3,
       -0x1.87fa06520d000p-3, -0x1.7ab890210e000p-3, -0x1.7ab890210e000p-3,
       -0x1.6d60fe719d000p-3, -0x1.5ff3070a79000p-3, -0x1.5ff3070a79000p-3,
       -0x1.526e5e3a1b000p-3, -0x1.44d2b6ccb8000p-3, -0x1.44d2b6ccb8000p-3,
       -0x1.371fc201e9000p-3, -0x1.29552f81ff000p-3, -0x1.1b72ad52f6000p-3,
       -0x1.1b72ad52f6000p-3, -0x1.0d77e7cd09000p-3, -0x1.0d77e7cd09000p-3,
       -0x1.fec9131dbe000p-4, -0x1.e27076e2b0000p-4, -0x1.e27076e2b0000p-4,
       -0x1.c5e548f5bc000p-4, -0x1.a926d3a4ae000p-4, -0x1.a926d3a4ae000p-4,
       -0x1.8c345d631a000p-4, -0x1.8c345d631a000p-4, -0x1.6f0d28ae56000p-4,
       -0x1.51b073f062000p-4, -0x1.51b073f062000p-4, -0x1.341d7961be000p-4,
       -0x1.341d7961be000p-4, -0x1.16536eea38000p-4, -0x1.f0a30c0118000p-5,
       -0x1.f0a30c0118000p-5, -0x1.b42dd71198000p-5, -0x1.b42dd71198000p-5,
       -0x1.77458f632c000p-5, -0x1.77458f632c000p-5, -0x1.39e87b9fec000p-5,
       -0x1.39e87b9fec000p-5, -0x1.f829b0e780000p-6, -0x1.f829b0e780000p-6,
       -0x1.7b91b07d58000p-6, -0x1.fc0a8b0fc0000p-7, -0x1.fc0a8b0fc0000p-7,
       -0x1.fe02a6b100000p-8, -0x1.fe02a6b100000p-8, 0x0.0000000000000p+0,
       0x0.0000000000000p+0,  0x1.0101575890000p-7,  0x1.0205658938000p-6,
       0x1.8492528c90000p-6,  0x1.0415d89e74000p-5,  0x1.466aed42e0000p-5,
       0x1.894aa149fc000p-5,  0x1.ccb73cdddc000p-5,  0x1.eea31c006c000p-5,
       0x1.1973bd1466000p-4,  0x1.3bdf5a7d1e000p-4,  0x1.5e95a4d97a000p-4,
       0x1.700d30aeac000p-4,  0x1.9335e5d594000p-4,  0x1.b6ac88dad6000p-4,
       0x1.c885801bc4000p-4,  0x1.ec739830a2000p-4,  0x1.fe89139dbe000p-4,
       0x1.1178e8227e000p-3,  0x1.1aa2b7e23f000p-3,  0x1.2d1610c868000p-3,
       0x1.365fcb0159000p-3,  0x1.4913d8333b000p-3,  0x1.527e5e4a1b000p-3,
       0x1.6574ebe8c1000p-3,  0x1.6f0128b757000p-3,  0x1.7898d85445000p-3,
       0x1.8beafeb390000p-3,  0x1.95a5adcf70000p-3,  0x1.a93ed3c8ae000p-3,
       0x1.b31d8575bd000p-3,  0x1.bd087383be000p-3,  0x1.c6ffbc6f01000p-3,
       0x1.db13db0d49000p-3,  0x1.e530effe71000p-3,  0x1.ef5ade4dd0000p-3,
       0x1.f991c6cb3b000p-3,  0x1.07138604d5800p-2,  0x1.0c42d67616000p-2,
       0x1.1178e8227e800p-2,  0x1.16b5ccbacf800p-2,  0x1.1bf99635a6800p-2,
       0x1.214456d0eb800p-2,  0x1.2bef07cdc9000p-2,  0x1.314f1e1d36000p-2,
       0x1.36b6776be1000p-2,  0x1.3c25277333000p-2,  0x1.419b423d5e800p-2,
       0x1.4718dc271c800p-2,  0x1.4c9e09e173000p-2,  0x1.522ae0738a000p-2,
       0x1.57bf753c8d000p-2,  0x1.5d5bddf596000p-2, },
   .logctail
   = { 0x1.ab42428375680p-48,  -0x1.ca508d8e0f720p-46, -0x1.362a4d5b6506dp-45,
       -0x1.684e49eb067d5p-49, -0x1.41b6993293ee0p-47, 0x1.3d82f484c84ccp-46,
       0x1.c42f3ed820b3ap-50,  0x1.0b1c686519460p-45,  0x1.5594dd4c58092p-45,
       0x1.67b1e99b72bd8p-45,  0x1.5ca14b6cfb03fp-46,  0x1.5ca14b6cfb03fp-46,
       -0x1.65a242853da76p-46, -0x1.fafbc68e75404p-46, 0x1.f1fc63382a8f0p-46,
       -0x1.6a8c4fd055a66p-45, -0x1.c6bee7ef4030ep-47, -0x1.036b89ef42d7fp-48,
       -0x1.036b89ef42d7fp-48, 0x1.d572aab993c87p-47,  0x1.b26b79c86af24p-45,
       -0x1.72f4f543fff10p-46, 0x1.1ba91bbca681bp-45,  0x1.7794f689f8434p-45,
       0x1.7794f689f8434p-45,  0x1.94eb0318bb78fp-46,  0x1.a4e633fcd9066p-52,
       -0x1.58c64dc46c1eap-45, -0x1.58c64dc46c1eap-45, -0x1.ad1d904c1d4e3p-45,
       0x1.bbdbf7fdbfa09p-45,  0x1.bdb9072534a58p-45,  0x1.bdb9072534a58p-45,
       -0x1.0e46aa3b2e266p-46, -0x1.e9e439f105039p-46, -0x1.e9e439f105039p-46,
       -0x1.0de8b90075b8fp-45, 0x1.70cc16135783cp-46,  0x1.70cc16135783cp-46,
       0x1.178864d27543ap-48,  -0x1.48d301771c408p-45, -0x1.e80a41811a396p-45,
       -0x1.e80a41811a396p-45, 0x1.a699688e85bf4p-47,  0x1.a699688e85bf4p-47,
       -0x1.575545ca333f2p-45, 0x1.a342c2af0003cp-45,  0x1.a342c2af0003cp-45,
       -0x1.d0c57585fbe06p-46, 0x1.53935e85baac8p-45,  0x1.53935e85baac8p-45,
       0x1.37c294d2f5668p-46,  0x1.37c294d2f5668p-46,  -0x1.69737c93373dap-45,
       0x1.f025b61c65e57p-46,  0x1.f025b61c65e57p-46,  0x1.c5edaccf913dfp-45,
       0x1.c5edaccf913dfp-45,  0x1.47c5e768fa309p-46,  0x1.d599e83368e91p-45,
       0x1.d599e83368e91p-45,  0x1.c827ae5d6704cp-46,  0x1.c827ae5d6704cp-46,
       -0x1.cfc4634f2a1eep-45, -0x1.cfc4634f2a1eep-45, 0x1.502b7f526feaap-48,
       0x1.502b7f526feaap-48,  -0x1.980267c7e09e4p-45, -0x1.980267c7e09e4p-45,
       -0x1.88d5493faa639p-45, -0x1.f1e7cf6d3a69cp-50, -0x1.f1e7cf6d3a69cp-50,
       -0x1.9e23f0dda40e4p-46, -0x1.9e23f0dda40e4p-46, 0x0.0000000000000p+0,
       0x0.0000000000000p+0,   -0x1.0c76b999d2be8p-46, -0x1.3dc5b06e2f7d2p-45,
       -0x1.aa0ba325a0c34p-45, 0x1.111c05cf1d753p-47,  -0x1.c167375bdfd28p-45,
       -0x1.97995d05a267dp-46, -0x1.a68f247d82807p-46, -0x1.e113e4fc93b7bp-47,
       -0x1.5325d560d9e9bp-45, 0x1.cc85ea5db4ed7p-45,  -0x1.c69063c5d1d1ep-45,
       0x1.c1e8da99ded32p-49,  0x1.3115c3abd47dap-45,  -0x1.390802bf768e5p-46,
       0x1.646d1c65aacd3p-45,  -0x1.dc068afe645e0p-45, -0x1.534d64fa10afdp-45,
       0x1.1ef78ce2d07f2p-45,  0x1.ca78e44389934p-45,  0x1.39d6ccb81b4a1p-47,
       0x1.62fa8234b7289p-51,  0x1.5837954fdb678p-45,  0x1.633e8e5697dc7p-45,
       0x1.9cf8b2c3c2e78p-46,  -0x1.5118de59c21e1p-45, -0x1.c661070914305p-46,
       -0x1.73d54aae92cd1p-47, 0x1.7f22858a0ff6fp-47,  -0x1.8724350562169p-45,
       -0x1.c358d4eace1aap-47, -0x1.d4bc4595412b6p-45, -0x1.1ec72c5962bd2p-48,
       -0x1.aff2af715b035p-45, 0x1.212276041f430p-51,  -0x1.a211565bb8e11p-51,
       0x1.bcbecca0cdf30p-46,  0x1.89cdb16ed4e91p-48,  0x1.7188b163ceae9p-45,
       -0x1.c210e63a5f01cp-45, 0x1.b9acdf7a51681p-45,  0x1.ca6ed5147bdb7p-45,
       0x1.a87deba46baeap-47,  0x1.a9cfa4a5004f4p-45,  -0x1.8e27ad3213cb8p-45,
       0x1.16ecdb0f177c8p-46,  0x1.83b54b606bd5cp-46,  0x1.8e436ec90e09dp-47,
       -0x1.f27ce0967d675p-45, -0x1.e20891b0ad8a4p-45, 0x1.ebe708164c759p-45,
       0x1.fadedee5d40efp-46,  -0x1.a0b2a08a465dcp-47, },
 };
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_powf.c b/contrib/arm-optimized-routines/math/aarch64/v_powf.c
deleted file mode 100644
index 3a4163ab0558..000000000000
--- a/contrib/arm-optimized-routines/math/aarch64/v_powf.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Single-precision vector powf function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-
-#define Min v_u32 (0x00800000)
-#define Max v_u32 (0x7f800000)
-#define Thresh v_u32 (0x7f000000) /* Max - Min.  */
-#define MantissaMask v_u32 (0x007fffff)
-
-#define A data.log2_poly
-#define C data.exp2f_poly
-
-/* 2.6 ulp ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2).  */
-#define Off v_u32 (0x3f35d000)
-
-#define V_POWF_LOG2_TABLE_BITS 5
-#define V_EXP2F_TABLE_BITS 5
-#define Log2IdxMask v_u32 ((1 << V_POWF_LOG2_TABLE_BITS) - 1)
-#define Scale ((double) (1 << V_EXP2F_TABLE_BITS))
-
-static const struct
-{
-  struct
-  {
-    double invc, logc;
-  } log2_tab[1 << V_POWF_LOG2_TABLE_BITS];
-  double log2_poly[4];
-  uint64_t exp2f_tab[1 << V_EXP2F_TABLE_BITS];
-  double exp2f_poly[3];
-} data = {
-  .log2_tab = {{0x1.6489890582816p+0, -0x1.e960f97b22702p-2 * Scale},
-	       {0x1.5cf19b35e3472p+0, -0x1.c993406cd4db6p-2 * Scale},
-	       {0x1.55aac0e956d65p+0, -0x1.aa711d9a7d0f3p-2 * Scale},
-	       {0x1.4eb0022977e01p+0, -0x1.8bf37bacdce9bp-2 * Scale},
-	       {0x1.47fcccda1dd1fp+0, -0x1.6e13b3519946ep-2 * Scale},
-	       {0x1.418ceabab68c1p+0, -0x1.50cb8281e4089p-2 * Scale},
-	       {0x1.3b5c788f1edb3p+0, -0x1.341504a237e2bp-2 * Scale},
-	       {0x1.3567de48e9c9ap+0, -0x1.17eaab624ffbbp-2 * Scale},
-	       {0x1.2fabc80fd19bap+0, -0x1.f88e708f8c853p-3 * Scale},
-	       {0x1.2a25200ce536bp+0, -0x1.c24b6da113914p-3 * Scale},
-	       {0x1.24d108e0152e3p+0, -0x1.8d02ee397cb1dp-3 * Scale},
-	       {0x1.1facd8ab2fbe1p+0, -0x1.58ac1223408b3p-3 * Scale},
-	       {0x1.1ab614a03efdfp+0, -0x1.253e6fd190e89p-3 * Scale},
-	       {0x1.15ea6d03af9ffp+0, -0x1.e5641882c12ffp-4 * Scale},
-	       {0x1.1147b994bb776p+0, -0x1.81fea712926f7p-4 * Scale},
-	       {0x1.0ccbf650593aap+0, -0x1.203e240de64a3p-4 * Scale},
-	       {0x1.0875408477302p+0, -0x1.8029b86a78281p-5 * Scale},
-	       {0x1.0441d42a93328p+0, -0x1.85d713190fb9p-6 * Scale},
-	       {0x1p+0, 0x0p+0 * Scale},
-	       {0x1.f1d006c855e86p-1, 0x1.4c1cc07312997p-5 * Scale},
-	       {0x1.e28c3341aa301p-1, 0x1.5e1848ccec948p-4 * Scale},
-	       {0x1.d4bdf9aa64747p-1, 0x1.04cfcb7f1196fp-3 * Scale},
-	       {0x1.c7b45a24e5803p-1, 0x1.582813d463c21p-3 * Scale},
-	       {0x1.bb5f5eb2ed60ap-1, 0x1.a936fa68760ccp-3 * Scale},
-	       {0x1.afb0bff8fe6b4p-1, 0x1.f81bc31d6cc4ep-3 * Scale},
-	       {0x1.a49badf7ab1f5p-1, 0x1.2279a09fae6b1p-2 * Scale},
-	       {0x1.9a14a111fc4c9p-1, 0x1.47ec0b6df5526p-2 * Scale},
-	       {0x1.901131f5b2fdcp-1, 0x1.6c71762280f1p-2 * Scale},
-	       {0x1.8687f73f6d865p-1, 0x1.90155070798dap-2 * Scale},
-	       {0x1.7d7067eb77986p-1, 0x1.b2e23b1d3068cp-2 * Scale},
-	       {0x1.74c2c1cf97b65p-1, 0x1.d4e21b0daa86ap-2 * Scale},
-	       {0x1.6c77f37cff2a1p-1, 0x1.f61e2a2f67f3fp-2 * Scale},},
-  .log2_poly = { /* rel err: 1.5 * 2^-30.  */
-		-0x1.6ff5daa3b3d7cp-2 * Scale, 0x1.ec81d03c01aebp-2 * Scale,
-		-0x1.71547bb43f101p-1 * Scale, 0x1.7154764a815cbp0 * Scale,},
-  .exp2f_tab = {0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f,
-		0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa,
-		0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715,
-		0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
-		0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429,
-		0x3feea47eb03a5585, 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74,
-		0x3feea11473eb0187, 0x3feea589994cce13, 0x3feeace5422aa0db,
-		0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
-		0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c,
-		0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f,
-		0x3fefa4afa2a490da, 0x3fefd0765b6e4540,},
-  .exp2f_poly = { /* rel err: 1.69 * 2^-34.  */
-		 0x1.c6af84b912394p-5 / Scale / Scale / Scale,
-		 0x1.ebfce50fac4f3p-3 / Scale / Scale,
-		 0x1.62e42ff0c52d6p-1 / Scale}};
-
-static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, float32x4_t ret, uint32x4_t cmp)
-{
-  return v_call2_f32 (powf, x, y, ret, cmp);
-}
-
-float32x4_t VPCS_ATTR V_NAME_F2 (pow) (float32x4_t x, float32x4_t y)
-{
-  uint32x4_t u = vreinterpretq_u32_f32 (x);
-  uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (u, Min), Thresh);
-  uint32x4_t tmp = vsubq_u32 (u, Off);
-  uint32x4_t i = vandq_u32 (vshrq_n_u32 (tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
-			    Log2IdxMask);
-  uint32x4_t top = vbicq_u32 (tmp, MantissaMask);
-  uint32x4_t iz = vsubq_u32 (u, top);
-  int32x4_t k = vshrq_n_s32 (vreinterpretq_s32_u32 (top),
-			     23 - V_EXP2F_TABLE_BITS); /* arithmetic shift.  */
-
-  float32x4_t ret;
-  for (int lane = 0; lane < 4; lane++)
-    {
-      /* Use double precision for each lane.  */
-      double invc = data.log2_tab[i[lane]].invc;
-      double logc = data.log2_tab[i[lane]].logc;
-      double z = (double) asfloat (iz[lane]);
-
-      /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k.  */
-      double r = __builtin_fma (z, invc, -1.0);
-      double y0 = logc + (double) k[lane];
-
-      /* Polynomial to approximate log1p(r)/ln2.  */
-      double logx = A[0];
-      logx = r * logx + A[1];
-      logx = r * logx + A[2];
-      logx = r * logx + A[3];
-      logx = r * logx + y0;
-      double ylogx = y[lane] * logx;
-      cmp[lane] = (asuint64 (ylogx) >> 47 & 0xffff)
-			  >= asuint64 (126.0 * (1 << V_EXP2F_TABLE_BITS)) >> 47
-		      ? 1
-		      : cmp[lane];
-
-      /* N*x = k + r with r in [-1/2, 1/2].  */
-      double kd = round (ylogx);
-      uint64_t ki = lround (ylogx);
-      r = ylogx - kd;
-
-      /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1).  */
-      uint64_t t = data.exp2f_tab[ki % (1 << V_EXP2F_TABLE_BITS)];
-      t += ki << (52 - V_EXP2F_TABLE_BITS);
-      double s = asdouble (t);
-      double p = C[0];
-      p = __builtin_fma (p, r, C[1]);
-      p = __builtin_fma (p, r, C[2]);
-      p = __builtin_fma (p, s * r, s);
-
-      ret[lane] = p;
-    }
-  if (unlikely (v_any_u32 (cmp)))
-    return special_case (x, y, ret, cmp);
-  return ret;
-}
diff --git a/contrib/arm-optimized-routines/pl/math/v_powf_data.c b/contrib/arm-optimized-routines/math/aarch64/v_powf_data.c
similarity index 98%
rename from contrib/arm-optimized-routines/pl/math/v_powf_data.c
rename to contrib/arm-optimized-routines/math/aarch64/v_powf_data.c
index ded211924b80..5cf1b8769414 100644
--- a/contrib/arm-optimized-routines/pl/math/v_powf_data.c
+++ b/contrib/arm-optimized-routines/math/aarch64/v_powf_data.c
@@ -1,89 +1,89 @@
 /*
  * Coefficients for single-precision SVE pow(x) function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 const struct v_powf_data __v_powf_data = {
   .invc = { 0x1.6489890582816p+0,
 	    0x1.5cf19b35e3472p+0,
 	    0x1.55aac0e956d65p+0,
 	    0x1.4eb0022977e01p+0,
 	    0x1.47fcccda1dd1fp+0,
 	    0x1.418ceabab68c1p+0,
 	    0x1.3b5c788f1edb3p+0,
 	    0x1.3567de48e9c9ap+0,
 	    0x1.2fabc80fd19bap+0,
 	    0x1.2a25200ce536bp+0,
 	    0x1.24d108e0152e3p+0,
 	    0x1.1facd8ab2fbe1p+0,
 	    0x1.1ab614a03efdfp+0,
 	    0x1.15ea6d03af9ffp+0,
 	    0x1.1147b994bb776p+0,
 	    0x1.0ccbf650593aap+0,
 	    0x1.0875408477302p+0,
 	    0x1.0441d42a93328p+0,
 	    0x1p+0,
 	    0x1.f1d006c855e86p-1,
 	    0x1.e28c3341aa301p-1,
 	    0x1.d4bdf9aa64747p-1,
 	    0x1.c7b45a24e5803p-1,
 	    0x1.bb5f5eb2ed60ap-1,
 	    0x1.afb0bff8fe6b4p-1,
 	    0x1.a49badf7ab1f5p-1,
 	    0x1.9a14a111fc4c9p-1,
 	    0x1.901131f5b2fdcp-1,
 	    0x1.8687f73f6d865p-1,
 	    0x1.7d7067eb77986p-1,
 	    0x1.74c2c1cf97b65p-1,
 	    0x1.6c77f37cff2a1p-1
   },
   .logc = { -0x1.e960f97b22702p+3,
 	    -0x1.c993406cd4db6p+3,
 	    -0x1.aa711d9a7d0f3p+3,
 	    -0x1.8bf37bacdce9bp+3,
 	    -0x1.6e13b3519946ep+3,
 	    -0x1.50cb8281e4089p+3,
 	    -0x1.341504a237e2bp+3,
 	    -0x1.17eaab624ffbbp+3,
 	    -0x1.f88e708f8c853p+2,
 	    -0x1.c24b6da113914p+2,
 	    -0x1.8d02ee397cb1dp+2,
 	    -0x1.58ac1223408b3p+2,
 	    -0x1.253e6fd190e89p+2,
 	    -0x1.e5641882c12ffp+1,
 	    -0x1.81fea712926f7p+1,
 	    -0x1.203e240de64a3p+1,
 	    -0x1.8029b86a78281p0,
 	    -0x1.85d713190fb9p-1,
 	    0x0p+0,
 	    0x1.4c1cc07312997p0,
 	    0x1.5e1848ccec948p+1,
 	    0x1.04cfcb7f1196fp+2,
 	    0x1.582813d463c21p+2,
 	    0x1.a936fa68760ccp+2,
 	    0x1.f81bc31d6cc4ep+2,
 	    0x1.2279a09fae6b1p+3,
 	    0x1.47ec0b6df5526p+3,
 	    0x1.6c71762280f1p+3,
 	    0x1.90155070798dap+3,
 	    0x1.b2e23b1d3068cp+3,
 	    0x1.d4e21b0daa86ap+3,
 	    0x1.f61e2a2f67f3fp+3
   },
   .scale = { 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f,
 	     0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa,
 	     0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715,
 	     0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
 	     0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429,
 	     0x3feea47eb03a5585, 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74,
 	     0x3feea11473eb0187, 0x3feea589994cce13, 0x3feeace5422aa0db,
 	     0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
 	     0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c,
 	     0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f,
 	     0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
   },
 };
diff --git a/contrib/arm-optimized-routines/math/cosf.c b/contrib/arm-optimized-routines/math/cosf.c
index 6293ce8f1b7d..a9b1f9da16ed 100644
--- a/contrib/arm-optimized-routines/math/cosf.c
+++ b/contrib/arm-optimized-routines/math/cosf.c
@@ -1,63 +1,71 @@
 /*
  * Single-precision cos function.
  *
- * Copyright (c) 2018-2021, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
 #include <math.h>
 #include "math_config.h"
 #include "sincosf.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /* Fast cosf implementation.  Worst-case ULP is 0.5607, maximum relative
    error is 0.5303 * 2^-23.  A single-step range reduction is used for
    small values.  Large inputs have their range reduced using fast integer
    arithmetic.  */
 float
 cosf (float y)
 {
   double x = y;
   double s;
   int n;
   const sincos_t *p = &__sincosf_table[0];
 
   if (abstop12 (y) < abstop12 (pio4f))
     {
       double x2 = x * x;
 
       if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
 	return 1.0f;
 
       return sinf_poly (x, x2, p, 1);
     }
   else if (likely (abstop12 (y) < abstop12 (120.0f)))
     {
       x = reduce_fast (x, p, &n);
 
       /* Setup the signs for sin and cos.  */
       s = p->sign[n & 3];
 
       if (n & 2)
 	p = &__sincosf_table[1];
 
       return sinf_poly (x * s, x * x, p, n ^ 1);
     }
   else if (abstop12 (y) < abstop12 (INFINITY))
     {
       uint32_t xi = asuint (y);
       int sign = xi >> 31;
 
       x = reduce_large (xi, &n);
 
       /* Setup signs for sin and cos - include original sign.  */
       s = p->sign[(n + sign) & 3];
 
       if ((n + sign) & 2)
 	p = &__sincosf_table[1];
 
       return sinf_poly (x * s, x * x, p, n ^ 1);
     }
   else
     return __math_invalidf (y);
 }
+
+TEST_SIG (S, F, 1, cos, -3.1, 3.1)
+TEST_ULP (cosf, 0.06)
+TEST_ULP_NONNEAREST (cosf, 0.5)
+TEST_INTERVAL (cosf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (cosf, 0x1p-14, 0x1p54, 50000)
diff --git a/contrib/arm-optimized-routines/math/erf.c b/contrib/arm-optimized-routines/math/erf.c
index 5f9f40dda264..2c93a304346a 100644
--- a/contrib/arm-optimized-routines/math/erf.c
+++ b/contrib/arm-optimized-routines/math/erf.c
@@ -1,244 +1,254 @@
 /*
  * Double-precision erf(x) function.
  *
- * Copyright (c) 2020, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 #include <math.h>
 #include <stdint.h>
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
 #define C 0x1.b0ac16p-1
 #define PA __erf_data.erf_poly_A
 #define NA __erf_data.erf_ratio_N_A
 #define DA __erf_data.erf_ratio_D_A
 #define NB __erf_data.erf_ratio_N_B
 #define DB __erf_data.erf_ratio_D_B
 #define PC __erf_data.erfc_poly_C
 #define PD __erf_data.erfc_poly_D
 #define PE __erf_data.erfc_poly_E
 #define PF __erf_data.erfc_poly_F
 
 /* Top 32 bits of a double.  */
 static inline uint32_t
 top32 (double x)
 {
   return asuint64 (x) >> 32;
 }
 
 /* Fast erf implementation using a mix of
    rational and polynomial approximations.
    Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0.  */
 double
 erf (double x)
 {
   /* Get top word and sign.  */
   uint32_t ix = top32 (x);
   uint32_t ia = ix & 0x7fffffff;
   uint32_t sign = ix >> 31;
 
   /* Normalized and subnormal cases */
   if (ia < 0x3feb0000)
     { /* a = |x| < 0.84375.  */
 
       if (ia < 0x3e300000)
 	{ /* a < 2^(-28).  */
 	  if (ia < 0x00800000)
 	    { /* a < 2^(-1015).  */
 	      double y =  fma (TwoOverSqrtPiMinusOne, x, x);
 	      return check_uflow (y);
 	    }
 	  return x + TwoOverSqrtPiMinusOne * x;
 	}
 
       double x2 = x * x;
 
       if (ia < 0x3fe00000)
 	{ /* a < 0.5  - Use polynomial approximation.  */
 	  double r1 = fma (x2, PA[1], PA[0]);
 	  double r2 = fma (x2, PA[3], PA[2]);
 	  double r3 = fma (x2, PA[5], PA[4]);
 	  double r4 = fma (x2, PA[7], PA[6]);
 	  double r5 = fma (x2, PA[9], PA[8]);
 	  double x4 = x2 * x2;
 	  double r = r5;
 	  r = fma (x4, r, r4);
 	  r = fma (x4, r, r3);
 	  r = fma (x4, r, r2);
 	  r = fma (x4, r, r1);
 	  return fma (r, x, x); /* This fma is crucial for accuracy.  */
 	}
       else
 	{ /* 0.5 <= a < 0.84375 - Use rational approximation.  */
 	  double x4, x8, r1n, r2n, r1d, r2d, r3d;
 
 	  r1n = fma (x2, NA[1], NA[0]);
 	  x4 = x2 * x2;
 	  r2n = fma (x2, NA[3], NA[2]);
 	  x8 = x4 * x4;
 	  r1d = fma (x2, DA[0], 1.0);
 	  r2d = fma (x2, DA[2], DA[1]);
 	  r3d = fma (x2, DA[4], DA[3]);
 	  double P = r1n + x4 * r2n + x8 * NA[4];
 	  double Q = r1d + x4 * r2d + x8 * r3d;
 	  return fma (P / Q, x, x);
 	}
     }
   else if (ia < 0x3ff40000)
     { /* 0.84375 <= |x| < 1.25.  */
       double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d;
       double a = fabs (x) - 1.0;
       r1n = fma (a, NB[1], NB[0]);
       a2 = a * a;
       r1d = fma (a, DB[0], 1.0);
       a4 = a2 * a2;
       r2n = fma (a, NB[3], NB[2]);
       a6 = a4 * a2;
       r2d = fma (a, DB[2], DB[1]);
       r3n = fma (a, NB[5], NB[4]);
       r3d = fma (a, DB[4], DB[3]);
       r4n = NB[6];
       r4d = DB[5];
       double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n;
       double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d;
       if (sign)
 	return -C - P / Q;
       else
 	return C + P / Q;
     }
   else if (ia < 0x40000000)
     { /* 1.25 <= |x| < 2.0.  */
       double a = fabs (x);
       a = a - 1.25;
 
       double r1 = fma (a, PC[1], PC[0]);
       double r2 = fma (a, PC[3], PC[2]);
       double r3 = fma (a, PC[5], PC[4]);
       double r4 = fma (a, PC[7], PC[6]);
       double r5 = fma (a, PC[9], PC[8]);
       double r6 = fma (a, PC[11], PC[10]);
       double r7 = fma (a, PC[13], PC[12]);
       double r8 = fma (a, PC[15], PC[14]);
 
       double a2 = a * a;
 
       double r = r8;
       r = fma (a2, r, r7);
       r = fma (a2, r, r6);
       r = fma (a2, r, r5);
       r = fma (a2, r, r4);
       r = fma (a2, r, r3);
       r = fma (a2, r, r2);
       r = fma (a2, r, r1);
 
       if (sign)
 	return -1.0 + r;
       else
 	return 1.0 - r;
     }
   else if (ia < 0x400a0000)
     { /* 2 <= |x| < 3.25.  */
       double a = fabs (x);
       a = fma (0.5, a, -1.0);
 
       double r1 = fma (a, PD[1], PD[0]);
       double r2 = fma (a, PD[3], PD[2]);
       double r3 = fma (a, PD[5], PD[4]);
       double r4 = fma (a, PD[7], PD[6]);
       double r5 = fma (a, PD[9], PD[8]);
       double r6 = fma (a, PD[11], PD[10]);
       double r7 = fma (a, PD[13], PD[12]);
       double r8 = fma (a, PD[15], PD[14]);
       double r9 = fma (a, PD[17], PD[16]);
 
       double a2 = a * a;
 
       double r = r9;
       r = fma (a2, r, r8);
       r = fma (a2, r, r7);
       r = fma (a2, r, r6);
       r = fma (a2, r, r5);
       r = fma (a2, r, r4);
       r = fma (a2, r, r3);
       r = fma (a2, r, r2);
       r = fma (a2, r, r1);
 
       if (sign)
 	return -1.0 + r;
       else
 	return 1.0 - r;
     }
   else if (ia < 0x40100000)
     { /* 3.25 <= |x| < 4.0.  */
       double a = fabs (x);
       a = a - 3.25;
 
       double r1 = fma (a, PE[1], PE[0]);
       double r2 = fma (a, PE[3], PE[2]);
       double r3 = fma (a, PE[5], PE[4]);
       double r4 = fma (a, PE[7], PE[6]);
       double r5 = fma (a, PE[9], PE[8]);
       double r6 = fma (a, PE[11], PE[10]);
       double r7 = fma (a, PE[13], PE[12]);
 
       double a2 = a * a;
 
       double r = r7;
       r = fma (a2, r, r6);
       r = fma (a2, r, r5);
       r = fma (a2, r, r4);
       r = fma (a2, r, r3);
       r = fma (a2, r, r2);
       r = fma (a2, r, r1);
 
       if (sign)
 	return -1.0 + r;
       else
 	return 1.0 - r;
     }
   else if (ia < 0x4017a000)
     { /* 4 <= |x| < 5.90625.  */
       double a = fabs (x);
       a = fma (0.5, a, -2.0);
 
       double r1 = fma (a, PF[1], PF[0]);
       double r2 = fma (a, PF[3], PF[2]);
       double r3 = fma (a, PF[5], PF[4]);
       double r4 = fma (a, PF[7], PF[6]);
       double r5 = fma (a, PF[9], PF[8]);
       double r6 = fma (a, PF[11], PF[10]);
       double r7 = fma (a, PF[13], PF[12]);
       double r8 = fma (a, PF[15], PF[14]);
       double r9 = PF[16];
 
       double a2 = a * a;
 
       double r = r9;
       r = fma (a2, r, r8);
       r = fma (a2, r, r7);
       r = fma (a2, r, r6);
       r = fma (a2, r, r5);
       r = fma (a2, r, r4);
       r = fma (a2, r, r3);
       r = fma (a2, r, r2);
       r = fma (a2, r, r1);
 
       if (sign)
 	return -1.0 + r;
       else
 	return 1.0 - r;
     }
   else
     {
       /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1.  */
       if (unlikely (ia >= 0x7ff00000))
 	return (double) (1.0 - (sign << 1)) + 1.0 / x;
 
       if (sign)
 	return -1.0;
       else
 	return 1.0;
     }
 }
+
+TEST_SIG (S, D, 1, erf, -6.0, 6.0)
+TEST_ULP (erf, 0.51)
+TEST_ULP_NONNEAREST (erf, 0.9)
+TEST_INTERVAL (erf, 0, 0xffff000000000000, 10000)
+TEST_SYM_INTERVAL (erf, 0x1p-1022, 0x1p-26, 40000)
+TEST_SYM_INTERVAL (erf, 0x1p-26, 0x1p3, 40000)
+TEST_INTERVAL (erf, 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/math/erff.c b/contrib/arm-optimized-routines/math/erff.c
index 9fa476dbbab2..fd64f40a2d22 100644
--- a/contrib/arm-optimized-routines/math/erff.c
+++ b/contrib/arm-optimized-routines/math/erff.c
@@ -1,104 +1,114 @@
 /*
  * Single-precision erf(x) function.
  *
- * Copyright (c) 2020, Arm Limited.
+ * Copyright (c) 2020-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
 #include <math.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
 #define A __erff_data.erff_poly_A
 #define B __erff_data.erff_poly_B
 
 /* Top 12 bits of a float.  */
 static inline uint32_t
 top12 (float x)
 {
   return asuint (x) >> 20;
 }
 
 /* Efficient implementation of erff
    using either a pure polynomial approximation or
    the exponential of a polynomial.
    Worst-case error is 1.09ulps at 0x1.c111acp-1.  */
 float
 erff (float x)
 {
   float r, x2, u;
 
   /* Get top word.  */
   uint32_t ix = asuint (x);
   uint32_t sign = ix >> 31;
   uint32_t ia12 = top12 (x) & 0x7ff;
 
   /* Limit of both intervals is 0.875 for performance reasons but coefficients
      computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy
      from 0.94 to 1.1ulps.  */
   if (ia12 < 0x3f6)
     { /* a = |x| < 0.875.  */
 
       /* Tiny and subnormal cases.  */
       if (unlikely (ia12 < 0x318))
 	{ /* |x| < 2^(-28).  */
 	  if (unlikely (ia12 < 0x040))
 	    { /* |x| < 2^(-119).  */
 	      float y = fmaf (TwoOverSqrtPiMinusOne, x, x);
 	      return check_uflowf (y);
 	    }
 	  return x + TwoOverSqrtPiMinusOne * x;
 	}
 
       x2 = x * x;
 
       /* Normalized cases (|x| < 0.921875). Use Horner scheme for x+x*P(x^2).  */
       r = A[5];
       r = fmaf (r, x2, A[4]);
       r = fmaf (r, x2, A[3]);
       r = fmaf (r, x2, A[2]);
       r = fmaf (r, x2, A[1]);
       r = fmaf (r, x2, A[0]);
       r = fmaf (r, x, x);
     }
   else if (ia12 < 0x408)
     { /* |x| < 4.0 - Use a custom Estrin scheme.  */
 
       float a = fabsf (x);
       /* Start with Estrin scheme on high order (small magnitude) coefficients.  */
       r = fmaf (B[6], a, B[5]);
       u = fmaf (B[4], a, B[3]);
       x2 = x * x;
       r = fmaf (r, x2, u);
       /* Then switch to pure Horner scheme.  */
       r = fmaf (r, a, B[2]);
       r = fmaf (r, a, B[1]);
       r = fmaf (r, a, B[0]);
       r = fmaf (r, a, a);
       /* Single precision exponential with ~0.5ulps,
 	 ensures erff has max. rel. error
 	 < 1ulp on [0.921875, 4.0],
 	 < 1.1ulps on [0.875, 4.0].  */
       r = expf (-r);
       /* Explicit copysign (calling copysignf increases latency).  */
       if (sign)
 	r = -1.0f + r;
       else
 	r = 1.0f - r;
     }
   else
     { /* |x| >= 4.0.  */
 
       /* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1.  */
       if (unlikely (ia12 >= 0x7f8))
 	return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x;
 
       /* Explicit copysign (calling copysignf increases latency).  */
       if (sign)
 	r = -1.0f;
       else
 	r = 1.0f;
     }
   return r;
 }
+
+TEST_SIG (S, F, 1, erf, -6.0, 6.0)
+TEST_ULP (erff, 0.6)
+TEST_ULP_NONNEAREST (erff, 0.9)
+TEST_INTERVAL (erff, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (erff, 0x1p-127, 0x1p-26, 40000)
+TEST_SYM_INTERVAL (erff, 0x1p-26, 0x1p3, 40000)
+TEST_INTERVAL (erff, 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/math/exp.c b/contrib/arm-optimized-routines/math/exp.c
index 1de500c31f3e..3b08d44688a8 100644
--- a/contrib/arm-optimized-routines/math/exp.c
+++ b/contrib/arm-optimized-routines/math/exp.c
@@ -1,176 +1,177 @@
 /*
  * Double-precision e^x function.
  *
- * Copyright (c) 2018-2019, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <float.h>
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define N (1 << EXP_TABLE_BITS)
 #define InvLn2N __exp_data.invln2N
 #define NegLn2hiN __exp_data.negln2hiN
 #define NegLn2loN __exp_data.negln2loN
 #define Shift __exp_data.shift
 #define T __exp_data.tab
 #define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
 #define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
 #define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
 #define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
 #define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
 
 /* Handle cases that may overflow or underflow when computing the result that
    is scale*(1+TMP) without intermediate rounding.  The bit representation of
    scale is in SBITS, however it has a computed exponent that may have
    overflown into the sign bit so that needs to be adjusted before using it as
    a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
 static inline double
 specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
 {
   double_t scale, y;
 
   if ((ki & 0x80000000) == 0)
     {
       /* k > 0, the exponent of scale might have overflowed by <= 460.  */
       sbits -= 1009ull << 52;
       scale = asdouble (sbits);
       y = 0x1p1009 * (scale + scale * tmp);
       return check_oflow (eval_as_double (y));
     }
   /* k < 0, need special care in the subnormal range.  */
   sbits += 1022ull << 52;
   scale = asdouble (sbits);
   y = scale + scale * tmp;
   if (y < 1.0)
     {
       /* Round y to the right precision before scaling it into the subnormal
 	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
 	 E is the worst-case ulp error outside the subnormal range.  So this
 	 is only useful if the goal is better than 1 ulp worst-case error.  */
       double_t hi, lo;
       lo = scale - y + scale * tmp;
       hi = 1.0 + y;
       lo = 1.0 - hi + y + lo;
       y = eval_as_double (hi + lo) - 1.0;
       /* Avoid -0.0 with downward rounding.  */
       if (WANT_ROUNDING && y == 0.0)
 	y = 0.0;
       /* The underflow exception needs to be signaled explicitly.  */
       force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
     }
   y = 0x1p-1022 * y;
   return check_uflow (eval_as_double (y));
 }
 
 /* Top 12 bits of a double (sign and exponent bits).  */
 static inline uint32_t
 top12 (double x)
 {
   return asuint64 (x) >> 52;
 }
 
 /* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    If hastail is 0 then xtail is assumed to be 0 too.  */
 static inline double
-exp_inline (double x, double xtail, int hastail)
+exp_inline (double x, double xtail)
 {
   uint32_t abstop;
   uint64_t ki, idx, top, sbits;
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t kd, z, r, r2, scale, tail, tmp;
 
   abstop = top12 (x) & 0x7ff;
   if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
     {
       if (abstop - top12 (0x1p-54) >= 0x80000000)
 	/* Avoid spurious underflow for tiny x.  */
 	/* Note: 0 is common input.  */
 	return WANT_ROUNDING ? 1.0 + x : 1.0;
       if (abstop >= top12 (1024.0))
 	{
 	  if (asuint64 (x) == asuint64 (-INFINITY))
 	    return 0.0;
 	  if (abstop >= top12 (INFINITY))
 	    return 1.0 + x;
 	  if (asuint64 (x) >> 63)
 	    return __math_uflow (0);
 	  else
 	    return __math_oflow (0);
 	}
       /* Large x is special cased below.  */
       abstop = 0;
     }
 
   /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
   /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
   z = InvLn2N * x;
 #if TOINT_INTRINSICS
   kd = roundtoint (z);
   ki = converttoint (z);
 #elif EXP_USE_TOINT_NARROW
   /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
   kd = eval_as_double (z + Shift);
   ki = asuint64 (kd) >> 16;
   kd = (double_t) (int32_t) ki;
 #else
   /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
   kd = eval_as_double (z + Shift);
   ki = asuint64 (kd);
   kd -= Shift;
 #endif
   r = x + kd * NegLn2hiN + kd * NegLn2loN;
   /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
-  if (hastail)
+  if (!__builtin_constant_p (xtail) || xtail != 0.0)
     r += xtail;
   /* 2^(k/N) ~= scale * (1 + tail).  */
   idx = 2 * (ki % N);
   top = ki << (52 - EXP_TABLE_BITS);
   tail = asdouble (T[idx]);
   /* This is only a valid scale when -1023*N < k < 1024*N.  */
   sbits = T[idx + 1] + top;
   /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
   r2 = r * r;
   /* Without fma the worst case error is 0.25/N ulp larger.  */
   /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
 #if EXP_POLY_ORDER == 4
   tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
 #elif EXP_POLY_ORDER == 5
   tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
 #elif EXP_POLY_ORDER == 6
   tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
 #endif
   if (unlikely (abstop == 0))
     return specialcase (tmp, sbits, ki);
   scale = asdouble (sbits);
   /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
      is no spurious underflow here even without fma.  */
   return eval_as_double (scale + scale * tmp);
 }
 
 double
 exp (double x)
 {
-  return exp_inline (x, 0, 0);
+  return exp_inline (x, 0);
 }
 
-/* May be useful for implementing pow where more than double
-   precision input is needed.  */
-double
-__exp_dd (double x, double xtail)
-{
-  return exp_inline (x, xtail, 1);
-}
 #if USE_GLIBC_ABI
 strong_alias (exp, __exp_finite)
 hidden_alias (exp, __ieee754_exp)
-hidden_alias (__exp_dd, __exp1)
 # if LDBL_MANT_DIG == 53
 long double expl (long double x) { return exp (x); }
 # endif
 #endif
+
+TEST_SIG (S, D, 1, exp, -9.9, 9.9)
+TEST_ULP (exp, 0.01)
+TEST_ULP_NONNEAREST (exp, 0.5)
+TEST_INTERVAL (exp, 0, 0xffff000000000000, 10000)
+TEST_SYM_INTERVAL (exp, 0x1p-6, 0x1p6, 400000)
+TEST_SYM_INTERVAL (exp, 633.3, 733.3, 10000)
diff --git a/contrib/arm-optimized-routines/math/exp10.c b/contrib/arm-optimized-routines/math/exp10.c
index 0fbec4c694ca..de8ece42e09e 100644
--- a/contrib/arm-optimized-routines/math/exp10.c
+++ b/contrib/arm-optimized-routines/math/exp10.c
@@ -1,129 +1,143 @@
 /*
  * Double-precision 10^x function.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define N (1 << EXP_TABLE_BITS)
 #define IndexMask (N - 1)
 #define OFlowBound 0x1.34413509f79ffp8 /* log10(DBL_MAX).  */
 #define UFlowBound -0x1.5ep+8 /* -350.  */
 #define SmallTop 0x3c6 /* top12(0x1p-57).  */
 #define BigTop 0x407   /* top12(0x1p8).  */
 #define Thresh 0x41    /* BigTop - SmallTop.  */
 #define Shift __exp_data.shift
 #define C(i) __exp_data.exp10_poly[i]
 
 static double
 special_case (uint64_t sbits, double_t tmp, uint64_t ki)
 {
   double_t scale, y;
 
-  if (ki - (1ull << 16) < 0x80000000)
+  if ((ki & 0x80000000) == 0)
     {
       /* The exponent of scale might have overflowed by 1.  */
       sbits -= 1ull << 52;
       scale = asdouble (sbits);
       y = 2 * (scale + scale * tmp);
       return check_oflow (eval_as_double (y));
     }
 
   /* n < 0, need special care in the subnormal range.  */
   sbits += 1022ull << 52;
   scale = asdouble (sbits);
   y = scale + scale * tmp;
 
   if (y < 1.0)
     {
       /* Round y to the right precision before scaling it into the subnormal
 	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
 	 E is the worst-case ulp error outside the subnormal range.  So this
 	 is only useful if the goal is better than 1 ulp worst-case error.  */
       double_t lo = scale - y + scale * tmp;
       double_t hi = 1.0 + y;
       lo = 1.0 - hi + y + lo;
       y = eval_as_double (hi + lo) - 1.0;
       /* Avoid -0.0 with downward rounding.  */
       if (WANT_ROUNDING && y == 0.0)
 	y = 0.0;
       /* The underflow exception needs to be signaled explicitly.  */
       force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
     }
   y = 0x1p-1022 * y;
 
   return check_uflow (y);
 }
 
 /* Double-precision 10^x approximation. Largest observed error is ~0.513 ULP.  */
 double
 exp10 (double x)
 {
   uint64_t ix = asuint64 (x);
   uint32_t abstop = (ix >> 52) & 0x7ff;
 
   if (unlikely (abstop - SmallTop >= Thresh))
     {
       if (abstop - SmallTop >= 0x80000000)
 	/* Avoid spurious underflow for tiny x.
 	   Note: 0 is common input.  */
 	return x + 1;
       if (abstop == 0x7ff)
 	return ix == asuint64 (-INFINITY) ? 0.0 : x + 1.0;
       if (x >= OFlowBound)
 	return __math_oflow (0);
       if (x < UFlowBound)
 	return __math_uflow (0);
 
       /* Large x is special-cased below.  */
       abstop = 0;
     }
 
   /* Reduce x: z = x * N / log10(2), k = round(z).  */
   double_t z = __exp_data.invlog10_2N * x;
   double_t kd;
-  int64_t ki;
+  uint64_t ki;
 #if TOINT_INTRINSICS
   kd = roundtoint (z);
   ki = converttoint (z);
 #else
   kd = eval_as_double (z + Shift);
+  ki = asuint64 (kd);
   kd -= Shift;
-  ki = kd;
 #endif
 
   /* r = x - k * log10(2), r in [-0.5, 0.5].  */
   double_t r = x;
   r = __exp_data.neglog10_2hiN * kd + r;
   r = __exp_data.neglog10_2loN * kd + r;
 
   /* exp10(x) = 2^(k/N) * 2^(r/N).
      Approximate the two components separately.  */
 
   /* s = 2^(k/N), using lookup table.  */
   uint64_t e = ki << (52 - EXP_TABLE_BITS);
   uint64_t i = (ki & IndexMask) * 2;
   uint64_t u = __exp_data.tab[i + 1];
   uint64_t sbits = u + e;
 
   double_t tail = asdouble (__exp_data.tab[i]);
 
   /* 2^(r/N) ~= 1 + r * Poly(r).  */
   double_t r2 = r * r;
   double_t p = C (0) + r * C (1);
   double_t y = C (2) + r * C (3);
   y = y + r2 * C (4);
   y = p + r2 * y;
   y = tail + y * r;
 
   if (unlikely (abstop == 0))
     return special_case (sbits, y, ki);
 
   /* Assemble components:
      y  = 2^(r/N) * 2^(k/N)
        ~= (y + 1) * s.  */
   double_t s = asdouble (sbits);
   return eval_as_double (s * y + s);
 }
+
+#if WANT_EXP10_TESTS
+TEST_SIG (S, D, 1, exp10, -9.9, 9.9)
+TEST_ULP (exp10, 0.02)
+TEST_ULP_NONNEAREST (exp10, 0.5)
+TEST_SYM_INTERVAL (exp10, 0, 0x1p-47, 5000)
+TEST_SYM_INTERVAL (exp10, 0x1p47, 1, 50000)
+TEST_INTERVAL (exp10, 1, OFlowBound, 50000)
+TEST_INTERVAL (exp10, -1, UFlowBound, 50000)
+TEST_INTERVAL (exp10, OFlowBound, inf, 5000)
+TEST_INTERVAL (exp10, UFlowBound, -inf, 5000)
+#endif
diff --git a/contrib/arm-optimized-routines/math/exp2.c b/contrib/arm-optimized-routines/math/exp2.c
index a1eee44f1f48..f26ac3cda2cc 100644
--- a/contrib/arm-optimized-routines/math/exp2.c
+++ b/contrib/arm-optimized-routines/math/exp2.c
@@ -1,143 +1,152 @@
 /*
  * Double-precision 2^x function.
  *
- * Copyright (c) 2018-2019, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <float.h>
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define N (1 << EXP_TABLE_BITS)
 #define Shift __exp_data.exp2_shift
 #define T __exp_data.tab
 #define C1 __exp_data.exp2_poly[0]
 #define C2 __exp_data.exp2_poly[1]
 #define C3 __exp_data.exp2_poly[2]
 #define C4 __exp_data.exp2_poly[3]
 #define C5 __exp_data.exp2_poly[4]
 #define C6 __exp_data.exp2_poly[5]
 
 /* Handle cases that may overflow or underflow when computing the result that
    is scale*(1+TMP) without intermediate rounding.  The bit representation of
    scale is in SBITS, however it has a computed exponent that may have
    overflown into the sign bit so that needs to be adjusted before using it as
    a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
 static inline double
 specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
 {
   double_t scale, y;
 
   if ((ki & 0x80000000) == 0)
     {
       /* k > 0, the exponent of scale might have overflowed by 1.  */
       sbits -= 1ull << 52;
       scale = asdouble (sbits);
       y = 2 * (scale + scale * tmp);
       return check_oflow (eval_as_double (y));
     }
   /* k < 0, need special care in the subnormal range.  */
   sbits += 1022ull << 52;
   scale = asdouble (sbits);
   y = scale + scale * tmp;
   if (y < 1.0)
     {
       /* Round y to the right precision before scaling it into the subnormal
 	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
 	 E is the worst-case ulp error outside the subnormal range.  So this
 	 is only useful if the goal is better than 1 ulp worst-case error.  */
       double_t hi, lo;
       lo = scale - y + scale * tmp;
       hi = 1.0 + y;
       lo = 1.0 - hi + y + lo;
       y = eval_as_double (hi + lo) - 1.0;
       /* Avoid -0.0 with downward rounding.  */
       if (WANT_ROUNDING && y == 0.0)
 	y = 0.0;
       /* The underflow exception needs to be signaled explicitly.  */
       force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
     }
   y = 0x1p-1022 * y;
   return check_uflow (eval_as_double (y));
 }
 
 /* Top 12 bits of a double (sign and exponent bits).  */
 static inline uint32_t
 top12 (double x)
 {
   return asuint64 (x) >> 52;
 }
 
 double
 exp2 (double x)
 {
   uint32_t abstop;
   uint64_t ki, idx, top, sbits;
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t kd, r, r2, scale, tail, tmp;
 
   abstop = top12 (x) & 0x7ff;
   if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
     {
       if (abstop - top12 (0x1p-54) >= 0x80000000)
 	/* Avoid spurious underflow for tiny x.  */
 	/* Note: 0 is common input.  */
 	return WANT_ROUNDING ? 1.0 + x : 1.0;
       if (abstop >= top12 (1024.0))
 	{
 	  if (asuint64 (x) == asuint64 (-INFINITY))
 	    return 0.0;
 	  if (abstop >= top12 (INFINITY))
 	    return 1.0 + x;
 	  if (!(asuint64 (x) >> 63))
 	    return __math_oflow (0);
 	  else if (asuint64 (x) >= asuint64 (-1075.0))
 	    return __math_uflow (0);
 	}
       if (2 * asuint64 (x) > 2 * asuint64 (928.0))
 	/* Large x is special cased below.  */
 	abstop = 0;
     }
 
   /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)].  */
   /* x = k/N + r, with int k and r in [-1/2N, 1/2N].  */
   kd = eval_as_double (x + Shift);
   ki = asuint64 (kd); /* k.  */
   kd -= Shift; /* k/N for int k.  */
   r = x - kd;
   /* 2^(k/N) ~= scale * (1 + tail).  */
   idx = 2 * (ki % N);
   top = ki << (52 - EXP_TABLE_BITS);
   tail = asdouble (T[idx]);
   /* This is only a valid scale when -1023*N < k < 1024*N.  */
   sbits = T[idx + 1] + top;
   /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1).  */
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
   r2 = r * r;
   /* Without fma the worst case error is 0.5/N ulp larger.  */
   /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp.  */
 #if EXP2_POLY_ORDER == 4
   tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4);
 #elif EXP2_POLY_ORDER == 5
   tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
 #elif EXP2_POLY_ORDER == 6
   tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
 #endif
   if (unlikely (abstop == 0))
     return specialcase (tmp, sbits, ki);
   scale = asdouble (sbits);
   /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
      is no spurious underflow here even without fma.  */
   return eval_as_double (scale + scale * tmp);
 }
 #if USE_GLIBC_ABI
 strong_alias (exp2, __exp2_finite)
 hidden_alias (exp2, __ieee754_exp2)
 # if LDBL_MANT_DIG == 53
 long double exp2l (long double x) { return exp2 (x); }
 # endif
 #endif
+
+TEST_SIG (S, D, 1, exp2, -9.9, 9.9)
+TEST_ULP (exp2, 0.01)
+TEST_ULP_NONNEAREST (exp2, 0.5)
+TEST_INTERVAL (exp2, 0, 0xffff000000000000, 10000)
+TEST_SYM_INTERVAL (exp2, 0x1p-6, 0x1p6, 40000)
+TEST_SYM_INTERVAL (exp2, 633.3, 733.3, 10000)
diff --git a/contrib/arm-optimized-routines/math/exp2f.c b/contrib/arm-optimized-routines/math/exp2f.c
index 776c3ddf7663..3202f41377ad 100644
--- a/contrib/arm-optimized-routines/math/exp2f.c
+++ b/contrib/arm-optimized-routines/math/exp2f.c
@@ -1,80 +1,88 @@
 /*
  * Single-precision 2^x function.
  *
- * Copyright (c) 2017-2018, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /*
 EXP2F_TABLE_BITS = 5
 EXP2F_POLY_ORDER = 3
 
 ULP error: 0.502 (nearest rounding.)
 Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
 Wrong count: 168353 (all nearest rounding wrong results with fma.)
 Non-nearest ULP error: 1 (rounded ULP error)
 */
 
 #define N (1 << EXP2F_TABLE_BITS)
 #define T __exp2f_data.tab
 #define C __exp2f_data.poly
 #define SHIFT __exp2f_data.shift_scaled
 
 static inline uint32_t
 top12 (float x)
 {
   return asuint (x) >> 20;
 }
 
 float
 exp2f (float x)
 {
   uint32_t abstop;
   uint64_t ki, t;
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t kd, xd, z, r, r2, y, s;
 
   xd = (double_t) x;
   abstop = top12 (x) & 0x7ff;
   if (unlikely (abstop >= top12 (128.0f)))
     {
       /* |x| >= 128 or x is nan.  */
       if (asuint (x) == asuint (-INFINITY))
 	return 0.0f;
       if (abstop >= top12 (INFINITY))
 	return x + x;
       if (x > 0.0f)
 	return __math_oflowf (0);
       if (x <= -150.0f)
 	return __math_uflowf (0);
 #if WANT_ERRNO_UFLOW
       if (x < -149.0f)
 	return __math_may_uflowf (0);
 #endif
     }
 
   /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k.  */
   kd = eval_as_double (xd + SHIFT);
   ki = asuint64 (kd);
   kd -= SHIFT; /* k/N for int k.  */
   r = xd - kd;
 
   /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
   t = T[ki % N];
   t += ki << (52 - EXP2F_TABLE_BITS);
   s = asdouble (t);
   z = C[0] * r + C[1];
   r2 = r * r;
   y = C[2] * r + 1;
   y = z * r2 + y;
   y = y * s;
   return eval_as_float (y);
 }
 #if USE_GLIBC_ABI
 strong_alias (exp2f, __exp2f_finite)
 hidden_alias (exp2f, __ieee754_exp2f)
 #endif
+
+TEST_SIG (S, F, 1, exp2, -9.9, 9.9)
+TEST_ULP (exp2f, 0.01)
+TEST_ULP_NONNEAREST (exp2f, 0.5)
+TEST_INTERVAL (exp2f, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (exp2f, 0x1p-14, 0x1p8, 50000)
diff --git a/contrib/arm-optimized-routines/math/expf.c b/contrib/arm-optimized-routines/math/expf.c
index 08a20d59e491..6572b99a1e68 100644
--- a/contrib/arm-optimized-routines/math/expf.c
+++ b/contrib/arm-optimized-routines/math/expf.c
@@ -1,91 +1,99 @@
 /*
  * Single-precision e^x function.
  *
- * Copyright (c) 2017-2019, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /*
 EXP2F_TABLE_BITS = 5
 EXP2F_POLY_ORDER = 3
 
 ULP error: 0.502 (nearest rounding.)
 Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
 Wrong count: 170635 (all nearest rounding wrong results with fma.)
 Non-nearest ULP error: 1 (rounded ULP error)
 */
 
 #define N (1 << EXP2F_TABLE_BITS)
 #define InvLn2N __exp2f_data.invln2_scaled
 #define T __exp2f_data.tab
 #define C __exp2f_data.poly_scaled
 
 static inline uint32_t
 top12 (float x)
 {
   return asuint (x) >> 20;
 }
 
 float
 expf (float x)
 {
   uint32_t abstop;
   uint64_t ki, t;
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t kd, xd, z, r, r2, y, s;
 
   xd = (double_t) x;
   abstop = top12 (x) & 0x7ff;
   if (unlikely (abstop >= top12 (88.0f)))
     {
       /* |x| >= 88 or x is nan.  */
       if (asuint (x) == asuint (-INFINITY))
 	return 0.0f;
       if (abstop >= top12 (INFINITY))
 	return x + x;
       if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
 	return __math_oflowf (0);
       if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
 	return __math_uflowf (0);
 #if WANT_ERRNO_UFLOW
       if (x < -0x1.9d1d9ep6f) /* x < log(0x1p-149) ~= -103.28 */
 	return __math_may_uflowf (0);
 #endif
     }
 
   /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
   z = InvLn2N * xd;
 
   /* Round and convert z to int, the result is in [-150*N, 128*N] and
      ideally nearest int is used, otherwise the magnitude of r can be
      bigger which gives larger approximation error.  */
 #if TOINT_INTRINSICS
   kd = roundtoint (z);
   ki = converttoint (z);
 #else
 # define SHIFT __exp2f_data.shift
   kd = eval_as_double (z + SHIFT);
   ki = asuint64 (kd);
   kd -= SHIFT;
 #endif
   r = z - kd;
 
   /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
   t = T[ki % N];
   t += ki << (52 - EXP2F_TABLE_BITS);
   s = asdouble (t);
   z = C[0] * r + C[1];
   r2 = r * r;
   y = C[2] * r + 1;
   y = z * r2 + y;
   y = y * s;
   return eval_as_float (y);
 }
 #if USE_GLIBC_ABI
 strong_alias (expf, __expf_finite)
 hidden_alias (expf, __ieee754_expf)
 #endif
+
+TEST_SIG (S, F, 1, exp, -9.9, 9.9)
+TEST_ULP (expf, 0.01)
+TEST_ULP_NONNEAREST (expf, 0.5)
+TEST_INTERVAL (expf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (expf, 0x1p-14, 0x1p8, 500000)
diff --git a/contrib/arm-optimized-routines/math/include/mathlib.h b/contrib/arm-optimized-routines/math/include/mathlib.h
index 64cbb9c1f850..23d04da99d93 100644
--- a/contrib/arm-optimized-routines/math/include/mathlib.h
+++ b/contrib/arm-optimized-routines/math/include/mathlib.h
@@ -1,59 +1,269 @@
 /*
  * Public API.
  *
- * Copyright (c) 2015-2023, Arm Limited.
+ * Copyright (c) 2015-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef _MATHLIB_H
 #define _MATHLIB_H
 
-float expf (float);
-float exp2f (float);
-float logf (float);
-float log2f (float);
-float powf (float, float);
-float sinf (float);
-float cosf (float);
-void sincosf (float, float*, float*);
-
-double exp (double);
-double exp10 (double);
-double exp2 (double);
-double log (double);
-double log2 (double);
-double pow (double, double);
-
 #if __aarch64__
-# if __GNUC__ >= 5
-typedef __Float32x4_t __f32x4_t;
-typedef __Float64x2_t __f64x2_t;
-# elif __clang_major__*100+__clang_minor__ >= 305
-typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t;
-typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t;
-# else
-#  error Unsupported compiler
-# endif
+/* Low-accuracy scalar implementations of C23 routines.  */
+float arm_math_cospif (float);
+double arm_math_cospi (double);
+float arm_math_sinpif (float);
+double arm_math_sinpi (double);
+float arm_math_tanpif (float);
+double arm_math_tanpi (double);
+void arm_math_sincospif (float, float *, float *);
+void arm_math_sincospi (double, double *, double *);
+#endif
+
+/* SIMD declaration for autovectorisation with fast-math enabled. Only GCC is
+   supported, and vector routines are only supported on Linux on AArch64.  */
+#if defined __aarch64__ && __linux__ && defined(__GNUC__)                     \
+    && !defined(__clang__) && defined(__FAST_MATH__)
+#  define DECL_SIMD_aarch64 __attribute__ ((__simd__ ("notinbranch"), const))
+#else
+#  define DECL_SIMD_aarch64
+#endif
+
+#if WANT_EXPERIMENTAL_MATH
+
+float arm_math_erff (float);
+DECL_SIMD_aarch64 float cospif (float);
+DECL_SIMD_aarch64 float erfinvf (float);
+DECL_SIMD_aarch64 float sinpif (float);
+DECL_SIMD_aarch64 float tanpif (float);
+
+double arm_math_erf (double);
+DECL_SIMD_aarch64 double cospi (double);
+DECL_SIMD_aarch64 double erfinv (double);
+DECL_SIMD_aarch64 double sinpi (double);
+DECL_SIMD_aarch64 double tanpi (double);
+
+long double erfinvl (long double);
+
+#endif
 
-# if __GNUC__ >= 9 || __clang_major__ >= 8
-#  undef __vpcs
-#  define __vpcs __attribute__((__aarch64_vector_pcs__))
+/* Note these routines may not be provided by AOR (some are only available with
+   WANT_EXPERIMENTAL_MATH, some are not provided at all. Redeclare them here to
+   add vector annotations.  */
+DECL_SIMD_aarch64 float acosf (float);
+DECL_SIMD_aarch64 float acoshf (float);
+DECL_SIMD_aarch64 float asinf (float);
+DECL_SIMD_aarch64 float asinhf (float);
+DECL_SIMD_aarch64 float atan2f (float, float);
+DECL_SIMD_aarch64 float atanf (float);
+DECL_SIMD_aarch64 float atanhf (float);
+DECL_SIMD_aarch64 float cbrtf (float);
+DECL_SIMD_aarch64 float cosf (float);
+DECL_SIMD_aarch64 float coshf (float);
+DECL_SIMD_aarch64 float erfcf (float);
+DECL_SIMD_aarch64 float erff (float);
+DECL_SIMD_aarch64 float exp10f (float);
+DECL_SIMD_aarch64 float exp2f (float);
+DECL_SIMD_aarch64 float expf (float);
+DECL_SIMD_aarch64 float expm1f (float);
+DECL_SIMD_aarch64 float hypotf (float, float);
+DECL_SIMD_aarch64 float log10f (float);
+DECL_SIMD_aarch64 float log1pf (float);
+DECL_SIMD_aarch64 float log2f (float);
+DECL_SIMD_aarch64 float logf (float);
+DECL_SIMD_aarch64 float powf (float, float);
+DECL_SIMD_aarch64 float sinf (float);
+void sincosf (float, float *, float *);
+DECL_SIMD_aarch64 float sinhf (float);
+DECL_SIMD_aarch64 float tanf (float);
+DECL_SIMD_aarch64 float tanhf (float);
+
+DECL_SIMD_aarch64 double acos (double);
+DECL_SIMD_aarch64 double acosh (double);
+DECL_SIMD_aarch64 double asin (double);
+DECL_SIMD_aarch64 double asinh (double);
+DECL_SIMD_aarch64 double atan2 (double, double);
+DECL_SIMD_aarch64 double atan (double);
+DECL_SIMD_aarch64 double atanh (double);
+DECL_SIMD_aarch64 double cbrt (double);
+DECL_SIMD_aarch64 double cos (double);
+DECL_SIMD_aarch64 double cosh (double);
+DECL_SIMD_aarch64 double erfc (double);
+DECL_SIMD_aarch64 double erf (double);
+DECL_SIMD_aarch64 double exp10 (double);
+DECL_SIMD_aarch64 double exp2 (double);
+DECL_SIMD_aarch64 double exp (double);
+DECL_SIMD_aarch64 double expm1 (double);
+DECL_SIMD_aarch64 double hypot (double, double);
+DECL_SIMD_aarch64 double log10 (double);
+DECL_SIMD_aarch64 double log1p (double);
+DECL_SIMD_aarch64 double log2 (double);
+DECL_SIMD_aarch64 double log (double);
+DECL_SIMD_aarch64 double pow (double, double);
+DECL_SIMD_aarch64 double sin (double);
+DECL_SIMD_aarch64 double sinh (double);
+DECL_SIMD_aarch64 double tan (double);
+DECL_SIMD_aarch64 double tanh (double);
+
+#if __aarch64__ && __linux__
+# include <arm_neon.h>
+# undef __vpcs
+# define __vpcs __attribute__((__aarch64_vector_pcs__))
 
 /* Vector functions following the vector PCS using ABI names.  */
-__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_expf_1u (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_exp2f_1u (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
-__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
+__vpcs float32x4_t _ZGVnN4v_acosf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_acoshf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_asinf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_asinhf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_atanf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_atanhf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_cbrtf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_cosf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_coshf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_cospif (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_erfcf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_erff (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_exp10f (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_exp2f (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_exp2f_1u (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_expf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_expf_1u (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_expm1f (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_log10f (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_log1pf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_log2f (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_logf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_sinf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_sinhf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_sinpif (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_tanf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_tanhf (float32x4_t);
+__vpcs float32x4_t _ZGVnN4v_tanpif (float32x4_t);
+__vpcs float32x4_t _ZGVnN4vl4_modff (float32x4_t, float *);
+__vpcs float32x4_t _ZGVnN4vv_atan2f (float32x4_t, float32x4_t);
+__vpcs float32x4_t _ZGVnN4vv_hypotf (float32x4_t, float32x4_t);
+__vpcs float32x4_t _ZGVnN4vv_powf (float32x4_t, float32x4_t);
+__vpcs float32x4x2_t _ZGVnN4v_cexpif (float32x4_t);
+__vpcs void _ZGVnN4vl4l4_sincosf (float32x4_t, float *, float *);
+__vpcs void _ZGVnN4vl4l4_sincospif (float32x4_t, float *, float *);
+
+__vpcs float64x2_t _ZGVnN2v_acos (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_acosh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_asin (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_asinh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_atan (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_atanh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_cbrt (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_cos (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_cosh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_cospi (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_erf (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_erfc (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_exp (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_exp10 (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_exp2 (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_expm1 (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_log (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_log10 (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_log1p (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_log2 (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_sin (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_sinh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_sinpi (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_tan (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_tanh (float64x2_t);
+__vpcs float64x2_t _ZGVnN2v_tanpi (float64x2_t);
+__vpcs float64x2_t _ZGVnN2vl8_modf (float64x2_t, double *);
+__vpcs float64x2_t _ZGVnN2vv_atan2 (float64x2_t, float64x2_t);
+__vpcs float64x2_t _ZGVnN2vv_hypot (float64x2_t, float64x2_t);
+__vpcs float64x2_t _ZGVnN2vv_pow (float64x2_t, float64x2_t);
+__vpcs float64x2x2_t _ZGVnN2v_cexpi (float64x2_t);
+__vpcs void _ZGVnN2vl8l8_sincos (float64x2_t, double *, double *);
+__vpcs void _ZGVnN2vl8l8_sincospi (float64x2_t, double *, double *);
+
+# if WANT_EXPERIMENTAL_MATH
+__vpcs float32x4_t _ZGVnN4v_erfinvf (float32x4_t);
+__vpcs float64x2_t _ZGVnN2v_erfinv (float64x2_t);
+# endif
+
+#  include <arm_sve.h>
+svfloat32_t _ZGVsMxv_acosf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_acoshf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_asinf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_asinhf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_atanf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_atanhf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_cbrtf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_cosf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_coshf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_cospif (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_erfcf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_erff (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_exp10f (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_exp2f (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_expf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_expm1f (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_log10f (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_log1pf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_log2f (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_logf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_sinf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_sinhf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_sinpif (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_tanf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_tanhf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_tanpif (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxvl4_modff (svfloat32_t, float *, svbool_t);
+svfloat32_t _ZGVsMxvv_atan2f (svfloat32_t, svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxvv_hypotf (svfloat32_t, svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxvv_powf (svfloat32_t, svfloat32_t, svbool_t);
+svfloat32x2_t _ZGVsMxv_cexpif (svfloat32_t, svbool_t);
+void _ZGVsMxvl4l4_sincosf (svfloat32_t, float *, float *, svbool_t);
+void _ZGVsMxvl4l4_sincospif (svfloat32_t, float *, float *, svbool_t);
+
+svfloat64_t _ZGVsMxv_acos (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_acosh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_asin (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_asinh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_atan (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_atanh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_cbrt (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_cos (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_cosh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_cospi (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_erf (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_erfc (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_exp (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_exp10 (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_exp2 (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_expm1 (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_log (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_log10 (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_log1p (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_log2 (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_sin (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_sinh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_sinpi (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_tan (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_tanh (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_tanpi (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxvl8_modf (svfloat64_t, double *, svbool_t);
+svfloat64_t _ZGVsMxvv_atan2 (svfloat64_t, svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxvv_hypot (svfloat64_t, svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxvv_pow (svfloat64_t, svfloat64_t, svbool_t);
+svfloat64x2_t _ZGVsMxv_cexpi (svfloat64_t, svbool_t);
+void _ZGVsMxvl8l8_sincos (svfloat64_t, double *, double *, svbool_t);
+void _ZGVsMxvl8l8_sincospi (svfloat64_t, double *, double *, svbool_t);
+
+#  if WANT_EXPERIMENTAL_MATH
+
+svfloat32_t _ZGVsMxv_erfinvf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxvv_powi (svfloat32_t, svint32_t, svbool_t);
+
+svfloat64_t _ZGVsMxvv_powk (svfloat64_t, svint64_t, svbool_t);
+svfloat64_t _ZGVsMxv_erfinv (svfloat64_t, svbool_t);
+
 # endif
 #endif
 
 #endif
diff --git a/contrib/arm-optimized-routines/math/include/test_defs.h b/contrib/arm-optimized-routines/math/include/test_defs.h
new file mode 100644
index 000000000000..2fe66fa6f14c
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/include/test_defs.h
@@ -0,0 +1,21 @@
+/*
+ * Helper macros for emitting various details about routines for consumption by
+ * runulp.sh. This version of the file is for inclusion when building routines,
+ * so expansions are empty - see math/test/test_defs for versions used by the
+ * build system.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+
+#define TEST_ULP(f, l)
+#define TEST_ULP_NONNEAREST(f, l)
+
+#define TEST_DISABLE_FENV(f)
+#define TEST_DISABLE_FENV_IF_NOT(f, e)
+
+#define TEST_INTERVAL(f, lo, hi, n)
+#define TEST_SYM_INTERVAL(f, lo, hi, n)
+#define TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n)
+
+#define TEST_CONTROL_VALUE(f, c)
diff --git a/contrib/arm-optimized-routines/math/include/test_sig.h b/contrib/arm-optimized-routines/math/include/test_sig.h
new file mode 100644
index 000000000000..a967829098d6
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/include/test_sig.h
@@ -0,0 +1,47 @@
+/*
+ * Macros for emitting various ulp/bench entries based on function signature
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+
+#define TEST_DECL_SF1(fun) float fun##f (float);
+#define TEST_DECL_SF2(fun) float fun##f (float, float);
+#define TEST_DECL_SD1(fun) double fun (double);
+#define TEST_DECL_SD2(fun) double fun (double, double);
+
+#define TEST_DECL_VF1(fun)                                                    \
+  float32x4_t VPCS_ATTR V_NAME_F1 (fun##f) (float32x4_t);
+#define TEST_DECL_VF2(fun)                                                    \
+  float32x4_t VPCS_ATTR V_NAME_F2 (fun##f) (float32x4_t, float32x4_t);
+#define TEST_DECL_VD1(fun) VPCS_ATTR float64x2_t V_NAME_D1 (fun) (float64x2_t);
+#define TEST_DECL_VD2(fun)                                                    \
+  VPCS_ATTR float64x2_t V_NAME_D2 (fun) (float64x2_t, float64x2_t);
+
+#define TEST_DECL_SVF1(fun)                                                   \
+  svfloat32_t SV_NAME_F1 (fun) (svfloat32_t, svbool_t);
+#define TEST_DECL_SVF2(fun)                                                   \
+  svfloat32_t SV_NAME_F2 (fun) (svfloat32_t, svfloat32_t, svbool_t);
+#define TEST_DECL_SVD1(fun)                                                   \
+  svfloat64_t SV_NAME_D1 (fun) (svfloat64_t, svbool_t);
+#define TEST_DECL_SVD2(fun)                                                   \
+  svfloat64_t SV_NAME_D2 (fun) (svfloat64_t, svfloat64_t, svbool_t);
+
+/* For building the routines, emit function prototype from TEST_SIG. This
+   ensures that the correct signature has been chosen (wrong one will be a
+   compile error). TEST_SIG is defined differently by various components of the
+   build system to emit entries in the wrappers and entries for mathbench and
+   ulp.  */
+#ifndef _TEST_SIG
+# if defined(EMIT_ULP_FUNCS)
+#  define _TEST_SIG(v, t, a, f, ...) TEST_SIG _Z##v##t##a (f)
+# elif defined(EMIT_ULP_WRAPPERS)
+#  define _TEST_SIG(v, t, a, f, ...) TEST_SIG Z##v##N##t##a##_WRAP (f)
+# elif defined(EMIT_MATHBENCH_FUNCS)
+#  define _TEST_SIG(v, t, a, f, ...) TEST_SIG _Z##v##t##a (f, ##__VA_ARGS__)
+# else
+#  define _TEST_SIG(v, t, a, f, ...) TEST_DECL_##v##t##a (f)
+# endif
+#endif
+
+#define TEST_SIG(...) _TEST_SIG (__VA_ARGS__)
diff --git a/contrib/arm-optimized-routines/math/log.c b/contrib/arm-optimized-routines/math/log.c
index 43dfc2a744f0..1d6244c30b79 100644
--- a/contrib/arm-optimized-routines/math/log.c
+++ b/contrib/arm-optimized-routines/math/log.c
@@ -1,162 +1,171 @@
 /*
  * Double-precision log(x) function.
  *
- * Copyright (c) 2018-2019, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <float.h>
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define T __log_data.tab
 #define T2 __log_data.tab2
 #define B __log_data.poly1
 #define A __log_data.poly
 #define Ln2hi __log_data.ln2hi
 #define Ln2lo __log_data.ln2lo
 #define N (1 << LOG_TABLE_BITS)
 #define OFF 0x3fe6000000000000
 
 /* Top 16 bits of a double.  */
 static inline uint32_t
 top16 (double x)
 {
   return asuint64 (x) >> 48;
 }
 
 double
 log (double x)
 {
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
   uint64_t ix, iz, tmp;
   uint32_t top;
   int k, i;
 
   ix = asuint64 (x);
   top = top16 (x);
 
 #if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
 # define LO asuint64 (1.0 - 0x1p-5)
 # define HI asuint64 (1.0 + 0x1.1p-5)
 #elif LOG_POLY1_ORDER == 12
 # define LO asuint64 (1.0 - 0x1p-4)
 # define HI asuint64 (1.0 + 0x1.09p-4)
 #endif
   if (unlikely (ix - LO < HI - LO))
     {
       /* Handle close to 1.0 inputs separately.  */
       /* Fix sign of zero with downward rounding when x==1.  */
       if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
 	return 0;
       r = x - 1.0;
       r2 = r * r;
       r3 = r * r2;
 #if LOG_POLY1_ORDER == 10
       /* Worst-case error is around 0.516 ULP.  */
       y = r3 * (B[1] + r * B[2] + r2 * B[3]
 		+ r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
       w = B[0] * r2; /* B[0] == -0.5.  */
       hi = r + w;
       y += r - hi + w;
       y += hi;
 #elif LOG_POLY1_ORDER == 11
       /* Worst-case error is around 0.516 ULP.  */
       y = r3 * (B[1] + r * B[2]
 		+ r2 * (B[3] + r * B[4] + r2 * B[5]
 			+ r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
       w = B[0] * r2; /* B[0] == -0.5.  */
       hi = r + w;
       y += r - hi + w;
       y += hi;
 #elif LOG_POLY1_ORDER == 12
       y = r3 * (B[1] + r * B[2] + r2 * B[3]
 		+ r3 * (B[4] + r * B[5] + r2 * B[6]
 			+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
 # if N <= 64
       /* Worst-case error is around 0.532 ULP.  */
       w = B[0] * r2; /* B[0] == -0.5.  */
       hi = r + w;
       y += r - hi + w;
       y += hi;
 # else
       /* Worst-case error is around 0.507 ULP.  */
       w = r * 0x1p27;
       double_t rhi = r + w - w;
       double_t rlo = r - rhi;
       w = rhi * rhi * B[0]; /* B[0] == -0.5.  */
       hi = r + w;
       lo = r - hi + w;
       lo += B[0] * rlo * (rhi + r);
       y += lo;
       y += hi;
 # endif
 #endif
       return eval_as_double (y);
     }
   if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
     {
       /* x < 0x1p-1022 or inf or nan.  */
       if (ix * 2 == 0)
 	return __math_divzero (1);
       if (ix == asuint64 (INFINITY)) /* log(inf) == inf.  */
 	return x;
       if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
 	return __math_invalid (x);
       /* x is subnormal, normalize it.  */
       ix = asuint64 (x * 0x1p52);
       ix -= 52ULL << 52;
     }
 
   /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
   tmp = ix - OFF;
   i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
   k = (int64_t) tmp >> 52; /* arithmetic shift */
   iz = ix - (tmp & 0xfffULL << 52);
   invc = T[i].invc;
   logc = T[i].logc;
   z = asdouble (iz);
 
   /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
   /* r ~= z/c - 1, |r| < 1/(2*N).  */
 #if HAVE_FAST_FMA
   /* rounding error: 0x1p-55/N.  */
   r = fma (z, invc, -1.0);
 #else
   /* rounding error: 0x1p-55/N + 0x1p-66.  */
   r = (z - T2[i].chi - T2[i].clo) * invc;
 #endif
   kd = (double_t) k;
 
   /* hi + lo = r + log(c) + k*Ln2.  */
   w = kd * Ln2hi + logc;
   hi = w + r;
   lo = w - hi + r + kd * Ln2lo;
 
   /* log(x) = lo + (log1p(r) - r) + hi.  */
   r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
   /* Worst case error if |y| > 0x1p-5:
      0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
      Worst case error if |y| > 0x1p-4:
      0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma).  */
 #if LOG_POLY_ORDER == 6
   y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
 #elif LOG_POLY_ORDER == 7
   y = lo
       + r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
 	      + r2 * r2 * (A[4] + r * A[5]))
       + hi;
 #endif
   return eval_as_double (y);
 }
 #if USE_GLIBC_ABI
 strong_alias (log, __log_finite)
 hidden_alias (log, __ieee754_log)
 # if LDBL_MANT_DIG == 53
 long double logl (long double x) { return log (x); }
 # endif
 #endif
+
+TEST_SIG (S, D, 1, log, 0.01, 11.1)
+TEST_ULP (log, 0.02)
+TEST_ULP_NONNEAREST (log, 0.5)
+TEST_INTERVAL (log, 0, 0xffff000000000000, 10000)
+TEST_INTERVAL (log, 0x1p-4, 0x1p4, 400000)
+TEST_INTERVAL (log, 0, inf, 400000)
diff --git a/contrib/arm-optimized-routines/pl/math/log10f.c b/contrib/arm-optimized-routines/math/log10f.c
similarity index 84%
rename from contrib/arm-optimized-routines/pl/math/log10f.c
rename to contrib/arm-optimized-routines/math/log10f.c
index 5c80008e4e57..f8561d063107 100644
--- a/contrib/arm-optimized-routines/pl/math/log10f.c
+++ b/contrib/arm-optimized-routines/math/log10f.c
@@ -1,97 +1,99 @@
 /*
  * Single-precision log10 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include <stdint.h>
 
 #include "math_config.h"
-#include "pl_sig.h"
-#include "pl_test.h"
+#include "test_sig.h"
+#include "test_defs.h"
 
 /* Data associated to logf:
 
    LOGF_TABLE_BITS = 4
    LOGF_POLY_ORDER = 4
 
    ULP error: 0.818 (nearest rounding.)
    Relative error: 1.957 * 2^-26 (before rounding.).  */
 
 #define T __logf_data.tab
 #define A __logf_data.poly
 #define Ln2 __logf_data.ln2
 #define InvLn10 __logf_data.invln10
 #define N (1 << LOGF_TABLE_BITS)
 #define OFF 0x3f330000
 
 /* This naive implementation of log10f mimics that of log
    then simply scales the result by 1/log(10) to switch from base e to
    base 10. Hence, most computations are carried out in double precision.
-   Scaling before rounding to single precision is both faster and more accurate.
+   Scaling before rounding to single precision is both faster and more
+   accurate.
 
    ULP error: 0.797 ulp (nearest rounding.).  */
 float
 log10f (float x)
 {
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t z, r, r2, y, y0, invc, logc;
   uint32_t ix, iz, tmp;
   int k, i;
 
   ix = asuint (x);
 #if WANT_ROUNDING
   /* Fix sign of zero with downward rounding when x==1.  */
   if (unlikely (ix == 0x3f800000))
     return 0;
 #endif
   if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
     {
       /* x < 0x1p-126 or inf or nan.  */
       if (ix * 2 == 0)
 	return __math_divzerof (1);
       if (ix == 0x7f800000) /* log(inf) == inf.  */
 	return x;
       if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
 	return __math_invalidf (x);
       /* x is subnormal, normalize it.  */
       ix = asuint (x * 0x1p23f);
       ix -= 23 << 23;
     }
 
   /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
   tmp = ix - OFF;
   i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
   k = (int32_t) tmp >> 23; /* arithmetic shift.  */
   iz = ix - (tmp & 0xff800000);
   invc = T[i].invc;
   logc = T[i].logc;
   z = (double_t) asfloat (iz);
 
   /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
   r = z * invc - 1;
   y0 = logc + (double_t) k * Ln2;
 
   /* Pipelined polynomial evaluation to approximate log1p(r).  */
   r2 = r * r;
   y = A[1] * r + A[2];
   y = A[0] * r2 + y;
   y = y * r2 + (y0 + r);
 
   /* Multiply by 1/log(10).  */
   y = y * InvLn10;
 
   return eval_as_float (y);
 }
 
-PL_SIG (S, F, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (log10f, 0.30)
-PL_TEST_INTERVAL (log10f, 0, 0xffff0000, 10000)
-PL_TEST_INTERVAL (log10f, 0x1p-127, 0x1p-26, 50000)
-PL_TEST_INTERVAL (log10f, 0x1p-26, 0x1p3, 50000)
-PL_TEST_INTERVAL (log10f, 0x1p-4, 0x1p4, 50000)
-PL_TEST_INTERVAL (log10f, 0, inf, 50000)
+TEST_SIG (S, F, 1, log10, 0.01, 11.1)
+TEST_ULP (log10f, 0.30)
+TEST_ULP_NONNEAREST (log10f, 0.5)
+TEST_INTERVAL (log10f, 0, 0xffff0000, 10000)
+TEST_INTERVAL (log10f, 0x1p-127, 0x1p-26, 50000)
+TEST_INTERVAL (log10f, 0x1p-26, 0x1p3, 50000)
+TEST_INTERVAL (log10f, 0x1p-4, 0x1p4, 50000)
+TEST_INTERVAL (log10f, 0, inf, 50000)
diff --git a/contrib/arm-optimized-routines/math/log2.c b/contrib/arm-optimized-routines/math/log2.c
index 3f9c21b03962..6462915a24f0 100644
--- a/contrib/arm-optimized-routines/math/log2.c
+++ b/contrib/arm-optimized-routines/math/log2.c
@@ -1,141 +1,150 @@
 /*
  * Double-precision log2(x) function.
  *
- * Copyright (c) 2018-2019, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <float.h>
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 #define T __log2_data.tab
 #define T2 __log2_data.tab2
 #define B __log2_data.poly1
 #define A __log2_data.poly
 #define InvLn2hi __log2_data.invln2hi
 #define InvLn2lo __log2_data.invln2lo
 #define N (1 << LOG2_TABLE_BITS)
 #define OFF 0x3fe6000000000000
 
 /* Top 16 bits of a double.  */
 static inline uint32_t
 top16 (double x)
 {
   return asuint64 (x) >> 48;
 }
 
 double
 log2 (double x)
 {
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
   uint64_t ix, iz, tmp;
   uint32_t top;
   int k, i;
 
   ix = asuint64 (x);
   top = top16 (x);
 
 #if LOG2_POLY1_ORDER == 11
 # define LO asuint64 (1.0 - 0x1.5b51p-5)
 # define HI asuint64 (1.0 + 0x1.6ab2p-5)
 #endif
   if (unlikely (ix - LO < HI - LO))
     {
       /* Handle close to 1.0 inputs separately.  */
       /* Fix sign of zero with downward rounding when x==1.  */
       if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
 	return 0;
       r = x - 1.0;
 #if HAVE_FAST_FMA
       hi = r * InvLn2hi;
       lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
 #else
       double_t rhi, rlo;
       rhi = asdouble (asuint64 (r) & -1ULL << 32);
       rlo = r - rhi;
       hi = rhi * InvLn2hi;
       lo = rlo * InvLn2hi + r * InvLn2lo;
 #endif
       r2 = r * r; /* rounding error: 0x1p-62.  */
       r4 = r2 * r2;
 #if LOG2_POLY1_ORDER == 11
       /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
       p = r2 * (B[0] + r * B[1]);
       y = hi + p;
       lo += hi - y + p;
       lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
 		  + r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
       y += lo;
 #endif
       return eval_as_double (y);
     }
   if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
     {
       /* x < 0x1p-1022 or inf or nan.  */
       if (ix * 2 == 0)
 	return __math_divzero (1);
       if (ix == asuint64 (INFINITY)) /* log(inf) == inf.  */
 	return x;
       if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
 	return __math_invalid (x);
       /* x is subnormal, normalize it.  */
       ix = asuint64 (x * 0x1p52);
       ix -= 52ULL << 52;
     }
 
   /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
   tmp = ix - OFF;
   i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
   k = (int64_t) tmp >> 52; /* arithmetic shift */
   iz = ix - (tmp & 0xfffULL << 52);
   invc = T[i].invc;
   logc = T[i].logc;
   z = asdouble (iz);
   kd = (double_t) k;
 
   /* log2(x) = log2(z/c) + log2(c) + k.  */
   /* r ~= z/c - 1, |r| < 1/(2*N).  */
 #if HAVE_FAST_FMA
   /* rounding error: 0x1p-55/N.  */
   r = fma (z, invc, -1.0);
   t1 = r * InvLn2hi;
   t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
 #else
   double_t rhi, rlo;
   /* rounding error: 0x1p-55/N + 0x1p-65.  */
   r = (z - T2[i].chi - T2[i].clo) * invc;
   rhi = asdouble (asuint64 (r) & -1ULL << 32);
   rlo = r - rhi;
   t1 = rhi * InvLn2hi;
   t2 = rlo * InvLn2hi + r * InvLn2lo;
 #endif
 
   /* hi + lo = r/ln2 + log2(c) + k.  */
   t3 = kd + logc;
   hi = t3 + t1;
   lo = t3 - hi + t1 + t2;
 
   /* log2(r+1) = r/ln2 + r^2*poly(r).  */
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
   r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
   r4 = r2 * r2;
 #if LOG2_POLY_ORDER == 7
   /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
      ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
   p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
   y = lo + r2 * p + hi;
 #endif
   return eval_as_double (y);
 }
 #if USE_GLIBC_ABI
 strong_alias (log2, __log2_finite)
 hidden_alias (log2, __ieee754_log2)
 # if LDBL_MANT_DIG == 53
 long double log2l (long double x) { return log2 (x); }
 # endif
 #endif
+
+TEST_SIG (S, D, 1, log2, 0.01, 11.1)
+TEST_ULP (log2, 0.05)
+TEST_ULP_NONNEAREST (log2, 0.5)
+TEST_INTERVAL (log2, 0, 0xffff000000000000, 10000)
+TEST_INTERVAL (log2, 0x1p-4, 0x1p4, 40000)
+TEST_INTERVAL (log2, 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/math/log2f.c b/contrib/arm-optimized-routines/math/log2f.c
index 0a44fa2024f6..7d47379b41cb 100644
--- a/contrib/arm-optimized-routines/math/log2f.c
+++ b/contrib/arm-optimized-routines/math/log2f.c
@@ -1,80 +1,89 @@
 /*
  * Single-precision log2 function.
  *
- * Copyright (c) 2017-2018, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /*
 LOG2F_TABLE_BITS = 4
 LOG2F_POLY_ORDER = 4
 
 ULP error: 0.752 (nearest rounding.)
 Relative error: 1.9 * 2^-26 (before rounding.)
 */
 
 #define N (1 << LOG2F_TABLE_BITS)
 #define T __log2f_data.tab
 #define A __log2f_data.poly
 #define OFF 0x3f330000
 
 float
 log2f (float x)
 {
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t z, r, r2, p, y, y0, invc, logc;
   uint32_t ix, iz, top, tmp;
   int k, i;
 
   ix = asuint (x);
 #if WANT_ROUNDING
   /* Fix sign of zero with downward rounding when x==1.  */
   if (unlikely (ix == 0x3f800000))
     return 0;
 #endif
   if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
     {
       /* x < 0x1p-126 or inf or nan.  */
       if (ix * 2 == 0)
 	return __math_divzerof (1);
       if (ix == 0x7f800000) /* log2(inf) == inf.  */
 	return x;
       if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
 	return __math_invalidf (x);
       /* x is subnormal, normalize it.  */
       ix = asuint (x * 0x1p23f);
       ix -= 23 << 23;
     }
 
   /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
   tmp = ix - OFF;
   i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
   top = tmp & 0xff800000;
   iz = ix - top;
   k = (int32_t) tmp >> 23; /* arithmetic shift */
   invc = T[i].invc;
   logc = T[i].logc;
   z = (double_t) asfloat (iz);
 
   /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
   r = z * invc - 1;
   y0 = logc + (double_t) k;
 
   /* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
   r2 = r * r;
   y = A[1] * r + A[2];
   y = A[0] * r2 + y;
   p = A[3] * r + y0;
   y = y * r2 + p;
   return eval_as_float (y);
 }
 #if USE_GLIBC_ABI
 strong_alias (log2f, __log2f_finite)
 hidden_alias (log2f, __ieee754_log2f)
 #endif
+
+TEST_SIG (S, F, 1, log2, 0.01, 11.1)
+TEST_ULP (log2f, 0.26)
+TEST_ULP_NONNEAREST (log2f, 0.5)
+TEST_INTERVAL (log2f, 0, 0xffff0000, 10000)
+TEST_INTERVAL (log2f, 0x1p-4, 0x1p4, 50000)
+TEST_INTERVAL (log2f, 0, inf, 50000)
diff --git a/contrib/arm-optimized-routines/math/logf.c b/contrib/arm-optimized-routines/math/logf.c
index 820f74c3e66a..f2c26deaff19 100644
--- a/contrib/arm-optimized-routines/math/logf.c
+++ b/contrib/arm-optimized-routines/math/logf.c
@@ -1,79 +1,88 @@
 /*
  * Single-precision log function.
  *
- * Copyright (c) 2017-2023, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /*
 LOGF_TABLE_BITS = 4
 LOGF_POLY_ORDER = 4
 
 ULP error: 0.818 (nearest rounding.)
 Relative error: 1.957 * 2^-26 (before rounding.)
 */
 
 #define T __logf_data.tab
 #define A __logf_data.poly
 #define Ln2 __logf_data.ln2
 #define N (1 << LOGF_TABLE_BITS)
 #define OFF 0x3f330000
 
 float
 logf (float x)
 {
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t z, r, r2, y, y0, invc, logc;
   uint32_t ix, iz, tmp;
   int k, i;
 
   ix = asuint (x);
 #if WANT_ROUNDING
   /* Fix sign of zero with downward rounding when x==1.  */
   if (unlikely (ix == 0x3f800000))
     return 0;
 #endif
   if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
     {
       /* x < 0x1p-126 or inf or nan.  */
       if (ix * 2 == 0)
 	return __math_divzerof (1);
       if (ix == 0x7f800000) /* log(inf) == inf.  */
 	return x;
       if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
 	return __math_invalidf (x);
       /* x is subnormal, normalize it.  */
       ix = asuint (x * 0x1p23f);
       ix -= 23 << 23;
     }
 
   /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
   tmp = ix - OFF;
   i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
   k = (int32_t) tmp >> 23; /* arithmetic shift */
   iz = ix - (tmp & 0xff800000);
   invc = T[i].invc;
   logc = T[i].logc;
   z = (double_t) asfloat (iz);
 
   /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
   r = z * invc - 1;
   y0 = logc + (double_t) k * Ln2;
 
   /* Pipelined polynomial evaluation to approximate log1p(r).  */
   r2 = r * r;
   y = A[1] * r + A[2];
   y = A[0] * r2 + y;
   y = y * r2 + (y0 + r);
   return eval_as_float (y);
 }
 #if USE_GLIBC_ABI
 strong_alias (logf, __logf_finite)
 hidden_alias (logf, __ieee754_logf)
 #endif
+
+TEST_SIG (S, F, 1, log, 0.01, 11.1)
+TEST_ULP (logf, 0.32)
+TEST_ULP_NONNEAREST (logf, 0.5)
+TEST_INTERVAL (logf, 0, 0xffff0000, 10000)
+TEST_INTERVAL (logf, 0x1p-4, 0x1p4, 500000)
+TEST_INTERVAL (logf, 0, inf, 50000)
diff --git a/contrib/arm-optimized-routines/math/logf_data.c b/contrib/arm-optimized-routines/math/logf_data.c
index 04247684755f..5c301a90af8e 100644
--- a/contrib/arm-optimized-routines/math/logf_data.c
+++ b/contrib/arm-optimized-routines/math/logf_data.c
@@ -1,33 +1,34 @@
 /*
  * Data definition for logf.
  *
- * Copyright (c) 2017-2019, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
 
 const struct logf_data __logf_data = {
   .tab = {
   { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
   { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
   { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
   { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
   { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
   { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
   { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
   { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
   { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
   { 0x1p+0, 0x0p+0 },
   { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
   { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
   { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
   { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
   { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
   { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
   },
   .ln2 = 0x1.62e42fefa39efp-1,
+  .invln10 = 0x1.bcb7b1526e50ep-2,
   .poly = {
   -0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
   }
 };
diff --git a/contrib/arm-optimized-routines/math/math_config.h b/contrib/arm-optimized-routines/math/math_config.h
index faf77b31fc99..0fc653f93761 100644
--- a/contrib/arm-optimized-routines/math/math_config.h
+++ b/contrib/arm-optimized-routines/math/math_config.h
@@ -1,521 +1,764 @@
 /*
  * Configuration for math routines.
  *
- * Copyright (c) 2017-2023, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef _MATH_CONFIG_H
 #define _MATH_CONFIG_H
 
 #include <math.h>
 #include <stdint.h>
 
 #ifndef WANT_ROUNDING
 /* If defined to 1, return correct results for special cases in non-nearest
-   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
-   This may be set to 0 if there is no fenv support or if math functions only
-   get called in round to nearest mode.  */
+   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than
+   -0.0f). This may be set to 0 if there is no fenv support or if math
+   functions only get called in round to nearest mode.  */
 # define WANT_ROUNDING 1
 #endif
 #ifndef WANT_ERRNO
 /* If defined to 1, set errno in math functions according to ISO C.  Many math
    libraries do not set errno, so this is 0 by default.  It may need to be
    set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
 # define WANT_ERRNO 0
 #endif
 #ifndef WANT_ERRNO_UFLOW
 /* Set errno to ERANGE if result underflows to 0 (in all rounding modes).  */
 # define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
 #endif
 
 /* Compiler can inline round as a single instruction.  */
 #ifndef HAVE_FAST_ROUND
 # if __aarch64__
 #   define HAVE_FAST_ROUND 1
 # else
 #   define HAVE_FAST_ROUND 0
 # endif
 #endif
 
 /* Compiler can inline lround, but not (long)round(x).  */
 #ifndef HAVE_FAST_LROUND
 # if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
 #   define HAVE_FAST_LROUND 1
 # else
 #   define HAVE_FAST_LROUND 0
 # endif
 #endif
 
 /* Compiler can inline fma as a single instruction.  */
 #ifndef HAVE_FAST_FMA
 # if defined FP_FAST_FMA || __aarch64__
 #   define HAVE_FAST_FMA 1
 # else
 #   define HAVE_FAST_FMA 0
 # endif
 #endif
 
 /* Provide *_finite symbols and some of the glibc hidden symbols
    so libmathlib can be used with binaries compiled against glibc
    to interpose math functions with both static and dynamic linking.  */
 #ifndef USE_GLIBC_ABI
 # if __GNUC__
 #   define USE_GLIBC_ABI 1
 # else
 #   define USE_GLIBC_ABI 0
 # endif
 #endif
 
 /* Optionally used extensions.  */
 #ifdef __GNUC__
 # define HIDDEN __attribute__ ((__visibility__ ("hidden")))
 # define NOINLINE __attribute__ ((noinline))
 # define UNUSED __attribute__ ((unused))
 # define likely(x) __builtin_expect (!!(x), 1)
 # define unlikely(x) __builtin_expect (x, 0)
 # if __GNUC__ >= 9
 #   define attribute_copy(f) __attribute__ ((copy (f)))
 # else
 #   define attribute_copy(f)
 # endif
 # define strong_alias(f, a) \
   extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
 # define hidden_alias(f, a) \
   extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
   attribute_copy (f);
 #else
 # define HIDDEN
 # define NOINLINE
 # define UNUSED
 # define likely(x) (x)
 # define unlikely(x) (x)
 #endif
 
 /* Return ptr but hide its value from the compiler so accesses through it
    cannot be optimized based on the contents.  */
 #define ptr_barrier(ptr)                                                      \
   ({                                                                          \
     __typeof (ptr) __ptr = (ptr);                                             \
     __asm("" : "+r"(__ptr));                                                  \
     __ptr;                                                                    \
   })
 
 /* Symbol renames to avoid libc conflicts.  */
 #define __math_oflowf arm_math_oflowf
 #define __math_uflowf arm_math_uflowf
 #define __math_may_uflowf arm_math_may_uflowf
 #define __math_divzerof arm_math_divzerof
 #define __math_oflow arm_math_oflow
 #define __math_uflow arm_math_uflow
 #define __math_may_uflow arm_math_may_uflow
 #define __math_divzero arm_math_divzero
 #define __math_invalidf arm_math_invalidf
 #define __math_invalid arm_math_invalid
 #define __math_check_oflow arm_math_check_oflow
 #define __math_check_uflow arm_math_check_uflow
 #define __math_check_oflowf arm_math_check_oflowf
 #define __math_check_uflowf arm_math_check_uflowf
 
+#define __exp_data arm_math_exp_data
+#define __asin_poly arm_math_asin_poly
+#define __asinf_poly arm_math_asinf_poly
+#define __asinh_data arm_math_asinh_data
+#define __asinhf_data arm_math_asinhf_data
+#define __atan_poly_data arm_math_atan_poly_data
+#define __atanf_poly_data arm_math_atanf_poly_data
+#define __cbrt_data arm_math_cbrt_data
+#define __cbrtf_data arm_math_cbrtf_data
+#define __erf_data arm_math_erf_data
+#define __expf_data arm_math_expf_data
+#define __expm1_poly arm_math_expm1_poly
+#define __expm1f_poly arm_math_expm1f_poly
+#define __log10_data arm_math_log10_data
+#define __log1p_data arm_math_log1p_data
+#define __log1pf_data arm_math_log1pf_data
+#define __log_data arm_math_log_data
+#define __tanf_poly_data arm_math_tanf_poly_data
+#define __v_log_data arm_math_v_log_data
 #define __sincosf_table arm_math_sincosf_table
 #define __inv_pio4 arm_math_inv_pio4
 #define __exp2f_data arm_math_exp2f_data
 #define __logf_data arm_math_logf_data
 #define __log2f_data arm_math_log2f_data
 #define __powf_log2_data arm_math_powf_log2_data
 #define __exp_data arm_math_exp_data
 #define __log_data arm_math_log_data
 #define __log2_data arm_math_log2_data
 #define __pow_log_data arm_math_pow_log_data
 #define __erff_data arm_math_erff_data
 #define __erf_data arm_math_erf_data
 #define __v_exp_data arm_math_v_exp_data
 #define __v_log_data arm_math_v_log_data
+#define __v_erf_data arm_math_v_erf_data
+#define __v_erfc_data arm_math_v_erfc_data
+#define __v_erfcf_data arm_math_v_erfcf_data
+#define __v_erff_data arm_math_v_erff_data
+#define __v_exp_tail_data arm_math_v_exp_tail_data
+#define __v_log10_data arm_math_v_log10_data
+#define __v_log2_data arm_math_v_log2_data
+#define __v_pow_exp_data arm_math_v_pow_exp_data
+#define __v_pow_log_data arm_math_v_pow_log_data
+#define __v_powf_data arm_math_v_powf_data
+
+/* On some platforms (in particular Windows) INFINITY and HUGE_VAL might
+   be defined in such a way that might not produce the expected bit pattern,
+   therefore we enforce the glibc math.h definition using a builtin that is
+   supported in both gcc and clang.  */
+#if defined (_WIN32) && (defined (__GNUC__) || defined (__clang__))
+# undef INFINITY
+# define INFINITY __builtin_inff()
+#endif
 
 #if HAVE_FAST_ROUND
 /* When set, the roundtoint and converttoint functions are provided with
    the semantics documented below.  */
 # define TOINT_INTRINSICS 1
 
 /* Round x to nearest int in all rounding modes, ties have to be rounded
    consistently with converttoint so the results match.  If the result
    would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
 static inline double_t
 roundtoint (double_t x)
 {
   return round (x);
 }
 
 /* Convert x to nearest int in all rounding modes, ties have to be rounded
    consistently with roundtoint.  If the result is not representible in an
    int32_t then the semantics is unspecified.  */
 static inline int32_t
 converttoint (double_t x)
 {
 # if HAVE_FAST_LROUND
   return lround (x);
 # else
   return (long) round (x);
 # endif
 }
 #endif
 
 static inline uint32_t
 asuint (float f)
 {
   union
   {
     float f;
     uint32_t i;
   } u = {f};
   return u.i;
 }
 
 static inline float
 asfloat (uint32_t i)
 {
   union
   {
     uint32_t i;
     float f;
   } u = {i};
   return u.f;
 }
 
 static inline uint64_t
 asuint64 (double f)
 {
   union
   {
     double f;
     uint64_t i;
   } u = {f};
   return u.i;
 }
 
 static inline double
 asdouble (uint64_t i)
 {
   union
   {
     uint64_t i;
     double f;
   } u = {i};
   return u.f;
 }
 
 #ifndef IEEE_754_2008_SNAN
 # define IEEE_754_2008_SNAN 1
 #endif
 static inline int
 issignalingf_inline (float x)
 {
   uint32_t ix = asuint (x);
   if (!IEEE_754_2008_SNAN)
     return (ix & 0x7fc00000) == 0x7fc00000;
   return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
 }
 
 static inline int
 issignaling_inline (double x)
 {
   uint64_t ix = asuint64 (x);
   if (!IEEE_754_2008_SNAN)
     return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
   return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
 }
 
 #if __aarch64__ && __GNUC__
 /* Prevent the optimization of a floating-point expression.  */
 static inline float
 opt_barrier_float (float x)
 {
   __asm__ __volatile__ ("" : "+w" (x));
   return x;
 }
 static inline double
 opt_barrier_double (double x)
 {
   __asm__ __volatile__ ("" : "+w" (x));
   return x;
 }
 /* Force the evaluation of a floating-point expression for its side-effect.  */
 static inline void
 force_eval_float (float x)
 {
   __asm__ __volatile__ ("" : "+w" (x));
 }
 static inline void
 force_eval_double (double x)
 {
   __asm__ __volatile__ ("" : "+w" (x));
 }
 #else
 static inline float
 opt_barrier_float (float x)
 {
   volatile float y = x;
   return y;
 }
 static inline double
 opt_barrier_double (double x)
 {
   volatile double y = x;
   return y;
 }
 static inline void
 force_eval_float (float x)
 {
   volatile float y UNUSED = x;
 }
 static inline void
 force_eval_double (double x)
 {
   volatile double y UNUSED = x;
 }
 #endif
 
 /* Evaluate an expression as the specified type, normally a type
    cast should be enough, but compilers implement non-standard
    excess-precision handling, so when FLT_EVAL_METHOD != 0 then
    these functions may need to be customized.  */
 static inline float
 eval_as_float (float x)
 {
   return x;
 }
 static inline double
 eval_as_double (double x)
 {
   return x;
 }
 
 /* Error handling tail calls for special cases, with a sign argument.
    The sign of the return value is set if the argument is non-zero.  */
 
 /* The result overflows.  */
 HIDDEN float __math_oflowf (uint32_t);
 /* The result underflows to 0 in nearest rounding mode.  */
 HIDDEN float __math_uflowf (uint32_t);
 /* The result underflows to 0 in some directed rounding mode only.  */
 HIDDEN float __math_may_uflowf (uint32_t);
 /* Division by zero.  */
 HIDDEN float __math_divzerof (uint32_t);
 /* The result overflows.  */
 HIDDEN double __math_oflow (uint32_t);
 /* The result underflows to 0 in nearest rounding mode.  */
 HIDDEN double __math_uflow (uint32_t);
 /* The result underflows to 0 in some directed rounding mode only.  */
 HIDDEN double __math_may_uflow (uint32_t);
 /* Division by zero.  */
 HIDDEN double __math_divzero (uint32_t);
 
 /* Error handling using input checking.  */
 
 /* Invalid input unless it is a quiet NaN.  */
 HIDDEN float __math_invalidf (float);
 /* Invalid input unless it is a quiet NaN.  */
 HIDDEN double __math_invalid (double);
 
 /* Error handling using output checking, only for errno setting.  */
 
 /* Check if the result overflowed to infinity.  */
 HIDDEN double __math_check_oflow (double);
 /* Check if the result underflowed to 0.  */
 HIDDEN double __math_check_uflow (double);
 
 /* Check if the result overflowed to infinity.  */
 static inline double
 check_oflow (double x)
 {
   return WANT_ERRNO ? __math_check_oflow (x) : x;
 }
 
 /* Check if the result underflowed to 0.  */
 static inline double
 check_uflow (double x)
 {
   return WANT_ERRNO ? __math_check_uflow (x) : x;
 }
 
 /* Check if the result overflowed to infinity.  */
 HIDDEN float __math_check_oflowf (float);
 /* Check if the result underflowed to 0.  */
 HIDDEN float __math_check_uflowf (float);
 
 /* Check if the result overflowed to infinity.  */
 static inline float
 check_oflowf (float x)
 {
   return WANT_ERRNO ? __math_check_oflowf (x) : x;
 }
 
 /* Check if the result underflowed to 0.  */
 static inline float
 check_uflowf (float x)
 {
   return WANT_ERRNO ? __math_check_uflowf (x) : x;
 }
 
 /* Shared between expf, exp2f and powf.  */
 #define EXP2F_TABLE_BITS 5
 #define EXP2F_POLY_ORDER 3
 extern const struct exp2f_data
 {
   uint64_t tab[1 << EXP2F_TABLE_BITS];
   double shift_scaled;
   double poly[EXP2F_POLY_ORDER];
-  double shift;
   double invln2_scaled;
   double poly_scaled[EXP2F_POLY_ORDER];
+  double shift;
 } __exp2f_data HIDDEN;
 
+/* Data for logf and log10f.  */
 #define LOGF_TABLE_BITS 4
 #define LOGF_POLY_ORDER 4
 extern const struct logf_data
 {
   struct
   {
     double invc, logc;
   } tab[1 << LOGF_TABLE_BITS];
   double ln2;
+  double invln10;
   double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
 } __logf_data HIDDEN;
 
 #define LOG2F_TABLE_BITS 4
 #define LOG2F_POLY_ORDER 4
 extern const struct log2f_data
 {
   struct
   {
     double invc, logc;
   } tab[1 << LOG2F_TABLE_BITS];
   double poly[LOG2F_POLY_ORDER];
 } __log2f_data HIDDEN;
 
 #define POWF_LOG2_TABLE_BITS 4
 #define POWF_LOG2_POLY_ORDER 5
 #if TOINT_INTRINSICS
 # define POWF_SCALE_BITS EXP2F_TABLE_BITS
 #else
 # define POWF_SCALE_BITS 0
 #endif
 #define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
 extern const struct powf_log2_data
 {
   struct
   {
     double invc, logc;
   } tab[1 << POWF_LOG2_TABLE_BITS];
   double poly[POWF_LOG2_POLY_ORDER];
 } __powf_log2_data HIDDEN;
 
 
 #define EXP_TABLE_BITS 7
 #define EXP_POLY_ORDER 5
 /* Use polynomial that is optimized for a wider input range.  This may be
    needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
 #define EXP_POLY_WIDE 0
 /* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
    needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
 #define EXP_USE_TOINT_NARROW 0
 #define EXP2_POLY_ORDER 5
 #define EXP2_POLY_WIDE 0
 /* Wider exp10 polynomial necessary for good precision in non-nearest rounding
    and !TOINT_INTRINSICS.  */
 #define EXP10_POLY_WIDE 0
 extern const struct exp_data
 {
   double invln2N;
-  double invlog10_2N;
-  double shift;
   double negln2hiN;
   double negln2loN;
-  double neglog10_2hiN;
-  double neglog10_2loN;
   double poly[4]; /* Last four coefficients.  */
+  double shift;
+
   double exp2_shift;
   double exp2_poly[EXP2_POLY_ORDER];
+
+  double neglog10_2hiN;
+  double neglog10_2loN;
   double exp10_poly[5];
   uint64_t tab[2*(1 << EXP_TABLE_BITS)];
+  double invlog10_2N;
 } __exp_data HIDDEN;
 
 #define LOG_TABLE_BITS 7
 #define LOG_POLY_ORDER 6
 #define LOG_POLY1_ORDER 12
 extern const struct log_data
 {
   double ln2hi;
   double ln2lo;
   double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
   double poly1[LOG_POLY1_ORDER - 1];
   struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
 #if !HAVE_FAST_FMA
   struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
 #endif
 } __log_data HIDDEN;
 
 #define LOG2_TABLE_BITS 6
 #define LOG2_POLY_ORDER 7
 #define LOG2_POLY1_ORDER 11
 extern const struct log2_data
 {
   double invln2hi;
   double invln2lo;
   double poly[LOG2_POLY_ORDER - 1];
   double poly1[LOG2_POLY1_ORDER - 1];
   struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
 #if !HAVE_FAST_FMA
   struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
 #endif
 } __log2_data HIDDEN;
 
 #define POW_LOG_TABLE_BITS 7
 #define POW_LOG_POLY_ORDER 8
 extern const struct pow_log_data
 {
   double ln2hi;
   double ln2lo;
   double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
   /* Note: the pad field is unused, but allows slightly faster indexing.  */
   struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
 } __pow_log_data HIDDEN;
 
 extern const struct erff_data
 {
   float erff_poly_A[6];
   float erff_poly_B[7];
 } __erff_data HIDDEN;
 
 #define ERF_POLY_A_ORDER 19
 #define ERF_POLY_A_NCOEFFS 10
 #define ERFC_POLY_C_NCOEFFS 16
 #define ERFC_POLY_D_NCOEFFS 18
 #define ERFC_POLY_E_NCOEFFS 14
 #define ERFC_POLY_F_NCOEFFS 17
 extern const struct erf_data
 {
   double erf_poly_A[ERF_POLY_A_NCOEFFS];
   double erf_ratio_N_A[5];
   double erf_ratio_D_A[5];
   double erf_ratio_N_B[7];
   double erf_ratio_D_B[6];
   double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
   double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
   double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
   double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
 } __erf_data HIDDEN;
 
 #define V_EXP_TABLE_BITS 7
 extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
 
+#define V_LOG_POLY_ORDER 6
 #define V_LOG_TABLE_BITS 7
 extern const struct v_log_data
 {
+  /* Shared data for vector log and log-derived routines (e.g. asinh).  */
+  double poly[V_LOG_POLY_ORDER - 1];
+  double ln2;
   struct
   {
     double invc, logc;
   } table[1 << V_LOG_TABLE_BITS];
 } __v_log_data HIDDEN;
 
+/* Some data for SVE powf's internal exp and log.  */
+#define V_POWF_EXP2_TABLE_BITS 5
+#define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
+#define V_POWF_LOG2_TABLE_BITS 5
+#define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
+extern const struct v_powf_data
+{
+  double invc[V_POWF_LOG2_N];
+  double logc[V_POWF_LOG2_N];
+  uint64_t scale[V_POWF_EXP2_N];
+} __v_powf_data HIDDEN;
+
+/* Some data for AdvSIMD and SVE pow's internal exp and log.  */
+#define V_POW_EXP_TABLE_BITS 8
+extern const struct v_pow_exp_data
+{
+  double poly[3];
+  double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
+  uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
+} __v_pow_exp_data HIDDEN;
+
+#define V_POW_LOG_TABLE_BITS 7
+extern const struct v_pow_log_data
+{
+  double poly[7]; /* First coefficient is 1.  */
+  double ln2_hi, ln2_lo;
+  double invc[1 << V_POW_LOG_TABLE_BITS];
+  double logc[1 << V_POW_LOG_TABLE_BITS];
+  double logctail[1 << V_POW_LOG_TABLE_BITS];
+} __v_pow_log_data HIDDEN;
+
+#define V_LOG2_TABLE_BITS 7
+extern const struct v_log2_data
+{
+  double poly[5];
+  double invln2;
+  struct
+  {
+    double invc, log2c;
+  } table[1 << V_LOG2_TABLE_BITS];
+} __v_log2_data HIDDEN;
+
+#define V_LOG10_TABLE_BITS 7
+extern const struct v_log10_data
+{
+  double poly[5];
+  double invln10, log10_2;
+  struct
+  {
+    double invc, log10c;
+  } table[1 << V_LOG10_TABLE_BITS];
+} __v_log10_data HIDDEN;
+
+#define V_EXP_TAIL_TABLE_BITS 8
+extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
+
+extern const struct v_erff_data
+{
+  struct
+  {
+    float erf, scale;
+  } tab[513];
+} __v_erff_data HIDDEN;
+
+extern const struct v_erfcf_data
+{
+  struct
+  {
+    float erfc, scale;
+  } tab[645];
+} __v_erfcf_data HIDDEN;
+
+extern const struct v_erf_data
+{
+  struct
+  {
+    double erf, scale;
+  } tab[769];
+} __v_erf_data HIDDEN;
+
+extern const struct v_erfc_data
+{
+  struct
+  {
+    double erfc, scale;
+  } tab[3488];
+} __v_erfc_data HIDDEN;
+
+/* Table with 4/PI to 192 bit precision.  */
+extern const uint32_t __inv_pio4[] HIDDEN;
+
+#if WANT_EXPERIMENTAL_MATH
+
+# define LOG1P_NCOEFFS 19
+extern const struct log1p_data
+{
+  double coeffs[LOG1P_NCOEFFS];
+} __log1p_data HIDDEN;
+
+# define LOG1PF_2U5
+# define LOG1PF_NCOEFFS 9
+extern const struct log1pf_data
+{
+  float coeffs[LOG1PF_NCOEFFS];
+} __log1pf_data HIDDEN;
+
+# define ASINF_POLY_ORDER 4
+extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN;
+
+# define ASIN_POLY_ORDER 11
+extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN;
+
+# define ASINHF_NCOEFFS 8
+extern const struct asinhf_data
+{
+  float coeffs[ASINHF_NCOEFFS];
+} __asinhf_data HIDDEN;
+
+# define ASINH_NCOEFFS 18
+extern const struct asinh_data
+{
+  double poly[ASINH_NCOEFFS];
+} __asinh_data HIDDEN;
+
+# define ATAN_POLY_NCOEFFS 20
+extern const struct atan_poly_data
+{
+  double poly[ATAN_POLY_NCOEFFS];
+} __atan_poly_data HIDDEN;
+
+# define ATANF_POLY_NCOEFFS 8
+extern const struct atanf_poly_data
+{
+  float poly[ATANF_POLY_NCOEFFS];
+} __atanf_poly_data HIDDEN;
+
+extern const struct cbrtf_data
+{
+  float poly[4];
+  float table[5];
+} __cbrtf_data HIDDEN;
+
+extern const struct cbrt_data
+{
+  double poly[4];
+  double table[5];
+} __cbrt_data HIDDEN;
+
+# define EXPF_TABLE_BITS 5
+# define EXPF_POLY_ORDER 3
+extern const struct expf_data
+{
+  uint64_t tab[1 << EXPF_TABLE_BITS];
+  double invln2_scaled;
+  double poly_scaled[EXPF_POLY_ORDER];
+} __expf_data HIDDEN;
+
+# define EXPM1F_POLY_ORDER 5
+extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
+
+# define EXPM1_POLY_ORDER 11
+extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN;
+
+/* Data for low accuracy log10 (with 1/ln(10) included in coefficients).  */
+# define LOG10_TABLE_BITS 7
+# define LOG10_POLY_ORDER 6
+# define LOG10_POLY1_ORDER 12
+extern const struct log10_data
+{
+  double ln2hi;
+  double ln2lo;
+  double invln10;
+  double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10).  */
+  double poly1[LOG10_POLY1_ORDER - 1];
+  struct
+  {
+    double invc, logc;
+  } tab[1 << LOG10_TABLE_BITS];
+#  if !HAVE_FAST_FMA
+  struct
+  {
+    double chi, clo;
+  } tab2[1 << LOG10_TABLE_BITS];
+#  endif
+} __log10_data HIDDEN;
+
+# define TANF_P_POLY_NCOEFFS 6
+/* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps.  */
+# define TANF_Q_POLY_NCOEFFS 4
+extern const struct tanf_poly_data
+{
+  float poly_tan[TANF_P_POLY_NCOEFFS];
+  float poly_cotan[TANF_Q_POLY_NCOEFFS];
+} __tanf_poly_data HIDDEN;
+
+#endif /* WANT_EXPERIMENTAL_MATH.  */
+
 #endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_generic.h b/contrib/arm-optimized-routines/math/poly_generic.h
similarity index 99%
rename from contrib/arm-optimized-routines/pl/math/poly_generic.h
rename to contrib/arm-optimized-routines/math/poly_generic.h
index 3fc25f8762f2..c21b61aad4c3 100644
--- a/contrib/arm-optimized-routines/pl/math/poly_generic.h
+++ b/contrib/arm-optimized-routines/math/poly_generic.h
@@ -1,277 +1,277 @@
 /*
  * Generic helpers for evaluating polynomials with various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef VTYPE
 # error Cannot use poly_generic without defining VTYPE
 #endif
 #ifndef VWRAP
 # error Cannot use poly_generic without defining VWRAP
 #endif
 #ifndef FMA
 # error Cannot use poly_generic without defining FMA
 #endif
 
 static inline VTYPE VWRAP (pairwise_poly_3) (VTYPE x, VTYPE x2,
 					     const VTYPE *poly)
 {
   /* At order 3, Estrin and Pairwise Horner are identical.  */
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   VTYPE p23 = FMA (poly[3], x, poly[2]);
   return FMA (p23, x2, p01);
 }
 
 static inline VTYPE VWRAP (estrin_4) (VTYPE x, VTYPE x2, VTYPE x4,
 				      const VTYPE *poly)
 {
   VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
   return FMA (poly[4], x4, p03);
 }
 static inline VTYPE VWRAP (estrin_5) (VTYPE x, VTYPE x2, VTYPE x4,
 				      const VTYPE *poly)
 {
   VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
   VTYPE p45 = FMA (poly[5], x, poly[4]);
   return FMA (p45, x4, p03);
 }
 static inline VTYPE VWRAP (estrin_6) (VTYPE x, VTYPE x2, VTYPE x4,
 				      const VTYPE *poly)
 {
   VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
   VTYPE p45 = FMA (poly[5], x, poly[4]);
   VTYPE p46 = FMA (poly[6], x2, p45);
   return FMA (p46, x4, p03);
 }
 static inline VTYPE VWRAP (estrin_7) (VTYPE x, VTYPE x2, VTYPE x4,
 				      const VTYPE *poly)
 {
   VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
   VTYPE p47 = VWRAP (pairwise_poly_3) (x, x2, poly + 4);
   return FMA (p47, x4, p03);
 }
 static inline VTYPE VWRAP (estrin_8) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				      const VTYPE *poly)
 {
   return FMA (poly[8], x8, VWRAP (estrin_7) (x, x2, x4, poly));
 }
 static inline VTYPE VWRAP (estrin_9) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				      const VTYPE *poly)
 {
   VTYPE p89 = FMA (poly[9], x, poly[8]);
   return FMA (p89, x8, VWRAP (estrin_7) (x, x2, x4, poly));
 }
 static inline VTYPE VWRAP (estrin_10) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				       const VTYPE *poly)
 {
   VTYPE p89 = FMA (poly[9], x, poly[8]);
   VTYPE p8_10 = FMA (poly[10], x2, p89);
   return FMA (p8_10, x8, VWRAP (estrin_7) (x, x2, x4, poly));
 }
 static inline VTYPE VWRAP (estrin_11) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				       const VTYPE *poly)
 {
   VTYPE p8_11 = VWRAP (pairwise_poly_3) (x, x2, poly + 8);
   return FMA (p8_11, x8, VWRAP (estrin_7) (x, x2, x4, poly));
 }
 static inline VTYPE VWRAP (estrin_12) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				       const VTYPE *poly)
 {
   return FMA (VWRAP (estrin_4) (x, x2, x4, poly + 8), x8,
 	      VWRAP (estrin_7) (x, x2, x4, poly));
 }
 static inline VTYPE VWRAP (estrin_13) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				       const VTYPE *poly)
 {
   return FMA (VWRAP (estrin_5) (x, x2, x4, poly + 8), x8,
 	      VWRAP (estrin_7) (x, x2, x4, poly));
 }
 static inline VTYPE VWRAP (estrin_14) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				       const VTYPE *poly)
 {
   return FMA (VWRAP (estrin_6) (x, x2, x4, poly + 8), x8,
 	      VWRAP (estrin_7) (x, x2, x4, poly));
 }
 static inline VTYPE VWRAP (estrin_15) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				       const VTYPE *poly)
 {
   return FMA (VWRAP (estrin_7) (x, x2, x4, poly + 8), x8,
 	      VWRAP (estrin_7) (x, x2, x4, poly));
 }
 static inline VTYPE VWRAP (estrin_16) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				       VTYPE x16, const VTYPE *poly)
 {
   return FMA (poly[16], x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
 }
 static inline VTYPE VWRAP (estrin_17) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				       VTYPE x16, const VTYPE *poly)
 {
   VTYPE p16_17 = FMA (poly[17], x, poly[16]);
   return FMA (p16_17, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
 }
 static inline VTYPE VWRAP (estrin_18) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				       VTYPE x16, const VTYPE *poly)
 {
   VTYPE p16_17 = FMA (poly[17], x, poly[16]);
   VTYPE p16_18 = FMA (poly[18], x2, p16_17);
   return FMA (p16_18, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
 }
 static inline VTYPE VWRAP (estrin_19) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
 				       VTYPE x16, const VTYPE *poly)
 {
   VTYPE p16_19 = VWRAP (pairwise_poly_3) (x, x2, poly + 16);
   return FMA (p16_19, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
 }
 
 static inline VTYPE VWRAP (horner_2) (VTYPE x, const VTYPE *poly)
 {
   VTYPE p = FMA (poly[2], x, poly[1]);
   return FMA (x, p, poly[0]);
 }
 static inline VTYPE VWRAP (horner_3) (VTYPE x, const VTYPE *poly)
 {
   VTYPE p = FMA (poly[3], x, poly[2]);
   p = FMA (x, p, poly[1]);
   p = FMA (x, p, poly[0]);
   return p;
 }
 static inline VTYPE VWRAP (horner_4) (VTYPE x, const VTYPE *poly)
 {
   VTYPE p = FMA (poly[4], x, poly[3]);
   p = FMA (x, p, poly[2]);
   p = FMA (x, p, poly[1]);
   p = FMA (x, p, poly[0]);
   return p;
 }
 static inline VTYPE VWRAP (horner_5) (VTYPE x, const VTYPE *poly)
 {
   return FMA (x, VWRAP (horner_4) (x, poly + 1), poly[0]);
 }
 static inline VTYPE VWRAP (horner_6) (VTYPE x, const VTYPE *poly)
 {
   return FMA (x, VWRAP (horner_5) (x, poly + 1), poly[0]);
 }
 static inline VTYPE VWRAP (horner_7) (VTYPE x, const VTYPE *poly)
 {
   return FMA (x, VWRAP (horner_6) (x, poly + 1), poly[0]);
 }
 static inline VTYPE VWRAP (horner_8) (VTYPE x, const VTYPE *poly)
 {
   return FMA (x, VWRAP (horner_7) (x, poly + 1), poly[0]);
 }
 static inline VTYPE VWRAP (horner_9) (VTYPE x, const VTYPE *poly)
 {
   return FMA (x, VWRAP (horner_8) (x, poly + 1), poly[0]);
 }
 static inline VTYPE VWRAP (horner_10) (VTYPE x, const VTYPE *poly)
 {
   return FMA (x, VWRAP (horner_9) (x, poly + 1), poly[0]);
 }
 static inline VTYPE VWRAP (horner_11) (VTYPE x, const VTYPE *poly)
 {
   return FMA (x, VWRAP (horner_10) (x, poly + 1), poly[0]);
 }
 static inline VTYPE VWRAP (horner_12) (VTYPE x, const VTYPE *poly)
 {
   return FMA (x, VWRAP (horner_11) (x, poly + 1), poly[0]);
 }
 
 static inline VTYPE VWRAP (pw_horner_4) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   VTYPE p23 = FMA (poly[3], x, poly[2]);
   VTYPE p;
   p = FMA (x2, poly[4], p23);
   p = FMA (x2, p, p01);
   return p;
 }
 static inline VTYPE VWRAP (pw_horner_5) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   VTYPE p23 = FMA (poly[3], x, poly[2]);
   VTYPE p45 = FMA (poly[5], x, poly[4]);
   VTYPE p;
   p = FMA (x2, p45, p23);
   p = FMA (x2, p, p01);
   return p;
 }
 static inline VTYPE VWRAP (pw_horner_6) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p26 = VWRAP (pw_horner_4) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p26, p01);
 }
 static inline VTYPE VWRAP (pw_horner_7) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p27 = VWRAP (pw_horner_5) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p27, p01);
 }
 static inline VTYPE VWRAP (pw_horner_8) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p28 = VWRAP (pw_horner_6) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p28, p01);
 }
 static inline VTYPE VWRAP (pw_horner_9) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p29 = VWRAP (pw_horner_7) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p29, p01);
 }
 static inline VTYPE VWRAP (pw_horner_10) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p2_10 = VWRAP (pw_horner_8) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p2_10, p01);
 }
 static inline VTYPE VWRAP (pw_horner_11) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p2_11 = VWRAP (pw_horner_9) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p2_11, p01);
 }
 static inline VTYPE VWRAP (pw_horner_12) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p2_12 = VWRAP (pw_horner_10) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p2_12, p01);
 }
 static inline VTYPE VWRAP (pw_horner_13) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p2_13 = VWRAP (pw_horner_11) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p2_13, p01);
 }
 static inline VTYPE VWRAP (pw_horner_14) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p2_14 = VWRAP (pw_horner_12) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p2_14, p01);
 }
 static inline VTYPE VWRAP (pw_horner_15) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p2_15 = VWRAP (pw_horner_13) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p2_15, p01);
 }
 static inline VTYPE VWRAP (pw_horner_16) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p2_16 = VWRAP (pw_horner_14) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p2_16, p01);
 }
 static inline VTYPE VWRAP (pw_horner_17) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p2_17 = VWRAP (pw_horner_15) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p2_17, p01);
 }
 static inline VTYPE VWRAP (pw_horner_18) (VTYPE x, VTYPE x2, const VTYPE *poly)
 {
   VTYPE p2_18 = VWRAP (pw_horner_16) (x, x2, poly + 2);
   VTYPE p01 = FMA (poly[1], x, poly[0]);
   return FMA (x2, p2_18, p01);
 }
diff --git a/contrib/arm-optimized-routines/pl/math/poly_scalar_f32.h b/contrib/arm-optimized-routines/math/poly_scalar_f32.h
similarity index 80%
rename from contrib/arm-optimized-routines/pl/math/poly_scalar_f32.h
rename to contrib/arm-optimized-routines/math/poly_scalar_f32.h
index a9b1c5544494..198e5801938a 100644
--- a/contrib/arm-optimized-routines/pl/math/poly_scalar_f32.h
+++ b/contrib/arm-optimized-routines/math/poly_scalar_f32.h
@@ -1,24 +1,24 @@
 /*
  * Helpers for evaluating polynomials on siongle-precision scalar input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_SCALAR_F32_H
-#define PL_MATH_POLY_SCALAR_F32_H
+#ifndef MATH_POLY_SCALAR_F32_H
+#define MATH_POLY_SCALAR_F32_H
 
 #include <math.h>
 
 /* Wrap scalar f32 helpers: evaluation of some scheme/order has form:
    [scheme]_[order]_f32.  */
 #define VTYPE float
 #define FMA fmaf
 #define VWRAP(f) f##_f32
 #include "poly_generic.h"
 #undef VWRAP
 #undef FMA
 #undef VTYPE
 
 #endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_scalar_f64.h b/contrib/arm-optimized-routines/math/poly_scalar_f64.h
similarity index 80%
rename from contrib/arm-optimized-routines/pl/math/poly_scalar_f64.h
rename to contrib/arm-optimized-routines/math/poly_scalar_f64.h
index 207dccee30ad..6fbebe05d1df 100644
--- a/contrib/arm-optimized-routines/pl/math/poly_scalar_f64.h
+++ b/contrib/arm-optimized-routines/math/poly_scalar_f64.h
@@ -1,24 +1,24 @@
 /*
  * Helpers for evaluating polynomials on double-precision scalar input, using
  * various schemes.
  *
- * Copyright (c) 2023, Arm Limited.
+ * Copyright (c) 2023-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef PL_MATH_POLY_SCALAR_F64_H
-#define PL_MATH_POLY_SCALAR_F64_H
+#ifndef MATH_POLY_SCALAR_F64_H
+#define MATH_POLY_SCALAR_F64_H
 
 #include <math.h>
 
 /* Wrap scalar f64 helpers: evaluation of some scheme/order has form:
    [scheme]_[order]_f64.  */
 #define VTYPE double
 #define FMA fma
 #define VWRAP(f) f##_f64
 #include "poly_generic.h"
 #undef VWRAP
 #undef FMA
 #undef VTYPE
 
 #endif
diff --git a/contrib/arm-optimized-routines/math/pow.c b/contrib/arm-optimized-routines/math/pow.c
index af719fe5ab10..1983bb2bbeba 100644
--- a/contrib/arm-optimized-routines/math/pow.c
+++ b/contrib/arm-optimized-routines/math/pow.c
@@ -1,380 +1,400 @@
 /*
  * Double-precision x^y function.
  *
- * Copyright (c) 2018-2020, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <float.h>
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
 
 /*
 Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
 relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
 ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
 */
 
 #define T __pow_log_data.tab
 #define A __pow_log_data.poly
 #define Ln2hi __pow_log_data.ln2hi
 #define Ln2lo __pow_log_data.ln2lo
 #define N (1 << POW_LOG_TABLE_BITS)
 #define OFF 0x3fe6955500000000
 
 /* Top 12 bits of a double (sign and exponent bits).  */
 static inline uint32_t
 top12 (double x)
 {
   return asuint64 (x) >> 52;
 }
 
 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
    additional 15 bits precision.  IX is the bit representation of x, but
    normalized in the subnormal range using the sign bit for the exponent.  */
 static inline double_t
 log_inline (uint64_t ix, double_t *tail)
 {
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
   uint64_t iz, tmp;
   int k, i;
 
   /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
   tmp = ix - OFF;
   i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
   k = (int64_t) tmp >> 52; /* arithmetic shift */
   iz = ix - (tmp & 0xfffULL << 52);
   z = asdouble (iz);
   kd = (double_t) k;
 
   /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
   invc = T[i].invc;
   logc = T[i].logc;
   logctail = T[i].logctail;
 
   /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
 #if HAVE_FAST_FMA
   r = fma (z, invc, -1.0);
 #else
   /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|.  */
   double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
   double_t zlo = z - zhi;
   double_t rhi = zhi * invc - 1.0;
   double_t rlo = zlo * invc;
   r = rhi + rlo;
 #endif
 
   /* k*Ln2 + log(c) + r.  */
   t1 = kd * Ln2hi + logc;
   t2 = t1 + r;
   lo1 = kd * Ln2lo + logctail;
   lo2 = t1 - t2 + r;
 
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
   double_t ar, ar2, ar3, lo3, lo4;
   ar = A[0] * r; /* A[0] = -0.5.  */
   ar2 = r * ar;
   ar3 = r * ar2;
   /* k*Ln2 + log(c) + r + A[0]*r*r.  */
 #if HAVE_FAST_FMA
   hi = t2 + ar2;
   lo3 = fma (ar, r, -ar2);
   lo4 = t2 - hi + ar2;
 #else
   double_t arhi = A[0] * rhi;
   double_t arhi2 = rhi * arhi;
   hi = t2 + arhi2;
   lo3 = rlo * (ar + arhi);
   lo4 = t2 - hi + arhi2;
 #endif
   /* p = log1p(r) - r - A[0]*r*r.  */
 #if POW_LOG_POLY_ORDER == 8
   p = (ar3
        * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
 #endif
   lo = lo1 + lo2 + lo3 + lo4 + p;
   y = hi + lo;
   *tail = hi - y + lo;
   return y;
 }
 
 #undef N
 #undef T
 #define N (1 << EXP_TABLE_BITS)
 #define InvLn2N __exp_data.invln2N
 #define NegLn2hiN __exp_data.negln2hiN
 #define NegLn2loN __exp_data.negln2loN
 #define Shift __exp_data.shift
 #define T __exp_data.tab
 #define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
 #define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
 #define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
 #define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
 #define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
 
 /* Handle cases that may overflow or underflow when computing the result that
    is scale*(1+TMP) without intermediate rounding.  The bit representation of
    scale is in SBITS, however it has a computed exponent that may have
    overflown into the sign bit so that needs to be adjusted before using it as
    a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
 static inline double
 specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
 {
   double_t scale, y;
 
   if ((ki & 0x80000000) == 0)
     {
       /* k > 0, the exponent of scale might have overflowed by <= 460.  */
       sbits -= 1009ull << 52;
       scale = asdouble (sbits);
       y = 0x1p1009 * (scale + scale * tmp);
       return check_oflow (eval_as_double (y));
     }
   /* k < 0, need special care in the subnormal range.  */
   sbits += 1022ull << 52;
   /* Note: sbits is signed scale.  */
   scale = asdouble (sbits);
   y = scale + scale * tmp;
   if (fabs (y) < 1.0)
     {
       /* Round y to the right precision before scaling it into the subnormal
 	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
 	 E is the worst-case ulp error outside the subnormal range.  So this
 	 is only useful if the goal is better than 1 ulp worst-case error.  */
       double_t hi, lo, one = 1.0;
       if (y < 0.0)
 	one = -1.0;
       lo = scale - y + scale * tmp;
       hi = one + y;
       lo = one - hi + y + lo;
       y = eval_as_double (hi + lo) - one;
       /* Fix the sign of 0.  */
       if (y == 0.0)
 	y = asdouble (sbits & 0x8000000000000000);
       /* The underflow exception needs to be signaled explicitly.  */
       force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
     }
   y = 0x1p-1022 * y;
   return check_uflow (eval_as_double (y));
 }
 
 #define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
 
 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1.  */
 static inline double
 exp_inline (double_t x, double_t xtail, uint32_t sign_bias)
 {
   uint32_t abstop;
   uint64_t ki, idx, top, sbits;
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t kd, z, r, r2, scale, tail, tmp;
 
   abstop = top12 (x) & 0x7ff;
   if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
     {
       if (abstop - top12 (0x1p-54) >= 0x80000000)
 	{
 	  /* Avoid spurious underflow for tiny x.  */
 	  /* Note: 0 is common input.  */
 	  double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
 	  return sign_bias ? -one : one;
 	}
       if (abstop >= top12 (1024.0))
 	{
 	  /* Note: inf and nan are already handled.  */
 	  if (asuint64 (x) >> 63)
 	    return __math_uflow (sign_bias);
 	  else
 	    return __math_oflow (sign_bias);
 	}
       /* Large x is special cased below.  */
       abstop = 0;
     }
 
   /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
   /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
   z = InvLn2N * x;
 #if TOINT_INTRINSICS
   kd = roundtoint (z);
   ki = converttoint (z);
 #elif EXP_USE_TOINT_NARROW
   /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
   kd = eval_as_double (z + Shift);
   ki = asuint64 (kd) >> 16;
   kd = (double_t) (int32_t) ki;
 #else
   /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
   kd = eval_as_double (z + Shift);
   ki = asuint64 (kd);
   kd -= Shift;
 #endif
   r = x + kd * NegLn2hiN + kd * NegLn2loN;
   /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
   r += xtail;
   /* 2^(k/N) ~= scale * (1 + tail).  */
   idx = 2 * (ki % N);
   top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
   tail = asdouble (T[idx]);
   /* This is only a valid scale when -1023*N < k < 1024*N.  */
   sbits = T[idx + 1] + top;
   /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
   r2 = r * r;
   /* Without fma the worst case error is 0.25/N ulp larger.  */
   /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
 #if EXP_POLY_ORDER == 4
   tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
 #elif EXP_POLY_ORDER == 5
   tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
 #elif EXP_POLY_ORDER == 6
   tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
 #endif
   if (unlikely (abstop == 0))
     return specialcase (tmp, sbits, ki);
   scale = asdouble (sbits);
   /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
      is no spurious underflow here even without fma.  */
   return eval_as_double (scale + scale * tmp);
 }
 
 /* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
    the bit representation of a non-zero finite floating-point value.  */
 static inline int
 checkint (uint64_t iy)
 {
   int e = iy >> 52 & 0x7ff;
   if (e < 0x3ff)
     return 0;
   if (e > 0x3ff + 52)
     return 2;
   if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
     return 0;
   if (iy & (1ULL << (0x3ff + 52 - e)))
     return 1;
   return 2;
 }
 
 /* Returns 1 if input is the bit representation of 0, infinity or nan.  */
 static inline int
 zeroinfnan (uint64_t i)
 {
   return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
 }
 
 double
 pow (double x, double y)
 {
   uint32_t sign_bias = 0;
   uint64_t ix, iy;
   uint32_t topx, topy;
 
   ix = asuint64 (x);
   iy = asuint64 (y);
   topx = top12 (x);
   topy = top12 (y);
   if (unlikely (topx - 0x001 >= 0x7ff - 0x001
 		|| (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
     {
       /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
 	 and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1.  */
       /* Special cases: (x < 0x1p-126 or inf or nan) or
 	 (|y| < 0x1p-65 or |y| >= 0x1p63 or nan).  */
       if (unlikely (zeroinfnan (iy)))
 	{
 	  if (2 * iy == 0)
 	    return issignaling_inline (x) ? x + y : 1.0;
 	  if (ix == asuint64 (1.0))
 	    return issignaling_inline (y) ? x + y : 1.0;
 	  if (2 * ix > 2 * asuint64 (INFINITY)
 	      || 2 * iy > 2 * asuint64 (INFINITY))
 	    return x + y;
 	  if (2 * ix == 2 * asuint64 (1.0))
 	    return 1.0;
 	  if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
 	    return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
 	  return y * y;
 	}
       if (unlikely (zeroinfnan (ix)))
 	{
 	  double_t x2 = x * x;
 	  if (ix >> 63 && checkint (iy) == 1)
 	    {
 	      x2 = -x2;
 	      sign_bias = 1;
 	    }
 	  if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
 	    return __math_divzero (sign_bias);
 	  /* Without the barrier some versions of clang hoist the 1/x2 and
 	     thus division by zero exception can be signaled spuriously.  */
 	  return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
 	}
       /* Here x and y are non-zero finite.  */
       if (ix >> 63)
 	{
 	  /* Finite x < 0.  */
 	  int yint = checkint (iy);
 	  if (yint == 0)
 	    return __math_invalid (x);
 	  if (yint == 1)
 	    sign_bias = SIGN_BIAS;
 	  ix &= 0x7fffffffffffffff;
 	  topx &= 0x7ff;
 	}
       if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
 	{
 	  /* Note: sign_bias == 0 here because y is not odd.  */
 	  if (ix == asuint64 (1.0))
 	    return 1.0;
 	  if ((topy & 0x7ff) < 0x3be)
 	    {
 	      /* |y| < 2^-65, x^y ~= 1 + y*log(x).  */
 	      if (WANT_ROUNDING)
 		return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
 	      else
 		return 1.0;
 	    }
 	  return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
 							 : __math_uflow (0);
 	}
       if (topx == 0)
 	{
 	  /* Normalize subnormal x so exponent becomes negative.  */
 	  /* Without the barrier some versions of clang evalutate the mul
 	     unconditionally causing spurious overflow exceptions.  */
 	  ix = asuint64 (opt_barrier_double (x) * 0x1p52);
 	  ix &= 0x7fffffffffffffff;
 	  ix -= 52ULL << 52;
 	}
     }
 
   double_t lo;
   double_t hi = log_inline (ix, &lo);
   double_t ehi, elo;
 #if HAVE_FAST_FMA
   ehi = y * hi;
   elo = y * lo + fma (y, hi, -ehi);
 #else
   double_t yhi = asdouble (iy & -1ULL << 27);
   double_t ylo = y - yhi;
   double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
   double_t llo = hi - lhi + lo;
   ehi = yhi * lhi;
   elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25.  */
 #endif
   return exp_inline (ehi, elo, sign_bias);
 }
 #if USE_GLIBC_ABI
 strong_alias (pow, __pow_finite)
 hidden_alias (pow, __ieee754_pow)
 # if LDBL_MANT_DIG == 53
 long double powl (long double x, long double y) { return pow (x, y); }
 # endif
 #endif
+
+TEST_ULP (pow, 0.05)
+TEST_ULP_NONNEAREST (pow, 0.5)
+TEST_INTERVAL2 (pow, 0.5, 2.0, 0, inf, 20000)
+TEST_INTERVAL2 (pow, -0.5, -2.0, 0, inf, 20000)
+TEST_INTERVAL2 (pow, 0.5, 2.0, -0, -inf, 20000)
+TEST_INTERVAL2 (pow, -0.5, -2.0, -0, -inf, 20000)
+TEST_INTERVAL2 (pow, 0.5, 2.0, 0x1p-10, 0x1p10, 40000)
+TEST_INTERVAL2 (pow, 0.5, 2.0, -0x1p-10, -0x1p10, 40000)
+TEST_INTERVAL2 (pow, 0, inf, 0.5, 2.0, 80000)
+TEST_INTERVAL2 (pow, 0, inf, -0.5, -2.0, 80000)
+TEST_INTERVAL2 (pow, 0x1.fp-1, 0x1.08p0, 0x1p8, 0x1p17, 80000)
+TEST_INTERVAL2 (pow, 0x1.fp-1, 0x1.08p0, -0x1p8, -0x1p17, 80000)
+TEST_INTERVAL2 (pow, 0, 0x1p-1000, 0, 1.0, 50000)
+TEST_INTERVAL2 (pow, 0x1p1000, inf, 0, 1.0, 50000)
+TEST_INTERVAL2 (pow, 0x1.ffffffffffff0p-1, 0x1.0000000000008p0, 0x1p60, 0x1p68,
+		50000)
+TEST_INTERVAL2 (pow, 0x1.ffffffffff000p-1, 0x1p0, 0x1p50, 0x1p52, 50000)
+TEST_INTERVAL2 (pow, -0x1.ffffffffff000p-1, -0x1p0, 0x1p50, 0x1p52, 50000)
diff --git a/contrib/arm-optimized-routines/math/powf.c b/contrib/arm-optimized-routines/math/powf.c
index 05c80bb2eb67..3f3f41ca276a 100644
--- a/contrib/arm-optimized-routines/math/powf.c
+++ b/contrib/arm-optimized-routines/math/powf.c
@@ -1,221 +1,231 @@
 /*
  * Single-precision pow function.
  *
- * Copyright (c) 2017-2019, Arm Limited.
+ * Copyright (c) 2017-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include <stdint.h>
 #include "math_config.h"
+#include "test_defs.h"
 
 /*
 POWF_LOG2_POLY_ORDER = 5
 EXP2F_TABLE_BITS = 5
 
 ULP error: 0.82 (~ 0.5 + relerr*2^24)
 relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
 relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
 relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
 */
 
 #define N (1 << POWF_LOG2_TABLE_BITS)
 #define T __powf_log2_data.tab
 #define A __powf_log2_data.poly
 #define OFF 0x3f330000
 
 /* Subnormal input is normalized so ix has negative biased exponent.
    Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set.  */
 static inline double_t
 log2_inline (uint32_t ix)
 {
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t z, r, r2, r4, p, q, y, y0, invc, logc;
   uint32_t iz, top, tmp;
   int k, i;
 
   /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
      The range is split into N subintervals.
      The ith subinterval contains z and c is near its center.  */
   tmp = ix - OFF;
   i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
   top = tmp & 0xff800000;
   iz = ix - top;
   k = (int32_t) top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
   invc = T[i].invc;
   logc = T[i].logc;
   z = (double_t) asfloat (iz);
 
   /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
   r = z * invc - 1;
   y0 = logc + (double_t) k;
 
   /* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
   r2 = r * r;
   y = A[0] * r + A[1];
   p = A[2] * r + A[3];
   r4 = r2 * r2;
   q = A[4] * r + y0;
   q = p * r2 + q;
   y = y * r4 + q;
   return y;
 }
 
 #undef N
 #undef T
 #define N (1 << EXP2F_TABLE_BITS)
 #define T __exp2f_data.tab
 #define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
 
 /* The output of log2 and thus the input of exp2 is either scaled by N
    (in case of fast toint intrinsics) or not.  The unscaled xd must be
    in [-1021,1023], sign_bias sets the sign of the result.  */
 static inline float
 exp2_inline (double_t xd, uint32_t sign_bias)
 {
   uint64_t ki, ski, t;
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
   double_t kd, z, r, r2, y, s;
 
 #if TOINT_INTRINSICS
 # define C __exp2f_data.poly_scaled
   /* N*x = k + r with r in [-1/2, 1/2] */
   kd = roundtoint (xd); /* k */
   ki = converttoint (xd);
 #else
 # define C __exp2f_data.poly
 # define SHIFT __exp2f_data.shift_scaled
   /* x = k/N + r with r in [-1/(2N), 1/(2N)] */
   kd = eval_as_double (xd + SHIFT);
   ki = asuint64 (kd);
   kd -= SHIFT; /* k/N */
 #endif
   r = xd - kd;
 
   /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
   t = T[ki % N];
   ski = ki + sign_bias;
   t += ski << (52 - EXP2F_TABLE_BITS);
   s = asdouble (t);
   z = C[0] * r + C[1];
   r2 = r * r;
   y = C[2] * r + 1;
   y = z * r2 + y;
   y = y * s;
   return eval_as_float (y);
 }
 
 /* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
    the bit representation of a non-zero finite floating-point value.  */
 static inline int
 checkint (uint32_t iy)
 {
   int e = iy >> 23 & 0xff;
   if (e < 0x7f)
     return 0;
   if (e > 0x7f + 23)
     return 2;
   if (iy & ((1 << (0x7f + 23 - e)) - 1))
     return 0;
   if (iy & (1 << (0x7f + 23 - e)))
     return 1;
   return 2;
 }
 
 static inline int
 zeroinfnan (uint32_t ix)
 {
   return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
 }
 
 float
 powf (float x, float y)
 {
   uint32_t sign_bias = 0;
   uint32_t ix, iy;
 
   ix = asuint (x);
   iy = asuint (y);
   if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || zeroinfnan (iy)))
     {
       /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).  */
       if (unlikely (zeroinfnan (iy)))
 	{
 	  if (2 * iy == 0)
 	    return issignalingf_inline (x) ? x + y : 1.0f;
 	  if (ix == 0x3f800000)
 	    return issignalingf_inline (y) ? x + y : 1.0f;
 	  if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
 	    return x + y;
 	  if (2 * ix == 2 * 0x3f800000)
 	    return 1.0f;
 	  if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
 	    return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
 	  return y * y;
 	}
       if (unlikely (zeroinfnan (ix)))
 	{
 	  float_t x2 = x * x;
 	  if (ix & 0x80000000 && checkint (iy) == 1)
 	    {
 	      x2 = -x2;
 	      sign_bias = 1;
 	    }
 #if WANT_ERRNO
 	  if (2 * ix == 0 && iy & 0x80000000)
 	    return __math_divzerof (sign_bias);
 #endif
 	  /* Without the barrier some versions of clang hoist the 1/x2 and
 	     thus division by zero exception can be signaled spuriously.  */
 	  return iy & 0x80000000 ? opt_barrier_float (1 / x2) : x2;
 	}
       /* x and y are non-zero finite.  */
       if (ix & 0x80000000)
 	{
 	  /* Finite x < 0.  */
 	  int yint = checkint (iy);
 	  if (yint == 0)
 	    return __math_invalidf (x);
 	  if (yint == 1)
 	    sign_bias = SIGN_BIAS;
 	  ix &= 0x7fffffff;
 	}
       if (ix < 0x00800000)
 	{
 	  /* Normalize subnormal x so exponent becomes negative.  */
 	  ix = asuint (x * 0x1p23f);
 	  ix &= 0x7fffffff;
 	  ix -= 23 << 23;
 	}
     }
   double_t logx = log2_inline (ix);
   double_t ylogx = y * logx; /* Note: cannot overflow, y is single prec.  */
   if (unlikely ((asuint64 (ylogx) >> 47 & 0xffff)
 		 >= asuint64 (126.0 * POWF_SCALE) >> 47))
     {
       /* |y*log(x)| >= 126.  */
       if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
 	/* |x^y| > 0x1.ffffffp127.  */
 	return __math_oflowf (sign_bias);
       if (WANT_ROUNDING && WANT_ERRNO
 	  && ylogx > 0x1.fffffffa3aae2p+6 * POWF_SCALE)
 	/* |x^y| > 0x1.fffffep127, check if we round away from 0.  */
 	if ((!sign_bias
 	     && eval_as_float (1.0f + opt_barrier_float (0x1p-25f)) != 1.0f)
 	    || (sign_bias
 		&& eval_as_float (-1.0f - opt_barrier_float (0x1p-25f))
 		     != -1.0f))
 	  return __math_oflowf (sign_bias);
       if (ylogx <= -150.0 * POWF_SCALE)
 	return __math_uflowf (sign_bias);
 #if WANT_ERRNO_UFLOW
       if (ylogx < -149.0 * POWF_SCALE)
 	return __math_may_uflowf (sign_bias);
 #endif
     }
   return exp2_inline (ylogx, sign_bias);
 }
 #if USE_GLIBC_ABI
 strong_alias (powf, __powf_finite)
 hidden_alias (powf, __ieee754_powf)
 #endif
+
+TEST_ULP (powf, 0.4)
+TEST_ULP_NONNEAREST (powf, 0.5)
+TEST_INTERVAL2 (powf, 0x1p-1, 0x1p1, 0x1p-7, 0x1p7, 50000)
+TEST_INTERVAL2 (powf, 0x1p-1, 0x1p1, -0x1p-7, -0x1p7, 50000)
+TEST_INTERVAL2 (powf, 0x1p-70, 0x1p70, 0x1p-1, 0x1p1, 50000)
+TEST_INTERVAL2 (powf, 0x1p-70, 0x1p70, -0x1p-1, -0x1p1, 50000)
+TEST_INTERVAL2 (powf, 0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p14, 50000)
+TEST_INTERVAL2 (powf, 0x1.ep-1, 0x1.1p0, -0x1p8, -0x1p14, 50000)
diff --git a/contrib/arm-optimized-routines/math/sincosf.c b/contrib/arm-optimized-routines/math/sincosf.c
index 446f21d60faf..05a71d78bb1e 100644
--- a/contrib/arm-optimized-routines/math/sincosf.c
+++ b/contrib/arm-optimized-routines/math/sincosf.c
@@ -1,79 +1,89 @@
 /*
  * Single-precision sin/cos function.
  *
- * Copyright (c) 2018-2021, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
 #include <math.h>
 #include "math_config.h"
 #include "sincosf.h"
+#include "test_defs.h"
 
 /* Fast sincosf implementation.  Worst-case ULP is 0.5607, maximum relative
    error is 0.5303 * 2^-23.  A single-step range reduction is used for
    small values.  Large inputs have their range reduced using fast integer
    arithmetic.  */
 void
 sincosf (float y, float *sinp, float *cosp)
 {
   double x = y;
   double s;
   int n;
   const sincos_t *p = &__sincosf_table[0];
 
   if (abstop12 (y) < abstop12 (pio4f))
     {
       double x2 = x * x;
 
       if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
 	{
 	  if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
 	    /* Force underflow for tiny y.  */
 	    force_eval_float (x2);
 	  *sinp = y;
 	  *cosp = 1.0f;
 	  return;
 	}
 
       sincosf_poly (x, x2, p, 0, sinp, cosp);
     }
   else if (abstop12 (y) < abstop12 (120.0f))
     {
       x = reduce_fast (x, p, &n);
 
       /* Setup the signs for sin and cos.  */
       s = p->sign[n & 3];
 
       if (n & 2)
 	p = &__sincosf_table[1];
 
       sincosf_poly (x * s, x * x, p, n, sinp, cosp);
     }
   else if (likely (abstop12 (y) < abstop12 (INFINITY)))
     {
       uint32_t xi = asuint (y);
       int sign = xi >> 31;
 
       x = reduce_large (xi, &n);
 
       /* Setup signs for sin and cos - include original sign.  */
       s = p->sign[(n + sign) & 3];
 
       if ((n + sign) & 2)
 	p = &__sincosf_table[1];
 
       sincosf_poly (x * s, x * x, p, n, sinp, cosp);
     }
   else
     {
       /* Return NaN if Inf or NaN for both sin and cos.  */
       *sinp = *cosp = y - y;
 #if WANT_ERRNO
       /* Needed to set errno for +-Inf, the add is a hack to work
 	 around a gcc register allocation issue: just passing y
 	 affects code generation in the fast path.  */
       __math_invalidf (y + y);
 #endif
     }
 }
+
+TEST_ULP (sincosf_sinf, 0.06)
+TEST_ULP (sincosf_cosf, 0.06)
+TEST_ULP_NONNEAREST (sincosf_sinf, 0.5)
+TEST_ULP_NONNEAREST (sincosf_cosf, 0.5)
+TEST_INTERVAL (sincosf_sinf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (sincosf_sinf, 0x1p-14, 0x1p54, 50000)
+TEST_INTERVAL (sincosf_cosf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (sincosf_cosf, 0x1p-14, 0x1p54, 50000)
diff --git a/contrib/arm-optimized-routines/math/sincosf.h b/contrib/arm-optimized-routines/math/sincosf.h
index ec23ed7aeb26..912def33d295 100644
--- a/contrib/arm-optimized-routines/math/sincosf.h
+++ b/contrib/arm-optimized-routines/math/sincosf.h
@@ -1,153 +1,150 @@
 /*
  * Header for sinf, cosf and sincosf.
  *
- * Copyright (c) 2018-2021, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
 #include <math.h>
 #include "math_config.h"
 
 /* 2PI * 2^-64.  */
 static const double pi63 = 0x1.921FB54442D18p-62;
 /* PI / 4.  */
 static const float pio4f = 0x1.921FB6p-1f;
 
 /* The constants and polynomials for sine and cosine.  */
 typedef struct
 {
   double sign[4];		/* Sign of sine in quadrants 0..3.  */
   double hpi_inv;		/* 2 / PI ( * 2^24 if !TOINT_INTRINSICS).  */
   double hpi;			/* PI / 2.  */
   double c0, c1, c2, c3, c4;	/* Cosine polynomial.  */
   double s1, s2, s3;		/* Sine polynomial.  */
 } sincos_t;
 
 /* Polynomial data (the cosine polynomial is negated in the 2nd entry).  */
 extern const sincos_t __sincosf_table[2] HIDDEN;
 
-/* Table with 4/PI to 192 bit precision.  */
-extern const uint32_t __inv_pio4[] HIDDEN;
-
 /* Top 12 bits of the float representation with the sign bit cleared.  */
 static inline uint32_t
 abstop12 (float x)
 {
   return (asuint (x) >> 20) & 0x7ff;
 }
 
 /* Compute the sine and cosine of inputs X and X2 (X squared), using the
    polynomial P and store the results in SINP and COSP.  N is the quadrant,
    if odd the cosine and sine polynomials are swapped.  */
 static inline void
 sincosf_poly (double x, double x2, const sincos_t *p, int n, float *sinp,
 	      float *cosp)
 {
   double x3, x4, x5, x6, s, c, c1, c2, s1;
 
   x4 = x2 * x2;
   x3 = x2 * x;
   c2 = p->c3 + x2 * p->c4;
   s1 = p->s2 + x2 * p->s3;
 
   /* Swap sin/cos result based on quadrant.  */
   float *tmp = (n & 1 ? cosp : sinp);
   cosp = (n & 1 ? sinp : cosp);
   sinp = tmp;
 
   c1 = p->c0 + x2 * p->c1;
   x5 = x3 * x2;
   x6 = x4 * x2;
 
   s = x + x3 * p->s1;
   c = c1 + x4 * p->c2;
 
   *sinp = s + x5 * s1;
   *cosp = c + x6 * c2;
 }
 
 /* Return the sine of inputs X and X2 (X squared) using the polynomial P.
    N is the quadrant, and if odd the cosine polynomial is used.  */
 static inline float
 sinf_poly (double x, double x2, const sincos_t *p, int n)
 {
   double x3, x4, x6, x7, s, c, c1, c2, s1;
 
   if ((n & 1) == 0)
     {
       x3 = x * x2;
       s1 = p->s2 + x2 * p->s3;
 
       x7 = x3 * x2;
       s = x + x3 * p->s1;
 
       return s + x7 * s1;
     }
   else
     {
       x4 = x2 * x2;
       c2 = p->c3 + x2 * p->c4;
       c1 = p->c0 + x2 * p->c1;
 
       x6 = x4 * x2;
       c = c1 + x4 * p->c2;
 
       return c + x6 * c2;
     }
 }
 
 /* Fast range reduction using single multiply-subtract.  Return the modulo of
    X as a value between -PI/4 and PI/4 and store the quadrant in NP.
    The values for PI/2 and 2/PI are accessed via P.  Since PI/2 as a double
    is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4,
    the result is accurate for |X| <= 120.0.  */
 static inline double
 reduce_fast (double x, const sincos_t *p, int *np)
 {
   double r;
 #if TOINT_INTRINSICS
   /* Use fast round and lround instructions when available.  */
   r = x * p->hpi_inv;
   *np = converttoint (r);
   return x - roundtoint (r) * p->hpi;
 #else
   /* Use scaled float to int conversion with explicit rounding.
      hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
      This avoids inaccuracies introduced by truncating negative values.  */
   r = x * p->hpi_inv;
   int n = ((int32_t)r + 0x800000) >> 24;
   *np = n;
   return x - n * p->hpi;
 #endif
 }
 
 /* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
    XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
    Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
    Reduction uses a table of 4/PI with 192 bits of precision.  A 32x96->128 bit
    multiply computes the exact 2.62-bit fixed-point modulo.  Since the result
    can have at most 29 leading zeros after the binary point, the double
    precision result is accurate to 33 bits.  */
 static inline double
 reduce_large (uint32_t xi, int *np)
 {
   const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
   int shift = (xi >> 23) & 7;
   uint64_t n, res0, res1, res2;
 
   xi = (xi & 0xffffff) | 0x800000;
   xi <<= shift;
 
   res0 = xi * arr[0];
   res1 = (uint64_t)xi * arr[4];
   res2 = (uint64_t)xi * arr[8];
   res0 = (res2 >> 32) | (res0 << 32);
   res0 += res1;
 
   n = (res0 + (1ULL << 61)) >> 62;
   res0 -= n << 62;
   double x = (int64_t)res0;
   *np = n;
   return x * pi63;
 }
diff --git a/contrib/arm-optimized-routines/math/sinf.c b/contrib/arm-optimized-routines/math/sinf.c
index 8dd8ae458794..e244e115d32b 100644
--- a/contrib/arm-optimized-routines/math/sinf.c
+++ b/contrib/arm-optimized-routines/math/sinf.c
@@ -1,67 +1,75 @@
 /*
  * Single-precision sin function.
  *
- * Copyright (c) 2018-2021, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
 #include "math_config.h"
 #include "sincosf.h"
+#include "test_defs.h"
+#include "test_sig.h"
 
 /* Fast sinf implementation.  Worst-case ULP is 0.5607, maximum relative
    error is 0.5303 * 2^-23.  A single-step range reduction is used for
    small values.  Large inputs have their range reduced using fast integer
    arithmetic.  */
 float
 sinf (float y)
 {
   double x = y;
   double s;
   int n;
   const sincos_t *p = &__sincosf_table[0];
 
   if (abstop12 (y) < abstop12 (pio4f))
     {
       s = x * x;
 
       if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
 	{
 	  if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
 	    /* Force underflow for tiny y.  */
 	    force_eval_float (s);
 	  return y;
 	}
 
       return sinf_poly (x, s, p, 0);
     }
   else if (likely (abstop12 (y) < abstop12 (120.0f)))
     {
       x = reduce_fast (x, p, &n);
 
       /* Setup the signs for sin and cos.  */
       s = p->sign[n & 3];
 
       if (n & 2)
 	p = &__sincosf_table[1];
 
       return sinf_poly (x * s, x * x, p, n);
     }
   else if (abstop12 (y) < abstop12 (INFINITY))
     {
       uint32_t xi = asuint (y);
       int sign = xi >> 31;
 
       x = reduce_large (xi, &n);
 
       /* Setup signs for sin and cos - include original sign.  */
       s = p->sign[(n + sign) & 3];
 
       if ((n + sign) & 2)
 	p = &__sincosf_table[1];
 
       return sinf_poly (x * s, x * x, p, n);
     }
   else
     return __math_invalidf (y);
 }
+
+TEST_SIG (S, F, 1, sin, -3.1, 3.1)
+TEST_ULP (sinf, 0.06)
+TEST_ULP_NONNEAREST (sinf, 0.5)
+TEST_INTERVAL (sinf, 0, 0xffff0000, 10000)
+TEST_SYM_INTERVAL (sinf, 0x1p-14, 0x1p54, 50000)
diff --git a/contrib/arm-optimized-routines/math/test/mathbench.c b/contrib/arm-optimized-routines/math/test/mathbench.c
index ed7e89bb7710..653c58fbc484 100644
--- a/contrib/arm-optimized-routines/math/test/mathbench.c
+++ b/contrib/arm-optimized-routines/math/test/mathbench.c
@@ -1,642 +1,581 @@
 /*
  * Microbenchmark for math functions.
  *
- * Copyright (c) 2018-2023, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#if WANT_SVE_TESTS
+#  if __aarch64__ && __linux__
+#    ifdef __clang__
+#      pragma clang attribute push(__attribute__((target("sve"))),            \
+				   apply_to = any(function))
+#    else
+#      pragma GCC target("+sve")
+#    endif
+#  else
+#    error "SVE not supported - please disable WANT_SVE_TESTS"
+#  endif
+#endif
+
 #undef _GNU_SOURCE
 #define _GNU_SOURCE 1
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <time.h>
 #include <math.h>
 #include "mathlib.h"
 
 /* Number of measurements, best result is reported.  */
 #define MEASURE 60
 /* Array size.  */
 #define N 8000
 /* Iterations over the array.  */
 #define ITER 125
 
 static double *Trace;
 static size_t trace_size;
 static double A[N];
 static float Af[N];
 static long measurecount = MEASURE;
 static long itercount = ITER;
 
-#ifdef __vpcs
-#include <arm_neon.h>
-typedef float64x2_t v_double;
-
-#define v_double_len() 2
-
-static inline v_double
-v_double_load (const double *p)
-{
-  return (v_double){p[0], p[1]};
-}
-
-static inline v_double
-v_double_dup (double x)
-{
-  return (v_double){x, x};
-}
-
-typedef float32x4_t v_float;
-
-#define v_float_len() 4
-
-static inline v_float
-v_float_load (const float *p)
-{
-  return (v_float){p[0], p[1], p[2], p[3]};
-}
-
-static inline v_float
-v_float_dup (float x)
-{
-  return (v_float){x, x, x, x};
-}
-#else
-/* dummy definitions to make things compile.  */
-typedef double v_double;
-typedef float v_float;
-#define v_double_len(x) 1
-#define v_double_load(x) (x)[0]
-#define v_double_dup(x) (x)
-#define v_float_len(x) 1
-#define v_float_load(x) (x)[0]
-#define v_float_dup(x) (x)
-
-#endif
-
-#if WANT_SVE_MATH
-#include <arm_sve.h>
-typedef svbool_t sv_bool;
-typedef svfloat64_t sv_double;
-
-#define sv_double_len() svcntd()
-
-static inline sv_double
-sv_double_load (const double *p)
-{
-  svbool_t pg = svptrue_b64();
-  return svld1(pg, p);
-}
-
-static inline sv_double
-sv_double_dup (double x)
-{
-  return svdup_n_f64(x);
-}
-
-typedef svfloat32_t sv_float;
-
-#define sv_float_len() svcntw()
-
-static inline sv_float
-sv_float_load (const float *p)
-{
-  svbool_t pg = svptrue_b32();
-  return svld1(pg, p);
-}
-
-static inline sv_float
-sv_float_dup (float x)
-{
-  return svdup_n_f32(x);
-}
-#else
-/* dummy definitions to make things compile.  */
-#define sv_double_len(x) 1
-#define sv_float_len(x) 1
-#endif
-
 static double
 dummy (double x)
 {
   return x;
 }
 
 static float
 dummyf (float x)
 {
   return x;
 }
-#ifdef __vpcs
-__vpcs static v_double
-__vn_dummy (v_double x)
+#if __aarch64__ && __linux__
+__vpcs static float64x2_t
+__vn_dummy (float64x2_t x)
 {
   return x;
 }
 
-__vpcs static v_float
-__vn_dummyf (v_float x)
+__vpcs static float32x4_t
+__vn_dummyf (float32x4_t x)
 {
   return x;
 }
 #endif
-#if WANT_SVE_MATH
-static sv_double
-__sv_dummy (sv_double x, sv_bool pg)
+#if WANT_SVE_TESTS
+static svfloat64_t
+__sv_dummy (svfloat64_t x, svbool_t pg)
 {
   return x;
 }
 
-static sv_float
-__sv_dummyf (sv_float x, sv_bool pg)
+static svfloat32_t
+__sv_dummyf (svfloat32_t x, svbool_t pg)
 {
   return x;
 }
 
 #endif
 
 #include "test/mathbench_wrappers.h"
 
 static const struct fun
 {
   const char *name;
   int prec;
   int vec;
   double lo;
   double hi;
   union
   {
     double (*d) (double);
     float (*f) (float);
-#ifdef __vpcs
-    __vpcs v_double (*vnd) (v_double);
-    __vpcs v_float (*vnf) (v_float);
+#if __aarch64__ && __linux__
+    __vpcs float64x2_t (*vnd) (float64x2_t);
+    __vpcs float32x4_t (*vnf) (float32x4_t);
 #endif
-#if WANT_SVE_MATH
-    sv_double (*svd) (sv_double, sv_bool);
-    sv_float (*svf) (sv_float, sv_bool);
+#if WANT_SVE_TESTS
+    svfloat64_t (*svd) (svfloat64_t, svbool_t);
+    svfloat32_t (*svf) (svfloat32_t, svbool_t);
 #endif
   } fun;
 } funtab[] = {
+// clang-format off
 #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
 #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
 #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
 #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
 #define SVD(func, lo, hi) {#func, 'd', 's', lo, hi, {.svd = func}},
 #define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}},
 D (dummy, 1.0, 2.0)
 F (dummyf, 1.0, 2.0)
-#ifdef __vpcs
+#if  __aarch64__ && __linux__
 VND (__vn_dummy, 1.0, 2.0)
 VNF (__vn_dummyf, 1.0, 2.0)
 #endif
-#if WANT_SVE_MATH
+#if WANT_SVE_TESTS
 SVD (__sv_dummy, 1.0, 2.0)
 SVF (__sv_dummyf, 1.0, 2.0)
 #endif
 #include "test/mathbench_funcs.h"
 {0},
 #undef F
 #undef D
 #undef VNF
 #undef VND
 #undef SVF
 #undef SVD
+  // clang-format on
 };
 
 static void
 gen_linear (double lo, double hi)
 {
   for (int i = 0; i < N; i++)
     A[i] = (lo * (N - i) + hi * i) / N;
 }
 
 static void
 genf_linear (double lo, double hi)
 {
   for (int i = 0; i < N; i++)
     Af[i] = (float)(lo * (N - i) + hi * i) / N;
 }
 
 static inline double
 asdouble (uint64_t i)
 {
   union
   {
     uint64_t i;
     double f;
   } u = {i};
   return u.f;
 }
 
 static uint64_t seed = 0x0123456789abcdef;
 
 static double
 frand (double lo, double hi)
 {
   seed = 6364136223846793005ULL * seed + 1;
   return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
 }
 
 static void
 gen_rand (double lo, double hi)
 {
   for (int i = 0; i < N; i++)
     A[i] = frand (lo, hi);
 }
 
 static void
 genf_rand (double lo, double hi)
 {
   for (int i = 0; i < N; i++)
     Af[i] = (float)frand (lo, hi);
 }
 
 static void
 gen_trace (int index)
 {
   for (int i = 0; i < N; i++)
     A[i] = Trace[index + i];
 }
 
 static void
 genf_trace (int index)
 {
   for (int i = 0; i < N; i++)
     Af[i] = (float)Trace[index + i];
 }
 
 static void
 run_thruput (double f (double))
 {
   for (int i = 0; i < N; i++)
     f (A[i]);
 }
 
 static void
 runf_thruput (float f (float))
 {
   for (int i = 0; i < N; i++)
     f (Af[i]);
 }
 
 volatile double zero = 0;
 
 static void
 run_latency (double f (double))
 {
   double z = zero;
   double prev = z;
   for (int i = 0; i < N; i++)
     prev = f (A[i] + prev * z);
 }
 
 static void
 runf_latency (float f (float))
 {
   float z = (float)zero;
   float prev = z;
   for (int i = 0; i < N; i++)
     prev = f (Af[i] + prev * z);
 }
 
-#ifdef __vpcs
+#if  __aarch64__ && __linux__
 static void
-run_vn_thruput (__vpcs v_double f (v_double))
+run_vn_thruput (__vpcs float64x2_t f (float64x2_t))
 {
-  for (int i = 0; i < N; i += v_double_len ())
-    f (v_double_load (A+i));
+  for (int i = 0; i < N; i += 2)
+    f (vld1q_f64 (A + i));
 }
 
 static void
-runf_vn_thruput (__vpcs v_float f (v_float))
+runf_vn_thruput (__vpcs float32x4_t f (float32x4_t))
 {
-  for (int i = 0; i < N; i += v_float_len ())
-    f (v_float_load (Af+i));
+  for (int i = 0; i < N; i += 4)
+    f (vld1q_f32 (Af + i));
 }
 
 static void
-run_vn_latency (__vpcs v_double f (v_double))
+run_vn_latency (__vpcs float64x2_t f (float64x2_t))
 {
   volatile uint64x2_t vsel = (uint64x2_t) { 0, 0 };
   uint64x2_t sel = vsel;
-  v_double prev = v_double_dup (0);
-  for (int i = 0; i < N; i += v_double_len ())
-    prev = f (vbslq_f64 (sel, prev, v_double_load (A+i)));
+  float64x2_t prev = vdupq_n_f64 (0);
+  for (int i = 0; i < N; i += 2)
+    prev = f (vbslq_f64 (sel, prev, vld1q_f64 (A + i)));
 }
 
 static void
-runf_vn_latency (__vpcs v_float f (v_float))
+runf_vn_latency (__vpcs float32x4_t f (float32x4_t))
 {
   volatile uint32x4_t vsel = (uint32x4_t) { 0, 0, 0, 0 };
   uint32x4_t sel = vsel;
-  v_float prev = v_float_dup (0);
-  for (int i = 0; i < N; i += v_float_len ())
-    prev = f (vbslq_f32 (sel, prev, v_float_load (Af+i)));
+  float32x4_t prev = vdupq_n_f32 (0);
+  for (int i = 0; i < N; i += 4)
+    prev = f (vbslq_f32 (sel, prev, vld1q_f32 (Af + i)));
 }
 #endif
 
-#if WANT_SVE_MATH
+#if WANT_SVE_TESTS
 static void
-run_sv_thruput (sv_double f (sv_double, sv_bool))
+run_sv_thruput (svfloat64_t f (svfloat64_t, svbool_t))
 {
-  for (int i = 0; i < N; i += sv_double_len ())
-    f (sv_double_load (A+i), svptrue_b64 ());
+  for (int i = 0; i < N; i += svcntd ())
+    f (svld1_f64 (svptrue_b64 (), A + i), svptrue_b64 ());
 }
 
 static void
-runf_sv_thruput (sv_float f (sv_float, sv_bool))
+runf_sv_thruput (svfloat32_t f (svfloat32_t, svbool_t))
 {
-  for (int i = 0; i < N; i += sv_float_len ())
-    f (sv_float_load (Af+i), svptrue_b32 ());
+  for (int i = 0; i < N; i += svcntw ())
+    f (svld1_f32 (svptrue_b32 (), Af + i), svptrue_b32 ());
 }
 
 static void
-run_sv_latency (sv_double f (sv_double, sv_bool))
+run_sv_latency (svfloat64_t f (svfloat64_t, svbool_t))
 {
-  volatile sv_bool vsel = svptrue_b64 ();
-  sv_bool sel = vsel;
-  sv_double prev = sv_double_dup (0);
-  for (int i = 0; i < N; i += sv_double_len ())
-    prev = f (svsel_f64 (sel, sv_double_load (A+i), prev), svptrue_b64 ());
+  volatile svbool_t vsel = svptrue_b64 ();
+  svbool_t sel = vsel;
+  svfloat64_t prev = svdup_f64 (0);
+  for (int i = 0; i < N; i += svcntd ())
+    prev = f (svsel_f64 (sel, svld1_f64 (svptrue_b64 (), A + i), prev),
+	      svptrue_b64 ());
 }
 
 static void
-runf_sv_latency (sv_float f (sv_float, sv_bool))
+runf_sv_latency (svfloat32_t f (svfloat32_t, svbool_t))
 {
-  volatile sv_bool vsel = svptrue_b32 ();
-  sv_bool sel = vsel;
-  sv_float prev = sv_float_dup (0);
-  for (int i = 0; i < N; i += sv_float_len ())
-    prev = f (svsel_f32 (sel, sv_float_load (Af+i), prev), svptrue_b32 ());
+  volatile svbool_t vsel = svptrue_b32 ();
+  svbool_t sel = vsel;
+  svfloat32_t prev = svdup_f32 (0);
+  for (int i = 0; i < N; i += svcntw ())
+    prev = f (svsel_f32 (sel, svld1_f32 (svptrue_b32 (), Af + i), prev),
+	      svptrue_b32 ());
 }
 #endif
 
 static uint64_t
 tic (void)
 {
   struct timespec ts;
+#if defined(_MSC_VER)
+  if (!timespec_get (&ts, TIME_UTC))
+#else
   if (clock_gettime (CLOCK_REALTIME, &ts))
+#endif
     abort ();
   return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
 }
 
 #define TIMEIT(run, f) do { \
   dt = -1; \
   run (f); /* Warm up.  */ \
   for (int j = 0; j < measurecount; j++) \
     { \
       uint64_t t0 = tic (); \
       for (int i = 0; i < itercount; i++) \
 	run (f); \
       uint64_t t1 = tic (); \
       if (t1 - t0 < dt) \
 	dt = t1 - t0; \
     } \
 } while (0)
 
 static void
 bench1 (const struct fun *f, int type, double lo, double hi)
 {
   uint64_t dt = 0;
   uint64_t ns100;
   const char *s = type == 't' ? "rthruput" : "latency";
   int vlen = 1;
 
   if (f->vec == 'n')
-    vlen = f->prec == 'd' ? v_double_len() : v_float_len();
+    vlen = f->prec == 'd' ? 2 : 4;
+#if WANT_SVE_TESTS
   else if (f->vec == 's')
-    vlen = f->prec == 'd' ? sv_double_len() : sv_float_len();
+    vlen = f->prec == 'd' ? svcntd () : svcntw ();
+#endif
 
   if (f->prec == 'd' && type == 't' && f->vec == 0)
     TIMEIT (run_thruput, f->fun.d);
   else if (f->prec == 'd' && type == 'l' && f->vec == 0)
     TIMEIT (run_latency, f->fun.d);
   else if (f->prec == 'f' && type == 't' && f->vec == 0)
     TIMEIT (runf_thruput, f->fun.f);
   else if (f->prec == 'f' && type == 'l' && f->vec == 0)
     TIMEIT (runf_latency, f->fun.f);
-#ifdef __vpcs
+#if __aarch64__ && __linux__
   else if (f->prec == 'd' && type == 't' && f->vec == 'n')
     TIMEIT (run_vn_thruput, f->fun.vnd);
   else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
     TIMEIT (run_vn_latency, f->fun.vnd);
   else if (f->prec == 'f' && type == 't' && f->vec == 'n')
     TIMEIT (runf_vn_thruput, f->fun.vnf);
   else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
     TIMEIT (runf_vn_latency, f->fun.vnf);
 #endif
-#if WANT_SVE_MATH
+#if WANT_SVE_TESTS
   else if (f->prec == 'd' && type == 't' && f->vec == 's')
     TIMEIT (run_sv_thruput, f->fun.svd);
   else if (f->prec == 'd' && type == 'l' && f->vec == 's')
     TIMEIT (run_sv_latency, f->fun.svd);
   else if (f->prec == 'f' && type == 't' && f->vec == 's')
     TIMEIT (runf_sv_thruput, f->fun.svf);
   else if (f->prec == 'f' && type == 'l' && f->vec == 's')
     TIMEIT (runf_sv_latency, f->fun.svf);
 #endif
 
   if (type == 't')
     {
       ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
       printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g] vlen %d\n",
 	      f->name, s,
 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
 	      (unsigned long long) dt, lo, hi, vlen);
     }
   else if (type == 'l')
     {
       ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
       printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g] vlen %d\n",
 	      f->name, s,
 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
 	      (unsigned long long) dt, lo, hi, vlen);
     }
   fflush (stdout);
 }
 
 static void
 bench (const struct fun *f, double lo, double hi, int type, int gen)
 {
   if (f->prec == 'd' && gen == 'r')
     gen_rand (lo, hi);
   else if (f->prec == 'd' && gen == 'l')
     gen_linear (lo, hi);
   else if (f->prec == 'd' && gen == 't')
     gen_trace (0);
   else if (f->prec == 'f' && gen == 'r')
     genf_rand (lo, hi);
   else if (f->prec == 'f' && gen == 'l')
     genf_linear (lo, hi);
   else if (f->prec == 'f' && gen == 't')
     genf_trace (0);
 
   if (gen == 't')
     hi = trace_size / N;
 
   if (type == 'b' || type == 't')
     bench1 (f, 't', lo, hi);
 
   if (type == 'b' || type == 'l')
     bench1 (f, 'l', lo, hi);
 
   for (int i = N; i < trace_size; i += N)
     {
       if (f->prec == 'd')
 	gen_trace (i);
       else
 	genf_trace (i);
 
       lo = i / N;
       if (type == 'b' || type == 't')
 	bench1 (f, 't', lo, hi);
 
       if (type == 'b' || type == 'l')
 	bench1 (f, 'l', lo, hi);
     }
 }
 
 static void
 readtrace (const char *name)
 {
 	int n = 0;
 	FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
 	if (!f)
 	  {
 	    printf ("openning \"%s\" failed: %m\n", name);
 	    exit (1);
 	  }
 	for (;;)
 	  {
 	    if (n >= trace_size)
 	      {
 		trace_size += N;
 		Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
 		if (Trace == NULL)
 		  {
 		    printf ("out of memory\n");
 		    exit (1);
 		  }
 	      }
 	    if (fscanf (f, "%lf", Trace + n) != 1)
 	      break;
 	    n++;
 	  }
 	if (ferror (f) || n == 0)
 	  {
 	    printf ("reading \"%s\" failed: %m\n", name);
 	    exit (1);
 	  }
 	fclose (f);
 	if (n % N == 0)
 	  trace_size = n;
 	for (int i = 0; n < trace_size; n++, i++)
 	  Trace[n] = Trace[i];
 }
 
 static void
 usage (void)
 {
   printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
 	  "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
 	  "[func2 ..]\n");
   printf ("func:\n");
   printf ("%7s [run all benchmarks]\n", "all");
   for (const struct fun *f = funtab; f->name; f++)
     printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
   exit (1);
 }
 
 int
 main (int argc, char *argv[])
 {
   int usergen = 0, gen = 'r', type = 'b', all = 0;
   double lo = 0, hi = 0;
   const char *tracefile = "-";
 
   argv++;
   argc--;
   for (;;)
     {
       if (argc <= 0)
 	usage ();
       if (argv[0][0] != '-')
 	break;
       else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
 	{
 	  usergen = 1;
 	  lo = strtod (argv[1], 0);
 	  hi = strtod (argv[2], 0);
 	  argv += 3;
 	  argc -= 3;
 	}
       else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
 	{
 	  measurecount = strtol (argv[1], 0, 0);
 	  argv += 2;
 	  argc -= 2;
 	}
       else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
 	{
 	  itercount = strtol (argv[1], 0, 0);
 	  argv += 2;
 	  argc -= 2;
 	}
       else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
 	{
 	  gen = argv[1][0];
 	  if (strchr ("rlt", gen) == 0)
 	    usage ();
 	  argv += 2;
 	  argc -= 2;
 	}
       else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
 	{
 	  gen = 't';  /* -f implies -g trace.  */
 	  tracefile = argv[1];
 	  argv += 2;
 	  argc -= 2;
 	}
       else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
 	{
 	  type = argv[1][0];
 	  if (strchr ("ltb", type) == 0)
 	    usage ();
 	  argv += 2;
 	  argc -= 2;
 	}
       else
 	usage ();
     }
   if (gen == 't')
     {
       readtrace (tracefile);
       lo = hi = 0;
       usergen = 1;
     }
   while (argc > 0)
     {
       int found = 0;
       all = strcmp (argv[0], "all") == 0;
       for (const struct fun *f = funtab; f->name; f++)
 	if (all || strcmp (argv[0], f->name) == 0)
 	  {
 	    found = 1;
 	    if (!usergen)
 	      {
 		lo = f->lo;
 		hi = f->hi;
 	      }
 	    bench (f, lo, hi, type, gen);
 	    if (usergen && !all)
 	      break;
 	  }
       if (!found)
 	printf ("unknown function: %s\n", argv[0]);
       argv++;
       argc--;
     }
   return 0;
 }
+
+#if __aarch64__ && __linux__ && WANT_SVE_TESTS && defined(__clang__)
+#  pragma clang attribute pop
+#endif
diff --git a/contrib/arm-optimized-routines/math/test/mathbench_funcs.h b/contrib/arm-optimized-routines/math/test/mathbench_funcs.h
index 84c4e68650ac..261ab02f55c3 100644
--- a/contrib/arm-optimized-routines/math/test/mathbench_funcs.h
+++ b/contrib/arm-optimized-routines/math/test/mathbench_funcs.h
@@ -1,62 +1,121 @@
 /*
  * Function entries for mathbench.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 /* clang-format off */
-D (exp, -9.9, 9.9)
-D (exp, 0.5, 1.0)
-D (exp10, -9.9, 9.9)
-D (exp2, -9.9, 9.9)
-D (log, 0.01, 11.1)
-D (log, 0.999, 1.001)
-D (log2, 0.01, 11.1)
-D (log2, 0.999, 1.001)
 {"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
 D (xpow, 0.01, 11.1)
 D (ypow, -9.9, 9.9)
-D (erf, -6.0, 6.0)
-
-F (expf, -9.9, 9.9)
-F (exp2f, -9.9, 9.9)
-F (logf, 0.01, 11.1)
-F (log2f, 0.01, 11.1)
 {"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
 F (xpowf, 0.01, 11.1)
 F (ypowf, -9.9, 9.9)
 {"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
 {"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
 {"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
 {"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
 {"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
 {"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
-F (sinf, 0.1, 0.7)
-F (sinf, 0.8, 3.1)
-F (sinf, -3.1, 3.1)
-F (sinf, 3.3, 33.3)
-F (sinf, 100, 1000)
-F (sinf, 1e6, 1e32)
-F (cosf, 0.1, 0.7)
-F (cosf, 0.8, 3.1)
-F (cosf, -3.1, 3.1)
-F (cosf, 3.3, 33.3)
-F (cosf, 100, 1000)
-F (cosf, 1e6, 1e32)
-F (erff, -4.0, 4.0)
-#ifdef __vpcs
-VND (_ZGVnN2v_exp, -9.9, 9.9)
-VND (_ZGVnN2v_log, 0.01, 11.1)
-{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
-VND (_ZGVnN2v_sin, -3.1, 3.1)
-VND (_ZGVnN2v_cos, -3.1, 3.1)
-VNF (_ZGVnN4v_expf, -9.9, 9.9)
+#if WANT_TRIGPI_TESTS
+F (arm_math_cospif, -0.9, 0.9)
+D (arm_math_cospi, -0.9, 0.9)
+F (arm_math_sinpif, -0.9, 0.9)
+D (arm_math_sinpi, -0.9, 0.9)
+F (arm_math_tanpif, -0.9, 0.9)
+D (arm_math_tanpi, -0.9, 0.9)
+{"sincospif", 'f', 0, -0.9, 0.9, {.f = sincospif_wrap}},
+{"sincospi", 'd', 0, -0.9, 0.9, {.d = sincospi_wrap}},
+#endif
+#if WANT_EXPERIMENTAL_MATH
+D (arm_math_erf, -6.0, 6.0)
+F (arm_math_erff, -4.0, 4.0)
+{"atan2f", 'f', 0, -10.0, 10.0, {.f = atan2f_wrap}},
+{"atan2",  'd', 0, -10.0, 10.0, {.d = atan2_wrap}},
+{"powi",   'd', 0,  0.01, 11.1, {.d = powi_wrap}},
+#endif
+#if __aarch64__ && __linux__
+{"_ZGVnN4vv_atan2f", 'f', 'n', -10.0, 10.0, {.vnf = _Z_atan2f_wrap}},
+{"_ZGVnN2vv_atan2",  'd', 'n', -10.0, 10.0, {.vnd = _Z_atan2_wrap}},
+{"_ZGVnN4vv_hypotf", 'f', 'n', -10.0, 10.0, {.vnf = _Z_hypotf_wrap}},
+{"_ZGVnN2vv_hypot",  'd', 'n', -10.0, 10.0, {.vnd = _Z_hypot_wrap}},
+{"_ZGVnN2vv_pow",    'd', 'n', -10.0, 10.0, {.vnd = xy_Z_pow}},
+{"x_ZGVnN2vv_pow",   'd', 'n', -10.0, 10.0, {.vnd = x_Z_pow}},
+{"y_ZGVnN2vv_pow",   'd', 'n', -10.0, 10.0, {.vnd = y_Z_pow}},
+{"_ZGVnN4vv_powf",  'f', 'n',   0.01, 11.1, {.vnf = xy_Z_powf}},
+{"x_ZGVnN4vv_powf", 'f', 'n',   0.01, 11.1, {.vnf = x_Z_powf}},
+{"y_ZGVnN4vv_powf", 'f', 'n', -10.0,  10.0, {.vnf = y_Z_powf}},
+{"_ZGVnN4vl4_modff", 'f', 'n', -10.0, 10.0, {.vnf = _Z_modff_wrap}},
+{"_ZGVnN2vl8_modf",  'd', 'n', -10.0, 10.0, {.vnd = _Z_modf_wrap}},
+{"_ZGVnN4vl4l4_sincosf", 'f', 'n', -3.1, 3.1, {.vnf = _Z_sincosf_wrap}},
+{"_ZGVnN2vl8l8_sincos", 'd', 'n', -3.1, 3.1, {.vnd = _Z_sincos_wrap}},
+{"_ZGVnN4v_cexpif", 'f', 'n', -3.1, 3.1, {.vnf = _Z_cexpif_wrap}},
+{"_ZGVnN2v_cexpi", 'd', 'n', -3.1, 3.1, {.vnd = _Z_cexpi_wrap}},
 VNF (_ZGVnN4v_expf_1u, -9.9, 9.9)
-VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
 VNF (_ZGVnN4v_exp2f_1u, -9.9, 9.9)
-VNF (_ZGVnN4v_logf, 0.01, 11.1)
-{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
-VNF (_ZGVnN4v_sinf, -3.1, 3.1)
-VNF (_ZGVnN4v_cosf, -3.1, 3.1)
+# if WANT_TRIGPI_TESTS
+VNF (_ZGVnN4v_cospif, -0.9, 0.9)
+VND (_ZGVnN2v_cospi, -0.9, 0.9)
+VNF (_ZGVnN4v_sinpif, -0.9, 0.9)
+VND (_ZGVnN2v_sinpi, -0.9, 0.9)
+VNF (_ZGVnN4v_tanpif, -0.9, 0.9)
+VND (_ZGVnN2v_tanpi, -0.9, 0.9)
+{"_ZGVnN4vl4l4_sincospif", 'f', 'n', -0.9, 0.9, {.vnf = _Z_sincospif_wrap}},
+{"_ZGVnN2vl8l8_sincospi", 'd', 'n', -0.9, 0.9, {.vnd = _Z_sincospi_wrap}},
+# endif
+#endif
+
+#if WANT_SVE_TESTS
+{ "_ZGVsMxvv_atan2f", 'f', 's', -10.0, 10.0, { .svf = _Z_sv_atan2f_wrap } },
+{ "_ZGVsMxvv_atan2", 'd', 's', -10.0, 10.0, { .svd = _Z_sv_atan2_wrap } },
+{ "_ZGVsMxvv_hypotf", 'f', 's', -10.0, 10.0, { .svf = _Z_sv_hypotf_wrap } },
+{ "_ZGVsMxvv_hypot", 'd', 's', -10.0, 10.0, { .svd = _Z_sv_hypot_wrap } },
+{"_ZGVsMxvv_powf",   'f', 's', -10.0, 10.0, {.svf = xy_Z_sv_powf}},
+{"x_ZGVsMxvv_powf",  'f', 's', -10.0, 10.0, {.svf = x_Z_sv_powf}},
+{"y_ZGVsMxvv_powf",  'f', 's', -10.0, 10.0, {.svf = y_Z_sv_powf}},
+{"_ZGVsMxvv_pow",    'd', 's', -10.0, 10.0, {.svd = xy_Z_sv_pow}},
+{"x_ZGVsMxvv_pow",   'd', 's', -10.0, 10.0, {.svd = x_Z_sv_pow}},
+{"y_ZGVsMxvv_pow",   'd', 's', -10.0, 10.0, {.svd = y_Z_sv_pow}},
+{"_ZGVsMxvl4_modff", 'f', 's', -10.0, 10.0, {.svf = _Z_sv_modff_wrap}},
+{"_ZGVsMxvl8_modf",  'd', 's', -10.0, 10.0, {.svd = _Z_sv_modf_wrap}},
+{"_ZGVsMxvl4l4_sincosf", 'f', 's', -3.1, 3.1, {.svf = _Z_sv_sincosf_wrap}},
+{"_ZGVsMxvl8l8_sincos", 'd', 's', -3.1, 3.1, {.svd = _Z_sv_sincos_wrap}},
+{"_ZGVsMxv_cexpif", 'f', 's', -3.1, 3.1, {.svf = _Z_sv_cexpif_wrap}},
+{"_ZGVsMxv_cexpi", 'd', 's', -3.1, 3.1, {.svd = _Z_sv_cexpi_wrap}},
+# if WANT_TRIGPI_TESTS
+SVF (_ZGVsMxv_cospif, -0.9, 0.9)
+SVD (_ZGVsMxv_cospi, -0.9, 0.9)
+SVF (_ZGVsMxv_sinpif, -0.9, 0.9)
+SVD (_ZGVsMxv_sinpi, -0.9, 0.9)
+SVF (_ZGVsMxv_tanpif, -0.9, 0.9)
+SVD (_ZGVsMxv_tanpi, -0.9, 0.9)
+{"_ZGVsMxvl4l4_sincospif", 'f', 's', -0.9, 0.9, {.svf = _Z_sv_sincospif_wrap}},
+{"_ZGVsMxvl8l8_sincospi", 'd', 's', -0.9, 0.9, {.svd = _Z_sv_sincospi_wrap}},
+# endif
+# if WANT_EXPERIMENTAL_MATH
+{"_ZGVsMxvv_powi",   'f', 's', -10.0, 10.0, {.svf = _Z_sv_powi_wrap}},
+{"_ZGVsMxvv_powk",   'd', 's', -10.0, 10.0, {.svd = _Z_sv_powk_wrap}},
+# endif
 #endif
-  /* clang-format on */
+    /* clang-format on */
+
+#define _ZSF1(fun, a, b) F (fun##f, a, b)
+#define _ZSD1(f, a, b) D (f, a, b)
+
+#define _ZVF1(fun, a, b) VNF (_ZGVnN4v_##fun##f, a, b)
+#define _ZVD1(f, a, b) VND (_ZGVnN2v_##f, a, b)
+
+#define _ZSVF1(fun, a, b) SVF (_ZGVsMxv_##fun##f, a, b)
+#define _ZSVD1(f, a, b) SVD (_ZGVsMxv_##f, a, b)
+
+/* No auto-generated wrappers for binary functions - they have be
+   manually defined in mathbench_wrappers.h. We have to define silent
+   macros for them anyway as they will be emitted by TEST_SIG.  */
+#define _ZSF2(...)
+#define _ZSD2(...)
+#define _ZVF2(...)
+#define _ZVD2(...)
+#define _ZSVF2(...)
+#define _ZSVD2(...)
+
+#include "test/mathbench_funcs_gen.h"
diff --git a/contrib/arm-optimized-routines/math/test/mathbench_wrappers.h b/contrib/arm-optimized-routines/math/test/mathbench_wrappers.h
index 062b9db56de5..32dcee36530a 100644
--- a/contrib/arm-optimized-routines/math/test/mathbench_wrappers.h
+++ b/contrib/arm-optimized-routines/math/test/mathbench_wrappers.h
@@ -1,66 +1,356 @@
 /*
  * Function wrappers for mathbench.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-#ifdef __vpcs
+#if WANT_EXPERIMENTAL_MATH
+static double
+atan2_wrap (double x)
+{
+  return atan2 (5.0, x);
+}
+
+static float
+atan2f_wrap (float x)
+{
+  return atan2f (5.0f, x);
+}
+
+static double
+powi_wrap (double x)
+{
+  return __builtin_powi (x, (int) round (x));
+}
+#endif /* WANT_EXPERIMENTAL_MATH.  */
+
+#if __aarch64__ && __linux__
+
+__vpcs static float32x4_t
+_Z_sincospif_wrap (float32x4_t x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincospif (x, s, c);
+  return vld1q_f32 (s) + vld1q_f32 (c);
+}
+
+__vpcs static float64x2_t
+_Z_sincospi_wrap (float64x2_t x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincospi (x, s, c);
+  return vld1q_f64 (s) + vld1q_f64 (c);
+}
 
-__vpcs static v_float
-xy_Z_powf (v_float x)
+__vpcs static float64x2_t
+_Z_atan2_wrap (float64x2_t x)
+{
+  return _ZGVnN2vv_atan2 (vdupq_n_f64 (5.0), x);
+}
+
+__vpcs static float32x4_t
+_Z_atan2f_wrap (float32x4_t x)
+{
+  return _ZGVnN4vv_atan2f (vdupq_n_f32 (5.0f), x);
+}
+
+__vpcs static float32x4_t
+_Z_hypotf_wrap (float32x4_t x)
+{
+  return _ZGVnN4vv_hypotf (vdupq_n_f32 (5.0f), x);
+}
+
+__vpcs static float64x2_t
+_Z_hypot_wrap (float64x2_t x)
+{
+  return _ZGVnN2vv_hypot (vdupq_n_f64 (5.0), x);
+}
+
+__vpcs static float32x4_t
+xy_Z_powf (float32x4_t x)
 {
   return _ZGVnN4vv_powf (x, x);
 }
 
-__vpcs static v_double
-xy_Z_pow (v_double x)
+__vpcs static float32x4_t
+x_Z_powf (float32x4_t x)
+{
+  return _ZGVnN4vv_powf (x, vdupq_n_f32 (23.4));
+}
+
+__vpcs static float32x4_t
+y_Z_powf (float32x4_t x)
+{
+  return _ZGVnN4vv_powf (vdupq_n_f32 (2.34), x);
+}
+
+__vpcs static float64x2_t
+xy_Z_pow (float64x2_t x)
 {
   return _ZGVnN2vv_pow (x, x);
 }
 
+__vpcs static float64x2_t
+x_Z_pow (float64x2_t x)
+{
+  return _ZGVnN2vv_pow (x, vdupq_n_f64 (23.4));
+}
+
+__vpcs static float64x2_t
+y_Z_pow (float64x2_t x)
+{
+  return _ZGVnN2vv_pow (vdupq_n_f64 (2.34), x);
+}
+
+__vpcs static float32x4_t
+_Z_modff_wrap (float32x4_t x)
+{
+  float y[4];
+  float32x4_t ret = _ZGVnN4vl4_modff (x, y);
+  return ret + vld1q_f32 (y);
+}
+
+__vpcs static float64x2_t
+_Z_modf_wrap (float64x2_t x)
+{
+  double y[2];
+  float64x2_t ret = _ZGVnN2vl8_modf (x, y);
+  return ret + vld1q_f64 (y);
+}
+
+__vpcs static float32x4_t
+_Z_sincosf_wrap (float32x4_t x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincosf (x, s, c);
+  return vld1q_f32 (s) + vld1q_f32 (c);
+}
+
+__vpcs static float32x4_t
+_Z_cexpif_wrap (float32x4_t x)
+{
+  float32x4x2_t sc = _ZGVnN4v_cexpif (x);
+  return sc.val[0] + sc.val[1];
+}
+
+__vpcs static float64x2_t
+_Z_sincos_wrap (float64x2_t x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincos (x, s, c);
+  return vld1q_f64 (s) + vld1q_f64 (c);
+}
+
+__vpcs static float64x2_t
+_Z_cexpi_wrap (float64x2_t x)
+{
+  float64x2x2_t sc = _ZGVnN2v_cexpi (x);
+  return sc.val[0] + sc.val[1];
+}
+
+#endif
+
+#if WANT_SVE_TESTS
+
+static svfloat32_t
+_Z_sv_atan2f_wrap (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_atan2f (x, svdup_f32 (5.0f), pg);
+}
+
+static svfloat64_t
+_Z_sv_atan2_wrap (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_atan2 (x, svdup_f64 (5.0), pg);
+}
+
+static svfloat32_t
+_Z_sv_hypotf_wrap (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_hypotf (x, svdup_f32 (5.0), pg);
+}
+
+static svfloat64_t
+_Z_sv_hypot_wrap (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_hypot (x, svdup_f64 (5.0), pg);
+}
+
+static svfloat32_t
+xy_Z_sv_powf (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_powf (x, x, pg);
+}
+
+static svfloat32_t
+x_Z_sv_powf (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_powf (x, svdup_f32 (23.4f), pg);
+}
+
+static svfloat32_t
+y_Z_sv_powf (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_powf (svdup_f32 (2.34f), x, pg);
+}
+
+static svfloat64_t
+xy_Z_sv_pow (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_pow (x, x, pg);
+}
+
+static svfloat64_t
+x_Z_sv_pow (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_pow (x, svdup_f64 (23.4), pg);
+}
+
+static svfloat64_t
+y_Z_sv_pow (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_pow (svdup_f64 (2.34), x, pg);
+}
+
+static svfloat32_t
+_Z_sv_sincospif_wrap (svfloat32_t x, svbool_t pg)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincospif (x, s, c, pg);
+  return svadd_x (pg, svld1 (pg, s), svld1 (pg, c));
+}
+
+static svfloat64_t
+_Z_sv_sincospi_wrap (svfloat64_t x, svbool_t pg)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincospi (x, s, c, pg);
+  return svadd_x (pg, svld1 (pg, s), svld1 (pg, c));
+}
+
+static svfloat32_t
+_Z_sv_modff_wrap (svfloat32_t x, svbool_t pg)
+{
+  float i[svcntw ()];
+  svfloat32_t r = _ZGVsMxvl4_modff (x, i, pg);
+  return svadd_x (pg, r, svld1 (pg, i));
+}
+
+static svfloat64_t
+_Z_sv_modf_wrap (svfloat64_t x, svbool_t pg)
+{
+  double i[svcntd ()];
+  svfloat64_t r = _ZGVsMxvl8_modf (x, i, pg);
+  return svadd_x (pg, r, svld1 (pg, i));
+}
+
+static svfloat32_t
+_Z_sv_sincosf_wrap (svfloat32_t x, svbool_t pg)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincosf (x, s, c, pg);
+  return svadd_x (pg, svld1 (pg, s), svld1 (pg, s));
+}
+
+static svfloat32_t
+_Z_sv_cexpif_wrap (svfloat32_t x, svbool_t pg)
+{
+  svfloat32x2_t sc = _ZGVsMxv_cexpif (x, pg);
+  return svadd_x (pg, svget2 (sc, 0), svget2 (sc, 1));
+}
+
+static svfloat64_t
+_Z_sv_sincos_wrap (svfloat64_t x, svbool_t pg)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincos (x, s, c, pg);
+  return svadd_x (pg, svld1 (pg, s), svld1 (pg, s));
+}
+
+static svfloat64_t
+_Z_sv_cexpi_wrap (svfloat64_t x, svbool_t pg)
+{
+  svfloat64x2_t sc = _ZGVsMxv_cexpi (x, pg);
+  return svadd_x (pg, svget2 (sc, 0), svget2 (sc, 1));
+}
+
+# if WANT_EXPERIMENTAL_MATH
+
+static svfloat32_t
+_Z_sv_powi_wrap (svfloat32_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_powi (x, svcvt_s32_f32_x (pg, x), pg);
+}
+
+static svfloat64_t
+_Z_sv_powk_wrap (svfloat64_t x, svbool_t pg)
+{
+  return _ZGVsMxvv_powk (x, svcvt_s64_f64_x (pg, x), pg);
+}
+
+# endif
+
+#endif
+
+#if __aarch64__
+static float
+sincospif_wrap (float x)
+{
+  float s, c;
+  arm_math_sincospif (x, &s, &c);
+  return s + c;
+}
+
+static double
+sincospi_wrap (double x)
+{
+  double s, c;
+  arm_math_sincospi (x, &s, &c);
+  return s + c;
+}
 #endif
 
 static double
 xypow (double x)
 {
   return pow (x, x);
 }
 
 static float
 xypowf (float x)
 {
   return powf (x, x);
 }
 
 static double
 xpow (double x)
 {
   return pow (x, 23.4);
 }
 
 static float
 xpowf (float x)
 {
   return powf (x, 23.4f);
 }
 
 static double
 ypow (double x)
 {
   return pow (2.34, x);
 }
 
 static float
 ypowf (float x)
 {
   return powf (2.34f, x);
 }
 
 static float
 sincosf_wrap (float x)
 {
   float s, c;
   sincosf (x, &s, &c);
   return s + c;
 }
diff --git a/contrib/arm-optimized-routines/math/test/mathtest.c b/contrib/arm-optimized-routines/math/test/mathtest.c
index 834233fdde9d..6e81f0d7b634 100644
--- a/contrib/arm-optimized-routines/math/test/mathtest.c
+++ b/contrib/arm-optimized-routines/math/test/mathtest.c
@@ -1,1709 +1,1711 @@
 /*
  * mathtest.c - test rig for mathlib
  *
- * Copyright (c) 1998-2023, Arm Limited.
+ * Copyright (c) 1998-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
+/* clang-format off */
 
+#define _GNU_SOURCE
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <setjmp.h>
 #include <ctype.h>
 #include <math.h>
 #include <errno.h>
 #include <limits.h>
 #include <fenv.h>
 #include "mathlib.h"
 
 #ifndef math_errhandling
 # define math_errhandling 0
 #endif
 
 #ifdef __cplusplus
  #define EXTERN_C extern "C"
 #else
  #define EXTERN_C extern
 #endif
 
 #ifndef TRUE
 #define TRUE 1
 #endif
 #ifndef FALSE
 #define FALSE 0
 #endif
 
 #ifdef IMPORT_SYMBOL
 #define STR2(x) #x
 #define STR(x) STR2(x)
 _Pragma(STR(import IMPORT_SYMBOL))
 #endif
 
 int dmsd, dlsd;
 int quiet = 0;
 int doround = 0;
 unsigned statusmask = FE_ALL_EXCEPT;
 
 #define EXTRABITS (12)
 #define ULPUNIT (1<<EXTRABITS)
 
 typedef int (*test) (void);
 
 /*
   struct to hold info about a function (which could actually be a macro)
 */
 typedef struct {
     enum {
         t_func, t_macro
     } type;
     enum {
         at_d, at_s,      /* double or single precision float */
         at_d2, at_s2,    /* same, but taking two args */
         at_di, at_si,    /* double/single and an int */
         at_dip, at_sip,  /* double/single and an int ptr */
         at_ddp, at_ssp,  /* d/s and a d/s ptr */
         at_dc, at_sc,    /* double or single precision complex */
         at_dc2, at_sc2   /* same, but taking two args */
     } argtype;
     enum {
         rt_d, rt_s, rt_i, /* double, single, int */
         rt_dc, rt_sc,     /* double, single precision complex */
         rt_d2, rt_s2      /* also use res2 */
     } rettype;
     union {
         void* ptr;
         double (*d_d_ptr)(double);
         float (*s_s_ptr)(float);
         int (*d_i_ptr)(double);
         int (*s_i_ptr)(float);
         double (*d2_d_ptr)(double, double);
         float (*s2_s_ptr)(float, float);
         double (*di_d_ptr)(double,int);
         float (*si_s_ptr)(float,int);
         double (*dip_d_ptr)(double,int*);
         float (*sip_s_ptr)(float,int*);
         double (*ddp_d_ptr)(double,double*);
         float (*ssp_s_ptr)(float,float*);
     } func;
     enum {
         m_none,
         m_isfinite, m_isfinitef,
         m_isgreater, m_isgreaterequal,
         m_isgreaterequalf, m_isgreaterf,
         m_isinf, m_isinff,
         m_isless, m_islessequal,
         m_islessequalf, m_islessf,
         m_islessgreater, m_islessgreaterf,
         m_isnan, m_isnanf,
         m_isnormal, m_isnormalf,
         m_isunordered, m_isunorderedf,
         m_fpclassify, m_fpclassifyf,
         m_signbit, m_signbitf,
         /* not actually a macro, but makes things easier */
         m_rred, m_rredf,
         m_cadd, m_csub, m_cmul, m_cdiv,
         m_caddf, m_csubf, m_cmulf, m_cdivf
     } macro_name; /* only used if a macro/something that can't be done using func */
     long long tolerance;
     const char* name;
 } test_func;
 
 /* used in qsort */
 int compare_tfuncs(const void* a, const void* b) {
     return strcmp(((test_func*)a)->name, ((test_func*)b)->name);
 }
 
 int is_double_argtype(int argtype) {
     switch(argtype) {
     case at_d:
     case at_d2:
     case at_dc:
     case at_dc2:
         return 1;
     default:
         return 0;
     }
 }
 
 int is_single_argtype(int argtype) {
     switch(argtype) {
     case at_s:
     case at_s2:
     case at_sc:
     case at_sc2:
         return 1;
     default:
         return 0;
     }
 }
 
 int is_double_rettype(int rettype) {
     switch(rettype) {
     case rt_d:
     case rt_dc:
     case rt_d2:
         return 1;
     default:
         return 0;
     }
 }
 
 int is_single_rettype(int rettype) {
     switch(rettype) {
     case rt_s:
     case rt_sc:
     case rt_s2:
         return 1;
     default:
         return 0;
     }
 }
 
 int is_complex_argtype(int argtype) {
     switch(argtype) {
     case at_dc:
     case at_sc:
     case at_dc2:
     case at_sc2:
         return 1;
     default:
         return 0;
     }
 }
 
 int is_complex_rettype(int rettype) {
     switch(rettype) {
     case rt_dc:
     case rt_sc:
         return 1;
     default:
         return 0;
     }
 }
 
 /*
  * Special-case flags indicating that some functions' error
  * tolerance handling is more complicated than a fixed relative
  * error bound.
  */
 #define ABSLOWERBOUND 0x4000000000000000LL
 #define PLUSMINUSPIO2 0x1000000000000000LL
 
 #define ARM_PREFIX(x) x
 
 #define TFUNC(arg,ret,name,tolerance) { t_func, arg, ret, (void*)&name, m_none, tolerance, #name }
 #define TFUNCARM(arg,ret,name,tolerance) { t_func, arg, ret, (void*)& ARM_PREFIX(name), m_none, tolerance, #name }
 #define MFUNC(arg,ret,name,tolerance) { t_macro, arg, ret, NULL, m_##name, tolerance, #name }
 
-#ifndef PL
 /* sincosf wrappers for easier testing.  */
 static float sincosf_sinf(float x) { float s,c; sincosf(x, &s, &c); return s; }
 static float sincosf_cosf(float x) { float s,c; sincosf(x, &s, &c); return c; }
-#endif
 
 test_func tfuncs[] = {
     /* trigonometric */
     TFUNC(at_d,rt_d, acos, 4*ULPUNIT),
     TFUNC(at_d,rt_d, asin, 4*ULPUNIT),
     TFUNC(at_d,rt_d, atan, 4*ULPUNIT),
     TFUNC(at_d2,rt_d, atan2, 4*ULPUNIT),
 
     TFUNC(at_d,rt_d, tan, 2*ULPUNIT),
     TFUNC(at_d,rt_d, sin, 2*ULPUNIT),
     TFUNC(at_d,rt_d, cos, 2*ULPUNIT),
 
     TFUNC(at_s,rt_s, acosf, 4*ULPUNIT),
     TFUNC(at_s,rt_s, asinf, 4*ULPUNIT),
     TFUNC(at_s,rt_s, atanf, 4*ULPUNIT),
     TFUNC(at_s2,rt_s, atan2f, 4*ULPUNIT),
     TFUNCARM(at_s,rt_s, tanf, 4*ULPUNIT),
     TFUNCARM(at_s,rt_s, sinf, 3*ULPUNIT/4),
     TFUNCARM(at_s,rt_s, cosf, 3*ULPUNIT/4),
-#ifndef PL
     TFUNCARM(at_s,rt_s, sincosf_sinf, 3*ULPUNIT/4),
     TFUNCARM(at_s,rt_s, sincosf_cosf, 3*ULPUNIT/4),
-#endif
+
     /* hyperbolic */
     TFUNC(at_d, rt_d, atanh, 4*ULPUNIT),
     TFUNC(at_d, rt_d, asinh, 4*ULPUNIT),
     TFUNC(at_d, rt_d, acosh, 4*ULPUNIT),
     TFUNC(at_d,rt_d, tanh, 4*ULPUNIT),
     TFUNC(at_d,rt_d, sinh, 4*ULPUNIT),
     TFUNC(at_d,rt_d, cosh, 4*ULPUNIT),
 
     TFUNC(at_s, rt_s, atanhf, 4*ULPUNIT),
     TFUNC(at_s, rt_s, asinhf, 4*ULPUNIT),
     TFUNC(at_s, rt_s, acoshf, 4*ULPUNIT),
     TFUNC(at_s,rt_s, tanhf, 4*ULPUNIT),
     TFUNC(at_s,rt_s, sinhf, 4*ULPUNIT),
     TFUNC(at_s,rt_s, coshf, 4*ULPUNIT),
 
     /* exponential and logarithmic */
     TFUNC(at_d,rt_d, log, 3*ULPUNIT/4),
     TFUNC(at_d,rt_d, log10, 3*ULPUNIT),
     TFUNC(at_d,rt_d, log2, 3*ULPUNIT/4),
     TFUNC(at_d,rt_d, log1p, 2*ULPUNIT),
     TFUNC(at_d,rt_d, exp, 3*ULPUNIT/4),
     TFUNC(at_d,rt_d, exp2, 3*ULPUNIT/4),
     TFUNC(at_d,rt_d, expm1, ULPUNIT),
     TFUNCARM(at_s,rt_s, logf, ULPUNIT),
     TFUNC(at_s,rt_s, log10f, 3*ULPUNIT),
     TFUNCARM(at_s,rt_s, log2f, ULPUNIT),
     TFUNC(at_s,rt_s, log1pf, 2*ULPUNIT),
     TFUNCARM(at_s,rt_s, expf, 3*ULPUNIT/4),
     TFUNCARM(at_s,rt_s, exp2f, 3*ULPUNIT/4),
     TFUNC(at_s,rt_s, expm1f, ULPUNIT),
+#if WANT_EXP10_TESTS
     TFUNC(at_d,rt_d, exp10, ULPUNIT),
+#endif
 
     /* power */
     TFUNC(at_d2,rt_d, pow, 3*ULPUNIT/4),
     TFUNC(at_d,rt_d, sqrt, ULPUNIT/2),
     TFUNC(at_d,rt_d, cbrt, 2*ULPUNIT),
     TFUNC(at_d2, rt_d, hypot, 4*ULPUNIT),
 
     TFUNCARM(at_s2,rt_s, powf, ULPUNIT),
     TFUNC(at_s,rt_s, sqrtf, ULPUNIT/2),
     TFUNC(at_s,rt_s, cbrtf, 2*ULPUNIT),
     TFUNC(at_s2, rt_s, hypotf, 4*ULPUNIT),
 
     /* error function */
     TFUNC(at_d,rt_d, erf, 16*ULPUNIT),
     TFUNC(at_s,rt_s, erff, 16*ULPUNIT),
     TFUNC(at_d,rt_d, erfc, 16*ULPUNIT),
     TFUNC(at_s,rt_s, erfcf, 16*ULPUNIT),
 
     /* gamma functions */
     TFUNC(at_d,rt_d, tgamma, 16*ULPUNIT),
     TFUNC(at_s,rt_s, tgammaf, 16*ULPUNIT),
     TFUNC(at_d,rt_d, lgamma, 16*ULPUNIT | ABSLOWERBOUND),
     TFUNC(at_s,rt_s, lgammaf, 16*ULPUNIT | ABSLOWERBOUND),
 
     TFUNC(at_d,rt_d, ceil, 0),
     TFUNC(at_s,rt_s, ceilf, 0),
     TFUNC(at_d2,rt_d, copysign, 0),
     TFUNC(at_s2,rt_s, copysignf, 0),
     TFUNC(at_d,rt_d, floor, 0),
     TFUNC(at_s,rt_s, floorf, 0),
     TFUNC(at_d2,rt_d, fmax, 0),
     TFUNC(at_s2,rt_s, fmaxf, 0),
     TFUNC(at_d2,rt_d, fmin, 0),
     TFUNC(at_s2,rt_s, fminf, 0),
     TFUNC(at_d2,rt_d, fmod, 0),
     TFUNC(at_s2,rt_s, fmodf, 0),
     MFUNC(at_d, rt_i, fpclassify, 0),
     MFUNC(at_s, rt_i, fpclassifyf, 0),
     TFUNC(at_dip,rt_d, frexp, 0),
     TFUNC(at_sip,rt_s, frexpf, 0),
     MFUNC(at_d, rt_i, isfinite, 0),
     MFUNC(at_s, rt_i, isfinitef, 0),
     MFUNC(at_d, rt_i, isgreater, 0),
     MFUNC(at_d, rt_i, isgreaterequal, 0),
     MFUNC(at_s, rt_i, isgreaterequalf, 0),
     MFUNC(at_s, rt_i, isgreaterf, 0),
     MFUNC(at_d, rt_i, isinf, 0),
     MFUNC(at_s, rt_i, isinff, 0),
     MFUNC(at_d, rt_i, isless, 0),
     MFUNC(at_d, rt_i, islessequal, 0),
     MFUNC(at_s, rt_i, islessequalf, 0),
     MFUNC(at_s, rt_i, islessf, 0),
     MFUNC(at_d, rt_i, islessgreater, 0),
     MFUNC(at_s, rt_i, islessgreaterf, 0),
     MFUNC(at_d, rt_i, isnan, 0),
     MFUNC(at_s, rt_i, isnanf, 0),
     MFUNC(at_d, rt_i, isnormal, 0),
     MFUNC(at_s, rt_i, isnormalf, 0),
     MFUNC(at_d, rt_i, isunordered, 0),
     MFUNC(at_s, rt_i, isunorderedf, 0),
     TFUNC(at_di,rt_d, ldexp, 0),
     TFUNC(at_si,rt_s, ldexpf, 0),
     TFUNC(at_ddp,rt_d2, modf, 0),
     TFUNC(at_ssp,rt_s2, modff, 0),
 #ifndef BIGRANGERED
     MFUNC(at_d, rt_d, rred, 2*ULPUNIT),
 #else
     MFUNC(at_d, rt_d, m_rred, ULPUNIT),
 #endif
     MFUNC(at_d, rt_i, signbit, 0),
     MFUNC(at_s, rt_i, signbitf, 0),
 };
 
 /*
  * keywords are: func size op1 op2 result res2 errno op1r op1i op2r op2i resultr resulti
  * also we ignore: wrongresult wrongres2 wrongerrno
  * op1 equivalent to op1r, same with op2 and result
  */
 
 typedef struct {
     test_func *func;
     unsigned op1r[2]; /* real part, also used for non-complex numbers */
     unsigned op1i[2]; /* imaginary part */
     unsigned op2r[2];
     unsigned op2i[2];
     unsigned resultr[3];
     unsigned resulti[3];
     enum {
         rc_none, rc_zero, rc_infinity, rc_nan, rc_finite
     } resultc; /* special complex results, rc_none means use resultr and resulti as normal */
     unsigned res2[2];
     unsigned status;                   /* IEEE status return, if any */
     unsigned maybestatus;             /* for optional status, or allowance for spurious */
     int nresult;                       /* number of result words */
     int in_err, in_err_limit;
     int err;
     int maybeerr;
     int valid;
     int comment;
     int random;
 } testdetail;
 
 enum {                                 /* keywords */
     k_errno, k_errno_in, k_error, k_func, k_maybeerror, k_maybestatus, k_op1, k_op1i, k_op1r, k_op2, k_op2i, k_op2r,
     k_random, k_res2, k_result, k_resultc, k_resulti, k_resultr, k_status,
     k_wrongres2, k_wrongresult, k_wrongstatus, k_wrongerrno
 };
 char *keywords[] = {
     "errno", "errno_in", "error", "func", "maybeerror", "maybestatus", "op1", "op1i", "op1r", "op2", "op2i", "op2r",
     "random", "res2", "result", "resultc", "resulti", "resultr", "status",
     "wrongres2", "wrongresult", "wrongstatus", "wrongerrno"
 };
 
 enum {
     e_0, e_EDOM, e_ERANGE,
 
     /*
      * This enum makes sure that we have the right number of errnos in the
      * errno[] array
      */
     e_number_of_errnos
 };
 char *errnos[] = {
     "0", "EDOM", "ERANGE"
 };
 
 enum {
     e_none, e_divbyzero, e_domain, e_overflow, e_underflow
 };
 char *errors[] = {
     "0", "divbyzero", "domain", "overflow", "underflow"
 };
 
 static int verbose, fo, strict;
 
 /* state toggled by random=on / random=off */
 static int randomstate;
 
 /* Canonify a double NaN: SNaNs all become 7FF00000.00000001 and QNaNs
  * all become 7FF80000.00000001 */
 void canon_dNaN(unsigned a[2]) {
     if ((a[0] & 0x7FF00000) != 0x7FF00000)
         return;                        /* not Inf or NaN */
     if (!(a[0] & 0xFFFFF) && !a[1])
         return;                        /* Inf */
     a[0] &= 0x7FF80000;                /* canonify top word */
     a[1] = 0x00000001;                 /* canonify bottom word */
 }
 
 /* Canonify a single NaN: SNaNs all become 7F800001 and QNaNs
  * all become 7FC00001. Returns classification of the NaN. */
 void canon_sNaN(unsigned a[1]) {
     if ((a[0] & 0x7F800000) != 0x7F800000)
         return;                        /* not Inf or NaN */
     if (!(a[0] & 0x7FFFFF))
         return;                        /* Inf */
     a[0] &= 0x7FC00000;                /* canonify most bits */
     a[0] |= 0x00000001;                /* canonify bottom bit */
 }
 
 /*
  * Detect difficult operands for FO mode.
  */
 int is_dhard(unsigned a[2])
 {
     if ((a[0] & 0x7FF00000) == 0x7FF00000)
         return TRUE;                   /* inf or NaN */
     if ((a[0] & 0x7FF00000) == 0 &&
         ((a[0] & 0x7FFFFFFF) | a[1]) != 0)
         return TRUE;                   /* denormal */
     return FALSE;
 }
 int is_shard(unsigned a[1])
 {
     if ((a[0] & 0x7F800000) == 0x7F800000)
         return TRUE;                   /* inf or NaN */
     if ((a[0] & 0x7F800000) == 0 &&
         (a[0] & 0x7FFFFFFF) != 0)
         return TRUE;                   /* denormal */
     return FALSE;
 }
 
 /*
  * Normalise all zeroes into +0, for FO mode.
  */
 void dnormzero(unsigned a[2])
 {
     if (a[0] == 0x80000000 && a[1] == 0)
         a[0] = 0;
 }
 void snormzero(unsigned a[1])
 {
     if (a[0] == 0x80000000)
         a[0] = 0;
 }
 
 static int find(char *word, char **array, int asize) {
     int i, j;
 
     asize /= sizeof(char *);
 
     i = -1; j = asize;                 /* strictly between i and j */
     while (j-i > 1) {
         int k = (i+j) / 2;
         int c = strcmp(word, array[k]);
         if (c > 0)
             i = k;
         else if (c < 0)
             j = k;
         else                           /* found it! */
             return k;
     }
     return -1;                         /* not found */
 }
 
 static test_func* find_testfunc(char *word) {
     int i, j, asize;
 
     asize = sizeof(tfuncs)/sizeof(test_func);
 
     i = -1; j = asize;                 /* strictly between i and j */
     while (j-i > 1) {
         int k = (i+j) / 2;
         int c = strcmp(word, tfuncs[k].name);
         if (c > 0)
             i = k;
         else if (c < 0)
             j = k;
         else                           /* found it! */
             return tfuncs + k;
     }
     return NULL;                         /* not found */
 }
 
 static long long calc_error(unsigned a[2], unsigned b[3], int shift, int rettype) {
     unsigned r0, r1, r2;
     int sign, carry;
     long long result;
 
     /*
      * If either number is infinite, require exact equality. If
      * either number is NaN, require that both are NaN. If either
      * of these requirements is broken, return INT_MAX.
      */
     if (is_double_rettype(rettype)) {
         if ((a[0] & 0x7FF00000) == 0x7FF00000 ||
             (b[0] & 0x7FF00000) == 0x7FF00000) {
             if (((a[0] & 0x800FFFFF) || a[1]) &&
                 ((b[0] & 0x800FFFFF) || b[1]) &&
                 (a[0] & 0x7FF00000) == 0x7FF00000 &&
                 (b[0] & 0x7FF00000) == 0x7FF00000)
                 return 0;              /* both NaN - OK */
             if (!((a[0] & 0xFFFFF) || a[1]) &&
                 !((b[0] & 0xFFFFF) || b[1]) &&
                 a[0] == b[0])
                 return 0;              /* both same sign of Inf - OK */
             return LLONG_MAX;
         }
     } else {
         if ((a[0] & 0x7F800000) == 0x7F800000 ||
             (b[0] & 0x7F800000) == 0x7F800000) {
             if ((a[0] & 0x807FFFFF) &&
                 (b[0] & 0x807FFFFF) &&
                 (a[0] & 0x7F800000) == 0x7F800000 &&
                 (b[0] & 0x7F800000) == 0x7F800000)
                 return 0;              /* both NaN - OK */
             if (!(a[0] & 0x7FFFFF) &&
                 !(b[0] & 0x7FFFFF) &&
                 a[0] == b[0])
                 return 0;              /* both same sign of Inf - OK */
             return LLONG_MAX;
         }
     }
 
     /*
      * Both finite. Return INT_MAX if the signs differ.
      */
     if ((a[0] ^ b[0]) & 0x80000000)
         return LLONG_MAX;
 
     /*
      * Now it's just straight multiple-word subtraction.
      */
     if (is_double_rettype(rettype)) {
         r2 = -b[2]; carry = (r2 == 0);
         r1 = a[1] + ~b[1] + carry; carry = (r1 < a[1] || (carry && r1 == a[1]));
         r0 = a[0] + ~b[0] + carry;
     } else {
         r2 = -b[1]; carry = (r2 == 0);
         r1 = a[0] + ~b[0] + carry; carry = (r1 < a[0] || (carry && r1 == a[0]));
         r0 = ~0 + carry;
     }
 
     /*
      * Forgive larger errors in specialised cases.
      */
     if (shift > 0) {
         if (shift > 32*3)
             return 0;                  /* all errors are forgiven! */
         while (shift >= 32) {
             r2 = r1;
             r1 = r0;
             r0 = -(r0 >> 31);
             shift -= 32;
         }
 
         if (shift > 0) {
             r2 = (r2 >> shift) | (r1 << (32-shift));
             r1 = (r1 >> shift) | (r0 << (32-shift));
             r0 = (r0 >> shift) | ((-(r0 >> 31)) << (32-shift));
         }
     }
 
     if (r0 & 0x80000000) {
         sign = 1;
         r2 = ~r2; carry = (r2 == 0);
         r1 = 0 + ~r1 + carry; carry = (carry && (r2 == 0));
         r0 = 0 + ~r0 + carry;
     } else {
         sign = 0;
     }
 
     if (r0 >= (1LL<<(31-EXTRABITS)))
         return LLONG_MAX;                /* many ulps out */
 
     result = (r2 >> (32-EXTRABITS)) & (ULPUNIT-1);
     result |= r1 << EXTRABITS;
     result |= (long long)r0 << (32+EXTRABITS);
     if (sign)
         result = -result;
     return result;
 }
 
 /* special named operands */
 
 typedef struct {
     unsigned op1, op2;
     char* name;
 } special_op;
 
 static special_op special_ops_double[] = {
     {0x00000000,0x00000000,"0"},
     {0x3FF00000,0x00000000,"1"},
     {0x7FF00000,0x00000000,"inf"},
     {0x7FF80000,0x00000001,"qnan"},
     {0x7FF00000,0x00000001,"snan"},
     {0x3ff921fb,0x54442d18,"pi2"},
     {0x400921fb,0x54442d18,"pi"},
     {0x3fe921fb,0x54442d18,"pi4"},
     {0x4002d97c,0x7f3321d2,"3pi4"},
 };
 
 static special_op special_ops_float[] = {
     {0x00000000,0,"0"},
     {0x3f800000,0,"1"},
     {0x7f800000,0,"inf"},
     {0x7fc00000,0,"qnan"},
     {0x7f800001,0,"snan"},
     {0x3fc90fdb,0,"pi2"},
     {0x40490fdb,0,"pi"},
     {0x3f490fdb,0,"pi4"},
     {0x4016cbe4,0,"3pi4"},
 };
 
 /*
    This is what is returned by the below functions.
    We need it to handle the sign of the number
 */
 static special_op tmp_op = {0,0,0};
 
 special_op* find_special_op_from_op(unsigned op1, unsigned op2, int is_double) {
     int i;
     special_op* sop;
     if(is_double) {
         sop = special_ops_double;
     } else {
         sop = special_ops_float;
     }
     for(i = 0; i < sizeof(special_ops_double)/sizeof(special_op); i++) {
         if(sop->op1 == (op1&0x7fffffff) && sop->op2 == op2) {
             if(tmp_op.name) free(tmp_op.name);
             tmp_op.name = malloc(strlen(sop->name)+2);
             if(op1>>31) {
                 sprintf(tmp_op.name,"-%s",sop->name);
             } else {
                 strcpy(tmp_op.name,sop->name);
             }
             return &tmp_op;
         }
         sop++;
     }
     return NULL;
 }
 
 special_op* find_special_op_from_name(const char* name, int is_double) {
     int i, neg=0;
     special_op* sop;
     if(is_double) {
         sop = special_ops_double;
     } else {
         sop = special_ops_float;
     }
     if(*name=='-') {
         neg=1;
         name++;
     } else if(*name=='+') {
         name++;
     }
     for(i = 0; i < sizeof(special_ops_double)/sizeof(special_op); i++) {
         if(0 == strcmp(name,sop->name)) {
             tmp_op.op1 = sop->op1;
             if(neg) {
                 tmp_op.op1 |= 0x80000000;
             }
             tmp_op.op2 = sop->op2;
             return &tmp_op;
         }
         sop++;
     }
     return NULL;
 }
 
 /*
    helper function for the below
    type=0 for single, 1 for double, 2 for no sop
 */
 int do_op(char* q, unsigned* op, const char* name, int num, int sop_type) {
     int i;
     int n=num;
     special_op* sop = NULL;
     for(i = 0; i < num; i++) {
         op[i] = 0;
     }
     if(sop_type<2) {
         sop = find_special_op_from_name(q,sop_type);
     }
     if(sop != NULL) {
         op[0] = sop->op1;
         op[1] = sop->op2;
     } else {
         switch(num) {
         case 1: n = sscanf(q, "%x", &op[0]); break;
         case 2: n = sscanf(q, "%x.%x", &op[0], &op[1]); break;
         case 3: n = sscanf(q, "%x.%x.%x", &op[0], &op[1], &op[2]); break;
         default: return -1;
         }
     }
     if (verbose) {
         printf("%s=",name);
         for (i = 0; (i < n); ++i) printf("%x.", op[i]);
         printf(" (n=%d)\n", n);
     }
     return n;
 }
 
 testdetail parsetest(char *testbuf, testdetail oldtest) {
     char *p; /* Current part of line: Option name */
     char *q; /* Current part of line: Option value */
     testdetail ret; /* What we return */
     int k; /* Function enum from k_* */
     int n; /* Used as returns for scanfs */
     int argtype=2, rettype=2; /* for do_op */
 
     /* clear ret */
     memset(&ret, 0, sizeof(ret));
 
     if (verbose) printf("Parsing line: %s\n", testbuf);
     while (*testbuf && isspace(*testbuf)) testbuf++;
     if (testbuf[0] == ';' || testbuf[0] == '#' || testbuf[0] == '!' ||
         testbuf[0] == '>' || testbuf[0] == '\0') {
         ret.comment = 1;
         if (verbose) printf("Line is a comment\n");
         return ret;
     }
     ret.comment = 0;
 
     if (*testbuf == '+') {
         if (oldtest.valid) {
             ret = oldtest;             /* structure copy */
         } else {
             fprintf(stderr, "copy from invalid: ignored\n");
         }
         testbuf++;
     }
 
     ret.random = randomstate;
 
     ret.in_err = 0;
     ret.in_err_limit = e_number_of_errnos;
 
     p = strtok(testbuf, " \t");
     while (p != NULL) {
         q = strchr(p, '=');
         if (!q)
             goto balderdash;
         *q++ = '\0';
         k = find(p, keywords, sizeof(keywords));
         switch (k) {
         case k_random:
             randomstate = (!strcmp(q, "on"));
             ret.comment = 1;
             return ret;                /* otherwise ignore this line */
         case k_func:
             if (verbose) printf("func=%s ", q);
             //ret.func = find(q, funcs, sizeof(funcs));
             ret.func = find_testfunc(q);
             if (ret.func == NULL)
                 {
                     if (verbose) printf("(id=unknown)\n");
                     goto balderdash;
                 }
             if(is_single_argtype(ret.func->argtype))
                 argtype = 0;
             else if(is_double_argtype(ret.func->argtype))
                 argtype = 1;
             if(is_single_rettype(ret.func->rettype))
                 rettype = 0;
             else if(is_double_rettype(ret.func->rettype))
                 rettype = 1;
             //ret.size = sizes[ret.func];
             if (verbose) printf("(name=%s) (size=%d)\n", ret.func->name, ret.func->argtype);
             break;
         case k_op1:
         case k_op1r:
             n = do_op(q,ret.op1r,"op1r",2,argtype);
             if (n < 1)
                 goto balderdash;
             break;
         case k_op1i:
             n = do_op(q,ret.op1i,"op1i",2,argtype);
             if (n < 1)
                 goto balderdash;
             break;
         case k_op2:
         case k_op2r:
             n = do_op(q,ret.op2r,"op2r",2,argtype);
             if (n < 1)
                 goto balderdash;
             break;
         case k_op2i:
             n = do_op(q,ret.op2i,"op2i",2,argtype);
             if (n < 1)
                 goto balderdash;
             break;
         case k_resultc:
             puts(q);
             if(strncmp(q,"inf",3)==0) {
                 ret.resultc = rc_infinity;
             } else if(strcmp(q,"zero")==0) {
                 ret.resultc = rc_zero;
             } else if(strcmp(q,"nan")==0) {
                 ret.resultc = rc_nan;
             } else if(strcmp(q,"finite")==0) {
                 ret.resultc = rc_finite;
             } else {
                 goto balderdash;
             }
             break;
         case k_result:
         case k_resultr:
             n = (do_op)(q,ret.resultr,"resultr",3,rettype);
             if (n < 1)
                 goto balderdash;
             ret.nresult = n; /* assume real and imaginary have same no. words */
             break;
         case k_resulti:
             n = do_op(q,ret.resulti,"resulti",3,rettype);
             if (n < 1)
                 goto balderdash;
             break;
         case k_res2:
             n = do_op(q,ret.res2,"res2",2,rettype);
             if (n < 1)
                 goto balderdash;
             break;
         case k_status:
             while (*q) {
                 if (*q == 'i') ret.status |= FE_INVALID;
                 if (*q == 'z') ret.status |= FE_DIVBYZERO;
                 if (*q == 'o') ret.status |= FE_OVERFLOW;
                 if (*q == 'u') ret.status |= FE_UNDERFLOW;
                 q++;
             }
             break;
         case k_maybeerror:
             n = find(q, errors, sizeof(errors));
             if (n < 0)
                 goto balderdash;
             if(math_errhandling&MATH_ERREXCEPT) {
                 switch(n) {
                 case e_domain: ret.maybestatus |= FE_INVALID; break;
                 case e_divbyzero: ret.maybestatus |= FE_DIVBYZERO; break;
                 case e_overflow: ret.maybestatus |= FE_OVERFLOW; break;
                 case e_underflow: ret.maybestatus |= FE_UNDERFLOW; break;
                 }
             }
             {
                 switch(n) {
                 case e_domain:
                     ret.maybeerr = e_EDOM; break;
                 case e_divbyzero:
                 case e_overflow:
                 case e_underflow:
                     ret.maybeerr = e_ERANGE; break;
                 }
             }
         case k_maybestatus:
             while (*q) {
                 if (*q == 'i') ret.maybestatus |= FE_INVALID;
                 if (*q == 'z') ret.maybestatus |= FE_DIVBYZERO;
                 if (*q == 'o') ret.maybestatus |= FE_OVERFLOW;
                 if (*q == 'u') ret.maybestatus |= FE_UNDERFLOW;
                 q++;
             }
             break;
         case k_error:
             n = find(q, errors, sizeof(errors));
             if (n < 0)
                 goto balderdash;
             if(math_errhandling&MATH_ERREXCEPT) {
                 switch(n) {
                 case e_domain: ret.status |= FE_INVALID; break;
                 case e_divbyzero: ret.status |= FE_DIVBYZERO; break;
                 case e_overflow: ret.status |= FE_OVERFLOW; break;
                 case e_underflow: ret.status |= FE_UNDERFLOW; break;
                 }
             }
             if(math_errhandling&MATH_ERRNO) {
                 switch(n) {
                 case e_domain:
                     ret.err = e_EDOM; break;
                 case e_divbyzero:
                 case e_overflow:
                 case e_underflow:
                     ret.err = e_ERANGE; break;
                 }
             }
             if(!(math_errhandling&MATH_ERRNO)) {
                 switch(n) {
                 case e_domain:
                     ret.maybeerr = e_EDOM; break;
                 case e_divbyzero:
                 case e_overflow:
                 case e_underflow:
                     ret.maybeerr = e_ERANGE; break;
                 }
             }
             break;
         case k_errno:
             ret.err = find(q, errnos, sizeof(errnos));
             if (ret.err < 0)
                 goto balderdash;
             break;
         case k_errno_in:
             ret.in_err = find(q, errnos, sizeof(errnos));
             if (ret.err < 0)
                 goto balderdash;
             ret.in_err_limit = ret.in_err + 1;
             break;
         case k_wrongresult:
         case k_wrongstatus:
         case k_wrongres2:
         case k_wrongerrno:
             /* quietly ignore these keys */
             break;
         default:
             goto balderdash;
         }
         p = strtok(NULL, " \t");
     }
     ret.valid = 1;
     return ret;
 
     /* come here from almost any error */
  balderdash:
     ret.valid = 0;
     return ret;
 }
 
 typedef enum {
     test_comment,                      /* deliberately not a test */
     test_invalid,                      /* accidentally not a test */
     test_decline,                      /* was a test, and wasn't run */
     test_fail,                         /* was a test, and failed */
     test_pass                          /* was a test, and passed */
 } testresult;
 
 char failtext[512];
 
 typedef union {
     unsigned i[2];
     double f;
     double da[2];
 } dbl;
 
 typedef union {
     unsigned i;
     float f;
     float da[2];
 } sgl;
 
 /* helper function for runtest */
 void print_error(int rettype, unsigned *result, char* text, char** failp) {
     special_op *sop;
     char *str;
 
     if(result) {
         *failp += sprintf(*failp," %s=",text);
         sop = find_special_op_from_op(result[0],result[1],is_double_rettype(rettype));
         if(sop) {
             *failp += sprintf(*failp,"%s",sop->name);
         } else {
             if(is_double_rettype(rettype)) {
                 str="%08x.%08x";
             } else {
                 str="%08x";
             }
             *failp += sprintf(*failp,str,result[0],result[1]);
         }
     }
 }
 
 
 void print_ulps_helper(const char *name, long long ulps, char** failp) {
     if(ulps == LLONG_MAX) {
         *failp += sprintf(*failp, " %s=HUGE", name);
     } else {
         *failp += sprintf(*failp, " %s=%.3f", name, (double)ulps / ULPUNIT);
     }
 }
 
 /* for complex args make ulpsr or ulpsri = 0 to not print */
 void print_ulps(int rettype, long long ulpsr, long long ulpsi, char** failp) {
     if(is_complex_rettype(rettype)) {
         if (ulpsr) print_ulps_helper("ulpsr",ulpsr,failp);
         if (ulpsi) print_ulps_helper("ulpsi",ulpsi,failp);
     } else {
         if (ulpsr) print_ulps_helper("ulps",ulpsr,failp);
     }
 }
 
 int runtest(testdetail t) {
     int err, status;
 
     dbl d_arg1, d_arg2, d_res, d_res2;
     sgl s_arg1, s_arg2, s_res, s_res2;
 
     int deferred_decline = FALSE;
     char *failp = failtext;
 
     unsigned int intres=0;
 
     int res2_adjust = 0;
 
     if (t.comment)
         return test_comment;
     if (!t.valid)
         return test_invalid;
 
     /* Set IEEE status to mathlib-normal */
     feclearexcept(FE_ALL_EXCEPT);
 
     /* Deal with operands */
 #define DO_DOP(arg,op) arg.i[dmsd] = t.op[0]; arg.i[dlsd] = t.op[1]
     DO_DOP(d_arg1,op1r);
     DO_DOP(d_arg2,op2r);
     s_arg1.i = t.op1r[0]; s_arg2.i = t.op2r[0];
     s_res.i = 0;
 
     /*
      * Detect NaNs, infinities and denormals on input, and set a
      * deferred decline flag if we're in FO mode.
      *
      * (We defer the decline rather than doing it immediately
      * because even in FO mode the operation is not permitted to
      * crash or tight-loop; so we _run_ the test, and then ignore
      * all the results.)
      */
     if (fo) {
         if (is_double_argtype(t.func->argtype) && is_dhard(t.op1r))
             deferred_decline = TRUE;
         if (t.func->argtype==at_d2 && is_dhard(t.op2r))
             deferred_decline = TRUE;
         if (is_single_argtype(t.func->argtype) && is_shard(t.op1r))
             deferred_decline = TRUE;
         if (t.func->argtype==at_s2 && is_shard(t.op2r))
             deferred_decline = TRUE;
         if (is_double_rettype(t.func->rettype) && is_dhard(t.resultr))
             deferred_decline = TRUE;
         if (t.func->rettype==rt_d2 && is_dhard(t.res2))
             deferred_decline = TRUE;
         if (is_single_argtype(t.func->rettype) && is_shard(t.resultr))
             deferred_decline = TRUE;
         if (t.func->rettype==rt_s2 && is_shard(t.res2))
             deferred_decline = TRUE;
         if (t.err == e_ERANGE)
             deferred_decline = TRUE;
     }
 
     /*
      * Perform the operation
      */
 
     errno = t.in_err == e_EDOM ? EDOM : t.in_err == e_ERANGE ? ERANGE : 0;
     if (t.err == e_0)
         t.err = t.in_err;
     if (t.maybeerr == e_0)
         t.maybeerr = t.in_err;
 
     if(t.func->type == t_func) {
         switch(t.func->argtype) {
         case at_d: d_res.f = t.func->func.d_d_ptr(d_arg1.f); break;
         case at_s: s_res.f = t.func->func.s_s_ptr(s_arg1.f); break;
         case at_d2: d_res.f = t.func->func.d2_d_ptr(d_arg1.f, d_arg2.f); break;
         case at_s2: s_res.f = t.func->func.s2_s_ptr(s_arg1.f, s_arg2.f); break;
         case at_di: d_res.f = t.func->func.di_d_ptr(d_arg1.f, d_arg2.i[dmsd]); break;
         case at_si: s_res.f = t.func->func.si_s_ptr(s_arg1.f, s_arg2.i); break;
         case at_dip: d_res.f = t.func->func.dip_d_ptr(d_arg1.f, (int*)&intres); break;
         case at_sip: s_res.f = t.func->func.sip_s_ptr(s_arg1.f, (int*)&intres); break;
         case at_ddp: d_res.f = t.func->func.ddp_d_ptr(d_arg1.f, &d_res2.f); break;
         case at_ssp: s_res.f = t.func->func.ssp_s_ptr(s_arg1.f, &s_res2.f); break;
         default:
             printf("unhandled function: %s\n",t.func->name);
             return test_fail;
         }
     } else {
         /* printf("macro: name=%s, num=%i, s1.i=0x%08x s1.f=%f\n",t.func->name, t.func->macro_name, s_arg1.i, (double)s_arg1.f); */
         switch(t.func->macro_name) {
         case m_isfinite: intres = isfinite(d_arg1.f); break;
         case m_isinf: intres = isinf(d_arg1.f); break;
         case m_isnan: intres = isnan(d_arg1.f); break;
         case m_isnormal: intres = isnormal(d_arg1.f); break;
         case m_signbit: intres = signbit(d_arg1.f); break;
         case m_fpclassify: intres = fpclassify(d_arg1.f); break;
         case m_isgreater: intres = isgreater(d_arg1.f, d_arg2.f); break;
         case m_isgreaterequal: intres = isgreaterequal(d_arg1.f, d_arg2.f); break;
         case m_isless: intres = isless(d_arg1.f, d_arg2.f); break;
         case m_islessequal: intres = islessequal(d_arg1.f, d_arg2.f); break;
         case m_islessgreater: intres = islessgreater(d_arg1.f, d_arg2.f); break;
         case m_isunordered: intres = isunordered(d_arg1.f, d_arg2.f); break;
 
         case m_isfinitef: intres = isfinite(s_arg1.f); break;
         case m_isinff: intres = isinf(s_arg1.f); break;
         case m_isnanf: intres = isnan(s_arg1.f); break;
         case m_isnormalf: intres = isnormal(s_arg1.f); break;
         case m_signbitf: intres = signbit(s_arg1.f); break;
         case m_fpclassifyf: intres = fpclassify(s_arg1.f); break;
         case m_isgreaterf: intres = isgreater(s_arg1.f, s_arg2.f); break;
         case m_isgreaterequalf: intres = isgreaterequal(s_arg1.f, s_arg2.f); break;
         case m_islessf: intres = isless(s_arg1.f, s_arg2.f); break;
         case m_islessequalf: intres = islessequal(s_arg1.f, s_arg2.f); break;
         case m_islessgreaterf: intres = islessgreater(s_arg1.f, s_arg2.f); break;
         case m_isunorderedf: intres = isunordered(s_arg1.f, s_arg2.f); break;
 
         default:
             printf("unhandled macro: %s\n",t.func->name);
             return test_fail;
         }
     }
 
     /*
      * Decline the test if the deferred decline flag was set above.
      */
     if (deferred_decline)
         return test_decline;
 
     /* printf("intres=%i\n",intres); */
 
     /* Clear the fail text (indicating a pass unless we change it) */
     failp[0] = '\0';
 
     /* Check the IEEE status bits (except INX, which we disregard).
      * We don't bother with this for complex numbers, because the
      * complex functions are hard to get exactly right and we don't
      * have to anyway (C99 annex G is only informative). */
     if (!(is_complex_argtype(t.func->argtype) || is_complex_rettype(t.func->rettype))) {
         status = fetestexcept(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW);
         if ((status|t.maybestatus|~statusmask) != (t.status|t.maybestatus|~statusmask)) {
             if (quiet) failtext[0]='x';
             else {
                 failp += sprintf(failp,
                                  " wrongstatus=%s%s%s%s%s",
                                  (status & FE_INVALID ? "i" : ""),
                                  (status & FE_DIVBYZERO ? "z" : ""),
                                  (status & FE_OVERFLOW ? "o" : ""),
                                  (status & FE_UNDERFLOW ? "u" : ""),
                                  (status ? "" : "OK"));
             }
         }
     }
 
     /* Check the result */
     {
         unsigned resultr[2], resulti[2];
         unsigned tresultr[3], tresulti[3], wres;
 
         switch(t.func->rettype) {
         case rt_d:
         case rt_d2:
             tresultr[0] = t.resultr[0];
             tresultr[1] = t.resultr[1];
             resultr[0] = d_res.i[dmsd]; resultr[1] = d_res.i[dlsd];
             resulti[0] = resulti[1] = 0;
             wres = 2;
             break;
         case rt_i:
             tresultr[0] = t.resultr[0];
             resultr[0] = intres;
             resulti[0] = 0;
             wres = 1;
             break;
         case rt_s:
         case rt_s2:
             tresultr[0] = t.resultr[0];
             resultr[0] = s_res.i;
             resulti[0] = 0;
             wres = 1;
             break;
         default:
             puts("unhandled rettype in runtest");
             abort ();
         }
         if(t.resultc != rc_none) {
             int err = 0;
             switch(t.resultc) {
             case rc_zero:
                 if(resultr[0] != 0 || resulti[0] != 0 ||
                    (wres==2 && (resultr[1] != 0 || resulti[1] != 0))) {
                     err = 1;
                 }
                 break;
             case rc_infinity:
                 if(wres==1) {
                     if(!((resultr[0]&0x7fffffff)==0x7f800000 ||
                          (resulti[0]&0x7fffffff)==0x7f800000)) {
                         err = 1;
                     }
                 } else {
                   if(!(((resultr[0]&0x7fffffff)==0x7ff00000 && resultr[1]==0) ||
                        ((resulti[0]&0x7fffffff)==0x7ff00000 && resulti[1]==0))) {
                         err = 1;
                     }
                 }
                 break;
             case rc_nan:
                 if(wres==1) {
                     if(!((resultr[0]&0x7fffffff)>0x7f800000 ||
                          (resulti[0]&0x7fffffff)>0x7f800000)) {
                         err = 1;
                     }
                 } else {
                     canon_dNaN(resultr);
                     canon_dNaN(resulti);
                     if(!(((resultr[0]&0x7fffffff)>0x7ff00000 && resultr[1]==1) ||
                          ((resulti[0]&0x7fffffff)>0x7ff00000 && resulti[1]==1))) {
                         err = 1;
                     }
                 }
                 break;
             case rc_finite:
                 if(wres==1) {
                     if(!((resultr[0]&0x7fffffff)<0x7f800000 ||
                          (resulti[0]&0x7fffffff)<0x7f800000)) {
                         err = 1;
                     }
                 } else {
                     if(!((resultr[0]&0x7fffffff)<0x7ff00000 ||
                          (resulti[0]&0x7fffffff)<0x7ff00000)) {
                         err = 1;
                     }
                 }
                 break;
             default:
                 break;
             }
             if(err) {
                 print_error(t.func->rettype,resultr,"wrongresultr",&failp);
                 print_error(t.func->rettype,resulti,"wrongresulti",&failp);
             }
         } else if (t.nresult > wres) {
             /*
              * The test case data has provided the result to more
              * than double precision. Instead of testing exact
              * equality, we test against our maximum error
              * tolerance.
              */
             int rshift, ishift;
             long long ulpsr, ulpsi, ulptolerance;
 
             tresultr[wres] = t.resultr[wres] << (32-EXTRABITS);
             tresulti[wres] = t.resulti[wres] << (32-EXTRABITS);
             if(strict) {
                 ulptolerance = 4096; /* one ulp */
             } else {
                 ulptolerance = t.func->tolerance;
             }
             rshift = ishift = 0;
             if (ulptolerance & ABSLOWERBOUND) {
                 /*
                  * Hack for the lgamma functions, which have an
                  * error behaviour that can't conveniently be
                  * characterised in pure ULPs. Really, we want to
                  * say that the error in lgamma is "at most N ULPs,
                  * or at most an absolute error of X, whichever is
                  * larger", for appropriately chosen N,X. But since
                  * these two functions are the only cases where it
                  * arises, I haven't bothered to do it in a nice way
                  * in the function table above.
                  *
                  * (The difficult cases arise with negative input
                  * values such that |gamma(x)| is very near to 1; in
                  * this situation implementations tend to separately
                  * compute lgamma(|x|) and the log of the correction
                  * term from the Euler reflection formula, and
                  * subtract - which catastrophically loses
                  * significance.)
                  *
                  * As far as I can tell, nobody cares about this:
                  * GNU libm doesn't get those cases right either,
                  * and OpenCL explicitly doesn't state a ULP error
                  * limit for lgamma. So my guess is that this is
                  * simply considered acceptable error behaviour for
                  * this particular function, and hence I feel free
                  * to allow for it here.
                  */
                 ulptolerance &= ~ABSLOWERBOUND;
                 if (t.op1r[0] & 0x80000000) {
                     if (t.func->rettype == rt_d)
                         rshift = 0x400 - ((tresultr[0] >> 20) & 0x7ff);
                     else if (t.func->rettype == rt_s)
                         rshift = 0x80 - ((tresultr[0] >> 23) & 0xff);
                     if (rshift < 0)
                         rshift = 0;
                 }
             }
             if (ulptolerance & PLUSMINUSPIO2) {
                 ulptolerance &= ~PLUSMINUSPIO2;
                 /*
                  * Hack for range reduction, which can reduce
                  * borderline cases in the wrong direction, i.e.
                  * return a value just outside one end of the interval
                  * [-pi/4,+pi/4] when it could have returned a value
                  * just inside the other end by subtracting an
                  * adjacent multiple of pi/2.
                  *
                  * We tolerate this, up to a point, because the
                  * trigonometric functions making use of the output of
                  * rred can cope and because making the range reducer
                  * do the exactly right thing in every case would be
                  * more expensive.
                  */
                 if (wres == 1) {
                     /* Upper bound of overshoot derived in rredf.h */
                     if ((resultr[0]&0x7FFFFFFF) <= 0x3f494b02 &&
                         (resultr[0]&0x7FFFFFFF) > 0x3f490fda &&
                         (resultr[0]&0x80000000) != (tresultr[0]&0x80000000)) {
                         unsigned long long val;
                         val = tresultr[0];
                         val = (val << 32) | tresultr[1];
                         /*
                          * Compute the alternative permitted result by
                          * subtracting from the sum of the extended
                          * single-precision bit patterns of +pi/4 and
                          * -pi/4. This is a horrible hack which only
                          * works because we can be confident that
                          * numbers in this range all have the same
                          * exponent!
                          */
                         val = 0xfe921fb54442d184ULL - val;
                         tresultr[0] = val >> 32;
                         tresultr[1] = (val >> (32-EXTRABITS)) << (32-EXTRABITS);
                         /*
                          * Also, expect a correspondingly different
                          * value of res2 as a result of this change.
                          * The adjustment depends on whether we just
                          * flipped the result from + to - or vice
                          * versa.
                          */
                         if (resultr[0] & 0x80000000) {
                             res2_adjust = +1;
                         } else {
                             res2_adjust = -1;
                         }
                     }
                 }
             }
             ulpsr = calc_error(resultr, tresultr, rshift, t.func->rettype);
             if(is_complex_rettype(t.func->rettype)) {
                 ulpsi = calc_error(resulti, tresulti, ishift, t.func->rettype);
             } else {
                 ulpsi = 0;
             }
             unsigned *rr = (ulpsr > ulptolerance || ulpsr < -ulptolerance) ? resultr : NULL;
             unsigned *ri = (ulpsi > ulptolerance || ulpsi < -ulptolerance) ? resulti : NULL;
 /*             printf("tolerance=%i, ulpsr=%i, ulpsi=%i, rr=%p, ri=%p\n",ulptolerance,ulpsr,ulpsi,rr,ri); */
             if (rr || ri) {
                 if (quiet) failtext[0]='x';
                 else {
                     print_error(t.func->rettype,rr,"wrongresultr",&failp);
                     print_error(t.func->rettype,ri,"wrongresulti",&failp);
                     print_ulps(t.func->rettype,rr ? ulpsr : 0, ri ? ulpsi : 0,&failp);
                 }
             }
         } else {
             if(is_complex_rettype(t.func->rettype))
                 /*
                  * Complex functions are not fully supported,
                  * this is unreachable, but prevents warnings.
                  */
                 abort();
             /*
              * The test case data has provided the result in
              * exactly the output precision. Therefore we must
              * complain about _any_ violation.
              */
             switch(t.func->rettype) {
             case rt_dc:
                 canon_dNaN(tresulti);
                 canon_dNaN(resulti);
                 if (fo) {
                     dnormzero(tresulti);
                     dnormzero(resulti);
                 }
                 /* deliberate fall-through */
             case rt_d:
                 canon_dNaN(tresultr);
                 canon_dNaN(resultr);
                 if (fo) {
                     dnormzero(tresultr);
                     dnormzero(resultr);
                 }
                 break;
             case rt_sc:
                 canon_sNaN(tresulti);
                 canon_sNaN(resulti);
                 if (fo) {
                     snormzero(tresulti);
                     snormzero(resulti);
                 }
                 /* deliberate fall-through */
             case rt_s:
                 canon_sNaN(tresultr);
                 canon_sNaN(resultr);
                 if (fo) {
                     snormzero(tresultr);
                     snormzero(resultr);
                 }
                 break;
             default:
                 break;
             }
             if(is_complex_rettype(t.func->rettype)) {
                 unsigned *rr, *ri;
                 if(resultr[0] != tresultr[0] ||
                    (wres > 1 && resultr[1] != tresultr[1])) {
                     rr = resultr;
                 } else {
                     rr = NULL;
                 }
                 if(resulti[0] != tresulti[0] ||
                    (wres > 1 && resulti[1] != tresulti[1])) {
                     ri = resulti;
                 } else {
                     ri = NULL;
                 }
                 if(rr || ri) {
                     if (quiet) failtext[0]='x';
                     print_error(t.func->rettype,rr,"wrongresultr",&failp);
                     print_error(t.func->rettype,ri,"wrongresulti",&failp);
                 }
             } else if (resultr[0] != tresultr[0] ||
                        (wres > 1 && resultr[1] != tresultr[1])) {
                 if (quiet) failtext[0]='x';
                 print_error(t.func->rettype,resultr,"wrongresult",&failp);
             }
         }
         /*
          * Now test res2, for those functions (frexp, modf, rred)
          * which use it.
          */
         if (t.func->func.ptr == &frexp || t.func->func.ptr == &frexpf ||
             t.func->macro_name == m_rred || t.func->macro_name == m_rredf) {
             unsigned tres2 = t.res2[0];
             if (res2_adjust) {
                 /* Fix for range reduction, propagated from further up */
                 tres2 = (tres2 + res2_adjust) & 3;
             }
             if (tres2 != intres) {
                 if (quiet) failtext[0]='x';
                 else {
                     failp += sprintf(failp,
                                      " wrongres2=%08x", intres);
                 }
             }
         } else if (t.func->func.ptr == &modf || t.func->func.ptr == &modff) {
             tresultr[0] = t.res2[0];
             tresultr[1] = t.res2[1];
             if (is_double_rettype(t.func->rettype)) {
                 canon_dNaN(tresultr);
                 resultr[0] = d_res2.i[dmsd];
                 resultr[1] = d_res2.i[dlsd];
                 canon_dNaN(resultr);
                 if (fo) {
                     dnormzero(tresultr);
                     dnormzero(resultr);
                 }
             } else {
                 canon_sNaN(tresultr);
                 resultr[0] = s_res2.i;
                 resultr[1] = s_res2.i;
                 canon_sNaN(resultr);
                 if (fo) {
                     snormzero(tresultr);
                     snormzero(resultr);
                 }
             }
             if (resultr[0] != tresultr[0] ||
                 (wres > 1 && resultr[1] != tresultr[1])) {
                 if (quiet) failtext[0]='x';
                 else {
                     if (is_double_rettype(t.func->rettype))
                         failp += sprintf(failp, " wrongres2=%08x.%08x",
                                          resultr[0], resultr[1]);
                     else
                         failp += sprintf(failp, " wrongres2=%08x",
                                          resultr[0]);
                 }
             }
         }
     }
 
     /* Check errno */
     err = (errno == EDOM ? e_EDOM : errno == ERANGE ? e_ERANGE : e_0);
     if (err != t.err && err != t.maybeerr) {
         if (quiet) failtext[0]='x';
         else {
             failp += sprintf(failp, " wrongerrno=%s expecterrno=%s ", errnos[err], errnos[t.err]);
         }
     }
 
     return *failtext ? test_fail : test_pass;
 }
 
 int passed, failed, declined;
 
 void runtests(char *name, FILE *fp) {
     char testbuf[512], linebuf[512];
     int lineno = 1;
     testdetail test;
 
     test.valid = 0;
 
     if (verbose) printf("runtests: %s\n", name);
     while (fgets(testbuf, sizeof(testbuf), fp)) {
         int res, print_errno;
         testbuf[strcspn(testbuf, "\r\n")] = '\0';
         strcpy(linebuf, testbuf);
         test = parsetest(testbuf, test);
         print_errno = 0;
         while (test.in_err < test.in_err_limit) {
             res = runtest(test);
             if (res == test_pass) {
                 if (verbose)
                     printf("%s:%d: pass\n", name, lineno);
                 ++passed;
             } else if (res == test_decline) {
                 if (verbose)
                     printf("%s:%d: declined\n", name, lineno);
                 ++declined;
             } else if (res == test_fail) {
                 if (!quiet)
                     printf("%s:%d: FAIL%s: %s%s%s%s\n", name, lineno,
                            test.random ? " (random)" : "",
                            linebuf,
                            print_errno ? " errno_in=" : "",
                            print_errno ? errnos[test.in_err] : "",
                            failtext);
                 ++failed;
             } else if (res == test_invalid) {
                 printf("%s:%d: malformed: %s\n", name, lineno, linebuf);
                 ++failed;
             }
             test.in_err++;
             print_errno = 1;
         }
         lineno++;
     }
 }
 
 int main(int ac, char **av) {
     char **files;
     int i, nfiles = 0;
     dbl d;
 
 #ifdef MICROLIB
     /*
      * Invent argc and argv ourselves.
      */
     char *argv[256];
     char args[256];
     {
         int sargs[2];
         char *p;
 
         ac = 0;
 
         sargs[0]=(int)args;
         sargs[1]=(int)sizeof(args);
         if (!__semihost(0x15, sargs)) {
             args[sizeof(args)-1] = '\0';   /* just in case */
             p = args;
             while (1) {
                 while (*p == ' ' || *p == '\t') p++;
                 if (!*p) break;
                 argv[ac++] = p;
                 while (*p && *p != ' ' && *p != '\t') p++;
                 if (*p) *p++ = '\0';
             }
         }
 
         av = argv;
     }
 #endif
 
     /* Sort tfuncs */
     qsort(tfuncs, sizeof(tfuncs)/sizeof(test_func), sizeof(test_func), &compare_tfuncs);
 
     /*
      * Autodetect the `double' endianness.
      */
     dmsd = 0;
     d.f = 1.0;                       /* 0x3ff00000 / 0x00000000 */
     if (d.i[dmsd] == 0) {
         dmsd = 1;
     }
     /*
      * Now dmsd denotes what the compiler thinks we're at. Let's
      * check that it agrees with what the runtime thinks.
      */
     d.i[0] = d.i[1] = 0x11111111;/* a random +ve number */
     d.f /= d.f;                    /* must now be one */
     if (d.i[dmsd] == 0) {
         fprintf(stderr, "YIKES! Compiler and runtime disagree on endianness"
                 " of `double'. Bailing out\n");
         return 1;
     }
     dlsd = !dmsd;
 
     /* default is terse */
     verbose = 0;
     fo = 0;
     strict = 0;
 
     files = (char **)malloc((ac+1) * sizeof(char *));
     if (!files) {
         fprintf(stderr, "initial malloc failed!\n");
         return 1;
     }
 #ifdef NOCMDLINE
     files[nfiles++] = "testfile";
 #endif
 
     while (--ac) {
         char *p = *++av;
         if (*p == '-') {
             static char *options[] = {
                 "-fo",
 #if 0
                 "-noinexact",
                 "-noround",
 #endif
                 "-nostatus",
                 "-quiet",
                 "-strict",
                 "-v",
                 "-verbose",
             };
             enum {
                 op_fo,
 #if 0
                 op_noinexact,
                 op_noround,
 #endif
                 op_nostatus,
                 op_quiet,
                 op_strict,
                 op_v,
                 op_verbose,
             };
             switch (find(p, options, sizeof(options))) {
             case op_quiet:
                 quiet = 1;
                 break;
 #if 0
             case op_noinexact:
                 statusmask &= 0x0F;    /* remove bit 4 */
                 break;
             case op_noround:
                 doround = 0;
                 break;
 #endif
             case op_nostatus:        /* no status word => noinx,noround */
                 statusmask = 0;
                 doround = 0;
                 break;
             case op_v:
             case op_verbose:
                 verbose = 1;
                 break;
             case op_fo:
                 fo = 1;
                 break;
             case op_strict: /* tolerance is 1 ulp */
                 strict = 1;
                 break;
             default:
                 fprintf(stderr, "unrecognised option: %s\n", p);
                 break;
             }
         } else {
             files[nfiles++] = p;
         }
     }
 
     passed = failed = declined = 0;
 
     if (nfiles) {
         for (i = 0; i < nfiles; i++) {
             FILE *fp = fopen(files[i], "r");
             if (!fp) {
                 fprintf(stderr, "Couldn't open %s\n", files[i]);
             } else
                 runtests(files[i], fp);
         }
     } else
         runtests("(stdin)", stdin);
 
     printf("Completed. Passed %d, failed %d (total %d",
            passed, failed, passed+failed);
     if (declined)
         printf(" plus %d declined", declined);
     printf(")\n");
     if (failed || passed == 0)
         return 1;
     printf("** TEST PASSED OK **\n");
     return 0;
 }
 
 void undef_func() {
     failed++;
     puts("ERROR: undefined function called");
 }
+/* clang-format on */
diff --git a/contrib/arm-optimized-routines/math/test/rtest/dotest.c b/contrib/arm-optimized-routines/math/test/rtest/dotest.c
index 5b3e9b4f18e4..dd8ceb068141 100644
--- a/contrib/arm-optimized-routines/math/test/rtest/dotest.c
+++ b/contrib/arm-optimized-routines/math/test/rtest/dotest.c
@@ -1,2167 +1,2210 @@
 /*
  * dotest.c - actually generate mathlib test cases
  *
- * Copyright (c) 1999-2019, Arm Limited.
+ * Copyright (c) 1999-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <assert.h>
 #include <limits.h>
 
 #include "semi.h"
 #include "intern.h"
 #include "random.h"
 
 #define MPFR_PREC 96 /* good enough for float or double + a few extra bits */
 
+#if MPFR_VERSION < MPFR_VERSION_NUM(4, 2, 0)
+int
+mpfr_tanpi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd)
+{
+  MPFR_DECL_INIT (frd, MPFR_PREC);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_tan (ret, frd, GMP_RNDN);
+}
+
+int
+mpfr_sinpi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd)
+{
+  MPFR_DECL_INIT (frd, MPFR_PREC);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_sin (ret, frd, GMP_RNDN);
+}
+
+int
+mpfr_cospi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd)
+{
+  MPFR_DECL_INIT (frd, MPFR_PREC);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_cos (ret, frd, GMP_RNDN);
+}
+#endif
+
 extern int lib_fo, lib_no_arith, ntests;
 
 /*
  * Prototypes.
  */
 static void cases_biased(uint32 *, uint32, uint32);
 static void cases_biased_positive(uint32 *, uint32, uint32);
 static void cases_biased_float(uint32 *, uint32, uint32);
 static void cases_uniform(uint32 *, uint32, uint32);
 static void cases_uniform_positive(uint32 *, uint32, uint32);
 static void cases_uniform_float(uint32 *, uint32, uint32);
 static void cases_uniform_float_positive(uint32 *, uint32, uint32);
 static void log_cases(uint32 *, uint32, uint32);
 static void log_cases_float(uint32 *, uint32, uint32);
 static void log1p_cases(uint32 *, uint32, uint32);
 static void log1p_cases_float(uint32 *, uint32, uint32);
 static void minmax_cases(uint32 *, uint32, uint32);
 static void minmax_cases_float(uint32 *, uint32, uint32);
 static void atan2_cases(uint32 *, uint32, uint32);
 static void atan2_cases_float(uint32 *, uint32, uint32);
 static void pow_cases(uint32 *, uint32, uint32);
 static void pow_cases_float(uint32 *, uint32, uint32);
 static void rred_cases(uint32 *, uint32, uint32);
 static void rred_cases_float(uint32 *, uint32, uint32);
 static void cases_semi1(uint32 *, uint32, uint32);
 static void cases_semi1_float(uint32 *, uint32, uint32);
 static void cases_semi2(uint32 *, uint32, uint32);
 static void cases_semi2_float(uint32 *, uint32, uint32);
 static void cases_ldexp(uint32 *, uint32, uint32);
 static void cases_ldexp_float(uint32 *, uint32, uint32);
 
 static void complex_cases_uniform(uint32 *, uint32, uint32);
 static void complex_cases_uniform_float(uint32 *, uint32, uint32);
 static void complex_cases_biased(uint32 *, uint32, uint32);
 static void complex_cases_biased_float(uint32 *, uint32, uint32);
 static void complex_log_cases(uint32 *, uint32, uint32);
 static void complex_log_cases_float(uint32 *, uint32, uint32);
 static void complex_pow_cases(uint32 *, uint32, uint32);
 static void complex_pow_cases_float(uint32 *, uint32, uint32);
 static void complex_arithmetic_cases(uint32 *, uint32, uint32);
 static void complex_arithmetic_cases_float(uint32 *, uint32, uint32);
 
 static uint32 doubletop(int x, int scale);
 static uint32 floatval(int x, int scale);
 
 /*
  * Convert back and forth between IEEE bit patterns and the
  * mpfr_t/mpc_t types.
  */
 static void set_mpfr_d(mpfr_t x, uint32 h, uint32 l)
 {
     uint64_t hl = ((uint64_t)h << 32) | l;
     uint32 exp = (hl >> 52) & 0x7ff;
     int64_t mantissa = hl & (((uint64_t)1 << 52) - 1);
     int sign = (hl >> 63) ? -1 : +1;
     if (exp == 0x7ff) {
         if (mantissa == 0)
             mpfr_set_inf(x, sign);
         else
             mpfr_set_nan(x);
     } else if (exp == 0 && mantissa == 0) {
         mpfr_set_ui(x, 0, GMP_RNDN);
         mpfr_setsign(x, x, sign < 0, GMP_RNDN);
     } else {
         if (exp != 0)
             mantissa |= ((uint64_t)1 << 52);
         else
             exp++;
         mpfr_set_sj_2exp(x, mantissa * sign, (int)exp - 0x3ff - 52, GMP_RNDN);
     }
 }
 static void set_mpfr_f(mpfr_t x, uint32 f)
 {
     uint32 exp = (f >> 23) & 0xff;
     int32 mantissa = f & ((1 << 23) - 1);
     int sign = (f >> 31) ? -1 : +1;
     if (exp == 0xff) {
         if (mantissa == 0)
             mpfr_set_inf(x, sign);
         else
             mpfr_set_nan(x);
     } else if (exp == 0 && mantissa == 0) {
         mpfr_set_ui(x, 0, GMP_RNDN);
         mpfr_setsign(x, x, sign < 0, GMP_RNDN);
     } else {
         if (exp != 0)
             mantissa |= (1 << 23);
         else
             exp++;
         mpfr_set_sj_2exp(x, mantissa * sign, (int)exp - 0x7f - 23, GMP_RNDN);
     }
 }
 static void set_mpc_d(mpc_t z, uint32 rh, uint32 rl, uint32 ih, uint32 il)
 {
     mpfr_t x, y;
     mpfr_init2(x, MPFR_PREC);
     mpfr_init2(y, MPFR_PREC);
     set_mpfr_d(x, rh, rl);
     set_mpfr_d(y, ih, il);
     mpc_set_fr_fr(z, x, y, MPC_RNDNN);
     mpfr_clear(x);
     mpfr_clear(y);
 }
 static void set_mpc_f(mpc_t z, uint32 r, uint32 i)
 {
     mpfr_t x, y;
     mpfr_init2(x, MPFR_PREC);
     mpfr_init2(y, MPFR_PREC);
     set_mpfr_f(x, r);
     set_mpfr_f(y, i);
     mpc_set_fr_fr(z, x, y, MPC_RNDNN);
     mpfr_clear(x);
     mpfr_clear(y);
 }
 static void get_mpfr_d(const mpfr_t x, uint32 *h, uint32 *l, uint32 *extra)
 {
     uint32_t sign, expfield, mantfield;
     mpfr_t significand;
     int exp;
 
     if (mpfr_nan_p(x)) {
         *h = 0x7ff80000;
         *l = 0;
         *extra = 0;
         return;
     }
 
     sign = mpfr_signbit(x) ? 0x80000000U : 0;
 
     if (mpfr_inf_p(x)) {
         *h = 0x7ff00000 | sign;
         *l = 0;
         *extra = 0;
         return;
     }
 
     if (mpfr_zero_p(x)) {
         *h = 0x00000000 | sign;
         *l = 0;
         *extra = 0;
         return;
     }
 
     mpfr_init2(significand, MPFR_PREC);
     mpfr_set(significand, x, GMP_RNDN);
     exp = mpfr_get_exp(significand);
     mpfr_set_exp(significand, 0);
 
     /* Now significand is in [1/2,1), and significand * 2^exp == x.
      * So the IEEE exponent corresponding to exp==0 is 0x3fe. */
     if (exp > 0x400) {
         /* overflow to infinity anyway */
         *h = 0x7ff00000 | sign;
         *l = 0;
         *extra = 0;
         mpfr_clear(significand);
         return;
     }
 
     if (exp <= -0x3fe || mpfr_zero_p(x))
         exp = -0x3fd;       /* denormalise */
     expfield = exp + 0x3fd; /* offset to cancel leading mantissa bit */
 
     mpfr_div_2si(significand, x, exp - 21, GMP_RNDN);
     mpfr_abs(significand, significand, GMP_RNDN);
     mantfield = mpfr_get_ui(significand, GMP_RNDZ);
     *h = sign + ((uint64_t)expfield << 20) + mantfield;
     mpfr_sub_ui(significand, significand, mantfield, GMP_RNDN);
     mpfr_mul_2ui(significand, significand, 32, GMP_RNDN);
     mantfield = mpfr_get_ui(significand, GMP_RNDZ);
     *l = mantfield;
     mpfr_sub_ui(significand, significand, mantfield, GMP_RNDN);
     mpfr_mul_2ui(significand, significand, 32, GMP_RNDN);
     mantfield = mpfr_get_ui(significand, GMP_RNDZ);
     *extra = mantfield;
 
     mpfr_clear(significand);
 }
 static void get_mpfr_f(const mpfr_t x, uint32 *f, uint32 *extra)
 {
     uint32_t sign, expfield, mantfield;
     mpfr_t significand;
     int exp;
 
     if (mpfr_nan_p(x)) {
         *f = 0x7fc00000;
         *extra = 0;
         return;
     }
 
     sign = mpfr_signbit(x) ? 0x80000000U : 0;
 
     if (mpfr_inf_p(x)) {
         *f = 0x7f800000 | sign;
         *extra = 0;
         return;
     }
 
     if (mpfr_zero_p(x)) {
         *f = 0x00000000 | sign;
         *extra = 0;
         return;
     }
 
     mpfr_init2(significand, MPFR_PREC);
     mpfr_set(significand, x, GMP_RNDN);
     exp = mpfr_get_exp(significand);
     mpfr_set_exp(significand, 0);
 
     /* Now significand is in [1/2,1), and significand * 2^exp == x.
      * So the IEEE exponent corresponding to exp==0 is 0x7e. */
     if (exp > 0x80) {
         /* overflow to infinity anyway */
         *f = 0x7f800000 | sign;
         *extra = 0;
         mpfr_clear(significand);
         return;
     }
 
     if (exp <= -0x7e || mpfr_zero_p(x))
         exp = -0x7d;                   /* denormalise */
     expfield = exp + 0x7d; /* offset to cancel leading mantissa bit */
 
     mpfr_div_2si(significand, x, exp - 24, GMP_RNDN);
     mpfr_abs(significand, significand, GMP_RNDN);
     mantfield = mpfr_get_ui(significand, GMP_RNDZ);
     *f = sign + ((uint64_t)expfield << 23) + mantfield;
     mpfr_sub_ui(significand, significand, mantfield, GMP_RNDN);
     mpfr_mul_2ui(significand, significand, 32, GMP_RNDN);
     mantfield = mpfr_get_ui(significand, GMP_RNDZ);
     *extra = mantfield;
 
     mpfr_clear(significand);
 }
 static void get_mpc_d(const mpc_t z,
                       uint32 *rh, uint32 *rl, uint32 *rextra,
                       uint32 *ih, uint32 *il, uint32 *iextra)
 {
     mpfr_t x, y;
     mpfr_init2(x, MPFR_PREC);
     mpfr_init2(y, MPFR_PREC);
     mpc_real(x, z, GMP_RNDN);
     mpc_imag(y, z, GMP_RNDN);
     get_mpfr_d(x, rh, rl, rextra);
     get_mpfr_d(y, ih, il, iextra);
     mpfr_clear(x);
     mpfr_clear(y);
 }
 static void get_mpc_f(const mpc_t z,
                       uint32 *r, uint32 *rextra,
                       uint32 *i, uint32 *iextra)
 {
     mpfr_t x, y;
     mpfr_init2(x, MPFR_PREC);
     mpfr_init2(y, MPFR_PREC);
     mpc_real(x, z, GMP_RNDN);
     mpc_imag(y, z, GMP_RNDN);
     get_mpfr_f(x, r, rextra);
     get_mpfr_f(y, i, iextra);
     mpfr_clear(x);
     mpfr_clear(y);
 }
 
 /*
  * Implementation of mathlib functions that aren't trivially
  * implementable using an existing mpfr or mpc function.
  */
 int test_rred(mpfr_t ret, const mpfr_t x, int *quadrant)
 {
     mpfr_t halfpi;
     long quo;
     int status;
 
     /*
      * In the worst case of range reduction, we get an input of size
      * around 2^1024, and must find its remainder mod pi, which means
      * we need 1024 bits of pi at least. Plus, the remainder might
      * happen to come out very very small if we're unlucky. How
      * unlucky can we be? Well, conveniently, I once went through and
      * actually worked that out using Paxson's modular minimisation
      * algorithm, and it turns out that the smallest exponent you can
      * get out of a nontrivial[1] double precision range reduction is
      * 0x3c2, i.e. of the order of 2^-61. So we need 1024 bits of pi
      * to get us down to the units digit, another 61 or so bits (say
      * 64) to get down to the highest set bit of the output, and then
      * some bits to make the actual mantissa big enough.
      *
      *   [1] of course the output of range reduction can have an
      *   arbitrarily small exponent in the trivial case, where the
      *   input is so small that it's the identity function. That
      *   doesn't count.
      */
     mpfr_init2(halfpi, MPFR_PREC + 1024 + 64);
     mpfr_const_pi(halfpi, GMP_RNDN);
     mpfr_div_ui(halfpi, halfpi, 2, GMP_RNDN);
 
     status = mpfr_remquo(ret, &quo, x, halfpi, GMP_RNDN);
     *quadrant = quo & 3;
 
     mpfr_clear(halfpi);
 
     return status;
 }
 int test_lgamma(mpfr_t ret, const mpfr_t x, mpfr_rnd_t rnd)
 {
     /*
      * mpfr_lgamma takes an extra int * parameter to hold the output
      * sign. We don't bother testing that, so this wrapper throws away
      * the sign and hence fits into the same function prototype as all
      * the other real->real mpfr functions.
      *
      * There is also mpfr_lngamma which has no sign output and hence
      * has the right prototype already, but unfortunately it returns
      * NaN in cases where gamma(x) < 0, so it's no use to us.
      */
     int sign;
     return mpfr_lgamma(ret, &sign, x, rnd);
 }
 int test_cpow(mpc_t ret, const mpc_t x, const mpc_t y, mpc_rnd_t rnd)
 {
     /*
      * For complex pow, we must bump up the precision by a huge amount
      * if we want it to get the really difficult cases right. (Not
      * that we expect the library under test to be getting those cases
      * right itself, but we'd at least like the test suite to report
      * them as wrong for the _right reason_.)
      *
      * This works around a bug in mpc_pow(), fixed by r1455 in the MPC
      * svn repository (2014-10-14) and expected to be in any MPC
      * release after 1.0.2 (which was the latest release already made
      * at the time of the fix). So as and when we update to an MPC
      * with the fix in it, we could remove this workaround.
      *
      * For the reasons for choosing this amount of extra precision,
      * see analysis in complex/cpownotes.txt for the rationale for the
      * amount.
      */
     mpc_t xbig, ybig, retbig;
     int status;
 
     mpc_init2(xbig, 1034 + 53 + 60 + MPFR_PREC);
     mpc_init2(ybig, 1034 + 53 + 60 + MPFR_PREC);
     mpc_init2(retbig, 1034 + 53 + 60 + MPFR_PREC);
 
     mpc_set(xbig, x, MPC_RNDNN);
     mpc_set(ybig, y, MPC_RNDNN);
     status = mpc_pow(retbig, xbig, ybig, rnd);
     mpc_set(ret, retbig, rnd);
 
     mpc_clear(xbig);
     mpc_clear(ybig);
     mpc_clear(retbig);
 
     return status;
 }
 
 /*
  * Identify 'hard' values (NaN, Inf, nonzero denormal) for deciding
  * whether microlib will decline to run a test.
  */
 #define is_shard(in) ( \
     (((in)[0] & 0x7F800000) == 0x7F800000 || \
      (((in)[0] & 0x7F800000) == 0 && ((in)[0]&0x7FFFFFFF) != 0)))
 
 #define is_dhard(in) ( \
     (((in)[0] & 0x7FF00000) == 0x7FF00000 || \
      (((in)[0] & 0x7FF00000) == 0 && (((in)[0] & 0xFFFFF) | (in)[1]) != 0)))
 
 /*
  * Identify integers.
  */
 int is_dinteger(uint32 *in)
 {
     uint32 out[3];
     if ((0x7FF00000 & ~in[0]) == 0)
         return 0;                      /* not finite, hence not integer */
     test_ceil(in, out);
     return in[0] == out[0] && in[1] == out[1];
 }
 int is_sinteger(uint32 *in)
 {
     uint32 out[3];
     if ((0x7F800000 & ~in[0]) == 0)
         return 0;                      /* not finite, hence not integer */
     test_ceilf(in, out);
     return in[0] == out[0];
 }
 
 /*
  * Identify signalling NaNs.
  */
 int is_dsnan(const uint32 *in)
 {
     if ((in[0] & 0x7FF00000) != 0x7FF00000)
         return 0;                      /* not the inf/nan exponent */
     if ((in[0] << 12) == 0 && in[1] == 0)
         return 0;                      /* inf */
     if (in[0] & 0x00080000)
         return 0;                      /* qnan */
     return 1;
 }
 int is_ssnan(const uint32 *in)
 {
     if ((in[0] & 0x7F800000) != 0x7F800000)
         return 0;                      /* not the inf/nan exponent */
     if ((in[0] << 9) == 0)
         return 0;                      /* inf */
     if (in[0] & 0x00400000)
         return 0;                      /* qnan */
     return 1;
 }
 int is_snan(const uint32 *in, int size)
 {
     return size == 2 ? is_dsnan(in) : is_ssnan(in);
 }
 
 /*
  * Wrapper functions called to fix up unusual results after the main
  * test function has run.
  */
 void universal_wrapper(wrapperctx *ctx)
 {
     /*
      * Any SNaN input gives rise to a QNaN output.
      */
     int op;
     for (op = 0; op < wrapper_get_nops(ctx); op++) {
         int size = wrapper_get_size(ctx, op);
 
         if (!wrapper_is_complex(ctx, op) &&
             is_snan(wrapper_get_ieee(ctx, op), size)) {
             wrapper_set_nan(ctx);
         }
     }
 }
 
+/* clang-format off */
 Testable functions[] = {
     /*
      * Trig functions: sin, cos, tan. We test the core function
      * between -16 and +16: we assume that range reduction exists
      * and will be used for larger arguments, and we'll test that
      * separately. Also we only go down to 2^-27 in magnitude,
      * because below that sin(x)=tan(x)=x and cos(x)=1 as far as
      * double precision can tell, which is boring.
      */
     {"sin", (funcptr)mpfr_sin, args1, {NULL},
         cases_uniform, 0x3e400000, 0x40300000},
     {"sinf", (funcptr)mpfr_sin, args1f, {NULL},
         cases_uniform_float, 0x39800000, 0x41800000},
     {"cos", (funcptr)mpfr_cos, args1, {NULL},
         cases_uniform, 0x3e400000, 0x40300000},
     {"cosf", (funcptr)mpfr_cos, args1f, {NULL},
         cases_uniform_float, 0x39800000, 0x41800000},
     {"tan", (funcptr)mpfr_tan, args1, {NULL},
         cases_uniform, 0x3e400000, 0x40300000},
     {"tanf", (funcptr)mpfr_tan, args1f, {NULL},
         cases_uniform_float, 0x39800000, 0x41800000},
     {"sincosf_sinf", (funcptr)mpfr_sin, args1f, {NULL},
         cases_uniform_float, 0x39800000, 0x41800000},
     {"sincosf_cosf", (funcptr)mpfr_cos, args1f, {NULL},
         cases_uniform_float, 0x39800000, 0x41800000},
+    {"sinpi", (funcptr)mpfr_sinpi, args1, {NULL},
+        cases_uniform, 0x3e400000, 0x40300000},
+    {"sinpif", (funcptr)mpfr_sinpi, args1f, {NULL},
+        cases_uniform_float, 0x39800000, 0x41800000},
+    {"cospi", (funcptr)mpfr_cospi, args1, {NULL},
+        cases_uniform, 0x3e400000, 0x40300000},
+    {"cospif", (funcptr)mpfr_cospi, args1f, {NULL},
+        cases_uniform_float, 0x39800000, 0x41800000},
+    {"tanpi", (funcptr)mpfr_tanpi, args1, {NULL},
+        cases_uniform, 0x3e400000, 0x40300000},
+    {"tanpif", (funcptr)mpfr_tanpi, args1f, {NULL},
+        cases_uniform_float, 0x39800000, 0x41800000},
     /*
      * Inverse trig: asin, acos. Between 1 and -1, of course. acos
      * goes down to 2^-54, asin to 2^-27.
      */
     {"asin", (funcptr)mpfr_asin, args1, {NULL},
         cases_uniform, 0x3e400000, 0x3fefffff},
     {"asinf", (funcptr)mpfr_asin, args1f, {NULL},
         cases_uniform_float, 0x39800000, 0x3f7fffff},
     {"acos", (funcptr)mpfr_acos, args1, {NULL},
         cases_uniform, 0x3c900000, 0x3fefffff},
     {"acosf", (funcptr)mpfr_acos, args1f, {NULL},
         cases_uniform_float, 0x33800000, 0x3f7fffff},
     /*
      * Inverse trig: atan. atan is stable (in double prec) with
      * argument magnitude past 2^53, so we'll test up to there.
      * atan(x) is boringly just x below 2^-27.
      */
     {"atan", (funcptr)mpfr_atan, args1, {NULL},
         cases_uniform, 0x3e400000, 0x43400000},
     {"atanf", (funcptr)mpfr_atan, args1f, {NULL},
         cases_uniform_float, 0x39800000, 0x4b800000},
     /*
      * atan2. Interesting cases arise when the exponents of the
      * arguments differ by at most about 50.
      */
     {"atan2", (funcptr)mpfr_atan2, args2, {NULL},
         atan2_cases, 0},
     {"atan2f", (funcptr)mpfr_atan2, args2f, {NULL},
         atan2_cases_float, 0},
     /*
      * The exponentials: exp, sinh, cosh. They overflow at around
      * 710. exp and sinh are boring below 2^-54, cosh below 2^-27.
      */
     {"exp", (funcptr)mpfr_exp, args1, {NULL},
         cases_uniform, 0x3c900000, 0x40878000},
     {"expf", (funcptr)mpfr_exp, args1f, {NULL},
         cases_uniform_float, 0x33800000, 0x42dc0000},
     {"sinh", (funcptr)mpfr_sinh, args1, {NULL},
         cases_uniform, 0x3c900000, 0x40878000},
     {"sinhf", (funcptr)mpfr_sinh, args1f, {NULL},
         cases_uniform_float, 0x33800000, 0x42dc0000},
     {"cosh", (funcptr)mpfr_cosh, args1, {NULL},
         cases_uniform, 0x3e400000, 0x40878000},
     {"coshf", (funcptr)mpfr_cosh, args1f, {NULL},
         cases_uniform_float, 0x39800000, 0x42dc0000},
     /*
      * tanh is stable past around 20. It's boring below 2^-27.
      */
     {"tanh", (funcptr)mpfr_tanh, args1, {NULL},
         cases_uniform, 0x3e400000, 0x40340000},
     {"tanhf", (funcptr)mpfr_tanh, args1f, {NULL},
         cases_uniform, 0x39800000, 0x41100000},
     /*
      * log must be tested only on positive numbers, but can cover
      * the whole range of positive nonzero finite numbers. It never
      * gets boring.
      */
     {"log", (funcptr)mpfr_log, args1, {NULL}, log_cases, 0},
     {"logf", (funcptr)mpfr_log, args1f, {NULL}, log_cases_float, 0},
     {"log10", (funcptr)mpfr_log10, args1, {NULL}, log_cases, 0},
     {"log10f", (funcptr)mpfr_log10, args1f, {NULL}, log_cases_float, 0},
     /*
      * pow.
      */
     {"pow", (funcptr)mpfr_pow, args2, {NULL}, pow_cases, 0},
     {"powf", (funcptr)mpfr_pow, args2f, {NULL}, pow_cases_float, 0},
     /*
      * Trig range reduction. We are able to test this for all
      * finite values, but will only bother for things between 2^-3
      * and 2^+52.
      */
     {"rred", (funcptr)test_rred, rred, {NULL}, rred_cases, 0},
     {"rredf", (funcptr)test_rred, rredf, {NULL}, rred_cases_float, 0},
     /*
      * Square and cube root.
      */
     {"sqrt", (funcptr)mpfr_sqrt, args1, {NULL}, log_cases, 0},
     {"sqrtf", (funcptr)mpfr_sqrt, args1f, {NULL}, log_cases_float, 0},
     {"cbrt", (funcptr)mpfr_cbrt, args1, {NULL}, log_cases, 0},
     {"cbrtf", (funcptr)mpfr_cbrt, args1f, {NULL}, log_cases_float, 0},
     {"hypot", (funcptr)mpfr_hypot, args2, {NULL}, atan2_cases, 0},
     {"hypotf", (funcptr)mpfr_hypot, args2f, {NULL}, atan2_cases_float, 0},
     /*
      * Seminumerical functions.
      */
     {"ceil", (funcptr)test_ceil, semi1, {NULL}, cases_semi1},
     {"ceilf", (funcptr)test_ceilf, semi1f, {NULL}, cases_semi1_float},
     {"floor", (funcptr)test_floor, semi1, {NULL}, cases_semi1},
     {"floorf", (funcptr)test_floorf, semi1f, {NULL}, cases_semi1_float},
     {"fmod", (funcptr)test_fmod, semi2, {NULL}, cases_semi2},
     {"fmodf", (funcptr)test_fmodf, semi2f, {NULL}, cases_semi2_float},
     {"ldexp", (funcptr)test_ldexp, t_ldexp, {NULL}, cases_ldexp},
     {"ldexpf", (funcptr)test_ldexpf, t_ldexpf, {NULL}, cases_ldexp_float},
     {"frexp", (funcptr)test_frexp, t_frexp, {NULL}, cases_semi1},
     {"frexpf", (funcptr)test_frexpf, t_frexpf, {NULL}, cases_semi1_float},
     {"modf", (funcptr)test_modf, t_modf, {NULL}, cases_semi1},
     {"modff", (funcptr)test_modff, t_modff, {NULL}, cases_semi1_float},
 
     /*
      * Classification and more semi-numericals
      */
     {"copysign", (funcptr)test_copysign, semi2, {NULL}, cases_semi2},
     {"copysignf", (funcptr)test_copysignf, semi2f, {NULL}, cases_semi2_float},
     {"isfinite", (funcptr)test_isfinite, classify, {NULL}, cases_uniform, 0, 0x7fffffff},
     {"isfinitef", (funcptr)test_isfinitef, classifyf, {NULL}, cases_uniform_float, 0, 0x7fffffff},
     {"isinf", (funcptr)test_isinf, classify, {NULL}, cases_uniform, 0, 0x7fffffff},
     {"isinff", (funcptr)test_isinff, classifyf, {NULL}, cases_uniform_float, 0, 0x7fffffff},
     {"isnan", (funcptr)test_isnan, classify, {NULL}, cases_uniform, 0, 0x7fffffff},
     {"isnanf", (funcptr)test_isnanf, classifyf, {NULL}, cases_uniform_float, 0, 0x7fffffff},
     {"isnormal", (funcptr)test_isnormal, classify, {NULL}, cases_uniform, 0, 0x7fffffff},
     {"isnormalf", (funcptr)test_isnormalf, classifyf, {NULL}, cases_uniform_float, 0, 0x7fffffff},
     {"signbit", (funcptr)test_signbit, classify, {NULL}, cases_uniform, 0, 0x7fffffff},
     {"signbitf", (funcptr)test_signbitf, classifyf, {NULL}, cases_uniform_float, 0, 0x7fffffff},
     {"fpclassify", (funcptr)test_fpclassify, classify, {NULL}, cases_uniform, 0, 0x7fffffff},
     {"fpclassifyf", (funcptr)test_fpclassifyf, classifyf, {NULL}, cases_uniform_float, 0, 0x7fffffff},
     /*
      * Comparisons
      */
     {"isgreater", (funcptr)test_isgreater, compare, {NULL}, cases_uniform, 0, 0x7fffffff},
     {"isgreaterequal", (funcptr)test_isgreaterequal, compare, {NULL}, cases_uniform, 0, 0x7fffffff},
     {"isless", (funcptr)test_isless, compare, {NULL}, cases_uniform, 0, 0x7fffffff},
     {"islessequal", (funcptr)test_islessequal, compare, {NULL}, cases_uniform, 0, 0x7fffffff},
     {"islessgreater", (funcptr)test_islessgreater, compare, {NULL}, cases_uniform, 0, 0x7fffffff},
     {"isunordered", (funcptr)test_isunordered, compare, {NULL}, cases_uniform, 0, 0x7fffffff},
 
     {"isgreaterf", (funcptr)test_isgreaterf, comparef, {NULL}, cases_uniform_float, 0, 0x7fffffff},
     {"isgreaterequalf", (funcptr)test_isgreaterequalf, comparef, {NULL}, cases_uniform_float, 0, 0x7fffffff},
     {"islessf", (funcptr)test_islessf, comparef, {NULL}, cases_uniform_float, 0, 0x7fffffff},
     {"islessequalf", (funcptr)test_islessequalf, comparef, {NULL}, cases_uniform_float, 0, 0x7fffffff},
     {"islessgreaterf", (funcptr)test_islessgreaterf, comparef, {NULL}, cases_uniform_float, 0, 0x7fffffff},
     {"isunorderedf", (funcptr)test_isunorderedf, comparef, {NULL}, cases_uniform_float, 0, 0x7fffffff},
 
     /*
      * Inverse Hyperbolic functions
      */
     {"atanh", (funcptr)mpfr_atanh, args1, {NULL}, cases_uniform, 0x3e400000, 0x3fefffff},
     {"asinh", (funcptr)mpfr_asinh, args1, {NULL}, cases_uniform, 0x3e400000, 0x3fefffff},
     {"acosh", (funcptr)mpfr_acosh, args1, {NULL}, cases_uniform_positive, 0x3ff00000, 0x7fefffff},
 
     {"atanhf", (funcptr)mpfr_atanh, args1f, {NULL}, cases_uniform_float, 0x32000000, 0x3f7fffff},
     {"asinhf", (funcptr)mpfr_asinh, args1f, {NULL}, cases_uniform_float, 0x32000000, 0x3f7fffff},
     {"acoshf", (funcptr)mpfr_acosh, args1f, {NULL}, cases_uniform_float_positive, 0x3f800000, 0x7f800000},
 
     /*
      * Everything else (sitting in a section down here at the bottom
      * because historically they were not tested because we didn't
      * have reference implementations for them)
      */
     {"csin", (funcptr)mpc_sin, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"csinf", (funcptr)mpc_sin, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
     {"ccos", (funcptr)mpc_cos, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"ccosf", (funcptr)mpc_cos, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
     {"ctan", (funcptr)mpc_tan, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"ctanf", (funcptr)mpc_tan, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
 
     {"casin", (funcptr)mpc_asin, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"casinf", (funcptr)mpc_asin, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
     {"cacos", (funcptr)mpc_acos, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"cacosf", (funcptr)mpc_acos, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
     {"catan", (funcptr)mpc_atan, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"catanf", (funcptr)mpc_atan, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
 
     {"csinh", (funcptr)mpc_sinh, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"csinhf", (funcptr)mpc_sinh, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
     {"ccosh", (funcptr)mpc_cosh, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"ccoshf", (funcptr)mpc_cosh, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
     {"ctanh", (funcptr)mpc_tanh, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"ctanhf", (funcptr)mpc_tanh, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
 
     {"casinh", (funcptr)mpc_asinh, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"casinhf", (funcptr)mpc_asinh, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
     {"cacosh", (funcptr)mpc_acosh, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"cacoshf", (funcptr)mpc_acosh, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
     {"catanh", (funcptr)mpc_atanh, args1c, {NULL}, complex_cases_uniform, 0x3f000000, 0x40300000},
     {"catanhf", (funcptr)mpc_atanh, args1fc, {NULL}, complex_cases_uniform_float, 0x38000000, 0x41800000},
 
     {"cexp", (funcptr)mpc_exp, args1c, {NULL}, complex_cases_uniform, 0x3c900000, 0x40862000},
     {"cpow", (funcptr)test_cpow, args2c, {NULL}, complex_pow_cases, 0x3fc00000, 0x40000000},
     {"clog", (funcptr)mpc_log, args1c, {NULL}, complex_log_cases, 0, 0},
     {"csqrt", (funcptr)mpc_sqrt, args1c, {NULL}, complex_log_cases, 0, 0},
 
     {"cexpf", (funcptr)mpc_exp, args1fc, {NULL}, complex_cases_uniform_float, 0x24800000, 0x42b00000},
     {"cpowf", (funcptr)test_cpow, args2fc, {NULL}, complex_pow_cases_float, 0x3e000000, 0x41000000},
     {"clogf", (funcptr)mpc_log, args1fc, {NULL}, complex_log_cases_float, 0, 0},
     {"csqrtf", (funcptr)mpc_sqrt, args1fc, {NULL}, complex_log_cases_float, 0, 0},
 
     {"cdiv", (funcptr)mpc_div, args2c, {NULL}, complex_arithmetic_cases, 0, 0},
     {"cmul", (funcptr)mpc_mul, args2c, {NULL}, complex_arithmetic_cases, 0, 0},
     {"cadd", (funcptr)mpc_add, args2c, {NULL}, complex_arithmetic_cases, 0, 0},
     {"csub", (funcptr)mpc_sub, args2c, {NULL}, complex_arithmetic_cases, 0, 0},
 
     {"cdivf", (funcptr)mpc_div, args2fc, {NULL}, complex_arithmetic_cases_float, 0, 0},
     {"cmulf", (funcptr)mpc_mul, args2fc, {NULL}, complex_arithmetic_cases_float, 0, 0},
     {"caddf", (funcptr)mpc_add, args2fc, {NULL}, complex_arithmetic_cases_float, 0, 0},
     {"csubf", (funcptr)mpc_sub, args2fc, {NULL}, complex_arithmetic_cases_float, 0, 0},
 
     {"cabsf", (funcptr)mpc_abs, args1fcr, {NULL}, complex_arithmetic_cases_float, 0, 0},
     {"cabs", (funcptr)mpc_abs, args1cr, {NULL}, complex_arithmetic_cases, 0, 0},
     {"cargf", (funcptr)mpc_arg, args1fcr, {NULL}, complex_arithmetic_cases_float, 0, 0},
     {"carg", (funcptr)mpc_arg, args1cr, {NULL}, complex_arithmetic_cases, 0, 0},
     {"cimagf", (funcptr)mpc_imag, args1fcr, {NULL}, complex_arithmetic_cases_float, 0, 0},
     {"cimag", (funcptr)mpc_imag, args1cr, {NULL}, complex_arithmetic_cases, 0, 0},
     {"conjf", (funcptr)mpc_conj, args1fc, {NULL}, complex_arithmetic_cases_float, 0, 0},
     {"conj", (funcptr)mpc_conj, args1c, {NULL}, complex_arithmetic_cases, 0, 0},
     {"cprojf", (funcptr)mpc_proj, args1fc, {NULL}, complex_arithmetic_cases_float, 0, 0},
     {"cproj", (funcptr)mpc_proj, args1c, {NULL}, complex_arithmetic_cases, 0, 0},
     {"crealf", (funcptr)mpc_real, args1fcr, {NULL}, complex_arithmetic_cases_float, 0, 0},
     {"creal", (funcptr)mpc_real, args1cr, {NULL}, complex_arithmetic_cases, 0, 0},
     {"erfcf", (funcptr)mpfr_erfc, args1f, {NULL}, cases_biased_float, 0x1e800000, 0x41000000},
     {"erfc", (funcptr)mpfr_erfc, args1, {NULL}, cases_biased, 0x3bd00000, 0x403c0000},
     {"erff", (funcptr)mpfr_erf, args1f, {NULL}, cases_biased_float, 0x03800000, 0x40700000},
     {"erf", (funcptr)mpfr_erf, args1, {NULL}, cases_biased, 0x00800000, 0x40200000},
     {"exp2f", (funcptr)mpfr_exp2, args1f, {NULL}, cases_uniform_float, 0x33800000, 0x43c00000},
     {"exp2", (funcptr)mpfr_exp2, args1, {NULL}, cases_uniform, 0x3ca00000, 0x40a00000},
     {"expm1f", (funcptr)mpfr_expm1, args1f, {NULL}, cases_uniform_float, 0x33000000, 0x43800000},
     {"expm1", (funcptr)mpfr_expm1, args1, {NULL}, cases_uniform, 0x3c900000, 0x409c0000},
     {"fmaxf", (funcptr)mpfr_max, args2f, {NULL}, minmax_cases_float, 0, 0x7f7fffff},
     {"fmax", (funcptr)mpfr_max, args2, {NULL}, minmax_cases, 0, 0x7fefffff},
     {"fminf", (funcptr)mpfr_min, args2f, {NULL}, minmax_cases_float, 0, 0x7f7fffff},
     {"fmin", (funcptr)mpfr_min, args2, {NULL}, minmax_cases, 0, 0x7fefffff},
     {"lgammaf", (funcptr)test_lgamma, args1f, {NULL}, cases_uniform_float, 0x01800000, 0x7f800000},
     {"lgamma", (funcptr)test_lgamma, args1, {NULL}, cases_uniform, 0x00100000, 0x7ff00000},
     {"log1pf", (funcptr)mpfr_log1p, args1f, {NULL}, log1p_cases_float, 0, 0},
     {"log1p", (funcptr)mpfr_log1p, args1, {NULL}, log1p_cases, 0, 0},
     {"log2f", (funcptr)mpfr_log2, args1f, {NULL}, log_cases_float, 0, 0},
     {"log2", (funcptr)mpfr_log2, args1, {NULL}, log_cases, 0, 0},
     {"tgammaf", (funcptr)mpfr_gamma, args1f, {NULL}, cases_uniform_float, 0x2f800000, 0x43000000},
     {"tgamma", (funcptr)mpfr_gamma, args1, {NULL}, cases_uniform, 0x3c000000, 0x40800000},
 };
+/* clang-format on */
 
 const int nfunctions = ( sizeof(functions)/sizeof(*functions) );
 
 #define random_sign ( random_upto(1) ? 0x80000000 : 0 )
 
 static int iszero(uint32 *x) {
     return !((x[0] & 0x7FFFFFFF) || x[1]);
 }
 
 
 static void complex_log_cases(uint32 *out, uint32 param1,
                               uint32 param2) {
     cases_uniform(out,0x00100000,0x7fefffff);
     cases_uniform(out+2,0x00100000,0x7fefffff);
 }
 
 
 static void complex_log_cases_float(uint32 *out, uint32 param1,
                                     uint32 param2) {
     cases_uniform_float(out,0x00800000,0x7f7fffff);
     cases_uniform_float(out+2,0x00800000,0x7f7fffff);
 }
 
 static void complex_cases_biased(uint32 *out, uint32 lowbound,
                                  uint32 highbound) {
     cases_biased(out,lowbound,highbound);
     cases_biased(out+2,lowbound,highbound);
 }
 
 static void complex_cases_biased_float(uint32 *out, uint32 lowbound,
                                        uint32 highbound) {
     cases_biased_float(out,lowbound,highbound);
     cases_biased_float(out+2,lowbound,highbound);
 }
 
 static void complex_cases_uniform(uint32 *out, uint32 lowbound,
                                  uint32 highbound) {
     cases_uniform(out,lowbound,highbound);
     cases_uniform(out+2,lowbound,highbound);
 }
 
 static void complex_cases_uniform_float(uint32 *out, uint32 lowbound,
                                        uint32 highbound) {
     cases_uniform_float(out,lowbound,highbound);
     cases_uniform(out+2,lowbound,highbound);
 }
 
 static void complex_pow_cases(uint32 *out, uint32 lowbound,
                               uint32 highbound) {
     /*
      * Generating non-overflowing cases for complex pow:
      *
      * Our base has both parts within the range [1/2,2], and hence
      * its magnitude is within [1/2,2*sqrt(2)]. The magnitude of its
      * logarithm in base 2 is therefore at most the magnitude of
      * (log2(2*sqrt(2)) + i*pi/log(2)), or in other words
      * hypot(3/2,pi/log(2)) = 4.77. So the magnitude of the exponent
      * input must be at most our output magnitude limit (as a power
      * of two) divided by that.
      *
      * I also set the output magnitude limit a bit low, because we
      * don't guarantee (and neither does glibc) to prevent internal
      * overflow in cases where the output _magnitude_ overflows but
      * scaling it back down by cos and sin of the argument brings it
      * back in range.
      */
     cases_uniform(out,0x3fe00000, 0x40000000);
     cases_uniform(out+2,0x3fe00000, 0x40000000);
     cases_uniform(out+4,0x3f800000, 0x40600000);
     cases_uniform(out+6,0x3f800000, 0x40600000);
 }
 
 static void complex_pow_cases_float(uint32 *out, uint32 lowbound,
                                     uint32 highbound) {
     /*
      * Reasoning as above, though of course the detailed numbers are
      * all different.
      */
     cases_uniform_float(out,0x3f000000, 0x40000000);
     cases_uniform_float(out+2,0x3f000000, 0x40000000);
     cases_uniform_float(out+4,0x3d600000, 0x41900000);
     cases_uniform_float(out+6,0x3d600000, 0x41900000);
 }
 
 static void complex_arithmetic_cases(uint32 *out, uint32 lowbound,
                                      uint32 highbound) {
     cases_uniform(out,0,0x7fefffff);
     cases_uniform(out+2,0,0x7fefffff);
     cases_uniform(out+4,0,0x7fefffff);
     cases_uniform(out+6,0,0x7fefffff);
 }
 
 static void complex_arithmetic_cases_float(uint32 *out, uint32 lowbound,
                                            uint32 highbound) {
     cases_uniform_float(out,0,0x7f7fffff);
     cases_uniform_float(out+2,0,0x7f7fffff);
     cases_uniform_float(out+4,0,0x7f7fffff);
     cases_uniform_float(out+6,0,0x7f7fffff);
 }
 
 /*
  * Included from fplib test suite, in a compact self-contained
  * form.
  */
 
 void float32_case(uint32 *ret) {
     int n, bits;
     uint32 f;
     static int premax, preptr;
     static uint32 *specifics = NULL;
 
     if (!ret) {
         if (specifics)
             free(specifics);
         specifics = NULL;
         premax = preptr = 0;
         return;
     }
 
     if (!specifics) {
         int exps[] = {
             -127, -126, -125, -24, -4, -3, -2, -1, 0, 1, 2, 3, 4,
                 24, 29, 30, 31, 32, 61, 62, 63, 64, 126, 127, 128
         };
         int sign, eptr;
         uint32 se, j;
         /*
          * We want a cross product of:
          *  - each of two sign bits (2)
          *  - each of the above (unbiased) exponents (25)
          *  - the following list of fraction parts:
          *    * zero (1)
          *    * all bits (1)
          *    * one-bit-set (23)
          *    * one-bit-clear (23)
          *    * one-bit-and-above (20: 3 are duplicates)
          *    * one-bit-and-below (20: 3 are duplicates)
          *    (total 88)
          *  (total 4400)
          */
         specifics = malloc(4400 * sizeof(*specifics));
         preptr = 0;
         for (sign = 0; sign <= 1; sign++) {
             for (eptr = 0; eptr < sizeof(exps)/sizeof(*exps); eptr++) {
                 se = (sign ? 0x80000000 : 0) | ((exps[eptr]+127) << 23);
                 /*
                  * Zero.
                  */
                 specifics[preptr++] = se | 0;
                 /*
                  * All bits.
                  */
                 specifics[preptr++] = se | 0x7FFFFF;
                 /*
                  * One-bit-set.
                  */
                 for (j = 1; j && j <= 0x400000; j <<= 1)
                     specifics[preptr++] = se | j;
                 /*
                  * One-bit-clear.
                  */
                 for (j = 1; j && j <= 0x400000; j <<= 1)
                     specifics[preptr++] = se | (0x7FFFFF ^ j);
                 /*
                  * One-bit-and-everything-below.
                  */
                 for (j = 2; j && j <= 0x100000; j <<= 1)
                     specifics[preptr++] = se | (2*j-1);
                 /*
                  * One-bit-and-everything-above.
                  */
                 for (j = 4; j && j <= 0x200000; j <<= 1)
                     specifics[preptr++] = se | (0x7FFFFF ^ (j-1));
                 /*
                  * Done.
                  */
             }
         }
         assert(preptr == 4400);
         premax = preptr;
     }
 
     /*
      * Decide whether to return a pre or a random case.
      */
     n = random32() % (premax+1);
     if (n < preptr) {
         /*
          * Return pre[n].
          */
         uint32 t;
         t = specifics[n];
         specifics[n] = specifics[preptr-1];
         specifics[preptr-1] = t;        /* (not really needed) */
         preptr--;
         *ret = t;
     } else {
         /*
          * Random case.
          * Sign and exponent:
          *  - FIXME
          * Significand:
          *  - with prob 1/5, a totally random bit pattern
          *  - with prob 1/5, all 1s down to some point and then random
          *  - with prob 1/5, all 1s up to some point and then random
          *  - with prob 1/5, all 0s down to some point and then random
          *  - with prob 1/5, all 0s up to some point and then random
          */
         n = random32() % 5;
         f = random32();                /* some random bits */
         bits = random32() % 22 + 1;    /* 1-22 */
         switch (n) {
           case 0:
             break;                     /* leave f alone */
           case 1:
             f |= (1<<bits)-1;
             break;
           case 2:
             f &= ~((1<<bits)-1);
             break;
           case 3:
             f |= ~((1<<bits)-1);
             break;
           case 4:
             f &= (1<<bits)-1;
             break;
         }
         f &= 0x7FFFFF;
         f |= (random32() & 0xFF800000);/* FIXME - do better */
         *ret = f;
     }
 }
 static void float64_case(uint32 *ret) {
     int n, bits;
     uint32 f, g;
     static int premax, preptr;
     static uint32 (*specifics)[2] = NULL;
 
     if (!ret) {
         if (specifics)
             free(specifics);
         specifics = NULL;
         premax = preptr = 0;
         return;
     }
 
     if (!specifics) {
         int exps[] = {
             -1023, -1022, -1021, -129, -128, -127, -126, -53, -4, -3, -2,
             -1, 0, 1, 2, 3, 4, 29, 30, 31, 32, 53, 61, 62, 63, 64, 127,
             128, 129, 1022, 1023, 1024
         };
         int sign, eptr;
         uint32 se, j;
         /*
          * We want a cross product of:
          *  - each of two sign bits (2)
          *  - each of the above (unbiased) exponents (32)
          *  - the following list of fraction parts:
          *    * zero (1)
          *    * all bits (1)
          *    * one-bit-set (52)
          *    * one-bit-clear (52)
          *    * one-bit-and-above (49: 3 are duplicates)
          *    * one-bit-and-below (49: 3 are duplicates)
          *    (total 204)
          *  (total 13056)
          */
         specifics = malloc(13056 * sizeof(*specifics));
         preptr = 0;
         for (sign = 0; sign <= 1; sign++) {
             for (eptr = 0; eptr < sizeof(exps)/sizeof(*exps); eptr++) {
                 se = (sign ? 0x80000000 : 0) | ((exps[eptr]+1023) << 20);
                 /*
                  * Zero.
                  */
                 specifics[preptr][0] = 0;
                 specifics[preptr][1] = 0;
                 specifics[preptr++][0] |= se;
                 /*
                  * All bits.
                  */
                 specifics[preptr][0] = 0xFFFFF;
                 specifics[preptr][1] = ~0;
                 specifics[preptr++][0] |= se;
                 /*
                  * One-bit-set.
                  */
                 for (j = 1; j && j <= 0x80000000; j <<= 1) {
                     specifics[preptr][0] = 0;
                     specifics[preptr][1] = j;
                     specifics[preptr++][0] |= se;
                     if (j & 0xFFFFF) {
                         specifics[preptr][0] = j;
                         specifics[preptr][1] = 0;
                         specifics[preptr++][0] |= se;
                     }
                 }
                 /*
                  * One-bit-clear.
                  */
                 for (j = 1; j && j <= 0x80000000; j <<= 1) {
                     specifics[preptr][0] = 0xFFFFF;
                     specifics[preptr][1] = ~j;
                     specifics[preptr++][0] |= se;
                     if (j & 0xFFFFF) {
                         specifics[preptr][0] = 0xFFFFF ^ j;
                         specifics[preptr][1] = ~0;
                         specifics[preptr++][0] |= se;
                     }
                 }
                 /*
                  * One-bit-and-everything-below.
                  */
                 for (j = 2; j && j <= 0x80000000; j <<= 1) {
                     specifics[preptr][0] = 0;
                     specifics[preptr][1] = 2*j-1;
                     specifics[preptr++][0] |= se;
                 }
                 for (j = 1; j && j <= 0x20000; j <<= 1) {
                     specifics[preptr][0] = 2*j-1;
                     specifics[preptr][1] = ~0;
                     specifics[preptr++][0] |= se;
                 }
                 /*
                  * One-bit-and-everything-above.
                  */
                 for (j = 4; j && j <= 0x80000000; j <<= 1) {
                     specifics[preptr][0] = 0xFFFFF;
                     specifics[preptr][1] = ~(j-1);
                     specifics[preptr++][0] |= se;
                 }
                 for (j = 1; j && j <= 0x40000; j <<= 1) {
                     specifics[preptr][0] = 0xFFFFF ^ (j-1);
                     specifics[preptr][1] = 0;
                     specifics[preptr++][0] |= se;
                 }
                 /*
                  * Done.
                  */
             }
         }
         assert(preptr == 13056);
         premax = preptr;
     }
 
     /*
      * Decide whether to return a pre or a random case.
      */
     n = (uint32) random32() % (uint32) (premax+1);
     if (n < preptr) {
         /*
          * Return pre[n].
          */
         uint32 t;
         t = specifics[n][0];
         specifics[n][0] = specifics[preptr-1][0];
         specifics[preptr-1][0] = t;     /* (not really needed) */
         ret[0] = t;
         t = specifics[n][1];
         specifics[n][1] = specifics[preptr-1][1];
         specifics[preptr-1][1] = t;     /* (not really needed) */
         ret[1] = t;
         preptr--;
     } else {
         /*
          * Random case.
          * Sign and exponent:
          *  - FIXME
          * Significand:
          *  - with prob 1/5, a totally random bit pattern
          *  - with prob 1/5, all 1s down to some point and then random
          *  - with prob 1/5, all 1s up to some point and then random
          *  - with prob 1/5, all 0s down to some point and then random
          *  - with prob 1/5, all 0s up to some point and then random
          */
         n = random32() % 5;
         f = random32();                /* some random bits */
         g = random32();                /* some random bits */
         bits = random32() % 51 + 1;    /* 1-51 */
         switch (n) {
           case 0:
             break;                     /* leave f alone */
           case 1:
             if (bits <= 32)
                 f |= (1<<bits)-1;
             else {
                 bits -= 32;
                 g |= (1<<bits)-1;
                 f = ~0;
             }
             break;
           case 2:
             if (bits <= 32)
                 f &= ~((1<<bits)-1);
             else {
                 bits -= 32;
                 g &= ~((1<<bits)-1);
                 f = 0;
             }
             break;
           case 3:
             if (bits <= 32)
                 g &= (1<<bits)-1;
             else {
                 bits -= 32;
                 f &= (1<<bits)-1;
                 g = 0;
             }
             break;
           case 4:
             if (bits <= 32)
                 g |= ~((1<<bits)-1);
             else {
                 bits -= 32;
                 f |= ~((1<<bits)-1);
                 g = ~0;
             }
             break;
         }
         g &= 0xFFFFF;
         g |= (random32() & 0xFFF00000);/* FIXME - do better */
         ret[0] = g;
         ret[1] = f;
     }
 }
 
 static void cases_biased(uint32 *out, uint32 lowbound,
                           uint32 highbound) {
     do {
         out[0] = highbound - random_upto_biased(highbound-lowbound, 8);
         out[1] = random_upto(0xFFFFFFFF);
         out[0] |= random_sign;
     } while (iszero(out));             /* rule out zero */
 }
 
 static void cases_biased_positive(uint32 *out, uint32 lowbound,
                                   uint32 highbound) {
     do {
         out[0] = highbound - random_upto_biased(highbound-lowbound, 8);
         out[1] = random_upto(0xFFFFFFFF);
     } while (iszero(out));             /* rule out zero */
 }
 
 static void cases_biased_float(uint32 *out, uint32 lowbound,
                                uint32 highbound) {
     do {
         out[0] = highbound - random_upto_biased(highbound-lowbound, 8);
         out[1] = 0;
         out[0] |= random_sign;
     } while (iszero(out));             /* rule out zero */
 }
 
 static void cases_semi1(uint32 *out, uint32 param1,
                         uint32 param2) {
     float64_case(out);
 }
 
 static void cases_semi1_float(uint32 *out, uint32 param1,
                               uint32 param2) {
     float32_case(out);
 }
 
 static void cases_semi2(uint32 *out, uint32 param1,
                         uint32 param2) {
     float64_case(out);
     float64_case(out+2);
 }
 
 static void cases_semi2_float(uint32 *out, uint32 param1,
                         uint32 param2) {
     float32_case(out);
     float32_case(out+2);
 }
 
 static void cases_ldexp(uint32 *out, uint32 param1,
                         uint32 param2) {
     float64_case(out);
     out[2] = random_upto(2048)-1024;
 }
 
 static void cases_ldexp_float(uint32 *out, uint32 param1,
                               uint32 param2) {
     float32_case(out);
     out[2] = random_upto(256)-128;
 }
 
 static void cases_uniform(uint32 *out, uint32 lowbound,
                           uint32 highbound) {
     do {
         out[0] = highbound - random_upto(highbound-lowbound);
         out[1] = random_upto(0xFFFFFFFF);
         out[0] |= random_sign;
     } while (iszero(out));             /* rule out zero */
 }
 static void cases_uniform_float(uint32 *out, uint32 lowbound,
                                 uint32 highbound) {
     do {
         out[0] = highbound - random_upto(highbound-lowbound);
         out[1] = 0;
         out[0] |= random_sign;
     } while (iszero(out));             /* rule out zero */
 }
 
 static void cases_uniform_positive(uint32 *out, uint32 lowbound,
                                    uint32 highbound) {
     do {
         out[0] = highbound - random_upto(highbound-lowbound);
         out[1] = random_upto(0xFFFFFFFF);
     } while (iszero(out));             /* rule out zero */
 }
 static void cases_uniform_float_positive(uint32 *out, uint32 lowbound,
                                          uint32 highbound) {
     do {
         out[0] = highbound - random_upto(highbound-lowbound);
         out[1] = 0;
     } while (iszero(out));             /* rule out zero */
 }
 
 
 static void log_cases(uint32 *out, uint32 param1,
                       uint32 param2) {
     do {
         out[0] = random_upto(0x7FEFFFFF);
         out[1] = random_upto(0xFFFFFFFF);
     } while (iszero(out));             /* rule out zero */
 }
 
 static void log_cases_float(uint32 *out, uint32 param1,
                             uint32 param2) {
     do {
         out[0] = random_upto(0x7F7FFFFF);
         out[1] = 0;
     } while (iszero(out));             /* rule out zero */
 }
 
 static void log1p_cases(uint32 *out, uint32 param1, uint32 param2)
 {
     uint32 sign = random_sign;
     if (sign == 0) {
         cases_uniform_positive(out, 0x3c700000, 0x43400000);
     } else {
         cases_uniform_positive(out, 0x3c000000, 0x3ff00000);
     }
     out[0] |= sign;
 }
 
 static void log1p_cases_float(uint32 *out, uint32 param1, uint32 param2)
 {
     uint32 sign = random_sign;
     if (sign == 0) {
         cases_uniform_float_positive(out, 0x32000000, 0x4c000000);
     } else {
         cases_uniform_float_positive(out, 0x30000000, 0x3f800000);
     }
     out[0] |= sign;
 }
 
 static void minmax_cases(uint32 *out, uint32 param1, uint32 param2)
 {
     do {
         out[0] = random_upto(0x7FEFFFFF);
         out[1] = random_upto(0xFFFFFFFF);
         out[0] |= random_sign;
         out[2] = random_upto(0x7FEFFFFF);
         out[3] = random_upto(0xFFFFFFFF);
         out[2] |= random_sign;
     } while (iszero(out));             /* rule out zero */
 }
 
 static void minmax_cases_float(uint32 *out, uint32 param1, uint32 param2)
 {
     do {
         out[0] = random_upto(0x7F7FFFFF);
         out[1] = 0;
         out[0] |= random_sign;
         out[2] = random_upto(0x7F7FFFFF);
         out[3] = 0;
         out[2] |= random_sign;
     } while (iszero(out));             /* rule out zero */
 }
 
 static void rred_cases(uint32 *out, uint32 param1,
                        uint32 param2) {
     do {
         out[0] = ((0x3fc00000 + random_upto(0x036fffff)) |
                   (random_upto(1) << 31));
         out[1] = random_upto(0xFFFFFFFF);
     } while (iszero(out));             /* rule out zero */
 }
 
 static void rred_cases_float(uint32 *out, uint32 param1,
                              uint32 param2) {
     do {
         out[0] = ((0x3e000000 + random_upto(0x0cffffff)) |
                   (random_upto(1) << 31));
         out[1] = 0;                    /* for iszero */
     } while (iszero(out));             /* rule out zero */
 }
 
 static void atan2_cases(uint32 *out, uint32 param1,
                         uint32 param2) {
     do {
         int expdiff = random_upto(101)-51;
         int swap;
         if (expdiff < 0) {
             expdiff = -expdiff;
             swap = 2;
         } else
             swap = 0;
         out[swap ^ 0] = random_upto(0x7FEFFFFF-((expdiff+1)<<20));
         out[swap ^ 2] = random_upto(((expdiff+1)<<20)-1) + out[swap ^ 0];
         out[1] = random_upto(0xFFFFFFFF);
         out[3] = random_upto(0xFFFFFFFF);
         out[0] |= random_sign;
         out[2] |= random_sign;
     } while (iszero(out) || iszero(out+2));/* rule out zero */
 }
 
 static void atan2_cases_float(uint32 *out, uint32 param1,
                               uint32 param2) {
     do {
         int expdiff = random_upto(44)-22;
         int swap;
         if (expdiff < 0) {
             expdiff = -expdiff;
             swap = 2;
         } else
             swap = 0;
         out[swap ^ 0] = random_upto(0x7F7FFFFF-((expdiff+1)<<23));
         out[swap ^ 2] = random_upto(((expdiff+1)<<23)-1) + out[swap ^ 0];
         out[0] |= random_sign;
         out[2] |= random_sign;
         out[1] = out[3] = 0;           /* for iszero */
     } while (iszero(out) || iszero(out+2));/* rule out zero */
 }
 
 static void pow_cases(uint32 *out, uint32 param1,
                       uint32 param2) {
     /*
      * Pick an exponent e (-0x33 to +0x7FE) for x, and here's the
      * range of numbers we can use as y:
      *
      * For e < 0x3FE, the range is [-0x400/(0x3FE-e),+0x432/(0x3FE-e)]
      * For e > 0x3FF, the range is [-0x432/(e-0x3FF),+0x400/(e-0x3FF)]
      *
      * For e == 0x3FE or e == 0x3FF, the range gets infinite at one
      * end or the other, so we have to be cleverer: pick a number n
      * of useful bits in the mantissa (1 thru 52, so 1 must imply
      * 0x3ff00000.00000001 whereas 52 is anything at least as big
      * as 0x3ff80000.00000000; for e == 0x3fe, 1 necessarily means
      * 0x3fefffff.ffffffff and 52 is anything at most as big as
      * 0x3fe80000.00000000). Then, as it happens, a sensible
      * maximum power is 2^(63-n) for e == 0x3fe, and 2^(62-n) for
      * e == 0x3ff.
      *
      * We inevitably get some overflows in approximating the log
      * curves by these nasty step functions, but that's all right -
      * we do want _some_ overflows to be tested.
      *
      * Having got that, then, it's just a matter of inventing a
      * probability distribution for all of this.
      */
     int e, n;
     uint32 dmin, dmax;
     const uint32 pmin = 0x3e100000;
 
     /*
      * Generate exponents in a slightly biased fashion.
      */
     e = (random_upto(1) ?              /* is exponent small or big? */
          0x3FE - random_upto_biased(0x431,2) :   /* small */
          0x3FF + random_upto_biased(0x3FF,2));   /* big */
 
     /*
      * Now split into cases.
      */
     if (e < 0x3FE || e > 0x3FF) {
         uint32 imin, imax;
         if (e < 0x3FE)
             imin = 0x40000 / (0x3FE - e), imax = 0x43200 / (0x3FE - e);
         else
             imin = 0x43200 / (e - 0x3FF), imax = 0x40000 / (e - 0x3FF);
         /* Power range runs from -imin to imax. Now convert to doubles */
         dmin = doubletop(imin, -8);
         dmax = doubletop(imax, -8);
         /* Compute the number of mantissa bits. */
         n = (e > 0 ? 53 : 52+e);
     } else {
         /* Critical exponents. Generate a top bit index. */
         n = 52 - random_upto_biased(51, 4);
         if (e == 0x3FE)
             dmax = 63 - n;
         else
             dmax = 62 - n;
         dmax = (dmax << 20) + 0x3FF00000;
         dmin = dmax;
     }
     /* Generate a mantissa. */
     if (n <= 32) {
         out[0] = 0;
         out[1] = random_upto((1 << (n-1)) - 1) + (1 << (n-1));
     } else if (n == 33) {
         out[0] = 1;
         out[1] = random_upto(0xFFFFFFFF);
     } else if (n > 33) {
         out[0] = random_upto((1 << (n-33)) - 1) + (1 << (n-33));
         out[1] = random_upto(0xFFFFFFFF);
     }
     /* Negate the mantissa if e == 0x3FE. */
     if (e == 0x3FE) {
         out[1] = -out[1];
         out[0] = -out[0];
         if (out[1]) out[0]--;
     }
     /* Put the exponent on. */
     out[0] &= 0xFFFFF;
     out[0] |= ((e > 0 ? e : 0) << 20);
     /* Generate a power. Powers don't go below 2^-30. */
     if (random_upto(1)) {
         /* Positive power */
         out[2] = dmax - random_upto_biased(dmax-pmin, 10);
     } else {
         /* Negative power */
         out[2] = (dmin - random_upto_biased(dmin-pmin, 10)) | 0x80000000;
     }
     out[3] = random_upto(0xFFFFFFFF);
 }
 static void pow_cases_float(uint32 *out, uint32 param1,
                             uint32 param2) {
     /*
      * Pick an exponent e (-0x16 to +0xFE) for x, and here's the
      * range of numbers we can use as y:
      *
      * For e < 0x7E, the range is [-0x80/(0x7E-e),+0x95/(0x7E-e)]
      * For e > 0x7F, the range is [-0x95/(e-0x7F),+0x80/(e-0x7F)]
      *
      * For e == 0x7E or e == 0x7F, the range gets infinite at one
      * end or the other, so we have to be cleverer: pick a number n
      * of useful bits in the mantissa (1 thru 23, so 1 must imply
      * 0x3f800001 whereas 23 is anything at least as big as
      * 0x3fc00000; for e == 0x7e, 1 necessarily means 0x3f7fffff
      * and 23 is anything at most as big as 0x3f400000). Then, as
      * it happens, a sensible maximum power is 2^(31-n) for e ==
      * 0x7e, and 2^(30-n) for e == 0x7f.
      *
      * We inevitably get some overflows in approximating the log
      * curves by these nasty step functions, but that's all right -
      * we do want _some_ overflows to be tested.
      *
      * Having got that, then, it's just a matter of inventing a
      * probability distribution for all of this.
      */
     int e, n;
     uint32 dmin, dmax;
     const uint32 pmin = 0x38000000;
 
     /*
      * Generate exponents in a slightly biased fashion.
      */
     e = (random_upto(1) ?              /* is exponent small or big? */
          0x7E - random_upto_biased(0x94,2) :   /* small */
          0x7F + random_upto_biased(0x7f,2));   /* big */
 
     /*
      * Now split into cases.
      */
     if (e < 0x7E || e > 0x7F) {
         uint32 imin, imax;
         if (e < 0x7E)
             imin = 0x8000 / (0x7e - e), imax = 0x9500 / (0x7e - e);
         else
             imin = 0x9500 / (e - 0x7f), imax = 0x8000 / (e - 0x7f);
         /* Power range runs from -imin to imax. Now convert to doubles */
         dmin = floatval(imin, -8);
         dmax = floatval(imax, -8);
         /* Compute the number of mantissa bits. */
         n = (e > 0 ? 24 : 23+e);
     } else {
         /* Critical exponents. Generate a top bit index. */
         n = 23 - random_upto_biased(22, 4);
         if (e == 0x7E)
             dmax = 31 - n;
         else
             dmax = 30 - n;
         dmax = (dmax << 23) + 0x3F800000;
         dmin = dmax;
     }
     /* Generate a mantissa. */
     out[0] = random_upto((1 << (n-1)) - 1) + (1 << (n-1));
     out[1] = 0;
     /* Negate the mantissa if e == 0x7E. */
     if (e == 0x7E) {
         out[0] = -out[0];
     }
     /* Put the exponent on. */
     out[0] &= 0x7FFFFF;
     out[0] |= ((e > 0 ? e : 0) << 23);
     /* Generate a power. Powers don't go below 2^-15. */
     if (random_upto(1)) {
         /* Positive power */
         out[2] = dmax - random_upto_biased(dmax-pmin, 10);
     } else {
         /* Negative power */
         out[2] = (dmin - random_upto_biased(dmin-pmin, 10)) | 0x80000000;
     }
     out[3] = 0;
 }
 
 void vet_for_decline(Testable *fn, uint32 *args, uint32 *result, int got_errno_in) {
     int declined = 0;
 
     switch (fn->type) {
       case args1:
       case rred:
       case semi1:
       case t_frexp:
       case t_modf:
       case classify:
       case t_ldexp:
         declined |= lib_fo && is_dhard(args+0);
         break;
       case args1f:
       case rredf:
       case semi1f:
       case t_frexpf:
       case t_modff:
       case classifyf:
         declined |= lib_fo && is_shard(args+0);
         break;
       case args2:
       case semi2:
       case args1c:
       case args1cr:
       case compare:
         declined |= lib_fo && is_dhard(args+0);
         declined |= lib_fo && is_dhard(args+2);
         break;
       case args2f:
       case semi2f:
       case t_ldexpf:
       case comparef:
       case args1fc:
       case args1fcr:
         declined |= lib_fo && is_shard(args+0);
         declined |= lib_fo && is_shard(args+2);
         break;
       case args2c:
         declined |= lib_fo && is_dhard(args+0);
         declined |= lib_fo && is_dhard(args+2);
         declined |= lib_fo && is_dhard(args+4);
         declined |= lib_fo && is_dhard(args+6);
         break;
       case args2fc:
         declined |= lib_fo && is_shard(args+0);
         declined |= lib_fo && is_shard(args+2);
         declined |= lib_fo && is_shard(args+4);
         declined |= lib_fo && is_shard(args+6);
         break;
     }
 
     switch (fn->type) {
       case args1:              /* return an extra-precise result */
       case args2:
       case rred:
       case semi1:              /* return a double result */
       case semi2:
       case t_ldexp:
       case t_frexp:            /* return double * int */
       case args1cr:
         declined |= lib_fo && is_dhard(result);
         break;
       case args1f:
       case args2f:
       case rredf:
       case semi1f:
       case semi2f:
       case t_ldexpf:
       case args1fcr:
         declined |= lib_fo && is_shard(result);
         break;
       case t_modf:             /* return double * double */
         declined |= lib_fo && is_dhard(result+0);
         declined |= lib_fo && is_dhard(result+2);
         break;
       case t_modff:                    /* return float * float */
         declined |= lib_fo && is_shard(result+2);
         /* fall through */
       case t_frexpf:                   /* return float * int */
         declined |= lib_fo && is_shard(result+0);
         break;
       case args1c:
       case args2c:
         declined |= lib_fo && is_dhard(result+0);
         declined |= lib_fo && is_dhard(result+4);
         break;
       case args1fc:
       case args2fc:
         declined |= lib_fo && is_shard(result+0);
         declined |= lib_fo && is_shard(result+4);
         break;
     }
 
     /* Expect basic arithmetic tests to be declined if the command
      * line said that would happen */
     declined |= (lib_no_arith && (fn->func == (funcptr)mpc_add ||
                                   fn->func == (funcptr)mpc_sub ||
                                   fn->func == (funcptr)mpc_mul ||
                                   fn->func == (funcptr)mpc_div));
 
     if (!declined) {
         if (got_errno_in)
             ntests++;
         else
             ntests += 3;
     }
 }
 
 void docase(Testable *fn, uint32 *args) {
     uint32 result[8];  /* real part in first 4, imaginary part in last 4 */
     char *errstr = NULL;
     mpfr_t a, b, r;
     mpc_t ac, bc, rc;
     int rejected, printextra;
     wrapperctx ctx;
 
     mpfr_init2(a, MPFR_PREC);
     mpfr_init2(b, MPFR_PREC);
     mpfr_init2(r, MPFR_PREC);
     mpc_init2(ac, MPFR_PREC);
     mpc_init2(bc, MPFR_PREC);
     mpc_init2(rc, MPFR_PREC);
 
     printf("func=%s", fn->name);
 
     rejected = 0; /* FIXME */
 
     switch (fn->type) {
       case args1:
       case rred:
       case semi1:
       case t_frexp:
       case t_modf:
       case classify:
         printf(" op1=%08x.%08x", args[0], args[1]);
         break;
       case args1f:
       case rredf:
       case semi1f:
       case t_frexpf:
       case t_modff:
       case classifyf:
         printf(" op1=%08x", args[0]);
         break;
       case args2:
       case semi2:
       case compare:
         printf(" op1=%08x.%08x", args[0], args[1]);
         printf(" op2=%08x.%08x", args[2], args[3]);
         break;
       case args2f:
       case semi2f:
       case t_ldexpf:
       case comparef:
         printf(" op1=%08x", args[0]);
         printf(" op2=%08x", args[2]);
         break;
       case t_ldexp:
         printf(" op1=%08x.%08x", args[0], args[1]);
         printf(" op2=%08x", args[2]);
         break;
       case args1c:
       case args1cr:
         printf(" op1r=%08x.%08x", args[0], args[1]);
         printf(" op1i=%08x.%08x", args[2], args[3]);
         break;
       case args2c:
         printf(" op1r=%08x.%08x", args[0], args[1]);
         printf(" op1i=%08x.%08x", args[2], args[3]);
         printf(" op2r=%08x.%08x", args[4], args[5]);
         printf(" op2i=%08x.%08x", args[6], args[7]);
         break;
       case args1fc:
       case args1fcr:
         printf(" op1r=%08x", args[0]);
         printf(" op1i=%08x", args[2]);
         break;
       case args2fc:
         printf(" op1r=%08x", args[0]);
         printf(" op1i=%08x", args[2]);
         printf(" op2r=%08x", args[4]);
         printf(" op2i=%08x", args[6]);
         break;
       default:
         fprintf(stderr, "internal inconsistency?!\n");
         abort();
     }
 
     if (rejected == 2) {
         printf(" - test case rejected\n");
         goto cleanup;
     }
 
     wrapper_init(&ctx);
 
     if (rejected == 0) {
         switch (fn->type) {
           case args1:
             set_mpfr_d(a, args[0], args[1]);
             wrapper_op_real(&ctx, a, 2, args);
             ((testfunc1)(fn->func))(r, a, GMP_RNDN);
             get_mpfr_d(r, &result[0], &result[1], &result[2]);
             wrapper_result_real(&ctx, r, 2, result);
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpfr_d(r, &result[0], &result[1], &result[2]);
             break;
           case args1cr:
             set_mpc_d(ac, args[0], args[1], args[2], args[3]);
             wrapper_op_complex(&ctx, ac, 2, args);
             ((testfunc1cr)(fn->func))(r, ac, GMP_RNDN);
             get_mpfr_d(r, &result[0], &result[1], &result[2]);
             wrapper_result_real(&ctx, r, 2, result);
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpfr_d(r, &result[0], &result[1], &result[2]);
             break;
           case args1f:
             set_mpfr_f(a, args[0]);
             wrapper_op_real(&ctx, a, 1, args);
             ((testfunc1)(fn->func))(r, a, GMP_RNDN);
             get_mpfr_f(r, &result[0], &result[1]);
             wrapper_result_real(&ctx, r, 1, result);
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpfr_f(r, &result[0], &result[1]);
             break;
           case args1fcr:
             set_mpc_f(ac, args[0], args[2]);
             wrapper_op_complex(&ctx, ac, 1, args);
             ((testfunc1cr)(fn->func))(r, ac, GMP_RNDN);
             get_mpfr_f(r, &result[0], &result[1]);
             wrapper_result_real(&ctx, r, 1, result);
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpfr_f(r, &result[0], &result[1]);
             break;
           case args2:
             set_mpfr_d(a, args[0], args[1]);
             wrapper_op_real(&ctx, a, 2, args);
             set_mpfr_d(b, args[2], args[3]);
             wrapper_op_real(&ctx, b, 2, args+2);
             ((testfunc2)(fn->func))(r, a, b, GMP_RNDN);
             get_mpfr_d(r, &result[0], &result[1], &result[2]);
             wrapper_result_real(&ctx, r, 2, result);
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpfr_d(r, &result[0], &result[1], &result[2]);
             break;
           case args2f:
             set_mpfr_f(a, args[0]);
             wrapper_op_real(&ctx, a, 1, args);
             set_mpfr_f(b, args[2]);
             wrapper_op_real(&ctx, b, 1, args+2);
             ((testfunc2)(fn->func))(r, a, b, GMP_RNDN);
             get_mpfr_f(r, &result[0], &result[1]);
             wrapper_result_real(&ctx, r, 1, result);
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpfr_f(r, &result[0], &result[1]);
             break;
           case rred:
             set_mpfr_d(a, args[0], args[1]);
             wrapper_op_real(&ctx, a, 2, args);
             ((testrred)(fn->func))(r, a, (int *)&result[3]);
             get_mpfr_d(r, &result[0], &result[1], &result[2]);
             wrapper_result_real(&ctx, r, 2, result);
             /* We never need to mess about with the integer auxiliary
              * output. */
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpfr_d(r, &result[0], &result[1], &result[2]);
             break;
           case rredf:
             set_mpfr_f(a, args[0]);
             wrapper_op_real(&ctx, a, 1, args);
             ((testrred)(fn->func))(r, a, (int *)&result[3]);
             get_mpfr_f(r, &result[0], &result[1]);
             wrapper_result_real(&ctx, r, 1, result);
             /* We never need to mess about with the integer auxiliary
              * output. */
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpfr_f(r, &result[0], &result[1]);
             break;
           case semi1:
           case semi1f:
             errstr = ((testsemi1)(fn->func))(args, result);
             break;
           case semi2:
           case compare:
             errstr = ((testsemi2)(fn->func))(args, args+2, result);
             break;
           case semi2f:
           case comparef:
           case t_ldexpf:
             errstr = ((testsemi2f)(fn->func))(args, args+2, result);
             break;
           case t_ldexp:
             errstr = ((testldexp)(fn->func))(args, args+2, result);
             break;
           case t_frexp:
             errstr = ((testfrexp)(fn->func))(args, result, result+2);
             break;
           case t_frexpf:
             errstr = ((testfrexp)(fn->func))(args, result, result+2);
             break;
           case t_modf:
             errstr = ((testmodf)(fn->func))(args, result, result+2);
             break;
           case t_modff:
             errstr = ((testmodf)(fn->func))(args, result, result+2);
             break;
           case classify:
             errstr = ((testclassify)(fn->func))(args, &result[0]);
             break;
           case classifyf:
             errstr = ((testclassifyf)(fn->func))(args, &result[0]);
             break;
           case args1c:
             set_mpc_d(ac, args[0], args[1], args[2], args[3]);
             wrapper_op_complex(&ctx, ac, 2, args);
             ((testfunc1c)(fn->func))(rc, ac, MPC_RNDNN);
             get_mpc_d(rc, &result[0], &result[1], &result[2], &result[4], &result[5], &result[6]);
             wrapper_result_complex(&ctx, rc, 2, result);
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpc_d(rc, &result[0], &result[1], &result[2], &result[4], &result[5], &result[6]);
             break;
           case args2c:
             set_mpc_d(ac, args[0], args[1], args[2], args[3]);
             wrapper_op_complex(&ctx, ac, 2, args);
             set_mpc_d(bc, args[4], args[5], args[6], args[7]);
             wrapper_op_complex(&ctx, bc, 2, args+4);
             ((testfunc2c)(fn->func))(rc, ac, bc, MPC_RNDNN);
             get_mpc_d(rc, &result[0], &result[1], &result[2], &result[4], &result[5], &result[6]);
             wrapper_result_complex(&ctx, rc, 2, result);
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpc_d(rc, &result[0], &result[1], &result[2], &result[4], &result[5], &result[6]);
             break;
           case args1fc:
             set_mpc_f(ac, args[0], args[2]);
             wrapper_op_complex(&ctx, ac, 1, args);
             ((testfunc1c)(fn->func))(rc, ac, MPC_RNDNN);
             get_mpc_f(rc, &result[0], &result[1], &result[4], &result[5]);
             wrapper_result_complex(&ctx, rc, 1, result);
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpc_f(rc, &result[0], &result[1], &result[4], &result[5]);
             break;
           case args2fc:
             set_mpc_f(ac, args[0], args[2]);
             wrapper_op_complex(&ctx, ac, 1, args);
             set_mpc_f(bc, args[4], args[6]);
             wrapper_op_complex(&ctx, bc, 1, args+4);
             ((testfunc2c)(fn->func))(rc, ac, bc, MPC_RNDNN);
             get_mpc_f(rc, &result[0], &result[1], &result[4], &result[5]);
             wrapper_result_complex(&ctx, rc, 1, result);
             if (wrapper_run(&ctx, fn->wrappers))
                 get_mpc_f(rc, &result[0], &result[1], &result[4], &result[5]);
             break;
           default:
             fprintf(stderr, "internal inconsistency?!\n");
             abort();
         }
     }
 
     switch (fn->type) {
       case args1:              /* return an extra-precise result */
       case args2:
       case args1cr:
       case rred:
         printextra = 1;
         if (rejected == 0) {
             errstr = NULL;
             if (!mpfr_zero_p(a)) {
                 if ((result[0] & 0x7FFFFFFF) == 0 && result[1] == 0) {
                     /*
                      * If the output is +0 or -0 apart from the extra
                      * precision in result[2], then there's a tricky
                      * judgment call about what we require in the
                      * output. If we output the extra bits and set
                      * errstr="?underflow" then mathtest will tolerate
                      * the function under test rounding down to zero
                      * _or_ up to the minimum denormal; whereas if we
                      * suppress the extra bits and set
                      * errstr="underflow", then mathtest will enforce
                      * that the function really does underflow to zero.
                      *
                      * But where to draw the line? It seems clear to
                      * me that numbers along the lines of
                      * 00000000.00000000.7ff should be treated
                      * similarly to 00000000.00000000.801, but on the
                      * other hand, we must surely be prepared to
                      * enforce a genuine underflow-to-zero in _some_
                      * case where the true mathematical output is
                      * nonzero but absurdly tiny.
                      *
                      * I think a reasonable place to draw the
                      * distinction is at 00000000.00000000.400, i.e.
                      * one quarter of the minimum positive denormal.
                      * If a value less than that rounds up to the
                      * minimum denormal, that must mean the function
                      * under test has managed to make an error of an
                      * entire factor of two, and that's something we
                      * should fix. Above that, you can misround within
                      * the limits of your accuracy bound if you have
                      * to.
                      */
                     if (result[2] < 0x40000000) {
                         /* Total underflow (ERANGE + UFL) is required,
                          * and we suppress the extra bits to make
                          * mathtest enforce that the output is really
                          * zero. */
                         errstr = "underflow";
                         printextra = 0;
                     } else {
                         /* Total underflow is not required, but if the
                          * function rounds down to zero anyway, then
                          * we should be prepared to tolerate it. */
                         errstr = "?underflow";
                     }
                 } else if (!(result[0] & 0x7ff00000)) {
                     /*
                      * If the output is denormal, we usually expect a
                      * UFL exception, warning the user of partial
                      * underflow. The exception is if the denormal
                      * being returned is just one of the input values,
                      * unchanged even in principle. I bodgily handle
                      * this by just special-casing the functions in
                      * question below.
                      */
                     if (!strcmp(fn->name, "fmax") ||
                         !strcmp(fn->name, "fmin") ||
                         !strcmp(fn->name, "creal") ||
                         !strcmp(fn->name, "cimag")) {
                         /* no error expected */
                     } else {
                         errstr = "u";
                     }
                 } else if ((result[0] & 0x7FFFFFFF) > 0x7FEFFFFF) {
                     /*
                      * Infinite results are usually due to overflow,
                      * but one exception is lgamma of a negative
                      * integer.
                      */
                     if (!strcmp(fn->name, "lgamma") &&
                         (args[0] & 0x80000000) != 0 && /* negative */
                         is_dinteger(args)) {
                         errstr = "ERANGE status=z";
                     } else {
                         errstr = "overflow";
                     }
                     printextra = 0;
                 }
             } else {
                 /* lgamma(0) is also a pole. */
                 if (!strcmp(fn->name, "lgamma")) {
                     errstr = "ERANGE status=z";
                     printextra = 0;
                 }
             }
         }
 
         if (!printextra || (rejected && !(rejected==1 && result[2]!=0))) {
             printf(" result=%08x.%08x",
                    result[0], result[1]);
         } else {
             printf(" result=%08x.%08x.%03x",
                    result[0], result[1], (result[2] >> 20) & 0xFFF);
         }
         if (fn->type == rred) {
             printf(" res2=%08x", result[3]);
         }
         break;
       case args1f:
       case args2f:
       case args1fcr:
       case rredf:
         printextra = 1;
         if (rejected == 0) {
             errstr = NULL;
             if (!mpfr_zero_p(a)) {
                 if ((result[0] & 0x7FFFFFFF) == 0) {
                     /*
                      * Decide whether to print the extra bits based on
                      * just how close to zero the number is. See the
                      * big comment in the double-precision case for
                      * discussion.
                      */
                     if (result[1] < 0x40000000) {
                         errstr = "underflow";
                         printextra = 0;
                     } else {
                         errstr = "?underflow";
                     }
                 } else if (!(result[0] & 0x7f800000)) {
                     /*
                      * Functions which do not report partial overflow
                      * are listed here as special cases. (See the
                      * corresponding double case above for a fuller
                      * comment.)
                      */
                     if (!strcmp(fn->name, "fmaxf") ||
                         !strcmp(fn->name, "fminf") ||
                         !strcmp(fn->name, "crealf") ||
                         !strcmp(fn->name, "cimagf")) {
                         /* no error expected */
                     } else {
                         errstr = "u";
                     }
                 } else if ((result[0] & 0x7FFFFFFF) > 0x7F7FFFFF) {
                     /*
                      * Infinite results are usually due to overflow,
                      * but one exception is lgamma of a negative
                      * integer.
                      */
                     if (!strcmp(fn->name, "lgammaf") &&
                         (args[0] & 0x80000000) != 0 && /* negative */
                         is_sinteger(args)) {
                         errstr = "ERANGE status=z";
                     } else {
                         errstr = "overflow";
                     }
                     printextra = 0;
                 }
             } else {
                 /* lgamma(0) is also a pole. */
                 if (!strcmp(fn->name, "lgammaf")) {
                     errstr = "ERANGE status=z";
                     printextra = 0;
                 }
             }
         }
 
         if (!printextra || (rejected && !(rejected==1 && result[1]!=0))) {
             printf(" result=%08x",
                    result[0]);
         } else {
             printf(" result=%08x.%03x",
                    result[0], (result[1] >> 20) & 0xFFF);
         }
         if (fn->type == rredf) {
             printf(" res2=%08x", result[3]);
         }
         break;
       case semi1:              /* return a double result */
       case semi2:
       case t_ldexp:
         printf(" result=%08x.%08x", result[0], result[1]);
         break;
       case semi1f:
       case semi2f:
       case t_ldexpf:
         printf(" result=%08x", result[0]);
         break;
       case t_frexp:            /* return double * int */
         printf(" result=%08x.%08x res2=%08x", result[0], result[1],
                result[2]);
         break;
       case t_modf:             /* return double * double */
         printf(" result=%08x.%08x res2=%08x.%08x",
                result[0], result[1], result[2], result[3]);
         break;
       case t_modff:                    /* return float * float */
         /* fall through */
       case t_frexpf:                   /* return float * int */
         printf(" result=%08x res2=%08x", result[0], result[2]);
         break;
       case classify:
       case classifyf:
       case compare:
       case comparef:
         printf(" result=%x", result[0]);
         break;
       case args1c:
       case args2c:
         if (0/* errstr */) {
             printf(" resultr=%08x.%08x", result[0], result[1]);
             printf(" resulti=%08x.%08x", result[4], result[5]);
         } else {
             printf(" resultr=%08x.%08x.%03x",
                    result[0], result[1], (result[2] >> 20) & 0xFFF);
             printf(" resulti=%08x.%08x.%03x",
                    result[4], result[5], (result[6] >> 20) & 0xFFF);
         }
         /* Underflow behaviour doesn't seem to be specified for complex arithmetic */
         errstr = "?underflow";
         break;
       case args1fc:
       case args2fc:
         if (0/* errstr */) {
             printf(" resultr=%08x", result[0]);
             printf(" resulti=%08x", result[4]);
         } else {
             printf(" resultr=%08x.%03x",
                    result[0], (result[1] >> 20) & 0xFFF);
             printf(" resulti=%08x.%03x",
                    result[4], (result[5] >> 20) & 0xFFF);
         }
         /* Underflow behaviour doesn't seem to be specified for complex arithmetic */
         errstr = "?underflow";
         break;
     }
 
     if (errstr && *(errstr+1) == '\0') {
         printf(" errno=0 status=%c",*errstr);
     } else if (errstr && *errstr == '?') {
         printf(" maybeerror=%s", errstr+1);
     } else if (errstr && errstr[0] == 'E') {
         printf(" errno=%s", errstr);
     } else {
         printf(" error=%s", errstr && *errstr ? errstr : "0");
     }
 
     printf("\n");
 
     vet_for_decline(fn, args, result, 0);
 
   cleanup:
     mpfr_clear(a);
     mpfr_clear(b);
     mpfr_clear(r);
     mpc_clear(ac);
     mpc_clear(bc);
     mpc_clear(rc);
 }
 
 void gencases(Testable *fn, int number) {
     int i;
     uint32 args[8];
 
     float32_case(NULL);
     float64_case(NULL);
 
     printf("random=on\n"); /* signal to runtests.pl that the following tests are randomly generated */
     for (i = 0; i < number; i++) {
         /* generate test point */
         fn->cases(args, fn->caseparam1, fn->caseparam2);
         docase(fn, args);
     }
     printf("random=off\n");
 }
 
 static uint32 doubletop(int x, int scale) {
     int e = 0x412 + scale;
     while (!(x & 0x100000))
         x <<= 1, e--;
     return (e << 20) + x;
 }
 
 static uint32 floatval(int x, int scale) {
     int e = 0x95 + scale;
     while (!(x & 0x800000))
         x <<= 1, e--;
     return (e << 23) + x;
 }
diff --git a/contrib/arm-optimized-routines/math/test/runulp.sh b/contrib/arm-optimized-routines/math/test/runulp.sh
index e2e03e3ae761..672908f355c4 100755
--- a/contrib/arm-optimized-routines/math/test/runulp.sh
+++ b/contrib/arm-optimized-routines/math/test/runulp.sh
@@ -1,282 +1,105 @@
 #!/bin/bash
 
 # ULP error check script.
 #
-# Copyright (c) 2019-2023, Arm Limited.
+# Copyright (c) 2019-2024, Arm Limited.
 # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 #set -x
 set -eu
 
 # cd to bin directory.
 cd "${0%/*}"
 
 rmodes='n u d z'
 #rmodes=n
 flags="${ULPFLAGS:--q}"
 emu="$@"
 
 FAIL=0
 PASS=0
 
 t() {
-	[ $r = "n" ] && Lt=$L || Lt=$Ldir
-	$emu ./ulp -r $r -e $Lt $flags "$@" && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
+    # First argument: routine name
+    routine=$1; shift
+    # Second and third argument: lo and hi bounds
+    # Extra processing needed for bivariate routines
+    IFS=',' read -ra LO <<< "$1"; shift
+    IFS=',' read -ra HI <<< "$1"; shift
+    ITV="${LO[0]} ${HI[0]}"
+    for i in "${!LO[@]}"; do
+	[[ "$i" -eq "0" ]] || ITV="$ITV x ${LO[$i]} ${HI[$i]}"
+    done
+    # Fourth argument: number of test points
+    n=$1; shift
+    # Any remaining arguments forwards directly to ulp tool
+    extra_flags="$@"
+
+    # Read ULP limits, fenv expectation and control values from autogenerated files
+    limits_file=$LIMITS
+    [ $r == "n" ] || limits_file=${limits_file}_nn
+    L=$(grep "^$routine " $limits_file | awk '{print $2}')
+    [ -n "$L" ] || { echo ERROR: Could not determine ULP limit for $routine in $limits_file && false; }
+    cvals=($(grep "^$routine " $CVALS | awk '{print $2}'))
+
+    if grep -q "^$routine$" $DISABLE_FENV; then extra_flags="$extra_flags -f"; fi 
+    # Emulate a do-while loop to loop over cvals, but still execute once if it is empty
+    while : ; do
+	# Empty string if we are at the end of cvals array
+	c_arg=""
+	[ -z "${cvals[0]:-}" ] || c_arg="-c ${cvals[0]}"
+	$emu ./ulp -e $L $flags $extra_flags -r $r $c_arg $routine $ITV $n && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
+	# Shift cvals by 1, and break if it is now empty
+	cvals=("${cvals[@]:1}")
+	[ -n "${cvals[0]:-}" ] || break
+    done
+
+    # Run ULP tool
+
 }
 
 check() {
-	$emu ./ulp -f -q "$@" >/dev/null
+	$emu ./ulp -f -q "$@"
 }
 
-Ldir=0.5
+if [[ $WANT_EXPERIMENTAL_MATH -eq 1 ]] && [[ $WANT_SVE_TESTS -eq 1 ]] && [[ $USE_MPFR -eq 0 ]]; then
+    # No guarantees about powi accuracy, so regression-test for exactness
+    # w.r.t. the custom reference impl in ulp_wrappers.h
+    if [ -z "$FUNC" ] || [ "$FUNC" == "_ZGVsMxvv_powi" ]; then
+	check -q -f -e 0 _ZGVsMxvv_powi  0  inf x  0  1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x  0  1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powi  0  inf x -0 -1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x -0 -1000 100000
+    fi
+    if [ -z "$FUNC" ] || [ "$FUNC" == "_ZGVsMxvv_powk" ]; then
+	check -q -f -e 0 _ZGVsMxvv_powk  0  inf x  0  1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x  0  1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powk  0  inf x -0 -1000 100000
+	check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x -0 -1000 100000
+    fi
+fi
+
+# Test generic routines in all rounding modes
 for r in $rmodes
 do
-L=0.01
-t exp  0 0xffff000000000000 10000
-t exp  0x1p-6     0x1p6     40000
-t exp -0x1p-6    -0x1p6     40000
-t exp  633.3      733.3     10000
-t exp -633.3     -777.3     10000
-
-L=0.01
-t exp2  0 0xffff000000000000 10000
-t exp2  0x1p-6     0x1p6     40000
-t exp2 -0x1p-6    -0x1p6     40000
-t exp2  633.3      733.3     10000
-t exp2 -633.3     -777.3     10000
-
-L=0.02
-t log  0 0xffff000000000000 10000
-t log  0x1p-4    0x1p4      40000
-t log  0         inf        40000
-
-L=0.05
-t log2  0 0xffff000000000000 10000
-t log2  0x1p-4    0x1p4      40000
-t log2  0         inf        40000
-
-L=0.05
-t pow  0.5  2.0  x  0  inf 20000
-t pow -0.5 -2.0  x  0  inf 20000
-t pow  0.5  2.0  x -0 -inf 20000
-t pow -0.5 -2.0  x -0 -inf 20000
-t pow  0.5  2.0  x  0x1p-10  0x1p10  40000
-t pow  0.5  2.0  x -0x1p-10 -0x1p10  40000
-t pow  0    inf  x    0.5      2.0   80000
-t pow  0    inf  x   -0.5     -2.0   80000
-t pow  0x1.fp-1   0x1.08p0  x  0x1p8 0x1p17  80000
-t pow  0x1.fp-1   0x1.08p0  x -0x1p8 -0x1p17 80000
-t pow  0         0x1p-1000  x  0 1.0 50000
-t pow  0x1p1000        inf  x  0 1.0 50000
-t pow  0x1.ffffffffffff0p-1  0x1.0000000000008p0 x 0x1p60 0x1p68 50000
-t pow  0x1.ffffffffff000p-1  0x1p0 x 0x1p50 0x1p52 50000
-t pow -0x1.ffffffffff000p-1 -0x1p0 x 0x1p50 0x1p52 50000
-
-L=0.02
-t exp10   0                   0x1p-47             5000
-t exp10  -0                  -0x1p-47             5000
-t exp10   0x1p-47             1                   50000
-t exp10  -0x1p-47            -1                   50000
-t exp10   1                   0x1.34413509f79ffp8 50000
-t exp10  -1                  -0x1.434e6420f4374p8 50000
-t exp10  0x1.34413509f79ffp8  inf                 5000
-t exp10 -0x1.434e6420f4374p8 -inf                 5000
-
-L=1.0
-Ldir=0.9
-t erf  0 0xffff000000000000 10000
-t erf  0x1p-1022  0x1p-26   40000
-t erf  -0x1p-1022 -0x1p-26  40000
-t erf  0x1p-26    0x1p3     40000
-t erf  -0x1p-26  -0x1p3     40000
-t erf  0         inf        40000
-Ldir=0.5
-
-L=0.01
-t expf  0    0xffff0000    10000
-t expf  0x1p-14   0x1p8    50000
-t expf -0x1p-14  -0x1p8    50000
-
-L=0.01
-t exp2f  0    0xffff0000   10000
-t exp2f  0x1p-14   0x1p8   50000
-t exp2f -0x1p-14  -0x1p8   50000
-
-L=0.32
-t logf  0    0xffff0000    10000
-t logf  0x1p-4    0x1p4    50000
-t logf  0         inf      50000
-
-L=0.26
-t log2f  0    0xffff0000   10000
-t log2f  0x1p-4    0x1p4   50000
-t log2f  0         inf     50000
-
-L=0.06
-t sinf  0    0xffff0000    10000
-t sinf  0x1p-14  0x1p54    50000
-t sinf -0x1p-14 -0x1p54    50000
-
-L=0.06
-t cosf  0    0xffff0000    10000
-t cosf  0x1p-14  0x1p54    50000
-t cosf -0x1p-14 -0x1p54    50000
-
-L=0.06
-t sincosf_sinf  0    0xffff0000    10000
-t sincosf_sinf  0x1p-14  0x1p54    50000
-t sincosf_sinf -0x1p-14 -0x1p54    50000
-
-L=0.06
-t sincosf_cosf  0    0xffff0000    10000
-t sincosf_cosf  0x1p-14  0x1p54    50000
-t sincosf_cosf -0x1p-14 -0x1p54    50000
-
-L=0.4
-t powf  0x1p-1   0x1p1  x  0x1p-7 0x1p7   50000
-t powf  0x1p-1   0x1p1  x -0x1p-7 -0x1p7  50000
-t powf  0x1p-70 0x1p70  x  0x1p-1 0x1p1   50000
-t powf  0x1p-70 0x1p70  x  -0x1p-1 -0x1p1 50000
-t powf  0x1.ep-1 0x1.1p0 x  0x1p8 0x1p14  50000
-t powf  0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p14 50000
-
-L=0.6
-Ldir=0.9
-t erff  0      0xffff0000 10000
-t erff  0x1p-127  0x1p-26 40000
-t erff -0x1p-127 -0x1p-26 40000
-t erff  0x1p-26   0x1p3   40000
-t erff -0x1p-26  -0x1p3   40000
-t erff  0         inf     40000
-Ldir=0.5
-
+  while read F LO HI N
+  do
+	[[ -z $F ]] || t $F $LO $HI $N
+  done << EOF
+$(grep "\b$FUNC\b" $GEN_ITVS)
+EOF
 done
 
-# vector functions
-
-Ldir=0.5
-r='n'
-flags="${ULPFLAGS:--q}"
-
-range_exp='
-  0 0xffff000000000000 10000
-  0x1p-6     0x1p6     400000
- -0x1p-6    -0x1p6     400000
-  633.3      733.3     10000
- -633.3     -777.3     10000
-'
-
-range_log='
-  0 0xffff000000000000 10000
-  0x1p-4     0x1p4     400000
-  0          inf       400000
-'
-
-range_pow='
- 0x1p-1   0x1p1  x  0x1p-10 0x1p10   50000
- 0x1p-1   0x1p1  x -0x1p-10 -0x1p10  50000
- 0x1p-500 0x1p500  x  0x1p-1 0x1p1   50000
- 0x1p-500 0x1p500  x  -0x1p-1 -0x1p1 50000
- 0x1.ep-1 0x1.1p0 x  0x1p8 0x1p16    50000
- 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p16   50000
-'
-
-range_sin='
-  0       0x1p23     500000
- -0      -0x1p23     500000
-  0x1p23  inf        10000
- -0x1p23 -inf        10000
-'
-range_cos="$range_sin"
-
-range_expf='
-  0    0xffff0000    10000
-  0x1p-14   0x1p8    500000
- -0x1p-14  -0x1p8    500000
-'
-
-range_expf_1u="$range_expf"
-range_exp2f="$range_expf"
-range_exp2f_1u="$range_expf"
-
-range_logf='
- 0    0xffff0000    10000
- 0x1p-4    0x1p4    500000
-'
-
-range_sinf='
-  0        0x1p20   500000
- -0       -0x1p20   500000
-  0x1p20   inf      10000
- -0x1p20  -inf      10000
-'
-range_cosf="$range_sinf"
-
-range_powf='
- 0x1p-1   0x1p1  x  0x1p-7 0x1p7   50000
- 0x1p-1   0x1p1  x -0x1p-7 -0x1p7  50000
- 0x1p-70 0x1p70  x  0x1p-1 0x1p1   50000
- 0x1p-70 0x1p70  x  -0x1p-1 -0x1p1 50000
- 0x1.ep-1 0x1.1p0 x  0x1p8 0x1p14  50000
- 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p14 50000
-'
-
-# error limits
-L_exp=1.9
-L_log=1.2
-L_pow=0.05
-L_sin=3.0
-L_cos=3.0
-L_expf=1.49
-L_expf_1u=0.4
-L_exp2f=1.49
-L_exp2f_1u=0.4
-L_logf=2.9
-L_sinf=1.4
-L_cosf=1.4
-L_powf=2.1
-
-while read G F D
+# Only test arch-specific routines in round-to-nearest, with sign of zero ignored (-z flag)
+r=n
+while read F LO HI N
 do
-	case "$G" in \#*) continue ;; esac
-	eval range="\${range_$G}"
-	eval L="\${L_$G}"
-	while read X
-	do
-		[ -n "$X" ] || continue
-		case "$X" in \#*) continue ;; esac
-		disable_fenv=""
-		if [ -z "$WANT_SIMD_EXCEPT" ] || [ $WANT_SIMD_EXCEPT -eq 0 ]; then
-			# If library was built with SIMD exceptions
-			# disabled, disable fenv checking in ulp
-			# tool. Otherwise, fenv checking may still be
-			# disabled by adding -f to the end of the run
-			# line.
-			disable_fenv="-f"
-		fi
-		t $D $disable_fenv $F $X
-	done << EOF
-$range
-
-EOF
+	[[ -z $F ]] || t $F $LO $HI $N -z
 done << EOF
-# group symbol run
-exp       _ZGVnN2v_exp
-log       _ZGVnN2v_log
-pow       _ZGVnN2vv_pow      -f
-sin       _ZGVnN2v_sin       -z
-cos       _ZGVnN2v_cos
-expf      _ZGVnN4v_expf
-expf_1u   _ZGVnN4v_expf_1u   -f
-exp2f     _ZGVnN4v_exp2f
-exp2f_1u  _ZGVnN4v_exp2f_1u  -f
-logf      _ZGVnN4v_logf
-sinf      _ZGVnN4v_sinf      -z
-cosf      _ZGVnN4v_cosf
-powf      _ZGVnN4vv_powf     -f
+$(grep "\b$FUNC\b" $ARCH_ITVS)
 EOF
 
 [ 0 -eq $FAIL ] || {
 	echo "FAILED $FAIL PASSED $PASS"
 	exit 1
 }
diff --git a/contrib/arm-optimized-routines/math/test/test_defs.h b/contrib/arm-optimized-routines/math/test/test_defs.h
new file mode 100644
index 000000000000..d0656c9e1d84
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/test/test_defs.h
@@ -0,0 +1,31 @@
+/*
+ * Helper macros for emitting various details about routines for consumption by
+ * runulp.sh.
+ *
+ * Copyright (c) 2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+
+#define TEST_ULP(f, l) TEST_ULP f l
+#define TEST_ULP_NONNEAREST(f, l) TEST_ULP_NONNEAREST f l
+
+/* Emit routine name if e == 0 and f is expected to correctly trigger fenv
+   exceptions. e allows declaration to be emitted conditionally on
+   WANT_SIMD_EXCEPT - defer expansion by one pass to allow those flags to be
+   expanded properly.  */
+#define TEST_DISABLE_FENV(f) TEST_DISABLE_FENV f
+#define TEST_DISABLE_FENV_IF_NOT(f, e) TEST_DISABLE_FENV_IF_NOT_ (f, e)
+#define TEST_DISABLE_FENV_IF_NOT_(f, e) TEST_DISABLE_FENV_IF_NOT_##e (f)
+#define TEST_DISABLE_FENV_IF_NOT_0(f) TEST_DISABLE_FENV (f)
+#define TEST_DISABLE_FENV_IF_NOT_1(f)
+
+#define TEST_INTERVAL(f, lo, hi, n) TEST_INTERVAL f lo hi n
+#define TEST_SYM_INTERVAL(f, lo, hi, n)                                       \
+  TEST_INTERVAL (f, lo, hi, n)                                                \
+  TEST_INTERVAL (f, -lo, -hi, n)
+// clang-format off
+#define TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n)                            \
+  TEST_INTERVAL f xlo,ylo xhi,yhi n
+// clang-format on
+
+#define TEST_CONTROL_VALUE(f, c) TEST_CONTROL_VALUE f c
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acos.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/acos.tst
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/acos.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/acos.tst
index a73dcd25965b..7889e62f4459 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acos.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/acos.tst
@@ -1,17 +1,17 @@
 ; acos.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=acos op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=acos op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=acos op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=acos op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=acos op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
 func=acos op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
 func=acos op1=00000000.00000000 result=3ff921fb.54442d18.469 errno=0
 func=acos op1=80000000.00000000 result=3ff921fb.54442d18.469 errno=0
 func=acos op1=3ff00000.00000000 result=00000000.00000000 errno=0
 func=acos op1=bff00000.00000000 result=400921fb.54442d18.469 errno=0
 func=acos op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
 func=acos op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/acosf.tst
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/acosf.tst
index 9e453e3bff5e..0c2165967abb 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/acosf.tst
@@ -1,21 +1,21 @@
 ; acosf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=acosf op1=7fc00001 result=7fc00001 errno=0
 func=acosf op1=ffc00001 result=7fc00001 errno=0
 func=acosf op1=7f800001 result=7fc00001 errno=0 status=i
 func=acosf op1=ff800001 result=7fc00001 errno=0 status=i
 func=acosf op1=7f800000 result=7fc00001 errno=EDOM status=i
 func=acosf op1=ff800000 result=7fc00001 errno=EDOM status=i
 func=acosf op1=00000000 result=3fc90fda.a22 errno=0
 func=acosf op1=80000000 result=3fc90fda.a22 errno=0
 func=acosf op1=3f800000 result=00000000 errno=0
 func=acosf op1=bf800000 result=40490fda.a22 errno=0
 func=acosf op1=3f800001 result=7fc00001 errno=EDOM status=i
 func=acosf op1=bf800001 result=7fc00001 errno=EDOM status=i
 func=acosf op1=33000000 result=3fc90fda.622 error=0
 func=acosf op1=30000000 result=3fc90fda.a12 error=0
 func=acosf op1=2d000000 result=3fc90fda.a21 error=0
 func=acosf op1=2a000000 result=3fc90fda.a22 error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosh.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/acosh.tst
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosh.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/acosh.tst
index dd962bd391da..b78d64bb8ea7 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosh.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/acosh.tst
@@ -1,19 +1,19 @@
 ; acosh.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=acosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=acosh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=acosh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=acosh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=acosh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
 func=acosh op1=3ff00000.00000000 result=00000000.00000000 errno=0
 func=acosh op1=3fefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
 func=acosh op1=00000000.00000000 result=7ff80000.00000001 errno=EDOM status=i
 func=acosh op1=80000000.00000000 result=7ff80000.00000001 errno=EDOM status=i
 func=acosh op1=bfefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
 func=acosh op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
 func=acosh op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
 func=acosh op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
 func=acosh op1=7fe01ac0.7f03a83e result=40862e50.541778f1.8cc error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acoshf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/acoshf.tst
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/acoshf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/acoshf.tst
index 606c615f9b74..9eec2caf014d 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acoshf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/acoshf.tst
@@ -1,19 +1,19 @@
 ; acoshf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=acoshf op1=7fc00001 result=7fc00001 errno=0
 func=acoshf op1=ffc00001 result=7fc00001 errno=0
 func=acoshf op1=7f800001 result=7fc00001 errno=0 status=i
 func=acoshf op1=ff800001 result=7fc00001 errno=0 status=i
 func=acoshf op1=7f800000 result=7f800000 errno=0
 func=acoshf op1=3f800000 result=00000000 errno=0
 func=acoshf op1=3f7fffff result=7fc00001 errno=EDOM status=i
 func=acoshf op1=00000000 result=7fc00001 errno=EDOM status=i
 func=acoshf op1=80000000 result=7fc00001 errno=EDOM status=i
 func=acoshf op1=bf7fffff result=7fc00001 errno=EDOM status=i
 func=acoshf op1=bf800000 result=7fc00001 errno=EDOM status=i
 func=acoshf op1=bf800001 result=7fc00001 errno=EDOM status=i
 func=acoshf op1=ff800000 result=7fc00001 errno=EDOM status=i
 func=acoshf op1=7f767efe result=42b2c19d.83e error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asin.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/asin.tst
similarity index 97%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/asin.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/asin.tst
index 6180d7849d90..7b916f3624c0 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asin.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/asin.tst
@@ -1,24 +1,24 @@
 ; asin.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=asin op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=asin op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=asin op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=asin op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=asin op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
 func=asin op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
 func=asin op1=00000000.00000000 result=00000000.00000000 errno=0
 func=asin op1=80000000.00000000 result=80000000.00000000 errno=0
 ; Inconsistent behavior was detected for the following 2 cases.
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=asin op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
 func=asin op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
 
 func=asin op1=3ff00000.00000000 result=3ff921fb.54442d18.469 errno=0
 func=asin op1=bff00000.00000000 result=bff921fb.54442d18.469 errno=0
 func=asin op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
 func=asin op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/asinf.tst
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/asinf.tst
index a85b2593768d..d5830b99b620 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/asinf.tst
@@ -1,24 +1,24 @@
 ; asinf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=asinf op1=7fc00001 result=7fc00001 errno=0
 func=asinf op1=ffc00001 result=7fc00001 errno=0
 func=asinf op1=7f800001 result=7fc00001 errno=0 status=i
 func=asinf op1=ff800001 result=7fc00001 errno=0 status=i
 func=asinf op1=7f800000 result=7fc00001 errno=EDOM status=i
 func=asinf op1=ff800000 result=7fc00001 errno=EDOM status=i
 func=asinf op1=00000000 result=00000000 errno=0
 func=asinf op1=80000000 result=80000000 errno=0
 ; Inconsistent behavior was detected for the following 2 cases.
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=asinf op1=00000001 result=00000001 errno=0 maybestatus=ux
 func=asinf op1=80000001 result=80000001 errno=0 maybestatus=ux
 
 func=asinf op1=3f800000 result=3fc90fda.a22 errno=0
 func=asinf op1=bf800000 result=bfc90fda.a22 errno=0
 func=asinf op1=3f800001 result=7fc00001 errno=EDOM status=i
 func=asinf op1=bf800001 result=7fc00001 errno=EDOM status=i
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinh.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/asinh.tst
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinh.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/asinh.tst
index 1485dfeffecf..9b250a14f50c 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinh.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/asinh.tst
@@ -1,18 +1,18 @@
 ; asinh.tst
 ;
-; Copyright (c) 2022-2023, Arm Limited.
+; Copyright (c) 2022-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=asinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=asinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=asinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=asinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=asinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
 func=asinh op1=fff00000.00000000 result=fff00000.00000000 errno=0
 func=asinh op1=00000000.00000000 result=00000000.00000000 errno=0
 func=asinh op1=80000000.00000000 result=80000000.00000000 errno=0
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=asinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
 func=asinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinhf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/asinhf.tst
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinhf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/asinhf.tst
index eb76a5892a70..f2410e09b03e 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinhf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/asinhf.tst
@@ -1,18 +1,18 @@
 ; asinhf.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=asinhf op1=7fc00001 result=7fc00001 errno=0
 func=asinhf op1=ffc00001 result=7fc00001 errno=0
 func=asinhf op1=7f800001 result=7fc00001 errno=0 status=i
 func=asinhf op1=ff800001 result=7fc00001 errno=0 status=i
 func=asinhf op1=7f800000 result=7f800000 errno=0
 func=asinhf op1=ff800000 result=ff800000 errno=0
 func=asinhf op1=00000000 result=00000000 errno=0
 func=asinhf op1=80000000 result=80000000 errno=0
 ; No exception is raised on certain machines (different version of glibc)
 ; Same issue encountered with other function similar to x close to 0
 ; Could be due to function so boring no flop is involved in some implementations
 func=asinhf op1=00000001 result=00000001 errno=0 maybestatus=ux
 func=asinhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/atan.tst
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/atan.tst
index 4c670553d58f..d29b13245cd5 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/atan.tst
@@ -1,22 +1,22 @@
 ; atan.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atan op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan op1=7ff00000.00000000 result=3ff921fb.54442d18.469 errno=0
 func=atan op1=fff00000.00000000 result=bff921fb.54442d18.469 errno=0
 func=atan op1=00000000.00000000 result=00000000.00000000 errno=0
 func=atan op1=80000000.00000000 result=80000000.00000000 errno=0
 ; Inconsistent behavior was detected for the following 2 cases.
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=atan op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
 func=atan op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
 
 func=atan op1=3ff00000.00000000 result=3fe921fb.54442d18.469 errno=0
 func=atan op1=bff00000.00000000 result=bfe921fb.54442d18.469 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/atan2.tst
similarity index 99%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/atan2.tst
index 647b3764072c..3e34e7641f28 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/atan2.tst
@@ -1,110 +1,110 @@
 ; atan2.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atan2 op1=7ff00000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff00000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff00000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff00000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff00000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff00000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff00000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff00000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff00000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff00000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff00000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff00000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff00000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff00000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff00000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff00000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff80000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff80000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff80000.00000001 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=7ff80000.00000001 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=7ff80000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=7ff80000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=7ff80000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=7ff80000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=7ff80000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=7ff80000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=fff80000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff80000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff80000.00000001 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=fff80000.00000001 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=fff80000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=fff80000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=fff80000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=fff80000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=fff80000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=fff80000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0
 func=atan2 op1=7ff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=7ff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=7ff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=7ff00000.00000000 op2=7ff00000.00000000 result=3fe921fb.54442d18.469 errno=0
 func=atan2 op1=7ff00000.00000000 op2=fff00000.00000000 result=4002d97c.7f3321d2.34f errno=0
 func=atan2 op1=7ff00000.00000000 op2=00000000.00000000 result=3ff921fb.54442d18.469 errno=0
 func=atan2 op1=7ff00000.00000000 op2=80000000.00000000 result=3ff921fb.54442d18.469 errno=0
 func=atan2 op1=7ff00000.00000000 op2=3ff00000.00000000 result=3ff921fb.54442d18.469 errno=0
 func=atan2 op1=7ff00000.00000000 op2=bff00000.00000000 result=3ff921fb.54442d18.469 errno=0
 func=atan2 op1=fff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=fff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=fff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=fff00000.00000000 op2=7ff00000.00000000 result=bfe921fb.54442d18.469 errno=0
 func=atan2 op1=fff00000.00000000 op2=fff00000.00000000 result=c002d97c.7f3321d2.34f errno=0
 func=atan2 op1=fff00000.00000000 op2=00000000.00000000 result=bff921fb.54442d18.469 errno=0
 func=atan2 op1=fff00000.00000000 op2=80000000.00000000 result=bff921fb.54442d18.469 errno=0
 func=atan2 op1=fff00000.00000000 op2=3ff00000.00000000 result=bff921fb.54442d18.469 errno=0
 func=atan2 op1=fff00000.00000000 op2=bff00000.00000000 result=bff921fb.54442d18.469 errno=0
 func=atan2 op1=00000000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=00000000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=00000000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=00000000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=00000000.00000000 op2=7ff00000.00000000 result=00000000.00000000 errno=0
 func=atan2 op1=00000000.00000000 op2=fff00000.00000000 result=400921fb.54442d18.469 errno=0
 func=atan2 op1=00000000.00000000 op2=00000000.00000000 result=00000000.00000000 errno=0
 func=atan2 op1=00000000.00000000 op2=80000000.00000000 result=400921fb.54442d18.469 errno=0
 func=atan2 op1=00000000.00000000 op2=3ff00000.00000000 result=00000000.00000000 errno=0
 func=atan2 op1=00000000.00000000 op2=bff00000.00000000 result=400921fb.54442d18.469 errno=0
 ; No exception is raised on certain machines (different version of glibc)
 ; Same issue encountered with other function similar to x close to 0
 ; Could be due to function so boring no flop is involved in some implementations
 func=atan2 op1=00000000.00000001 op2=3ff00000.00000000 result=00000000.00000001 errno=0 maybestatus=ux
 func=atan2 op1=80000000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=80000000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=80000000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=80000000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=80000000.00000000 op2=7ff00000.00000000 result=80000000.00000000 errno=0
 func=atan2 op1=80000000.00000000 op2=fff00000.00000000 result=c00921fb.54442d18.469 errno=0
 func=atan2 op1=80000000.00000000 op2=00000000.00000000 result=80000000.00000000 errno=0
 func=atan2 op1=80000000.00000000 op2=80000000.00000000 result=c00921fb.54442d18.469 errno=0
 func=atan2 op1=80000000.00000000 op2=3ff00000.00000000 result=80000000.00000000 errno=0
 func=atan2 op1=80000000.00000000 op2=bff00000.00000000 result=c00921fb.54442d18.469 errno=0
 ; No exception is raised on certain machines (different version of glibc)
 ; Same issue encountered with other function similar to x close to 0
 ; Could be due to function so boring no flop is involved in some implementations
 func=atan2 op1=80000000.00000001 op2=3ff00000.00000000 result=80000000.00000001 errno=0 maybestatus=ux
 func=atan2 op1=3ff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=3ff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=3ff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=3ff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=3ff00000.00000000 op2=7ff00000.00000000 result=00000000.00000000 errno=0
 func=atan2 op1=3ff00000.00000000 op2=fff00000.00000000 result=400921fb.54442d18.469 errno=0
 func=atan2 op1=3ff00000.00000000 op2=00000000.00000000 result=3ff921fb.54442d18.469 errno=0
 func=atan2 op1=3ff00000.00000000 op2=80000000.00000000 result=3ff921fb.54442d18.469 errno=0
 func=atan2 op1=3ff00000.00000000 op2=3ff00000.00000000 result=3fe921fb.54442d18.469 errno=0
 func=atan2 op1=3ff00000.00000000 op2=bff00000.00000000 result=4002d97c.7f3321d2.34f errno=0
 func=atan2 op1=bff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=bff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atan2 op1=bff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=bff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=atan2 op1=bff00000.00000000 op2=7ff00000.00000000 result=80000000.00000000 errno=0
 func=atan2 op1=bff00000.00000000 op2=fff00000.00000000 result=c00921fb.54442d18.469 errno=0
 func=atan2 op1=bff00000.00000000 op2=00000000.00000000 result=bff921fb.54442d18.469 errno=0
 func=atan2 op1=bff00000.00000000 op2=80000000.00000000 result=bff921fb.54442d18.469 errno=0
 func=atan2 op1=bff00000.00000000 op2=3ff00000.00000000 result=bfe921fb.54442d18.469 errno=0
 func=atan2 op1=bff00000.00000000 op2=bff00000.00000000 result=c002d97c.7f3321d2.34f errno=0
 func=atan2 op1=3ff00000.00000000 op2=3ff00000.00000000 result=3fe921fb.54442d18 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2f.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/atan2f.tst
similarity index 99%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2f.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/atan2f.tst
index 85c5c5d47e10..e637fe0eba24 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2f.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/atan2f.tst
@@ -1,121 +1,121 @@
 ; atan2f.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atan2f op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=7f800001 op2=7fc00001 result=7fc00001 errno=0 status=i
 func=atan2f op1=7f800001 op2=ffc00001 result=7fc00001 errno=0 status=i
 func=atan2f op1=7f800001 op2=7f800000 result=7fc00001 errno=0 status=i
 func=atan2f op1=7f800001 op2=ff800000 result=7fc00001 errno=0 status=i
 func=atan2f op1=7f800001 op2=00000000 result=7fc00001 errno=0 status=i
 func=atan2f op1=7f800001 op2=80000000 result=7fc00001 errno=0 status=i
 func=atan2f op1=7f800001 op2=3f800000 result=7fc00001 errno=0 status=i
 func=atan2f op1=7f800001 op2=bf800000 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800001 op2=7f800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800001 op2=ff800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800001 op2=7fc00001 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800001 op2=ffc00001 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800001 op2=7f800000 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800001 op2=ff800000 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800001 op2=00000000 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800001 op2=80000000 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800001 op2=3f800000 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800001 op2=bf800000 result=7fc00001 errno=0 status=i
 func=atan2f op1=7fc00001 op2=7f800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=7fc00001 op2=ff800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=7fc00001 op2=7fc00001 result=7fc00001 errno=0
 func=atan2f op1=7fc00001 op2=ffc00001 result=7fc00001 errno=0
 func=atan2f op1=7fc00001 op2=7f800000 result=7fc00001 errno=0
 func=atan2f op1=7fc00001 op2=ff800000 result=7fc00001 errno=0
 func=atan2f op1=7fc00001 op2=00000000 result=7fc00001 errno=0
 func=atan2f op1=7fc00001 op2=80000000 result=7fc00001 errno=0
 func=atan2f op1=7fc00001 op2=3f800000 result=7fc00001 errno=0
 func=atan2f op1=7fc00001 op2=bf800000 result=7fc00001 errno=0
 func=atan2f op1=ffc00001 op2=7f800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=ffc00001 op2=ff800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=ffc00001 op2=7fc00001 result=ffc00001 errno=0
 func=atan2f op1=ffc00001 op2=ffc00001 result=ffc00001 errno=0
 func=atan2f op1=ffc00001 op2=7f800000 result=ffc00001 errno=0
 func=atan2f op1=ffc00001 op2=ff800000 result=ffc00001 errno=0
 func=atan2f op1=ffc00001 op2=00000000 result=ffc00001 errno=0
 func=atan2f op1=ffc00001 op2=80000000 result=ffc00001 errno=0
 func=atan2f op1=ffc00001 op2=3f800000 result=ffc00001 errno=0
 func=atan2f op1=ffc00001 op2=bf800000 result=ffc00001 errno=0
 func=atan2f op1=7f800000 op2=7f800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=7f800000 op2=ff800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=7f800000 op2=7fc00001 result=7fc00001 errno=0
 func=atan2f op1=7f800000 op2=ffc00001 result=7fc00001 errno=0
 func=atan2f op1=7f800000 op2=7f800000 result=3f490fda.a22 errno=0
 func=atan2f op1=7f800000 op2=ff800000 result=4016cbe3.f99 errno=0
 func=atan2f op1=7f800000 op2=00000000 result=3fc90fda.a22 errno=0
 func=atan2f op1=7f800000 op2=80000000 result=3fc90fda.a22 errno=0
 func=atan2f op1=7f800000 op2=3f800000 result=3fc90fda.a22 errno=0
 func=atan2f op1=7f800000 op2=bf800000 result=3fc90fda.a22 errno=0
 func=atan2f op1=ff800000 op2=7f800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800000 op2=ff800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=ff800000 op2=7fc00001 result=7fc00001 errno=0
 func=atan2f op1=ff800000 op2=ffc00001 result=ffc00001 errno=0
 func=atan2f op1=ff800000 op2=7f800000 result=bf490fda.a22 errno=0
 func=atan2f op1=ff800000 op2=ff800000 result=c016cbe3.f99 errno=0
 func=atan2f op1=ff800000 op2=00000000 result=bfc90fda.a22 errno=0
 func=atan2f op1=ff800000 op2=80000000 result=bfc90fda.a22 errno=0
 func=atan2f op1=ff800000 op2=3f800000 result=bfc90fda.a22 errno=0
 func=atan2f op1=ff800000 op2=bf800000 result=bfc90fda.a22 errno=0
 func=atan2f op1=00000000 op2=7f800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=00000000 op2=ff800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=00000000 op2=7fc00001 result=7fc00001 errno=0
 func=atan2f op1=00000000 op2=ffc00001 result=ffc00001 errno=0
 func=atan2f op1=00000000 op2=7f800000 result=00000000 errno=0
 func=atan2f op1=00000000 op2=ff800000 result=40490fda.a22 errno=0
 func=atan2f op1=00000000 op2=00000000 result=00000000 errno=0
 func=atan2f op1=00000000 op2=80000000 result=40490fda.a22 errno=0
 func=atan2f op1=00000000 op2=3f800000 result=00000000 errno=0
 func=atan2f op1=00000000 op2=bf800000 result=40490fda.a22 errno=0
 ; No exception is raised on certain machines (different version of glibc)
 ; Same issue encountered with other function similar to x close to 0
 ; Could be due to function so boring no flop is involved in some implementations
 func=atan2f op1=00000001 op2=3f800000 result=00000001 errno=0 maybestatus=ux
 
 func=atan2f op1=80000000 op2=7f800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=80000000 op2=ff800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=80000000 op2=7fc00001 result=7fc00001 errno=0
 func=atan2f op1=80000000 op2=ffc00001 result=ffc00001 errno=0
 func=atan2f op1=80000000 op2=7f800000 result=80000000 errno=0
 func=atan2f op1=80000000 op2=ff800000 result=c0490fda.a22 errno=0
 func=atan2f op1=80000000 op2=00000000 result=80000000 errno=0
 func=atan2f op1=80000000 op2=80000000 result=c0490fda.a22 errno=0
 func=atan2f op1=80000000 op2=3f800000 result=80000000 errno=0
 func=atan2f op1=80000000 op2=bf800000 result=c0490fda.a22 errno=0
 ; No exception is raised on certain machines (different version of glibc)
 ; Same issue encountered with other function similar to x close to 0
 ; Could be due to function so boring no flop is involved in some implementations
 func=atan2f op1=80000001 op2=3f800000 result=80000001 errno=0 maybestatus=ux
 
 func=atan2f op1=3f800000 op2=7f800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=3f800000 op2=ff800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=3f800000 op2=7fc00001 result=7fc00001 errno=0
 func=atan2f op1=3f800000 op2=ffc00001 result=ffc00001 errno=0
 func=atan2f op1=3f800000 op2=7f800000 result=00000000 errno=0
 func=atan2f op1=3f800000 op2=ff800000 result=40490fda.a22 errno=0
 func=atan2f op1=3f800000 op2=00000000 result=3fc90fda.a22 errno=0
 func=atan2f op1=3f800000 op2=80000000 result=3fc90fda.a22 errno=0
 func=atan2f op1=3f800000 op2=3f800000 result=3f490fda.a22 errno=0
 func=atan2f op1=3f800000 op2=bf800000 result=4016cbe3.f99 errno=0
 func=atan2f op1=bf800000 op2=7f800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=bf800000 op2=ff800001 result=7fc00001 errno=0 status=i
 func=atan2f op1=bf800000 op2=7fc00001 result=7fc00001 errno=0
 func=atan2f op1=bf800000 op2=ffc00001 result=ffc00001 errno=0
 func=atan2f op1=bf800000 op2=7f800000 result=80000000 errno=0
 func=atan2f op1=bf800000 op2=ff800000 result=c0490fda.a22 errno=0
 func=atan2f op1=bf800000 op2=00000000 result=bfc90fda.a22 errno=0
 func=atan2f op1=bf800000 op2=80000000 result=bfc90fda.a22 errno=0
 func=atan2f op1=bf800000 op2=3f800000 result=bf490fda.a22 errno=0
 func=atan2f op1=bf800000 op2=bf800000 result=c016cbe3.f99 errno=0
 func=atan2f op1=8005f16d op2=002bb601 result=be0a60a5.d88 error=0
 func=atan2f op1=80818ec8 op2=80ba5db9 result=c0222eda.f42 error=0
 
 func=atan2f op1=ff7fffff op2=ff7fffff result=c016cbe3.f99 errno=0
 func=atan2f op1=bfc00001 op2=7f7fffff result=80300000.700 errno=0 status=u
 func=atan2f op1=80800001 op2=40000000 result=80400000.800 errno=0 status=u
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/atanf.tst
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/atanf.tst
index 0a0bfc24c605..8739ea89c3a2 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/atanf.tst
@@ -1,22 +1,22 @@
 ; atanf.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atanf op1=7fc00001 result=7fc00001 errno=0
 func=atanf op1=ffc00001 result=7fc00001 errno=0
 func=atanf op1=7f800001 result=7fc00001 errno=0 status=i
 func=atanf op1=ff800001 result=7fc00001 errno=0 status=i
 func=atanf op1=7f800000 result=3fc90fda.a22 errno=0
 func=atanf op1=ff800000 result=bfc90fda.a22 errno=0
 func=atanf op1=00000000 result=00000000 errno=0
 func=atanf op1=80000000 result=80000000 errno=0
 ; Inconsistent behavior was detected for the following 2 cases.
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=atanf op1=00000001 result=00000001 errno=0 maybestatus=ux
 func=atanf op1=80000001 result=80000001 errno=0 maybestatus=ux
 
 func=atanf op1=3f800000 result=3f490fda.a22 errno=0
 func=atanf op1=bf800000 result=bf490fda.a22 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanh.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/atanh.tst
similarity index 97%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanh.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/atanh.tst
index d96ff327fcd9..7ba297e5046c 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanh.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/atanh.tst
@@ -1,22 +1,22 @@
 ; atanh.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=atanh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=atanh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atanh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=atanh op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
 func=atanh op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
 func=atanh op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
 func=atanh op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
 func=atanh op1=3ff00000.00000000 result=7ff00000.00000000 errno=ERANGE status=z
 func=atanh op1=bff00000.00000000 result=fff00000.00000000 errno=ERANGE status=z
 func=atanh op1=00000000.00000000 result=00000000.00000000 errno=0
 func=atanh op1=80000000.00000000 result=80000000.00000000 errno=0
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=atanh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
 func=atanh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanhf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/atanhf.tst
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanhf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/atanhf.tst
index 21a68a661a11..010012831b3c 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanhf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/atanhf.tst
@@ -1,23 +1,23 @@
 ; atanhf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=atanhf op1=7fc00001 result=7fc00001 errno=0
 func=atanhf op1=ffc00001 result=7fc00001 errno=0
 func=atanhf op1=7f800001 result=7fc00001 errno=0 status=i
 func=atanhf op1=ff800001 result=7fc00001 errno=0 status=i
 func=atanhf op1=7f800000 result=7fc00001 errno=EDOM status=i
 func=atanhf op1=ff800000 result=7fc00001 errno=EDOM status=i
 func=atanhf op1=3f800001 result=7fc00001 errno=EDOM status=i
 func=atanhf op1=bf800001 result=7fc00001 errno=EDOM status=i
 func=atanhf op1=3f800000 result=7f800000 errno=ERANGE status=z
 func=atanhf op1=bf800000 result=ff800000 errno=ERANGE status=z
 func=atanhf op1=00000000 result=00000000 errno=0
 func=atanhf op1=80000000 result=80000000 errno=0
 
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=atanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
 func=atanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/cbrtf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/cbrtf.tst
similarity index 97%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/cbrtf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/cbrtf.tst
index 0dd8d09f1d4f..98942580c7a7 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/cbrtf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/cbrtf.tst
@@ -1,29 +1,29 @@
 ; cbrtf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=cbrtf op1=7f800000 result=7f800000 errno=0
 func=cbrtf op1=ff800000 result=ff800000 errno=0
 func=cbrtf op1=7f800001 result=7fc00001 errno=0 status=i
 func=cbrtf op1=7fc00001 result=7fc00001 errno=0
 func=cbrtf op1=00000000 result=00000000 errno=0
 func=cbrtf op1=00000001 result=26a14517.cc7 errno=0
 func=cbrtf op1=00000002 result=26cb2ff5.29f errno=0
 func=cbrtf op1=00000003 result=26e89768.579 errno=0
 func=cbrtf op1=00000004 result=27000000.000 errno=0
 func=cbrtf op1=00400000 result=2a4b2ff5.29f errno=0
 func=cbrtf op1=00800000 result=2a800000.000 errno=0
 func=cbrtf op1=3f800000 result=3f800000.000 errno=0
 func=cbrtf op1=40000000 result=3fa14517.cc7 errno=0
 func=cbrtf op1=7f7fffff result=54cb2ff4.e63 errno=0
 func=cbrtf op1=80000000 result=80000000 errno=0
 func=cbrtf op1=80000001 result=a6a14517.cc7 errno=0
 func=cbrtf op1=80000002 result=a6cb2ff5.29f errno=0
 func=cbrtf op1=80000003 result=a6e89768.579 errno=0
 func=cbrtf op1=80000004 result=a7000000.000 errno=0
 func=cbrtf op1=80400000 result=aa4b2ff5.29f errno=0
 func=cbrtf op1=80800000 result=aa800000.000 errno=0
 func=cbrtf op1=bf800000 result=bf800000.000 errno=0
 func=cbrtf op1=c0000000 result=bfa14517.cc7 errno=0
 func=cbrtf op1=ff7fffff result=d4cb2ff4.e63 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/cosh.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/cosh.tst
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/cosh.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/cosh.tst
index c4efacb7272d..4dc6fe4846dc 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/cosh.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/cosh.tst
@@ -1,15 +1,15 @@
 ; cosh.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=cosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=cosh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=cosh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=cosh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=cosh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
 func=cosh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
 func=cosh op1=fff00000.00000000 result=7ff00000.00000000 errno=0
 func=cosh op1=ffefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
 func=cosh op1=00000000.00000000 result=3ff00000.00000000 errno=0
 func=cosh op1=80000000.00000000 result=3ff00000.00000000 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/coshf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/coshf.tst
similarity index 93%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/coshf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/coshf.tst
index 2b967e78f4b4..d224baf486a5 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/coshf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/coshf.tst
@@ -1,15 +1,15 @@
 ; coshf.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=coshf op1=7fc00001 result=7fc00001 errno=0
 func=coshf op1=ffc00001 result=7fc00001 errno=0
 func=coshf op1=7f800001 result=7fc00001 errno=0 status=i
 func=coshf op1=ff800001 result=7fc00001 errno=0 status=i
 func=coshf op1=7f800000 result=7f800000 errno=0
 func=coshf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
 func=coshf op1=ff800000 result=7f800000 errno=0
 func=coshf op1=ff7fffff result=7f800000 errno=ERANGE status=ox
 func=coshf op1=00000000 result=3f800000 errno=0
 func=coshf op1=80000000 result=3f800000 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfc.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/erfc.tst
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfc.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/erfc.tst
index c03fc591da47..249e7343eac2 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfc.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/erfc.tst
@@ -1,23 +1,23 @@
 ; erfc.tst - Directed test cases for erfc
 ;
-; Copyright (c) 2022-2023, Arm Limited.
+; Copyright (c) 2022-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=erfc op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=erfc op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=erfc op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=erfc op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=erfc op1=7ff00000.00000000 result=00000000.00000000 errno=0
 func=erfc op1=7fefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
 ; We deliberately turned off errno setting in erf, as standard simply
 ; state that errno `may` be set to ERANGE in case of underflow.
 ; As a result the following condition on errno cannot be satisfied.
 ;
 ; func=erfc op1=403b44af.48b01531 result=00000000.00000000 errno=ERANGE status=ux
 ;
 func=erfc op1=c03b44af.48b01531 result=40000000.00000000 errno=0
 func=erfc op1=403bffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
 func=erfc op1=c03bffff.ffffffff result=40000000.00000000 errno=0
 func=erfc op1=fff00000.00000000 result=40000000.00000000 errno=0
 func=erfc op1=00000000.00000000 result=3ff00000.00000000 errno=0
 func=erfc op1=80000000.00000000 result=3ff00000.00000000 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfcf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/erfcf.tst
similarity index 93%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfcf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/erfcf.tst
index 719baccb2e45..22a1a8f236d8 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfcf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/erfcf.tst
@@ -1,14 +1,14 @@
 ; erfcf.tst - Directed test cases for erfcf
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=erfcf op1=7fc00001 result=7fc00001 errno=0
 func=erfcf op1=ffc00001 result=7fc00001 errno=0
 func=erfcf op1=7f800001 result=7fc00001 errno=0 status=i
 func=erfcf op1=ff800001 result=7fc00001 errno=0 status=i
 func=erfcf op1=7f800000 result=00000000 errno=0
 func=erfcf op1=7f7fffff result=00000000 errno=ERANGE status=ux
 func=erfcf op1=ff800000 result=40000000 errno=0
 func=erfcf op1=00000000 result=3f800000 errno=0
 func=erfcf op1=80000000 result=3f800000 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/expm1.tst
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/expm1.tst
index 609d6f479721..3d58c6b3f161 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/expm1.tst
@@ -1,21 +1,21 @@
 ; expm1.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=expm1 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=expm1 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=expm1 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=expm1 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=expm1 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
 func=expm1 op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
 func=expm1 op1=fff00000.00000000 result=bff00000.00000000 errno=0
 func=expm1 op1=ffefffff.ffffffff result=bff00000.00000000 errno=0
 func=expm1 op1=00000000.00000000 result=00000000.00000000 errno=0
 func=expm1 op1=80000000.00000000 result=80000000.00000000 errno=0
 ; Inconsistent behavior was detected for the following 2 cases.
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=expm1 op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
 func=expm1 op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1f.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/expm1f.tst
similarity index 98%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1f.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/expm1f.tst
index 44c38420a617..44a15d679870 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1f.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/expm1f.tst
@@ -1,57 +1,57 @@
 ; expm1f.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=expm1f op1=7fc00001 result=7fc00001 errno=0
 func=expm1f op1=ffc00001 result=7fc00001 errno=0
 func=expm1f op1=7f800001 result=7fc00001 errno=0 status=i
 func=expm1f op1=ff800001 result=7fc00001 errno=0 status=i
 func=expm1f op1=7f800000 result=7f800000 errno=0
 func=expm1f op1=7f7fffff result=7f800000 errno=ERANGE status=ox
 func=expm1f op1=ff800000 result=bf800000 errno=0
 func=expm1f op1=ff7fffff result=bf800000 errno=0
 func=expm1f op1=00000000 result=00000000 errno=0
 func=expm1f op1=80000000 result=80000000 errno=0
 
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 
 func=expm1f op1=00000001 result=00000001 errno=0 maybestatus=ux
 func=expm1f op1=80000001 result=80000001 errno=0 maybestatus=ux
 
 func=expm1f op1=42b145c0 result=7f6ac2dd.9b8 errno=0
 
 ; Check both sides of the over/underflow thresholds in the code.
 func=expm1f op1=c2000000 result=bf7fffff.fff error=0
 func=expm1f op1=c2000001 result=bf7fffff.fff error=0
 func=expm1f op1=43000000 result=7f800000 error=overflow
 func=expm1f op1=43000001 result=7f800000 error=overflow
 func=expm1f op1=c2a80000 result=bf800000.000 error=0
 func=expm1f op1=c2a80001 result=bf800000.000 error=0
 
 ; Check values for which exp goes denormal. expm1f should not report
 ; spurious overflow.
 func=expm1f op1=c2b00f34 result=bf800000.000 error=0
 func=expm1f op1=c2ce8ed0 result=bf800000.000 error=0
 func=expm1f op1=c2dc6bba result=bf800000.000 error=0
 
 ; Regression tests for significance loss when the two components of
 ; the result have opposite sign but similar magnitude
 func=expm1f op1=be8516c1 result=be6a652b.0dc error=0
 func=expm1f op1=be851714 result=be6a65ab.0e5 error=0
 func=expm1f op1=be851cc7 result=be6a6e75.111 error=0
 func=expm1f op1=be851d1a result=be6a6ef5.102 error=0
 func=expm1f op1=be851d6d result=be6a6f75.0f2 error=0
 func=expm1f op1=be852065 result=be6a7409.0e4 error=0
 func=expm1f op1=be8520b8 result=be6a7489.0c7 error=0
 func=expm1f op1=be85210b result=be6a7509.0a8 error=0
 func=expm1f op1=be855401 result=be6ac39b.0d5 error=0
 func=expm1f op1=be933307 result=be7fdbf0.d8d error=0
 func=expm1f op1=be92ed6b result=be7f737a.d81 error=0
 func=expm1f op1=be933b90 result=be7fe8be.d76 error=0
 func=expm1f op1=3eb11364 result=3ed38deb.0c0 error=0
 func=expm1f op1=3f28e830 result=3f6f344b.0da error=0
 func=expm1f op1=3eb1578f result=3ed3ee47.13b error=0
 func=expm1f op1=3f50176a result=3fa08e36.fea error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/log10.tst
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/log10.tst
index 34831436234a..3ff252013498 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/log10.tst
@@ -1,16 +1,16 @@
 ; log10.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=log10 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=log10 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=log10 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=log10 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=log10 op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=log10 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
 func=log10 op1=3ff00000.00000000 result=00000000.00000000 errno=0
 func=log10 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
 func=log10 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
 func=log10 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
 func=log10 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10f.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/log10f.tst
similarity index 98%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10f.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/log10f.tst
index d5744a66f092..5c83e3f5e9b4 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10f.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/log10f.tst
@@ -1,69 +1,69 @@
 ; log10f.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=log10f op1=7fc00001 result=7fc00001 errno=0
 func=log10f op1=ffc00001 result=7fc00001 errno=0
 func=log10f op1=7f800001 result=7fc00001 errno=0 status=i
 func=log10f op1=ff800001 result=7fc00001 errno=0 status=i
 func=log10f op1=ff810000 result=7fc00001 errno=0 status=i
 func=log10f op1=7f800000 result=7f800000 errno=0
 func=log10f op1=3f800000 result=00000000 errno=0
 func=log10f op1=ff800000 result=7fc00001 errno=EDOM status=i
 func=log10f op1=00000000 result=ff800000 errno=ERANGE status=z
 func=log10f op1=80000000 result=ff800000 errno=ERANGE status=z
 func=log10f op1=80000001 result=7fc00001 errno=EDOM status=i
 
 ; Directed tests for the special-case handling of log10 of things
 ; very near 1
 func=log10f op1=3f81a618 result=3bb62472.b92 error=0
 func=log10f op1=3f876783 result=3cc811f4.26c error=0
 func=log10f op1=3f816af8 result=3b9cc4c7.057 error=0
 func=log10f op1=3f7bed7d result=bbe432cb.e23 error=0
 func=log10f op1=3f803ece result=3a59ff3a.a84 error=0
 func=log10f op1=3f80089f result=38ef9728.aa6 error=0
 func=log10f op1=3f86ab72 result=3cb4b711.457 error=0
 func=log10f op1=3f780854 result=bc60f953.904 error=0
 func=log10f op1=3f7c6d76 result=bbc7fd01.01c error=0
 func=log10f op1=3f85dff6 result=3c9fa76f.81f error=0
 func=log10f op1=3f7b87f4 result=bbfa9edc.be4 error=0
 func=log10f op1=3f81c710 result=3bc4457b.745 error=0
 func=log10f op1=3f80946d result=3b00a140.c06 error=0
 func=log10f op1=3f7e87ea result=bb23cd70.828 error=0
 func=log10f op1=3f811437 result=3b6ee960.b40 error=0
 func=log10f op1=3f858dcf result=3c971d9b.2ea error=0
 func=log10f op1=3f7f61a3 result=ba89b814.4e0 error=0
 func=log10f op1=3f82d642 result=3c1bfb8d.517 error=0
 func=log10f op1=3f80f3bc result=3b52ebe8.c75 error=0
 func=log10f op1=3f85eff9 result=3ca150d9.7e8 error=0
 func=log10f op1=3f843eb8 result=3c68263f.771 error=0
 func=log10f op1=3f78e691 result=bc481cf4.50a error=0
 func=log10f op1=3f87c56f result=3cd1b268.5e6 error=0
 func=log10f op1=3f83b711 result=3c4b94c5.918 error=0
 func=log10f op1=3f823b2b result=3bf5eb02.e2a error=0
 func=log10f op1=3f7f2c4e result=bab82c80.519 error=0
 func=log10f op1=3f83fc92 result=3c5a3ba1.543 error=0
 func=log10f op1=3f793956 result=bc3ee04e.03c error=0
 func=log10f op1=3f839ba5 result=3c45caca.92a error=0
 func=log10f op1=3f862f30 result=3ca7de76.16f error=0
 func=log10f op1=3f832a20 result=3c2dc6e9.afd error=0
 func=log10f op1=3f810296 result=3b5fb92a.429 error=0
 func=log10f op1=3f7e58c9 result=bb38655a.0a4 error=0
 func=log10f op1=3f8362e7 result=3c39cc65.d15 error=0
 func=log10f op1=3f7fdb85 result=b97d9016.40b error=0
 func=log10f op1=3f84484e result=3c6a29f2.f74 error=0
 func=log10f op1=3f861862 result=3ca5819e.f2d error=0
 func=log10f op1=3f7c027b result=bbdf912d.440 error=0
 func=log10f op1=3f867803 result=3caf6744.34d error=0
 func=log10f op1=3f789a89 result=bc509bce.458 error=0
 func=log10f op1=3f8361d9 result=3c399347.379 error=0
 func=log10f op1=3f7d3ac3 result=bb9ad93a.93d error=0
 func=log10f op1=3f7ee241 result=baf8bd12.a62 error=0
 func=log10f op1=3f83a1fd result=3c4721bd.0a4 error=0
 func=log10f op1=3f840da3 result=3c5dd375.675 error=0
 func=log10f op1=3f79c2fe result=bc2f8a60.8c5 error=0
 func=log10f op1=3f854a93 result=3c901cc9.add error=0
 func=log10f op1=3f87a50a result=3cce6125.cd6 error=0
 func=log10f op1=3f818bf5 result=3baaee68.a55 error=0
 func=log10f op1=3f830a44 result=3c2705c4.d87 error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1p.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/log1p.tst
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1p.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/log1p.tst
index 9ee8c62fc9c0..109413a79e96 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1p.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/log1p.tst
@@ -1,22 +1,22 @@
 ; log1p.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=log1p op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=log1p op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=log1p op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=log1p op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=log1p op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i
 func=log1p op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
 ; Cases 6, 9 , 10, 11, 12 fail with certain versions of GLIBC and not others.
 ; The main reason seems to be the handling of errno and exceptions.
 
 func=log1p op1=00000000.00000000 result=00000000.00000000 errno=0
 func=log1p op1=80000000.00000000 result=80000000.00000000 errno=0
 
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=log1p op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
 func=log1p op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1pf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/log1pf.tst
similarity index 99%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1pf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/log1pf.tst
index aaa01d67c2b3..9655b9473612 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1pf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/log1pf.tst
@@ -1,130 +1,130 @@
 ; log1pf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=log1pf op1=7fc00001 result=7fc00001 errno=0
 func=log1pf op1=ffc00001 result=7fc00001 errno=0
 func=log1pf op1=7f800001 result=7fc00001 errno=0 status=i
 func=log1pf op1=ff800001 result=7fc00001 errno=0 status=i
 func=log1pf op1=ff810000 result=7fc00001 errno=0 status=i
 func=log1pf op1=7f800000 result=7f800000 errno=0
 
 ; Cases 6, 9 , 10, 11, 12 fail with certain versions of GLIBC and not others.
 ; The main reason seems to be the handling of errno and exceptions.
 
 func=log1pf op1=00000000 result=00000000 errno=0
 func=log1pf op1=80000000 result=80000000 errno=0
 
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=log1pf op1=00000001 result=00000001 errno=0 maybestatus=ux
 func=log1pf op1=80000001 result=80000001 errno=0 maybestatus=ux
 
 func=log1pf op1=3f1e91ee result=3ef6d127.fdb errno=0
 func=log1pf op1=3f201046 result=3ef8a881.fba errno=0
 func=log1pf op1=3f21b916 result=3efab23b.f9f errno=0
 func=log1pf op1=3f21bde6 result=3efab821.fee errno=0
 func=log1pf op1=3f22a5ee result=3efbd435.ff2 errno=0
 func=log1pf op1=3f231b56 result=3efc63b7.e26 errno=0
 func=log1pf op1=3f23ce96 result=3efd3e83.fc8 errno=0
 func=log1pf op1=3eee18c6 result=3ec38576.02e errno=0
 func=log1pf op1=3eee2f41 result=3ec394ce.057 errno=0
 func=log1pf op1=3eee770d result=3ec3c5cc.00c errno=0
 func=log1pf op1=3eee7fed result=3ec3cbda.065 errno=0
 func=log1pf op1=3eee8fb2 result=3ec3d69c.008 errno=0
 func=log1pf op1=3eeeb8eb result=3ec3f2ba.061 errno=0
 func=log1pf op1=3eeeccfd result=3ec4006a.01d errno=0
 func=log1pf op1=3eeef5f0 result=3ec41c56.020 errno=0
 func=log1pf op1=3eeeff12 result=3ec42290.00c errno=0
 func=log1pf op1=3eef05cf result=3ec42728.052 errno=0
 func=log1pf op1=3eef13d3 result=3ec430b6.00e errno=0
 func=log1pf op1=3eef2e70 result=3ec442da.04a errno=0
 func=log1pf op1=3eef3fbf result=3ec44ea6.055 errno=0
 func=log1pf op1=3eef3feb result=3ec44ec4.021 errno=0
 func=log1pf op1=3eef4399 result=3ec45146.011 errno=0
 func=log1pf op1=3eef452e result=3ec4525a.049 errno=0
 func=log1pf op1=3eef4ea9 result=3ec458d0.020 errno=0
 func=log1pf op1=3eef7365 result=3ec471d8.05e errno=0
 func=log1pf op1=3eefa38f result=3ec492a8.003 errno=0
 func=log1pf op1=3eefb1f1 result=3ec49c74.015 errno=0
 func=log1pf op1=3eefb334 result=3ec49d50.023 errno=0
 func=log1pf op1=3eefb3c1 result=3ec49db0.0bf errno=0
 func=log1pf op1=3eefb591 result=3ec49eec.15d errno=0
 func=log1pf op1=3eefd736 result=3ec4b5d6.02d errno=0
 func=log1pf op1=3eefd797 result=3ec4b618.114 errno=0
 func=log1pf op1=3eefee5d result=3ec4c59a.071 errno=0
 func=log1pf op1=3eeffff4 result=3ec4d194.0a7 errno=0
 func=log1pf op1=3ef00cd1 result=3ec4da56.025 errno=0
 func=log1pf op1=3ef0163a result=3ec4e0be.07a errno=0
 func=log1pf op1=3ef01e89 result=3ec4e666.007 errno=0
 func=log1pf op1=3ef02004 result=3ec4e768.00a errno=0
 func=log1pf op1=3ef02c40 result=3ec4efbc.017 errno=0
 func=log1pf op1=3ef05b50 result=3ec50fc4.031 errno=0
 func=log1pf op1=3ef05bb1 result=3ec51006.05f errno=0
 func=log1pf op1=3ef0651b result=3ec5166e.0d9 errno=0
 func=log1pf op1=3ef06609 result=3ec51710.02a errno=0
 func=log1pf op1=3ef0666a result=3ec51752.049 errno=0
 func=log1pf op1=3ef0791e result=3ec5240c.0a8 errno=0
 func=log1pf op1=3ef07d46 result=3ec526e0.00e errno=0
 func=log1pf op1=3ef091fd result=3ec534f8.03c errno=0
 func=log1pf op1=3ef09602 result=3ec537b4.128 errno=0
 func=log1pf op1=3ef09848 result=3ec53940.044 errno=0
 func=log1pf op1=3ef0a04f result=3ec53eb6.07d errno=0
 func=log1pf op1=3ef0ab6a result=3ec54644.062 errno=0
 func=log1pf op1=3ef0ae49 result=3ec54838.002 errno=0
 func=log1pf op1=3ef0c1b8 result=3ec55570.000 errno=0
 func=log1pf op1=3ef0ca06 result=3ec55b16.00d errno=0
 func=log1pf op1=3ef0cc29 result=3ec55c8a.095 errno=0
 func=log1pf op1=3ef0d228 result=3ec5609e.04f errno=0
 func=log1pf op1=3ef0d8c0 result=3ec5651a.05e errno=0
 func=log1pf op1=3ef0dc0c result=3ec56758.029 errno=0
 func=log1pf op1=3ef0e0e8 result=3ec56aa6.02e errno=0
 func=log1pf op1=3ef0e502 result=3ec56d70.102 errno=0
 func=log1pf op1=3ef0e754 result=3ec56f04.017 errno=0
 func=log1pf op1=3ef0efe9 result=3ec574da.01c errno=0
 func=log1pf op1=3ef0f309 result=3ec576fa.016 errno=0
 func=log1pf op1=3ef0f499 result=3ec5780a.005 errno=0
 func=log1pf op1=3ef0f6c2 result=3ec57982.083 errno=0
 func=log1pf op1=3ef0f852 result=3ec57a92.05d errno=0
 func=log1pf op1=3ef0f9e2 result=3ec57ba2.02e errno=0
 func=log1pf op1=3ef119ee result=3ec5916c.024 errno=0
 func=log1pf op1=3ef11edf result=3ec594c8.03d errno=0
 func=log1pf op1=3ef128c4 result=3ec59b82.001 errno=0
 func=log1pf op1=3ef12ac1 result=3ec59cdc.04b errno=0
 func=log1pf op1=3ef12fea result=3ec5a05e.045 errno=0
 func=log1pf op1=3ef131e7 result=3ec5a1b8.05a errno=0
 func=log1pf op1=3ef134e1 result=3ec5a3be.00e errno=0
 func=log1pf op1=3ef1397a result=3ec5a6de.127 errno=0
 func=log1pf op1=3ef13ade result=3ec5a7d0.0f6 errno=0
 func=log1pf op1=3ef13c0d result=3ec5a89e.054 errno=0
 func=log1pf op1=3ef13d71 result=3ec5a990.016 errno=0
 func=log1pf op1=3ef14074 result=3ec5ab9c.12c errno=0
 func=log1pf op1=3ef146a0 result=3ec5afce.035 errno=0
 func=log1pf op1=3ef14a39 result=3ec5b240.024 errno=0
 func=log1pf op1=3ef14d39 result=3ec5b44a.00c errno=0
 func=log1pf op1=3ef152a3 result=3ec5b7f8.04d errno=0
 func=log1pf op1=3ef170a1 result=3ec5cc5a.021 errno=0
 func=log1pf op1=3ef17855 result=3ec5d196.0dc errno=0
 func=log1pf op1=3ef17ece result=3ec5d5fc.010 errno=0
 func=log1pf op1=3ef1810c result=3ec5d782.08e errno=0
 func=log1pf op1=3ef18da9 result=3ec5e014.0ae errno=0
 func=log1pf op1=3ef19054 result=3ec5e1e4.1a2 errno=0
 func=log1pf op1=3ef190ea result=3ec5e24a.048 errno=0
 func=log1pf op1=3ef1a739 result=3ec5f172.0d8 errno=0
 func=log1pf op1=3ef1a83c result=3ec5f222.018 errno=0
 func=log1pf op1=3ef1bbcc result=3ec5ff6c.09d errno=0
 func=log1pf op1=3ef1bd3c result=3ec60066.03a errno=0
 func=log1pf op1=3ef1d6ee result=3ec611da.056 errno=0
 func=log1pf op1=3ef1de36 result=3ec616cc.01b errno=0
 func=log1pf op1=3ef1e623 result=3ec61c2e.008 errno=0
 func=log1pf op1=3ef1e9b1 result=3ec61e98.029 errno=0
 func=log1pf op1=3ef1ee19 result=3ec62196.0d8 errno=0
 func=log1pf op1=3ef1f13a result=3ec623b6.039 errno=0
 func=log1pf op1=3ef1f1a7 result=3ec62400.091 errno=0
 func=log1pf op1=3ef1f214 result=3ec6244a.0e8 errno=0
 func=log1pf op1=3ef206e1 result=3ec6326a.09b errno=0
 func=log1pf op1=3ef21245 result=3ec63a26.012 errno=0
 func=log1pf op1=3ef217fd result=3ec63e08.048 errno=0
 func=log1pf op1=3ef2186a result=3ec63e52.063 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinh.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/sinh.tst
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinh.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/sinh.tst
index d6a3da896693..ab0d84b84d9e 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinh.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/sinh.tst
@@ -1,21 +1,21 @@
 ; sinh.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=sinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=sinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=sinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=sinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=sinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
 func=sinh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
 func=sinh op1=fff00000.00000000 result=fff00000.00000000 errno=0
 func=sinh op1=ffefffff.ffffffff result=fff00000.00000000 errno=ERANGE status=ox
 func=sinh op1=00000000.00000000 result=00000000.00000000 errno=0
 func=sinh op1=80000000.00000000 result=80000000.00000000 errno=0
 
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=sinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
 func=sinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinhf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/sinhf.tst
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinhf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/sinhf.tst
index 5f7bd1b04137..d9269c0fa405 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinhf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/sinhf.tst
@@ -1,21 +1,21 @@
 ; sinhf.tst
 ;
-; Copyright (c) 2009-2023, Arm Limited.
+; Copyright (c) 2009-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=sinhf op1=7fc00001 result=7fc00001 errno=0
 func=sinhf op1=ffc00001 result=7fc00001 errno=0
 func=sinhf op1=7f800001 result=7fc00001 errno=0 status=i
 func=sinhf op1=ff800001 result=7fc00001 errno=0 status=i
 func=sinhf op1=7f800000 result=7f800000 errno=0
 func=sinhf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
 func=sinhf op1=ff800000 result=ff800000 errno=0
 func=sinhf op1=ff7fffff result=ff800000 errno=ERANGE status=ox
 func=sinhf op1=00000000 result=00000000 errno=0
 func=sinhf op1=80000000 result=80000000 errno=0
 
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=sinhf op1=00000001 result=00000001 errno=0 maybestatus=ux
 func=sinhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/tanf.tst
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/tanf.tst
index 3161f70f4361..e38142df6e3c 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/tanf.tst
@@ -1,25 +1,25 @@
 ; tanf.tst
 ;
-; Copyright (c) 2022-2023, Arm Limited.
+; Copyright (c) 2022-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=tanf op1=7fc00001 result=7fc00001 errno=0
 func=tanf op1=ffc00001 result=7fc00001 errno=0
 func=tanf op1=7f800001 result=7fc00001 errno=0 status=i
 func=tanf op1=ff800001 result=7fc00001 errno=0 status=i
 func=tanf op1=7f800000 result=7fc00001 errno=EDOM status=i
 func=tanf op1=ff800000 result=7fc00001 errno=EDOM status=i
 func=tanf op1=00000000 result=00000000 errno=0
 func=tanf op1=80000000 result=80000000 errno=0
 ; SDCOMP-26094: check tanf in the cases for which the range reducer
 ; returns values furthest beyond its nominal upper bound of pi/4.
 func=tanf op1=46427f1b result=3f80396d.599 error=0
 func=tanf op1=4647e568 result=3f8039a6.c9f error=0
 func=tanf op1=46428bac result=3f803a03.148 error=0
 func=tanf op1=4647f1f9 result=3f803a3c.852 error=0
 func=tanf op1=4647fe8a result=3f803ad2.410 error=0
 func=tanf op1=45d8d7f1 result=bf800669.901 error=0
 func=tanf op1=45d371a4 result=bf800686.3cd error=0
 func=tanf op1=45ce0b57 result=bf8006a2.e9a error=0
 func=tanf op1=45d35882 result=bf80071b.bc4 error=0
 func=tanf op1=45cdf235 result=bf800738.693 error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanh.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/tanh.tst
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanh.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/tanh.tst
index 78776e6f3924..e842063c0ef7 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanh.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/tanh.tst
@@ -1,18 +1,18 @@
 ; tanh.tst
 ;
-; Copyright (c) 1999-2023, Arm Limited.
+; Copyright (c) 1999-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=tanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=tanh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 func=tanh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=tanh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 func=tanh op1=7ff00000.00000000 result=3ff00000.00000000 errno=0
 func=tanh op1=fff00000.00000000 result=bff00000.00000000 errno=0
 func=tanh op1=00000000.00000000 result=00000000.00000000 errno=0
 func=tanh op1=80000000.00000000 result=80000000.00000000 errno=0
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 func=tanh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
 func=tanh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanhf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/tanhf.tst
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanhf.tst
rename to contrib/arm-optimized-routines/math/test/testcases/directed/tanhf.tst
index 603e3107e44f..412aa12b3621 100644
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanhf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/tanhf.tst
@@ -1,20 +1,20 @@
 ; tanhf.tst
 ;
-; Copyright (c) 2007-2023, Arm Limited.
+; Copyright (c) 2007-2024, Arm Limited.
 ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=tanhf op1=7fc00001 result=7fc00001 errno=0
 func=tanhf op1=ffc00001 result=7fc00001 errno=0
 func=tanhf op1=7f800001 result=7fc00001 errno=0 status=i
 func=tanhf op1=ff800001 result=7fc00001 errno=0 status=i
 func=tanhf op1=7f800000 result=3f800000 errno=0
 func=tanhf op1=ff800000 result=bf800000 errno=0
 func=tanhf op1=00000000 result=00000000 errno=0
 func=tanhf op1=80000000 result=80000000 errno=0
 ; No exception is raised with certain versions of glibc. Functions
 ; approximated by x near zero may not generate/implement flops and
 ; thus may not raise exceptions.
 ; func=tanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
 ; func=tanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
 func=tanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
 func=tanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/math/test/trigpi_references.h b/contrib/arm-optimized-routines/math/test/trigpi_references.h
new file mode 100644
index 000000000000..3dc5a3173436
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/test/trigpi_references.h
@@ -0,0 +1,106 @@
+/*
+ * Extended precision scalar reference functions for trigpi.
+ *
+ * Copyright (c) 2023-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#ifndef M_PIl
+#  define M_PIl 3.141592653589793238462643383279502884l
+#endif
+
+long double
+arm_math_sinpil (long double x)
+{
+  /* sin(inf) should return nan, as defined by C23.  */
+  if (isinf (x))
+    return __math_invalid (x);
+
+  long double ax = fabsl (x);
+
+  /* Return 0 for all values above 2^64 to prevent
+     overflow when casting to uint64_t.  */
+  if (ax >= 0x1p64)
+    return x < 0 ? -0.0l : 0.0l;
+
+  /* All integer cases should return 0, with unchanged sign for zero.  */
+  if (x == 0.0l)
+    return x;
+  if (ax == (uint64_t) ax)
+    return x < 0 ? -0.0l : 0.0l;
+
+  return sinl (x * M_PIl);
+}
+
+long double
+arm_math_cospil (long double x)
+{
+  /* cos(inf) should return nan, as defined by C23.  */
+  if (isinf (x))
+    return __math_invalid (x);
+
+  long double ax = fabsl (x);
+
+  if (ax >= 0x1p64)
+    return 1;
+
+  uint64_t m = (uint64_t) ax;
+
+  /* Integer values of cospi(x) should return +/-1.
+    The sign depends on if x is odd or even.  */
+  if (m == ax)
+    return (m & 1) ? -1 : 1;
+
+  /* Values of Integer + 0.5 should always return 0.  */
+  if (ax - 0.5 == m || ax + 0.5 == m)
+    return 0;
+
+  return cosl (ax * M_PIl);
+}
+
+long double
+arm_math_tanpil (long double x)
+{
+  /* inf and x = n + 0.5 for any integral n should return nan.  */
+  if (fabsl (x) >= 0x1p54l)
+    {
+      if (isinf (x))
+	return __math_invalid (x);
+      return x < 0 ? -0.0l : 0.0l;
+    }
+
+  long double i = roundl (x);
+  long double f = x - i;
+  int64_t m = (int64_t) i;
+
+  if (x == 0)
+    {
+      return x;
+    }
+  else if (x == i)
+    {
+      if (x < 0)
+	{
+	  return m & 1 ? 0.0l : -0.0l;
+	}
+      else
+	{
+	  return m & 1 ? -0.0l : 0.0l;
+	}
+    }
+  else if (fabsl (f) == 0.5l)
+    {
+      if (x < 0)
+	{
+	  return m & 1 ? -1.0l / 0.0l : 1.0l / 0.0l;
+	}
+      else
+	{
+	  return m & 1 ? 1.0l / 0.0l : -1.0l / 0.0l;
+	}
+    }
+
+  return tanl (f * M_PIl);
+}
diff --git a/contrib/arm-optimized-routines/math/test/ulp.c b/contrib/arm-optimized-routines/math/test/ulp.c
index 5ff29972e50e..0a75fe264630 100644
--- a/contrib/arm-optimized-routines/math/test/ulp.c
+++ b/contrib/arm-optimized-routines/math/test/ulp.c
@@ -1,828 +1,884 @@
 /*
  * ULP error checking tool for math functions.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#if WANT_SVE_TESTS
+#  if __aarch64__ && __linux__
+#    ifdef __clang__
+#      pragma clang attribute push(__attribute__((target("sve"))),            \
+				   apply_to = any(function))
+#    else
+#      pragma GCC target("+sve")
+#    endif
+#  else
+#    error "SVE not supported - please disable WANT_SVE_TESTS"
+#  endif
+#endif
+
 #define _GNU_SOURCE
 #include <ctype.h>
 #include <fenv.h>
 #include <float.h>
 #include <math.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "mathlib.h"
 
+#include "trigpi_references.h"
+
 /* Don't depend on mpfr by default.  */
 #ifndef USE_MPFR
 # define USE_MPFR 0
 #endif
 #if USE_MPFR
 # include <mpfr.h>
 #endif
 
-static inline uint64_t
-asuint64 (double f)
-{
-  union
-  {
-    double f;
-    uint64_t i;
-  } u = {f};
-  return u.i;
-}
-
-static inline double
-asdouble (uint64_t i)
-{
-  union
-  {
-    uint64_t i;
-    double f;
-  } u = {i};
-  return u.f;
-}
-
-static inline uint32_t
-asuint (float f)
-{
-  union
-  {
-    float f;
-    uint32_t i;
-  } u = {f};
-  return u.i;
-}
-
-static inline float
-asfloat (uint32_t i)
-{
-  union
-  {
-    uint32_t i;
-    float f;
-  } u = {i};
-  return u.f;
-}
-
 static uint64_t seed = 0x0123456789abcdef;
 static uint64_t
 rand64 (void)
 {
   seed = 6364136223846793005ull * seed + 1;
   return seed ^ (seed >> 32);
 }
 
 /* Uniform random in [0,n].  */
 static uint64_t
 randn (uint64_t n)
 {
   uint64_t r, m;
 
   if (n == 0)
     return 0;
   n++;
   if (n == 0)
     return rand64 ();
   for (;;)
     {
       r = rand64 ();
       m = r % n;
       if (r - m <= -n)
 	return m;
     }
 }
 
 struct gen
 {
   uint64_t start;
   uint64_t len;
   uint64_t start2;
   uint64_t len2;
   uint64_t off;
   uint64_t step;
   uint64_t cnt;
 };
 
 struct args_f1
 {
   float x;
 };
 
 struct args_f2
 {
   float x;
   float x2;
 };
 
 struct args_d1
 {
   double x;
 };
 
 struct args_d2
 {
   double x;
   double x2;
 };
 
 /* result = y + tail*2^ulpexp.  */
 struct ret_f
 {
   float y;
   double tail;
   int ulpexp;
   int ex;
   int ex_may;
 };
 
 struct ret_d
 {
   double y;
   double tail;
   int ulpexp;
   int ex;
   int ex_may;
 };
 
 static inline uint64_t
 next1 (struct gen *g)
 {
   /* For single argument use randomized incremental steps,
      that produce dense sampling without collisions and allow
      testing all inputs in a range.  */
   uint64_t r = g->start + g->off;
   g->off += g->step + randn (g->step / 2);
   if (g->off > g->len)
     g->off -= g->len; /* hack.  */
   return r;
 }
 
 static inline uint64_t
 next2 (uint64_t *x2, struct gen *g)
 {
   /* For two arguments use uniform random sampling.  */
   uint64_t r = g->start + randn (g->len);
   *x2 = g->start2 + randn (g->len2);
   return r;
 }
 
 static struct args_f1
 next_f1 (void *g)
 {
   return (struct args_f1){asfloat (next1 (g))};
 }
 
 static struct args_f2
 next_f2 (void *g)
 {
   uint64_t x2;
   uint64_t x = next2 (&x2, g);
   return (struct args_f2){asfloat (x), asfloat (x2)};
 }
 
 static struct args_d1
 next_d1 (void *g)
 {
   return (struct args_d1){asdouble (next1 (g))};
 }
 
 static struct args_d2
 next_d2 (void *g)
 {
   uint64_t x2;
   uint64_t x = next2 (&x2, g);
   return (struct args_d2){asdouble (x), asdouble (x2)};
 }
 
-struct conf
-{
-  int r;
-  int rc;
-  int quiet;
-  int mpfr;
-  int fenv;
-  unsigned long long n;
-  double softlim;
-  double errlim;
-  int ignore_zero_sign;
-};
-
 /* A bit of a hack: call vector functions twice with the same
    input in lane 0 but a different value in other lanes: once
    with an in-range value and then with a special case value.  */
 static int secondcall;
 
 /* Wrappers for vector functions.  */
-#ifdef __vpcs
-typedef __f32x4_t v_float;
-typedef __f64x2_t v_double;
+#if __aarch64__ && __linux__
 /* First element of fv and dv may be changed by -c argument.  */
 static float fv[2] = {1.0f, -INFINITY};
 static double dv[2] = {1.0, -INFINITY};
-static inline v_float argf(float x) { return (v_float){x,x,x,fv[secondcall]}; }
-static inline v_double argd(double x) { return (v_double){x,dv[secondcall]}; }
-#if WANT_SVE_MATH
+static inline float32x4_t
+argf (float x)
+{
+  return (float32x4_t){ x, x, x, fv[secondcall] };
+}
+static inline float64x2_t
+argd (double x)
+{
+  return (float64x2_t){ x, dv[secondcall] };
+}
+#if WANT_SVE_TESTS
 #include <arm_sve.h>
-typedef __SVFloat32_t sv_float;
-typedef __SVFloat64_t sv_double;
-
-static inline sv_float svargf(float x)  {
-	int n = svcntw();
-	float base[n];
-	for (int i=0; i<n; i++)
-		base[i] = (float)x;
-	base[n-1] = (float) fv[secondcall];
-	return svld1(svptrue_b32(), base);
-}
-static inline sv_double svargd(double x) {
-	int n = svcntd();
-	double base[n];
-	for (int i=0; i<n; i++)
-		base[i] = x;
-	base[n-1] = dv[secondcall];
-	return svld1(svptrue_b64(), base);
-}
-static inline float svretf(sv_float vec)  {
-	int n = svcntw();
-	float res[n];
-	svst1(svptrue_b32(), res, vec);
-	return res[0];
-}
-static inline double svretd(sv_double vec) {
-	int n = svcntd();
-	double res[n];
-	svst1(svptrue_b64(), res, vec);
-	return res[0];
+
+static inline svfloat32_t
+svargf (float x)
+{
+  int n = svcntw ();
+  float base[n];
+  for (int i = 0; i < n; i++)
+    base[i] = (float) x;
+  base[n - 1] = (float) fv[secondcall];
+  return svld1 (svptrue_b32 (), base);
+}
+static inline svfloat64_t
+svargd (double x)
+{
+  int n = svcntd ();
+  double base[n];
+  for (int i = 0; i < n; i++)
+    base[i] = x;
+  base[n - 1] = dv[secondcall];
+  return svld1 (svptrue_b64 (), base);
+}
+static inline float
+svretf (svfloat32_t vec, svbool_t pg)
+{
+  return svlastb_f32 (svpfirst (pg, svpfalse ()), vec);
 }
+static inline double
+svretd (svfloat64_t vec, svbool_t pg)
+{
+  return svlastb_f64 (svpfirst (pg, svpfalse ()), vec);
+}
+
+static inline svbool_t
+parse_pg (uint64_t p, int is_single)
+{
+  if (is_single)
+    {
+      uint32_t tmp[svcntw ()];
+      for (unsigned i = 0; i < svcntw (); i++)
+	tmp[i] = (p >> i) & 1;
+      return svcmpne (svptrue_b32 (), svld1 (svptrue_b32 (), tmp), 0);
+    }
+  else
+    {
+      uint64_t tmp[svcntd ()];
+      for (unsigned i = 0; i < svcntd (); i++)
+	tmp[i] = (p >> i) & 1;
+      return svcmpne (svptrue_b64 (), svld1 (svptrue_b64 (), tmp), 0);
+    }
+}
+# endif
 #endif
+
+struct conf
+{
+  int r;
+  int rc;
+  int quiet;
+  int mpfr;
+  int fenv;
+  unsigned long long n;
+  double softlim;
+  double errlim;
+  int ignore_zero_sign;
+#if WANT_SVE_TESTS
+  svbool_t *pg;
 #endif
+};
 
 #include "test/ulp_wrappers.h"
 
 struct fun
 {
   const char *name;
   int arity;
   int singleprec;
   int twice;
+  int is_predicated;
   union
   {
     float (*f1) (float);
     float (*f2) (float, float);
     double (*d1) (double);
     double (*d2) (double, double);
+#if WANT_SVE_TESTS
+    float (*f1_pred) (svbool_t, float);
+    float (*f2_pred) (svbool_t, float, float);
+    double (*d1_pred) (svbool_t, double);
+    double (*d2_pred) (svbool_t, double, double);
+#endif
   } fun;
   union
   {
     double (*f1) (double);
     double (*f2) (double, double);
     long double (*d1) (long double);
     long double (*d2) (long double, long double);
   } fun_long;
 #if USE_MPFR
   union
   {
     int (*f1) (mpfr_t, const mpfr_t, mpfr_rnd_t);
     int (*f2) (mpfr_t, const mpfr_t, const mpfr_t, mpfr_rnd_t);
     int (*d1) (mpfr_t, const mpfr_t, mpfr_rnd_t);
     int (*d2) (mpfr_t, const mpfr_t, const mpfr_t, mpfr_rnd_t);
   } fun_mpfr;
 #endif
 };
 
+// clang-format off
 static const struct fun fun[] = {
 #if USE_MPFR
-# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
-  {#x, a, s, twice, {.t = x_wrap}, {.t = x_long}, {.t = x_mpfr}},
+#  define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                        \
+    { #x, a, s, twice, 0, { .t = x_wrap }, { .t = x_long }, { .t = x_mpfr } },
+#  define SVF(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                      \
+    { #x, a, s, twice, 1, { .t##_pred = x_wrap }, { .t = x_long }, { .t = x_mpfr } },
 #else
-# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
-  {#x, a, s, twice, {.t = x_wrap}, {.t = x_long}},
+#  define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                        \
+    { #x, a, s, twice, 0, { .t = x_wrap }, { .t = x_long } },
+#  define SVF(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                      \
+    { #x, a, s, twice, 1, { .t##_pred = x_wrap }, { .t = x_long } },
 #endif
 #define F1(x) F (x##f, x##f, x, mpfr_##x, 1, 1, f1, 0)
 #define F2(x) F (x##f, x##f, x, mpfr_##x, 2, 1, f2, 0)
 #define D1(x) F (x, x, x##l, mpfr_##x, 1, 0, d1, 0)
 #define D2(x) F (x, x, x##l, mpfr_##x, 2, 0, d2, 0)
 /* Neon routines.  */
-#define VF1(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define VF2(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define VD1(x) F (__v_##x, v_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define VD2(x) F (__v_##x, v_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define VNF1(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define VNF2(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define VND1(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define VND2(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define ZVF1(x) F (_ZGVnN4v_##x##f, Z_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define ZVF2(x) F (_ZGVnN4vv_##x##f, Z_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define ZVD1(x) F (_ZGVnN2v_##x, Z_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define ZVD2(x) F (_ZGVnN2vv_##x, Z_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define ZVNF1(x) VNF1 (x) ZVF1 (x)
-#define ZVNF2(x) VNF2 (x) ZVF2 (x)
-#define ZVND1(x) VND1 (x) ZVD1 (x)
-#define ZVND2(x) VND2 (x) ZVD2 (x)
+#define ZVNF1(x) F (_ZGVnN4v_##x##f, Z_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define ZVNF2(x) F (_ZGVnN4vv_##x##f, Z_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define ZVND1(x) F (_ZGVnN2v_##x, Z_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define ZVND2(x) F (_ZGVnN2vv_##x, Z_##x, x##l, mpfr_##x, 2, 0, d2, 0)
 /* SVE routines.  */
-#define SVF1(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define SVF2(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define SVD1(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define SVD2(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define ZSVF1(x) F (_ZGVsMxv_##x##f, Z_sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define ZSVF2(x) F (_ZGVsMxvv_##x##f, Z_sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define ZSVD1(x) F (_ZGVsMxv_##x, Z_sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define ZSVD2(x) F (_ZGVsMxvv_##x, Z_sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+#define ZSVF1(x) SVF (_ZGVsMxv_##x##f, Z_sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define ZSVF2(x) SVF (_ZGVsMxvv_##x##f, Z_sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define ZSVD1(x) SVF (_ZGVsMxv_##x, Z_sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define ZSVD2(x) SVF (_ZGVsMxvv_##x, Z_sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
 
 #include "test/ulp_funcs.h"
 
 #undef F
 #undef F1
 #undef F2
 #undef D1
 #undef D2
-#undef SVF1
-#undef SVF2
-#undef SVD1
-#undef SVD2
- {0}};
+#undef ZSVF1
+#undef ZSVF2
+#undef ZSVD1
+#undef ZSVD2
+  { 0 }
+};
+// clang-format on
 
 /* Boilerplate for generic calls.  */
 
 static inline int
 ulpscale_f (float x)
 {
   int e = asuint (x) >> 23 & 0xff;
   if (!e)
     e++;
   return e - 0x7f - 23;
 }
 static inline int
 ulpscale_d (double x)
 {
   int e = asuint64 (x) >> 52 & 0x7ff;
   if (!e)
     e++;
   return e - 0x3ff - 52;
 }
 static inline float
-call_f1 (const struct fun *f, struct args_f1 a)
+call_f1 (const struct fun *f, struct args_f1 a, const struct conf *conf)
 {
+#if WANT_SVE_TESTS
+  if (f->is_predicated)
+    return f->fun.f1_pred (*conf->pg, a.x);
+#endif
   return f->fun.f1 (a.x);
 }
 static inline float
-call_f2 (const struct fun *f, struct args_f2 a)
+call_f2 (const struct fun *f, struct args_f2 a, const struct conf *conf)
 {
+#if WANT_SVE_TESTS
+  if (f->is_predicated)
+    return f->fun.f2_pred (*conf->pg, a.x, a.x2);
+#endif
   return f->fun.f2 (a.x, a.x2);
 }
 
 static inline double
-call_d1 (const struct fun *f, struct args_d1 a)
+call_d1 (const struct fun *f, struct args_d1 a, const struct conf *conf)
 {
+#if WANT_SVE_TESTS
+  if (f->is_predicated)
+    return f->fun.d1_pred (*conf->pg, a.x);
+#endif
   return f->fun.d1 (a.x);
 }
 static inline double
-call_d2 (const struct fun *f, struct args_d2 a)
+call_d2 (const struct fun *f, struct args_d2 a, const struct conf *conf)
 {
+#if WANT_SVE_TESTS
+  if (f->is_predicated)
+    return f->fun.d2_pred (*conf->pg, a.x, a.x2);
+#endif
   return f->fun.d2 (a.x, a.x2);
 }
 static inline double
 call_long_f1 (const struct fun *f, struct args_f1 a)
 {
   return f->fun_long.f1 (a.x);
 }
 static inline double
 call_long_f2 (const struct fun *f, struct args_f2 a)
 {
   return f->fun_long.f2 (a.x, a.x2);
 }
 static inline long double
 call_long_d1 (const struct fun *f, struct args_d1 a)
 {
   return f->fun_long.d1 (a.x);
 }
 static inline long double
 call_long_d2 (const struct fun *f, struct args_d2 a)
 {
   return f->fun_long.d2 (a.x, a.x2);
 }
 static inline void
 printcall_f1 (const struct fun *f, struct args_f1 a)
 {
   printf ("%s(%a)", f->name, a.x);
 }
 static inline void
 printcall_f2 (const struct fun *f, struct args_f2 a)
 {
   printf ("%s(%a, %a)", f->name, a.x, a.x2);
 }
 static inline void
 printcall_d1 (const struct fun *f, struct args_d1 a)
 {
   printf ("%s(%a)", f->name, a.x);
 }
 static inline void
 printcall_d2 (const struct fun *f, struct args_d2 a)
 {
   printf ("%s(%a, %a)", f->name, a.x, a.x2);
 }
 static inline void
 printgen_f1 (const struct fun *f, struct gen *gen)
 {
   printf ("%s in [%a;%a]", f->name, asfloat (gen->start),
 	  asfloat (gen->start + gen->len));
 }
 static inline void
 printgen_f2 (const struct fun *f, struct gen *gen)
 {
   printf ("%s in [%a;%a] x [%a;%a]", f->name, asfloat (gen->start),
 	  asfloat (gen->start + gen->len), asfloat (gen->start2),
 	  asfloat (gen->start2 + gen->len2));
 }
 static inline void
 printgen_d1 (const struct fun *f, struct gen *gen)
 {
   printf ("%s in [%a;%a]", f->name, asdouble (gen->start),
 	  asdouble (gen->start + gen->len));
 }
 static inline void
 printgen_d2 (const struct fun *f, struct gen *gen)
 {
   printf ("%s in [%a;%a] x [%a;%a]", f->name, asdouble (gen->start),
 	  asdouble (gen->start + gen->len), asdouble (gen->start2),
 	  asdouble (gen->start2 + gen->len2));
 }
 
 #define reduce_f1(a, f, op) (f (a.x))
 #define reduce_f2(a, f, op) (f (a.x) op f (a.x2))
 #define reduce_d1(a, f, op) (f (a.x))
 #define reduce_d2(a, f, op) (f (a.x) op f (a.x2))
 
 #ifndef IEEE_754_2008_SNAN
 # define IEEE_754_2008_SNAN 1
 #endif
 static inline int
 issignaling_f (float x)
 {
   uint32_t ix = asuint (x);
   if (!IEEE_754_2008_SNAN)
     return (ix & 0x7fc00000) == 0x7fc00000;
   return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
 }
 static inline int
 issignaling_d (double x)
 {
   uint64_t ix = asuint64 (x);
   if (!IEEE_754_2008_SNAN)
     return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
   return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
 }
 
 #if USE_MPFR
 static mpfr_rnd_t
 rmap (int r)
 {
   switch (r)
     {
     case FE_TONEAREST:
       return MPFR_RNDN;
     case FE_TOWARDZERO:
       return MPFR_RNDZ;
     case FE_UPWARD:
       return MPFR_RNDU;
     case FE_DOWNWARD:
       return MPFR_RNDD;
     }
   return -1;
 }
 
 #define prec_mpfr_f 50
 #define prec_mpfr_d 80
 #define prec_f 24
 #define prec_d 53
 #define emin_f -148
 #define emin_d -1073
 #define emax_f 128
 #define emax_d 1024
 static inline int
 call_mpfr_f1 (mpfr_t y, const struct fun *f, struct args_f1 a, mpfr_rnd_t r)
 {
   MPFR_DECL_INIT (x, prec_f);
   mpfr_set_flt (x, a.x, MPFR_RNDN);
   return f->fun_mpfr.f1 (y, x, r);
 }
 static inline int
 call_mpfr_f2 (mpfr_t y, const struct fun *f, struct args_f2 a, mpfr_rnd_t r)
 {
   MPFR_DECL_INIT (x, prec_f);
   MPFR_DECL_INIT (x2, prec_f);
   mpfr_set_flt (x, a.x, MPFR_RNDN);
   mpfr_set_flt (x2, a.x2, MPFR_RNDN);
   return f->fun_mpfr.f2 (y, x, x2, r);
 }
 static inline int
 call_mpfr_d1 (mpfr_t y, const struct fun *f, struct args_d1 a, mpfr_rnd_t r)
 {
   MPFR_DECL_INIT (x, prec_d);
   mpfr_set_d (x, a.x, MPFR_RNDN);
   return f->fun_mpfr.d1 (y, x, r);
 }
 static inline int
 call_mpfr_d2 (mpfr_t y, const struct fun *f, struct args_d2 a, mpfr_rnd_t r)
 {
   MPFR_DECL_INIT (x, prec_d);
   MPFR_DECL_INIT (x2, prec_d);
   mpfr_set_d (x, a.x, MPFR_RNDN);
   mpfr_set_d (x2, a.x2, MPFR_RNDN);
   return f->fun_mpfr.d2 (y, x, x2, r);
 }
 #endif
 
 #define float_f float
 #define double_f double
 #define copysign_f copysignf
 #define nextafter_f nextafterf
 #define fabs_f fabsf
 #define asuint_f asuint
 #define asfloat_f asfloat
 #define scalbn_f scalbnf
 #define lscalbn_f scalbn
 #define halfinf_f 0x1p127f
 #define min_normal_f 0x1p-126f
 
 #define float_d double
 #define double_d long double
 #define copysign_d copysign
 #define nextafter_d nextafter
 #define fabs_d fabs
 #define asuint_d asuint64
 #define asfloat_d asdouble
 #define scalbn_d scalbn
 #define lscalbn_d scalbnl
 #define halfinf_d 0x1p1023
 #define min_normal_d 0x1p-1022
 
 #define NEW_RT
 #define RT(x) x##_f
 #define T(x) x##_f1
 #include "ulp.h"
 #undef T
 #define T(x) x##_f2
 #include "ulp.h"
 #undef T
 #undef RT
 
 #define NEW_RT
 #define RT(x) x##_d
 #define T(x) x##_d1
 #include "ulp.h"
 #undef T
 #define T(x) x##_d2
 #include "ulp.h"
 #undef T
 #undef RT
 
 static void
 usage (void)
 {
   puts ("./ulp [-q] [-m] [-f] [-r {n|u|d|z}] [-l soft-ulplimit] [-e ulplimit] func "
 	"lo [hi [x lo2 hi2] [count]]");
   puts ("Compares func against a higher precision implementation in [lo; hi].");
   puts ("-q: quiet.");
   puts ("-m: use mpfr even if faster method is available.");
   puts ("-f: disable fenv exceptions testing.");
 #ifdef ___vpcs
   puts ("-c: neutral 'control value' to test behaviour when one lane can affect another. \n"
 	"    This should be different from tested input in other lanes, and non-special \n"
 	"    (i.e. should not trigger fenv exceptions). Default is 1.");
+#endif
+#if WANT_SVE_TESTS
+  puts ("-p: integer input for controlling predicate passed to SVE function. "
+	"If bit N is set, lane N is activated (bits past the vector length "
+	"are ignored). Default is UINT64_MAX (ptrue).");
 #endif
   puts ("-z: ignore sign of 0.");
   puts ("Supported func:");
   for (const struct fun *f = fun; f->name; f++)
     printf ("\t%s\n", f->name);
   exit (1);
 }
 
 static int
 cmp (const struct fun *f, struct gen *gen, const struct conf *conf)
 {
   int r = 1;
   if (f->arity == 1 && f->singleprec)
     r = cmp_f1 (f, gen, conf);
   else if (f->arity == 2 && f->singleprec)
     r = cmp_f2 (f, gen, conf);
   else if (f->arity == 1 && !f->singleprec)
     r = cmp_d1 (f, gen, conf);
   else if (f->arity == 2 && !f->singleprec)
     r = cmp_d2 (f, gen, conf);
   else
     usage ();
   return r;
 }
 
 static uint64_t
 getnum (const char *s, int singleprec)
 {
   //	int i;
   uint64_t sign = 0;
   //	char buf[12];
 
   if (s[0] == '+')
     s++;
   else if (s[0] == '-')
     {
       sign = singleprec ? 1ULL << 31 : 1ULL << 63;
       s++;
     }
+
+  /* Sentinel value for failed parse.  */
+  char *should_not_be_s = NULL;
+
   /* 0xXXXX is treated as bit representation, '-' flips the sign bit.  */
   if (s[0] == '0' && tolower (s[1]) == 'x' && strchr (s, 'p') == 0)
-    return sign ^ strtoull (s, 0, 0);
+    {
+      uint64_t out = sign ^ strtoull (s, &should_not_be_s, 0);
+      if (should_not_be_s == s)
+	{
+	  printf ("ERROR: Could not parse '%s'\n", s);
+	  exit (1);
+	}
+      return out;
+    }
   //	/* SNaN, QNaN, NaN, Inf.  */
   //	for (i=0; s[i] && i < sizeof buf; i++)
   //		buf[i] = tolower(s[i]);
   //	buf[i] = 0;
   //	if (strcmp(buf, "snan") == 0)
   //		return sign | (singleprec ? 0x7fa00000 : 0x7ff4000000000000);
   //	if (strcmp(buf, "qnan") == 0 || strcmp(buf, "nan") == 0)
   //		return sign | (singleprec ? 0x7fc00000 : 0x7ff8000000000000);
   //	if (strcmp(buf, "inf") == 0 || strcmp(buf, "infinity") == 0)
   //		return sign | (singleprec ? 0x7f800000 : 0x7ff0000000000000);
   /* Otherwise assume it's a floating-point literal.  */
-  return sign
-	 | (singleprec ? asuint (strtof (s, 0)) : asuint64 (strtod (s, 0)));
+  uint64_t out = sign
+		 | (singleprec ? asuint (strtof (s, &should_not_be_s))
+			       : asuint64 (strtod (s, &should_not_be_s)));
+  if (should_not_be_s == s)
+    {
+      printf ("ERROR: Could not parse '%s'\n", s);
+      exit (1);
+    }
+
+  return out;
 }
 
 static void
 parsegen (struct gen *g, int argc, char *argv[], const struct fun *f)
 {
   int singleprec = f->singleprec;
   int arity = f->arity;
   uint64_t a, b, a2, b2, n;
   if (argc < 1)
     usage ();
   b = a = getnum (argv[0], singleprec);
   n = 0;
   if (argc > 1 && strcmp (argv[1], "x") == 0)
     {
       argc -= 2;
       argv += 2;
     }
   else if (argc > 1)
     {
       b = getnum (argv[1], singleprec);
       if (argc > 2 && strcmp (argv[2], "x") == 0)
 	{
 	  argc -= 3;
 	  argv += 3;
 	}
     }
   b2 = a2 = getnum (argv[0], singleprec);
   if (argc > 1)
     b2 = getnum (argv[1], singleprec);
   if (argc > 2)
     n = strtoull (argv[2], 0, 0);
   if (argc > 3)
     usage ();
   //printf("ab %lx %lx ab2 %lx %lx n %lu\n", a, b, a2, b2, n);
   if (arity == 1)
     {
       g->start = a;
       g->len = b - a;
       if (n - 1 > b - a)
 	n = b - a + 1;
       g->off = 0;
       g->step = n ? (g->len + 1) / n : 1;
       g->start2 = g->len2 = 0;
       g->cnt = n;
     }
   else if (arity == 2)
     {
       g->start = a;
       g->len = b - a;
       g->off = g->step = 0;
       g->start2 = a2;
       g->len2 = b2 - a2;
       g->cnt = n;
     }
   else
     usage ();
 }
 
 int
 main (int argc, char *argv[])
 {
   const struct fun *f;
   struct gen gen;
   struct conf conf;
   conf.rc = 'n';
   conf.quiet = 0;
   conf.mpfr = 0;
   conf.fenv = 1;
   conf.softlim = 0;
   conf.errlim = INFINITY;
   conf.ignore_zero_sign = 0;
+#if WANT_SVE_TESTS
+  uint64_t pg_int = UINT64_MAX;
+#endif
   for (;;)
     {
       argc--;
       argv++;
       if (argc < 1)
 	usage ();
       if (argv[0][0] != '-')
 	break;
       switch (argv[0][1])
 	{
 	case 'e':
 	  argc--;
 	  argv++;
 	  if (argc < 1)
 	    usage ();
 	  conf.errlim = strtod (argv[0], 0);
 	  break;
 	case 'f':
 	  conf.fenv = 0;
 	  break;
 	case 'l':
 	  argc--;
 	  argv++;
 	  if (argc < 1)
 	    usage ();
 	  conf.softlim = strtod (argv[0], 0);
 	  break;
 	case 'm':
 	  conf.mpfr = 1;
 	  break;
 	case 'q':
 	  conf.quiet = 1;
 	  break;
 	case 'r':
 	  conf.rc = argv[0][2];
 	  if (!conf.rc)
 	    {
 	      argc--;
 	      argv++;
 	      if (argc < 1 || argv[0][1] != '\0')
 		usage ();
 	      conf.rc = argv[0][0];
 	    }
 	  break;
 	case 'z':
 	  conf.ignore_zero_sign = 1;
 	  break;
-#ifdef __vpcs
+#if  __aarch64__ && __linux__
 	case 'c':
 	  argc--;
 	  argv++;
 	  fv[0] = strtof(argv[0], 0);
 	  dv[0] = strtod(argv[0], 0);
 	  break;
+#endif
+#if WANT_SVE_TESTS
+	case 'p':
+	  argc--;
+	  argv++;
+	  pg_int = strtoull (argv[0], 0, 0);
+	  break;
 #endif
 	default:
 	  usage ();
 	}
     }
   switch (conf.rc)
     {
     case 'n':
       conf.r = FE_TONEAREST;
       break;
     case 'u':
       conf.r = FE_UPWARD;
       break;
     case 'd':
       conf.r = FE_DOWNWARD;
       break;
     case 'z':
       conf.r = FE_TOWARDZERO;
       break;
     default:
       usage ();
     }
   for (f = fun; f->name; f++)
     if (strcmp (argv[0], f->name) == 0)
       break;
   if (!f->name)
     {
 #ifndef __vpcs
       /* Ignore vector math functions if vector math is not supported.  */
       if (strncmp (argv[0], "_ZGVnN", 6) == 0)
 	exit (0);
 #endif
-#if !WANT_SVE_MATH
+#if !WANT_SVE_TESTS
       if (strncmp (argv[0], "_ZGVsMxv", 8) == 0)
 	exit (0);
 #endif
       printf ("math function %s not supported\n", argv[0]);
       exit (1);
     }
   if (!f->singleprec && LDBL_MANT_DIG == DBL_MANT_DIG)
     conf.mpfr = 1; /* Use mpfr if long double has no extra precision.  */
   if (!USE_MPFR && conf.mpfr)
     {
       puts ("mpfr is not available.");
       return 0;
     }
   argc--;
   argv++;
   parsegen (&gen, argc, argv, f);
   conf.n = gen.cnt;
+#if WANT_SVE_TESTS
+  svbool_t pg = parse_pg (pg_int, f->singleprec);
+  conf.pg = &pg;
+#endif
   return cmp (f, &gen, &conf);
 }
+
+#if __aarch64__ && __linux__ && WANT_SVE_TESTS && defined(__clang__)
+#  pragma clang attribute pop
+#endif
diff --git a/contrib/arm-optimized-routines/math/test/ulp.h b/contrib/arm-optimized-routines/math/test/ulp.h
index b0bc59aeef8d..de122257d3b1 100644
--- a/contrib/arm-optimized-routines/math/test/ulp.h
+++ b/contrib/arm-optimized-routines/math/test/ulp.h
@@ -1,379 +1,386 @@
 /*
  * Generic functions for ULP error estimation.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* For each different math function type,
    T(x) should add a different suffix to x.
-   RT(x) should add a return type specific suffix to x. */
+   RT(x) should add a return type specific suffix to x.  */
 
 #ifdef NEW_RT
 #undef NEW_RT
 
 # if USE_MPFR
 static int RT(ulpscale_mpfr) (mpfr_t x, int t)
 {
   /* TODO: pow of 2 cases.  */
   if (mpfr_regular_p (x))
     {
       mpfr_exp_t e = mpfr_get_exp (x) - RT(prec);
       if (e < RT(emin))
 	e = RT(emin) - 1;
       if (e > RT(emax) - RT(prec))
 	e = RT(emax) - RT(prec);
       return e;
     }
   if (mpfr_zero_p (x))
     return RT(emin) - 1;
   if (mpfr_inf_p (x))
     return RT(emax) - RT(prec);
   /* NaN.  */
   return 0;
 }
 # endif
 
 /* Difference between exact result and closest real number that
    gets rounded to got, i.e. error before rounding, for a correctly
    rounded result the difference is 0.  */
 static double RT (ulperr) (RT (float) got, const struct RT (ret) * p, int r,
 			   int ignore_zero_sign)
 {
   RT(float) want = p->y;
   RT(float) d;
   double e;
 
   if (RT(asuint) (got) == RT(asuint) (want))
     return 0.0;
   if (isnan (got) && isnan (want))
-    /* Ignore sign of NaN.  */
+  /* Ignore sign of NaN, and signalling-ness for MPFR.  */
+# if USE_MPFR
+    return 0;
+# else
     return RT (issignaling) (got) == RT (issignaling) (want) ? 0 : INFINITY;
+# endif
   if (signbit (got) != signbit (want))
     {
       /* Fall through to ULP calculation if ignoring sign of zero and at
 	 exactly one of want and got is non-zero.  */
       if (ignore_zero_sign && want == got)
 	return 0.0;
       if (!ignore_zero_sign || (want != 0 && got != 0))
 	return INFINITY;
     }
   if (!isfinite (want) || !isfinite (got))
     {
       if (isnan (got) != isnan (want))
 	return INFINITY;
       if (isnan (want))
 	return 0;
       if (isinf (got))
 	{
 	  got = RT(copysign) (RT(halfinf), got);
 	  want *= 0.5f;
 	}
       if (isinf (want))
 	{
 	  want = RT(copysign) (RT(halfinf), want);
 	  got *= 0.5f;
 	}
     }
   if (r == FE_TONEAREST)
     {
       // TODO: incorrect when got vs want cross a powof2 boundary
       /* error = got > want
 	      ? got - want - tail ulp - 0.5 ulp
-	      : got - want - tail ulp + 0.5 ulp;  */
+	      : got - want - tail ulp + 0.5 ulp.  */
       d = got - want;
       e = d > 0 ? -p->tail - 0.5 : -p->tail + 0.5;
     }
   else
     {
       if ((r == FE_DOWNWARD && got < want) || (r == FE_UPWARD && got > want)
 	  || (r == FE_TOWARDZERO && fabs (got) < fabs (want)))
 	got = RT(nextafter) (got, want);
       d = got - want;
       e = -p->tail;
     }
   return RT(scalbn) (d, -p->ulpexp) + e;
 }
 
 static int RT(isok) (RT(float) ygot, int exgot, RT(float) ywant, int exwant,
 		      int exmay)
 {
   return RT(asuint) (ygot) == RT(asuint) (ywant)
 	 && ((exgot ^ exwant) & ~exmay) == 0;
 }
 
 static int RT(isok_nofenv) (RT(float) ygot, RT(float) ywant)
 {
   return RT(asuint) (ygot) == RT(asuint) (ywant);
 }
 #endif
 
-static inline void T(call_fenv) (const struct fun *f, struct T(args) a, int r,
-				  RT(float) * y, int *ex)
+static inline void T (call_fenv) (const struct fun *f, struct T (args) a,
+				  int r, RT (float) * y, int *ex,
+				  const struct conf *conf)
 {
   if (r != FE_TONEAREST)
     fesetround (r);
   feclearexcept (FE_ALL_EXCEPT);
-  *y = T(call) (f, a);
+  *y = T (call) (f, a, conf);
   *ex = fetestexcept (FE_ALL_EXCEPT);
   if (r != FE_TONEAREST)
     fesetround (FE_TONEAREST);
 }
 
-static inline void T(call_nofenv) (const struct fun *f, struct T(args) a,
-				    int r, RT(float) * y, int *ex)
+static inline void T (call_nofenv) (const struct fun *f, struct T (args) a,
+				    int r, RT (float) * y, int *ex,
+				    const struct conf *conf)
 {
   if (r != FE_TONEAREST)
     fesetround (r);
-  *y = T(call) (f, a);
+  *y = T (call) (f, a, conf);
   *ex = 0;
   if (r != FE_TONEAREST)
     fesetround (FE_TONEAREST);
 }
 
-static inline int T(call_long_fenv) (const struct fun *f, struct T(args) a,
-				      int r, struct RT(ret) * p,
-				      RT(float) ygot, int exgot)
+static inline int T (call_long_fenv) (const struct fun *f, struct T (args) a,
+				      int r, struct RT (ret) * p,
+				      RT (float) ygot, int exgot)
 {
   if (r != FE_TONEAREST)
     fesetround (r);
   feclearexcept (FE_ALL_EXCEPT);
   volatile struct T(args) va = a; // TODO: barrier
   a = va;
   RT(double) yl = T(call_long) (f, a);
   p->y = (RT(float)) yl;
   volatile RT(float) vy = p->y; // TODO: barrier
   (void) vy;
   p->ex = fetestexcept (FE_ALL_EXCEPT);
   if (r != FE_TONEAREST)
     fesetround (FE_TONEAREST);
   p->ex_may = FE_INEXACT;
   if (RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
     return 1;
   p->ulpexp = RT(ulpscale) (p->y);
   if (isinf (p->y))
     p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
   else
     p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
   if (RT(fabs) (p->y) < RT(min_normal))
     {
       /* TODO: subnormal result is treated as undeflow even if it's
 	 exact since call_long may not raise inexact correctly.  */
       if (p->y != 0 || (p->ex & FE_INEXACT))
 	p->ex |= FE_UNDERFLOW | FE_INEXACT;
     }
   return 0;
 }
 static inline int T(call_long_nofenv) (const struct fun *f, struct T(args) a,
 					int r, struct RT(ret) * p,
 					RT(float) ygot, int exgot)
 {
   if (r != FE_TONEAREST)
     fesetround (r);
   RT(double) yl = T(call_long) (f, a);
   p->y = (RT(float)) yl;
   if (r != FE_TONEAREST)
     fesetround (FE_TONEAREST);
   if (RT(isok_nofenv) (ygot, p->y))
     return 1;
   p->ulpexp = RT(ulpscale) (p->y);
   if (isinf (p->y))
     p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
   else
     p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
   return 0;
 }
 
 /* There are nan input args and all quiet.  */
 static inline int T(qnanpropagation) (struct T(args) a)
 {
   return T(reduce) (a, isnan, ||) && !T(reduce) (a, RT(issignaling), ||);
 }
 static inline RT(float) T(sum) (struct T(args) a)
 {
   return T(reduce) (a, , +);
 }
 
 /* returns 1 if the got result is ok.  */
 static inline int T(call_mpfr_fix) (const struct fun *f, struct T(args) a,
 				     int r_fenv, struct RT(ret) * p,
 				     RT(float) ygot, int exgot)
 {
 #if USE_MPFR
   int t, t2;
   mpfr_rnd_t r = rmap (r_fenv);
   MPFR_DECL_INIT(my, RT(prec_mpfr));
   MPFR_DECL_INIT(mr, RT(prec));
   MPFR_DECL_INIT(me, RT(prec_mpfr));
   mpfr_clear_flags ();
   t = T(call_mpfr) (my, f, a, r);
   /* Double rounding.  */
   t2 = mpfr_set (mr, my, r);
   if (t2)
     t = t2;
   mpfr_set_emin (RT(emin));
   mpfr_set_emax (RT(emax));
   t = mpfr_check_range (mr, t, r);
   t = mpfr_subnormalize (mr, t, r);
   mpfr_set_emax (MPFR_EMAX_DEFAULT);
   mpfr_set_emin (MPFR_EMIN_DEFAULT);
   p->y = mpfr_get_d (mr, r);
   p->ex = t ? FE_INEXACT : 0;
   p->ex_may = FE_INEXACT;
   if (mpfr_underflow_p () && (p->ex & FE_INEXACT))
     /* TODO: handle before and after rounding uflow cases.  */
     p->ex |= FE_UNDERFLOW;
   if (mpfr_overflow_p ())
     p->ex |= FE_OVERFLOW | FE_INEXACT;
   if (mpfr_divby0_p ())
     p->ex |= FE_DIVBYZERO;
   //if (mpfr_erangeflag_p ())
   //  p->ex |= FE_INVALID;
   if (!mpfr_nanflag_p () && RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
     return 1;
   if (mpfr_nanflag_p () && !T(qnanpropagation) (a))
     p->ex |= FE_INVALID;
   p->ulpexp = RT(ulpscale_mpfr) (my, t);
   if (!isfinite (p->y))
     {
       p->tail = 0;
       if (isnan (p->y))
 	{
 	  /* If an input was nan keep its sign.  */
 	  p->y = T(sum) (a);
 	  if (!isnan (p->y))
 	    p->y = (p->y - p->y) / (p->y - p->y);
 	  return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
 	}
       mpfr_set_si_2exp (mr, signbit (p->y) ? -1 : 1, 1024, MPFR_RNDN);
       if (mpfr_cmpabs (my, mr) >= 0)
 	return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
     }
   mpfr_sub (me, my, mr, MPFR_RNDN);
   mpfr_mul_2si (me, me, -p->ulpexp, MPFR_RNDN);
   p->tail = mpfr_get_d (me, MPFR_RNDN);
   return 0;
 #else
   abort ();
 #endif
 }
 
 static int T(cmp) (const struct fun *f, struct gen *gen,
 		     const struct conf *conf)
 {
   double maxerr = 0;
   uint64_t cnt = 0;
   uint64_t cnt1 = 0;
   uint64_t cnt2 = 0;
   uint64_t cntfail = 0;
   int r = conf->r;
   int use_mpfr = conf->mpfr;
   int fenv = conf->fenv;
+
   for (;;)
     {
       struct RT(ret) want;
       struct T(args) a = T(next) (gen);
       int exgot;
       int exgot2;
       RT(float) ygot;
       RT(float) ygot2;
       int fail = 0;
       if (fenv)
-	T(call_fenv) (f, a, r, &ygot, &exgot);
+	T (call_fenv) (f, a, r, &ygot, &exgot, conf);
       else
-	T(call_nofenv) (f, a, r, &ygot, &exgot);
+	T (call_nofenv) (f, a, r, &ygot, &exgot, conf);
       if (f->twice) {
 	secondcall = 1;
 	if (fenv)
-	  T(call_fenv) (f, a, r, &ygot2, &exgot2);
+	  T (call_fenv) (f, a, r, &ygot2, &exgot2, conf);
 	else
-	  T(call_nofenv) (f, a, r, &ygot2, &exgot2);
+	  T (call_nofenv) (f, a, r, &ygot2, &exgot2, conf);
 	secondcall = 0;
 	if (RT(asuint) (ygot) != RT(asuint) (ygot2))
 	  {
 	    fail = 1;
 	    cntfail++;
 	    T(printcall) (f, a);
 	    printf (" got %a then %a for same input\n", ygot, ygot2);
 	  }
       }
       cnt++;
       int ok = use_mpfr
 		 ? T(call_mpfr_fix) (f, a, r, &want, ygot, exgot)
 		 : (fenv ? T(call_long_fenv) (f, a, r, &want, ygot, exgot)
 			 : T(call_long_nofenv) (f, a, r, &want, ygot, exgot));
       if (!ok)
 	{
 	  int print = 0;
 	  double err = RT (ulperr) (ygot, &want, r, conf->ignore_zero_sign);
 	  double abserr = fabs (err);
 	  // TODO: count errors below accuracy limit.
 	  if (abserr > 0)
 	    cnt1++;
 	  if (abserr > 1)
 	    cnt2++;
 	  if (abserr > conf->errlim)
 	    {
 	      print = 1;
 	      if (!fail)
 		{
 		  fail = 1;
 		  cntfail++;
 		}
 	    }
 	  if (abserr > maxerr)
 	    {
 	      maxerr = abserr;
 	      if (!conf->quiet && abserr > conf->softlim)
 		print = 1;
 	    }
 	  if (print)
 	    {
 	      T(printcall) (f, a);
 	      // TODO: inf ulp handling
 	      printf (" got %a want %a %+g ulp err %g\n", ygot, want.y,
 		      want.tail, err);
 	    }
 	  int diff = fenv ? exgot ^ want.ex : 0;
 	  if (fenv && (diff & ~want.ex_may))
 	    {
 	      if (!fail)
 		{
 		  fail = 1;
 		  cntfail++;
 		}
 	      T(printcall) (f, a);
 	      printf (" is %a %+g ulp, got except 0x%0x", want.y, want.tail,
 		      exgot);
 	      if (diff & exgot)
 		printf (" wrongly set: 0x%x", diff & exgot);
 	      if (diff & ~exgot)
 		printf (" wrongly clear: 0x%x", diff & ~exgot);
 	      putchar ('\n');
 	    }
 	}
       if (cnt >= conf->n)
 	break;
       if (!conf->quiet && cnt % 0x100000 == 0)
 	printf ("progress: %6.3f%% cnt %llu cnt1 %llu cnt2 %llu cntfail %llu "
 		"maxerr %g\n",
 		100.0 * cnt / conf->n, (unsigned long long) cnt,
 		(unsigned long long) cnt1, (unsigned long long) cnt2,
 		(unsigned long long) cntfail, maxerr);
     }
   double cc = cnt;
   if (cntfail)
     printf ("FAIL ");
   else
     printf ("PASS ");
   T(printgen) (f, gen);
   printf (" round %c errlim %g maxerr %g %s cnt %llu cnt1 %llu %g%% cnt2 %llu "
 	  "%g%% cntfail %llu %g%%\n",
 	  conf->rc, conf->errlim,
 	  maxerr, conf->r == FE_TONEAREST ? "+0.5" : "+1.0",
 	  (unsigned long long) cnt,
 	  (unsigned long long) cnt1, 100.0 * cnt1 / cc,
 	  (unsigned long long) cnt2, 100.0 * cnt2 / cc,
 	  (unsigned long long) cntfail, 100.0 * cntfail / cc);
   return !!cntfail;
 }
diff --git a/contrib/arm-optimized-routines/math/test/ulp_funcs.h b/contrib/arm-optimized-routines/math/test/ulp_funcs.h
index 84f7927d3935..b58a68ff275b 100644
--- a/contrib/arm-optimized-routines/math/test/ulp_funcs.h
+++ b/contrib/arm-optimized-routines/math/test/ulp_funcs.h
@@ -1,40 +1,109 @@
 /*
  * Function entries for ulp.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 /* clang-format off */
- F1 (sin)
- F1 (cos)
  F (sincosf_sinf, sincosf_sinf, sincos_sin, sincos_mpfr_sin, 1, 1, f1, 0)
  F (sincosf_cosf, sincosf_cosf, sincos_cos, sincos_mpfr_cos, 1, 1, f1, 0)
- F1 (exp)
- F1 (exp2)
- F1 (log)
- F1 (log2)
  F2 (pow)
- F1 (erf)
- D1 (exp)
- D1 (exp10)
- D1 (exp2)
- D1 (log)
- D1 (log2)
  D2 (pow)
- D1 (erf)
-#ifdef __vpcs
- F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
- F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
+#if __aarch64__ && __linux__
  F (_ZGVnN4v_expf_1u, Z_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
- F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
  F (_ZGVnN4v_exp2f_1u, Z_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1)
  F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1)
- F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1)
- F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
- F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
- F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
  F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
+ F (_ZGVnN4v_sincosf_sin, v_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
+ F (_ZGVnN4v_sincosf_cos, v_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
+ F (_ZGVnN4v_cexpif_sin, v_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
+ F (_ZGVnN4v_cexpif_cos, v_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
+ F (_ZGVnN4vl4_modff_frac, v_modff_frac, modf_frac, modf_mpfr_frac, 1, 1, f1, 0)
+ F (_ZGVnN4vl4_modff_int, v_modff_int, modf_int, modf_mpfr_int, 1, 1, f1, 0)
+ F (_ZGVnN2v_sincos_sin, v_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+ F (_ZGVnN2v_sincos_cos, v_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+ F (_ZGVnN2v_cexpi_sin, v_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+ F (_ZGVnN2v_cexpi_cos, v_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+ F (_ZGVnN2vl8_modf_frac, v_modf_frac, modfl_frac, modf_mpfr_frac, 1, 0, d1, 0)
+ F (_ZGVnN2vl8_modf_int, v_modf_int, modfl_int, modf_mpfr_int, 1, 0, d1, 0)
 #endif
-/* clang-format on */
+
+#if WANT_SVE_TESTS
+SVF (_ZGVsMxv_sincosf_sin, sv_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
+SVF (_ZGVsMxv_sincosf_cos, sv_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
+SVF (_ZGVsMxv_cexpif_sin, sv_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
+SVF (_ZGVsMxv_cexpif_cos, sv_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
+SVF (_ZGVsMxvl4_modff_frac, sv_modff_frac, modf_frac, modf_mpfr_frac, 1, 1, f1, 0)
+SVF (_ZGVsMxvl4_modff_int, sv_modff_int, modf_int, modf_mpfr_int, 1, 1, f1, 0)
+SVF (_ZGVsMxv_sincos_sin, sv_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+SVF (_ZGVsMxv_sincos_cos, sv_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+SVF (_ZGVsMxv_cexpi_sin, sv_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+SVF (_ZGVsMxv_cexpi_cos, sv_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+SVF (_ZGVsMxvl8_modf_frac, sv_modf_frac, modfl_frac, modf_mpfr_frac, 1, 0, d1, 0)
+SVF (_ZGVsMxvl8_modf_int, sv_modf_int, modfl_int, modf_mpfr_int, 1, 0, d1, 0)
+#endif
+
+#if WANT_EXPERIMENTAL_MATH
+ F (arm_math_erff, arm_math_erff, erf, mpfr_erf, 1, 1, f1, 0)
+ F (arm_math_erf,  arm_math_erf,  erfl, mpfr_erf, 1, 0, d1, 0)
+#endif
+
+#if WANT_TRIGPI_TESTS
+ F (arm_math_cospif, arm_math_cospif, arm_math_cospi, mpfr_cospi, 1, 1, f1, 0)
+ F (arm_math_cospi,  arm_math_cospi,  arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+ F (arm_math_sinpif, arm_math_sinpif, arm_math_sinpi, mpfr_sinpi, 1, 1, f1, 0)
+ F (arm_math_sinpi,  arm_math_sinpi,  arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ F (arm_math_tanpif, arm_math_tanpif, arm_math_tanpi, mpfr_tanpi, 1, 1, f1, 0)
+ F (arm_math_tanpi,  arm_math_tanpi,  arm_math_tanpil, mpfr_tanpi, 1, 0, d1, 0)
+ F (arm_math_sincospif_sin, arm_math_sincospif_sin, arm_math_sinpi, mpfr_sinpi, 1, 1, f1, 0)
+ F (arm_math_sincospif_cos, arm_math_sincospif_cos, arm_math_cospi, mpfr_cospi, 1, 1, f1, 0)
+ F (arm_math_sincospi_sin, arm_math_sincospi_sin, arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ F (arm_math_sincospi_cos, arm_math_sincospi_cos, arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+# if __aarch64__ && __linux__
+ F (_ZGVnN4v_cospif, Z_cospif, arm_math_cospi,  mpfr_cospi, 1, 1, f1, 0)
+ F (_ZGVnN2v_cospi,  Z_cospi,  arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+ F (_ZGVnN4v_sinpif, Z_sinpif, arm_math_sinpi,  mpfr_sinpi, 1, 1, f1, 0)
+ F (_ZGVnN2v_sinpi,  Z_sinpi,  arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ F (_ZGVnN4v_tanpif, Z_tanpif, arm_math_tanpi,  mpfr_tanpi, 1, 1, f1, 0)
+ F (_ZGVnN2v_tanpi,  Z_tanpi,  arm_math_tanpil, mpfr_tanpi, 1, 0, d1, 0)
+ F (_ZGVnN4v_sincospif_sin, v_sincospif_sin, arm_math_sinpi, mpfr_sinpi, 1, 1, f1, 0)
+ F (_ZGVnN4v_sincospif_cos, v_sincospif_cos, arm_math_cospi, mpfr_cospi, 1, 1, f1, 0)
+ F (_ZGVnN2v_sincospi_sin, v_sincospi_sin, arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ F (_ZGVnN2v_sincospi_cos, v_sincospi_cos, arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+# endif
+# if WANT_SVE_TESTS
+ SVF (_ZGVsMxv_cospif, Z_sv_cospif, arm_math_cospi,  mpfr_cospi, 1, 1, f1, 0)
+ SVF (_ZGVsMxv_cospi,  Z_sv_cospi,  arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+ SVF (_ZGVsMxv_sinpif, Z_sv_sinpif, arm_math_sinpi,  mpfr_sinpi, 1, 1, f1, 0)
+ SVF (_ZGVsMxv_sinpi,  Z_sv_sinpi,  arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ SVF (_ZGVsMxv_tanpif, Z_sv_tanpif, arm_math_tanpi,  mpfr_tanpi, 1, 1, f1, 0)
+ SVF (_ZGVsMxv_tanpi,  Z_sv_tanpi,  arm_math_tanpil, mpfr_tanpi, 1, 0, d1, 0)
+ SVF (_ZGVsMxvl4l4_sincospif_sin, sv_sincospif_sin, arm_math_sinpi, mpfr_sinpi, 1, 1, f1, 0)
+ SVF (_ZGVsMxvl4l4_sincospif_cos, sv_sincospif_cos, arm_math_cospi, mpfr_cospi, 1, 1, f1, 0)
+ SVF (_ZGVsMxvl8l8_sincospi_sin, sv_sincospi_sin, arm_math_sinpil, mpfr_sinpi, 1, 0, d1, 0)
+ SVF (_ZGVsMxvl8l8_sincospi_cos, sv_sincospi_cos, arm_math_cospil, mpfr_cospi, 1, 0, d1, 0)
+#  if WANT_EXPERIMENTAL_MATH
+SVF (_ZGVsMxvv_powk, Z_sv_powk, ref_powi, mpfr_powi, 2, 0, d2, 0)
+SVF (_ZGVsMxvv_powi, Z_sv_powi, ref_powif, mpfr_powi, 2, 1, f2, 0)
+#  endif
+# endif
+#endif
+
+ /* clang-format on */
+
+#define _ZSF1(f) F1 (f)
+#define _ZSF2(f) F2 (f)
+#define _ZSD1(f) D1 (f)
+#define _ZSD2(f) D2 (f)
+
+#define _ZVF1(f) ZVNF1 (f)
+#define _ZVD1(f) ZVND1 (f)
+#define _ZVF2(f) ZVNF2 (f)
+#define _ZVD2(f) ZVND2 (f)
+
+#define _ZSVF1(f) ZSVF1 (f)
+#define _ZSVF2(f) ZSVF2 (f)
+#define _ZSVD1(f) ZSVD1 (f)
+#define _ZSVD2(f) ZSVD2 (f)
+
+#include "test/ulp_funcs_gen.h"
diff --git a/contrib/arm-optimized-routines/math/test/ulp_wrappers.h b/contrib/arm-optimized-routines/math/test/ulp_wrappers.h
index 60dc3d6dd652..33e1e75f23ab 100644
--- a/contrib/arm-optimized-routines/math/test/ulp_wrappers.h
+++ b/contrib/arm-optimized-routines/math/test/ulp_wrappers.h
@@ -1,37 +1,429 @@
 /*
  * Function wrappers for ulp.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* clang-format off */
 
+#if  __aarch64__ && __linux__
+#include <arm_neon.h>
+#endif
+
+#include <stdbool.h>
+
 /* Wrappers for sincos.  */
 static float sincosf_sinf(float x) {(void)cosf(x); return sinf(x);}
 static float sincosf_cosf(float x) {(void)sinf(x); return cosf(x);}
 static double sincos_sin(double x) {(void)cos(x); return sin(x);}
 static double sincos_cos(double x) {(void)sin(x); return cos(x);}
 #if USE_MPFR
 static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); }
 static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); }
+static int modf_mpfr_frac(mpfr_t f, const mpfr_t x, mpfr_rnd_t r) { MPFR_DECL_INIT(i, 80); return mpfr_modf(i,f,x,r); }
+static int modf_mpfr_int(mpfr_t i, const mpfr_t x, mpfr_rnd_t r) { MPFR_DECL_INIT(f, 80); return mpfr_modf(i,f,x,r); }
+# if MPFR_VERSION < MPFR_VERSION_NUM(4, 2, 0)
+static int mpfr_tanpi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd) {
+  MPFR_DECL_INIT (frd, 1080);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_tan (ret, frd, GMP_RNDN);
+}
+static int mpfr_sinpi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd) {
+  MPFR_DECL_INIT (frd, 1080);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_sin (ret, frd, GMP_RNDN);
+}
+
+static int mpfr_cospi (mpfr_t ret, const mpfr_t arg, mpfr_rnd_t rnd) {
+  MPFR_DECL_INIT (frd, 1080);
+  mpfr_const_pi (frd, GMP_RNDN);
+  mpfr_mul (frd, frd, arg, GMP_RNDN);
+  return mpfr_cos (ret, frd, GMP_RNDN);
+}
+# endif
+# if WANT_EXPERIMENTAL_MATH
+static int wrap_mpfr_powi(mpfr_t ret, const mpfr_t x, const mpfr_t y, mpfr_rnd_t rnd) {
+  mpfr_t y2;
+  mpfr_init(y2);
+  mpfr_trunc(y2, y);
+  return mpfr_pow(ret, x, y2, rnd);
+}
+# endif
 #endif
 
+float modff_frac(float x) { float i; return modff(x, &i); }
+float modff_int(float x) { float i; modff(x, &i); return i; }
+double modf_frac(double x) { double i; return modf(x, &i); }
+double modf_int(double x) { double i; modf(x, &i); return i; }
+long double modfl_frac(long double x) { long double i; return modfl(x, &i); }
+long double modfl_int(long double x) { long double i; modfl(x, &i); return i; }
+
 /* Wrappers for vector functions.  */
-#ifdef __vpcs
-static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
-static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
+#if __aarch64__ && __linux__
 static float Z_expf_1u(float x) { return _ZGVnN4v_expf_1u(argf(x))[0]; }
-static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
 static float Z_exp2f_1u(float x) { return _ZGVnN4v_exp2f_1u(argf(x))[0]; }
-static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; }
-static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; }
-static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; }
-static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
-static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
-static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
-static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
-static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
 #endif
 
 /* clang-format on */
+
+/* No wrappers for scalar routines, but TEST_SIG will emit them.  */
+#define ZSNF1_WRAP(func)
+#define ZSNF2_WRAP(func)
+#define ZSND1_WRAP(func)
+#define ZSND2_WRAP(func)
+
+#define ZVNF1_WRAP(func)                                                      \
+  static float Z_##func##f (float x)                                          \
+  {                                                                           \
+    return _ZGVnN4v_##func##f (argf (x))[0];                                  \
+  }
+#define ZVNF2_WRAP(func)                                                      \
+  static float Z_##func##f (float x, float y)                                 \
+  {                                                                           \
+    return _ZGVnN4vv_##func##f (argf (x), argf (y))[0];                       \
+  }
+#define ZVND1_WRAP(func)                                                      \
+  static double Z_##func (double x) { return _ZGVnN2v_##func (argd (x))[0]; }
+#define ZVND2_WRAP(func)                                                      \
+  static double Z_##func (double x, double y)                                 \
+  {                                                                           \
+    return _ZGVnN2vv_##func (argd (x), argd (y))[0];                          \
+  }
+
+#if WANT_TRIGPI_TESTS
+float
+arm_math_sincospif_sin (float x)
+{
+  float s, c;
+  arm_math_sincospif (x, &s, &c);
+  return s;
+}
+float
+arm_math_sincospif_cos (float x)
+{
+  float s, c;
+  arm_math_sincospif (x, &s, &c);
+  return c;
+}
+double
+arm_math_sincospi_sin (double x)
+{
+  double s, c;
+  arm_math_sincospi (x, &s, &c);
+  return s;
+}
+double
+arm_math_sincospi_cos (double x)
+{
+  double s, c;
+  arm_math_sincospi (x, &s, &c);
+  return c;
+}
+#endif
+
+#if  __aarch64__ && __linux__
+
+# if WANT_TRIGPI_TESTS
+ZVNF1_WRAP (cospi)
+ZVND1_WRAP (cospi)
+ZVNF1_WRAP (sinpi)
+ZVND1_WRAP (sinpi)
+ZVNF1_WRAP (tanpi)
+ZVND1_WRAP (tanpi)
+
+double
+v_sincospi_sin (double x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincospi (vdupq_n_f64 (x), s, c);
+  return s[0];
+}
+double
+v_sincospi_cos (double x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincospi (vdupq_n_f64 (x), s, c);
+  return c[0];
+}
+float
+v_sincospif_sin (float x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincospif (vdupq_n_f32 (x), s, c);
+  return s[0];
+}
+float
+v_sincospif_cos (float x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincospif (vdupq_n_f32 (x), s, c);
+  return c[0];
+}
+# endif // WANT_TRIGPI_TESTS
+
+float
+v_sincosf_sin (float x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincosf (vdupq_n_f32 (x), s, c);
+  return s[0];
+}
+float
+v_sincosf_cos (float x)
+{
+  float s[4], c[4];
+  _ZGVnN4vl4l4_sincosf (vdupq_n_f32 (x), s, c);
+  return c[0];
+}
+float
+v_cexpif_sin (float x)
+{
+  return _ZGVnN4v_cexpif (vdupq_n_f32 (x)).val[0][0];
+}
+float
+v_cexpif_cos (float x)
+{
+  return _ZGVnN4v_cexpif (vdupq_n_f32 (x)).val[1][0];
+}
+float
+v_modff_frac (float x)
+{
+  float y[4];
+  return _ZGVnN4vl4_modff (vdupq_n_f32 (x), y)[0];
+}
+float
+v_modff_int (float x)
+{
+  float y[4];
+  _ZGVnN4vl4_modff (vdupq_n_f32 (x), y);
+  return y[0];
+}
+double
+v_sincos_sin (double x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincos (vdupq_n_f64 (x), s, c);
+  return s[0];
+}
+double
+v_sincos_cos (double x)
+{
+  double s[2], c[2];
+  _ZGVnN2vl8l8_sincos (vdupq_n_f64 (x), s, c);
+  return c[0];
+}
+double
+v_cexpi_sin (double x)
+{
+  return _ZGVnN2v_cexpi (vdupq_n_f64 (x)).val[0][0];
+}
+double
+v_cexpi_cos (double x)
+{
+  return _ZGVnN2v_cexpi (vdupq_n_f64 (x)).val[1][0];
+}
+double
+v_modf_frac (double x)
+{
+  double y[2];
+  return _ZGVnN2vl8_modf (vdupq_n_f64 (x), y)[0];
+}
+double
+v_modf_int (double x)
+{
+  double y[2];
+  _ZGVnN2vl8_modf (vdupq_n_f64 (x), y);
+  return y[0];
+}
+#endif //  __aarch64__ && __linux__
+
+#if WANT_SVE_TESTS
+# define ZSVNF1_WRAP(func)                                                   \
+    static float Z_sv_##func##f (svbool_t pg, float x)                        \
+    {                                                                         \
+      return svretf (_ZGVsMxv_##func##f (svargf (x), pg), pg);                \
+    }
+# define ZSVNF2_WRAP(func)                                                   \
+    static float Z_sv_##func##f (svbool_t pg, float x, float y)               \
+    {                                                                         \
+      return svretf (_ZGVsMxvv_##func##f (svargf (x), svargf (y), pg), pg);   \
+    }
+# define ZSVND1_WRAP(func)                                                   \
+    static double Z_sv_##func (svbool_t pg, double x)                         \
+    {                                                                         \
+      return svretd (_ZGVsMxv_##func (svargd (x), pg), pg);                   \
+    }
+# define ZSVND2_WRAP(func)                                                   \
+    static double Z_sv_##func (svbool_t pg, double x, double y)               \
+    {                                                                         \
+      return svretd (_ZGVsMxvv_##func (svargd (x), svargd (y), pg), pg);      \
+    }
+
+# if WANT_TRIGPI_TESTS
+ZSVNF1_WRAP (cospi)
+ZSVND1_WRAP (cospi)
+ZSVNF1_WRAP (sinpi)
+ZSVND1_WRAP (sinpi)
+ZSVNF1_WRAP (tanpi)
+ZSVND1_WRAP (tanpi)
+double
+sv_sincospi_sin (svbool_t pg, double x)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincospi (svdup_f64 (x), s, c, pg);
+  return svretd (svld1 (pg, s), pg);
+}
+double
+sv_sincospi_cos (svbool_t pg, double x)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincospi (svdup_f64 (x), s, c, pg);
+  return svretd (svld1 (pg, c), pg);
+}
+float
+sv_sincospif_sin (svbool_t pg, float x)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincospif (svdup_f32 (x), s, c, pg);
+  return svretf (svld1 (pg, s), pg);
+}
+float
+sv_sincospif_cos (svbool_t pg, float x)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincospif (svdup_f32 (x), s, c, pg);
+  return svretf (svld1 (pg, c), pg);
+}
+# endif // WANT_TRIGPI_TESTS
+
+float
+sv_sincosf_sin (svbool_t pg, float x)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincosf (svdup_f32 (x), s, c, pg);
+  return svretf (svld1 (pg, s), pg);
+}
+float
+sv_sincosf_cos (svbool_t pg, float x)
+{
+  float s[svcntw ()], c[svcntw ()];
+  _ZGVsMxvl4l4_sincosf (svdup_f32 (x), s, c, pg);
+  return svretf (svld1 (pg, c), pg);
+}
+float
+sv_cexpif_sin (svbool_t pg, float x)
+{
+  return svretf (svget2 (_ZGVsMxv_cexpif (svdup_f32 (x), pg), 0), pg);
+}
+float
+sv_cexpif_cos (svbool_t pg, float x)
+{
+  return svretf (svget2 (_ZGVsMxv_cexpif (svdup_f32 (x), pg), 1), pg);
+}
+float
+sv_modff_frac (svbool_t pg, float x)
+{
+  float i[svcntw ()];
+  return svretf (_ZGVsMxvl4_modff (svdup_f32 (x), i, pg), pg);
+}
+float
+sv_modff_int (svbool_t pg, float x)
+{
+  float i[svcntw ()];
+  _ZGVsMxvl4_modff (svdup_f32 (x), i, pg);
+  return svretf (svld1 (pg, i), pg);
+}
+double
+sv_sincos_sin (svbool_t pg, double x)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincos (svdup_f64 (x), s, c, pg);
+  return svretd (svld1 (pg, s), pg);
+}
+double
+sv_sincos_cos (svbool_t pg, double x)
+{
+  double s[svcntd ()], c[svcntd ()];
+  _ZGVsMxvl8l8_sincos (svdup_f64 (x), s, c, pg);
+  return svretd (svld1 (pg, c), pg);
+}
+double
+sv_cexpi_sin (svbool_t pg, double x)
+{
+  return svretd (svget2 (_ZGVsMxv_cexpi (svdup_f64 (x), pg), 0), pg);
+}
+double
+sv_cexpi_cos (svbool_t pg, double x)
+{
+  return svretd (svget2 (_ZGVsMxv_cexpi (svdup_f64 (x), pg), 1), pg);
+}
+double
+sv_modf_frac (svbool_t pg, double x)
+{
+  double i[svcntd ()];
+  return svretd (_ZGVsMxvl8_modf (svdup_f64 (x), i, pg), pg);
+}
+double
+sv_modf_int (svbool_t pg, double x)
+{
+  double i[svcntd ()];
+  _ZGVsMxvl8_modf (svdup_f64 (x), i, pg);
+  return svretd (svld1 (pg, i), pg);
+}
+
+# if WANT_EXPERIMENTAL_MATH
+
+/* Our implementations of powi/powk are too imprecise to verify
+   against any established pow implementation. Instead we have the
+   following simple implementation, against which it is enough to
+   maintain bitwise reproducibility. Note the test framework expects
+   the reference impl to be of higher precision than the function
+   under test. For instance this means that the reference for
+   double-precision powi will be passed a long double, so to check
+   bitwise reproducibility we have to cast it back down to
+   double. This is fine since a round-trip to higher precision and
+   back down is correctly rounded.  */
+#  define DECL_POW_INT_REF(NAME, DBL_T, FLT_T, INT_T)                       \
+      static DBL_T __attribute__ ((unused)) NAME (DBL_T in_val, DBL_T y)      \
+      {                                                                       \
+	INT_T n = (INT_T) round (y);                                          \
+	FLT_T acc = 1.0;                                                      \
+	bool want_recip = n < 0;                                              \
+	n = n < 0 ? -n : n;                                                   \
+                                                                              \
+	for (FLT_T c = in_val; n; c *= c, n >>= 1)                            \
+	  {                                                                   \
+	    if (n & 0x1)                                                      \
+	      {                                                               \
+		acc *= c;                                                     \
+	      }                                                               \
+	  }                                                                   \
+	if (want_recip)                                                       \
+	  {                                                                   \
+	    acc = 1.0 / acc;                                                  \
+	  }                                                                   \
+	return acc;                                                           \
+      }
+
+DECL_POW_INT_REF (ref_powif, double, float, int)
+DECL_POW_INT_REF (ref_powi, long double, double, int)
+static float
+Z_sv_powi (svbool_t pg, float x, float y)
+{
+  return svretf (_ZGVsMxvv_powi (svargf (x), svdup_s32 ((int) round (y)), pg),
+		 pg);
+}
+static double
+Z_sv_powk (svbool_t pg, double x, double y)
+{
+  return svretd (_ZGVsMxvv_powk (svargd (x), svdup_s64 ((long) round (y)), pg),
+		 pg);
+}
+
+# endif // WANT_EXPERIMENTAL_MATH
+#endif	// WANT_SVE_TESTS
+
+#include "test/ulp_wrappers_gen.h"
diff --git a/contrib/arm-optimized-routines/math/tgamma128.c b/contrib/arm-optimized-routines/math/tgamma128.c
index 65deacc49d99..d6049207b91f 100644
--- a/contrib/arm-optimized-routines/math/tgamma128.c
+++ b/contrib/arm-optimized-routines/math/tgamma128.c
@@ -1,356 +1,358 @@
 /*
  * Implementation of the true gamma function (as opposed to lgamma)
  * for 128-bit long double.
  *
  * Copyright (c) 2006-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /*
  * This module implements the float128 gamma function under the name
  * tgamma128. It's expected to be suitable for integration into system
  * maths libraries under the standard name tgammal, if long double is
  * 128-bit. Such a library will probably want to check the error
  * handling and optimize the initial process of extracting the
  * exponent, which is done here by simple and portable (but
  * potentially slower) methods.
  */
 
 #include <float.h>
 #include <math.h>
 #include <stdbool.h>
 #include <stddef.h>
 
 /* Only binary128 format is supported.  */
 #if LDBL_MANT_DIG == 113
 
 #include "tgamma128.h"
 
 #define lenof(x) (sizeof(x)/sizeof(*(x)))
 
 /*
  * Helper routine to evaluate a polynomial via Horner's rule
  */
 static long double poly(const long double *coeffs, size_t n, long double x)
 {
     long double result = coeffs[--n];
 
     while (n > 0)
         result = (result * x) + coeffs[--n];
 
     return result;
 }
 
 /*
  * Compute sin(pi*x) / pi, for use in the reflection formula that
  * relates gamma(-x) and gamma(x).
  */
 static long double sin_pi_x_over_pi(long double x)
 {
     int quo;
     long double fracpart = remquol(x, 0.5L, &quo);
 
     long double sign = 1.0L;
     if (quo & 2)
         sign = -sign;
     quo &= 1;
 
     if (quo == 0 && fabsl(fracpart) < 0x1.p-58L) {
         /* For numbers this size, sin(pi*x) is so close to pi*x that
          * sin(pi*x)/pi is indistinguishable from x in float128 */
         return sign * fracpart;
     }
 
     if (quo == 0) {
         return sign * sinl(pi*fracpart) / pi;
     } else {
         return sign * cosl(pi*fracpart) / pi;
     }
 }
 
 /* Return tgamma(x) on the assumption that x >= 8. */
 static long double tgamma_large(long double x,
                                 bool negative, long double negadjust)
 {
     /*
      * In this range we compute gamma(x) as x^(x-1/2) * e^-x * K,
      * where K is a correction factor computed as a polynomial in 1/x.
      *
      * (Vaguely inspired by the form of the Lanczos approximation, but
      * I tried the Lanczos approximation itself and it suffers badly
      * from big cancellation leading to loss of significance.)
      */
     long double t = 1/x;
     long double p = poly(coeffs_large, lenof(coeffs_large), t);
 
     /*
      * To avoid overflow in cases where x^(x-0.5) does overflow
      * but gamma(x) does not, we split x^(x-0.5) in half and
      * multiply back up _after_ multiplying the shrinking factor
      * of exp(-(x-0.5)).
      *
      * Note that computing x-0.5 and (x-0.5)/2 is exact for the
      * relevant range of x, so the only sources of error are pow
      * and exp themselves, plus the multiplications.
      */
     long double powhalf = powl(x, (x-0.5L)/2.0L);
     long double expret = expl(-(x-0.5L));
 
     if (!negative) {
         return (expret * powhalf) * powhalf * p;
     } else {
         /*
          * Apply the reflection formula as commented below, but
          * carefully: negadjust has magnitude less than 1, so it can
          * turn a case where gamma(+x) would overflow into a case
          * where gamma(-x) doesn't underflow. Not only that, but the
          * FP format has greater range in the tiny domain due to
          * denormals. For both reasons, it's not good enough to
          * compute the positive result and then adjust it.
          */
         long double ret = 1 / ((expret * powhalf) * (x * negadjust) * p);
         return ret / powhalf;
     }
 }
 
 /* Return tgamma(x) on the assumption that 0 <= x < 1/32. */
 static long double tgamma_tiny(long double x,
                                bool negative, long double negadjust)
 {
     /*
      * For x near zero, we use a polynomial approximation to
      * g = 1/(x*gamma(x)), and then return 1/(g*x).
      */
     long double g = poly(coeffs_tiny, lenof(coeffs_tiny), x);
     if (!negative)
         return 1.0L / (g*x);
     else
         return g / negadjust;
 }
 
 /* Return tgamma(x) on the assumption that 0 <= x < 2^-113. */
 static long double tgamma_ultratiny(long double x, bool negative,
                                     long double negadjust)
 {
     /* On this interval, gamma can't even be distinguished from 1/x,
      * so we skip the polynomial evaluation in tgamma_tiny, partly to
      * save time and partly to avoid the tiny intermediate values
      * setting the underflow exception flag. */
     if (!negative)
         return 1.0L / x;
     else
         return 1.0L / negadjust;
 }
 
 /* Return tgamma(x) on the assumption that 1 <= x <= 2. */
 static long double tgamma_central(long double x)
 {
     /*
      * In this central interval, our strategy is to finding the
      * difference between x and the point where gamma has a minimum,
      * and approximate based on that.
      */
 
     /* The difference between the input x and the minimum x. The first
      * subtraction is expected to be exact, since x and min_hi have
      * the same exponent (unless x=2, in which case it will still be
      * exact). */
     long double t = (x - min_x_hi) - min_x_lo;
 
     /*
      * Now use two different polynomials for the intervals [1,m] and
      * [m,2].
      */
     long double p;
     if (t < 0)
         p = poly(coeffs_central_neg, lenof(coeffs_central_neg), -t);
     else
         p = poly(coeffs_central_pos, lenof(coeffs_central_pos), t);
 
     return (min_y_lo + p * (t*t)) + min_y_hi;
 }
 
 long double tgamma128(long double x)
 {
     /*
      * Start by extracting the number's sign and exponent, and ruling
      * out cases of non-normalized numbers.
      *
      * For an implementation integrated into a system libm, it would
      * almost certainly be quicker to do this by direct bitwise access
      * to the input float128 value, using whatever is the local idiom
      * for knowing its endianness.
      *
      * Integration into a system libc may also need to worry about
      * setting errno, if that's the locally preferred way to report
      * math.h errors.
      */
     int sign = signbit(x);
     int exponent;
     switch (fpclassify(x)) {
       case FP_NAN:
         return x+x; /* propagate QNaN, make SNaN throw an exception */
       case FP_ZERO:
         return 1/x; /* divide by zero on purpose to indicate a pole */
       case FP_INFINITE:
         if (sign) {
             return x-x; /* gamma(-inf) has indeterminate sign, so provoke an
                          * IEEE invalid operation exception to indicate that */
         }
         return x;     /* but gamma(+inf) is just +inf with no error */
       case FP_SUBNORMAL:
         exponent = -16384;
         break;
       default:
         frexpl(x, &exponent);
         exponent--;
         break;
     }
 
     bool negative = false;
     long double negadjust = 0.0L;
 
     if (sign) {
         /*
          * Euler's reflection formula is
          *
          *    gamma(1-x) gamma(x) = pi/sin(pi*x)
          *
          *                        pi
          * => gamma(x) = --------------------
          *               gamma(1-x) sin(pi*x)
          *
          * But computing 1-x is going to lose a lot of accuracy when x
          * is very small, so instead we transform using the recurrence
          * gamma(t+1)=t gamma(t). Setting t=-x, this gives us
          * gamma(1-x) = -x gamma(-x), so we now have
          *
          *                         pi
          *    gamma(x) = ----------------------
          *               -x gamma(-x) sin(pi*x)
          *
          * which relates gamma(x) to gamma(-x), which is much nicer,
          * since x can be turned into -x without rounding.
          */
         negadjust = sin_pi_x_over_pi(x);
         negative = true;
         x = -x;
 
         /*
          * Now the ultimate answer we want is
          *
          *    1 / (gamma(x) * x * negadjust)
          *
          * where x is the positive value we've just turned it into.
          *
          * For some of the cases below, we'll compute gamma(x)
          * normally and then compute this adjusted value afterwards.
          * But for others, we can implement the reciprocal operation
          * in this formula by _avoiding_ an inversion that the
          * sub-case was going to do anyway.
          */
 
         if (negadjust == 0) {
             /*
              * Special case for negative integers. Applying the
              * reflection formula would cause division by zero, but
              * standards would prefer we treat this error case as an
              * invalid operation and return NaN instead. (Possibly
              * because otherwise you'd have to decide which sign of
              * infinity to return, and unlike the x=0 case, there's no
              * sign of zero available to disambiguate.)
              */
             return negadjust / negadjust;
         }
     }
 
     /*
      * Split the positive domain into various cases. For cases where
      * we do the negative-number adjustment the usual way, we'll leave
      * the answer in 'g' and drop out of the if statement.
      */
     long double g;
 
     if (exponent >= 11) {
         /*
          * gamma of any positive value this large overflows, and gamma
          * of any negative value underflows.
          */
         if (!negative) {
             long double huge = 0x1p+12288L;
             return huge * huge; /* provoke an overflow */
         } else {
             long double tiny = 0x1p-12288L;
             return tiny * tiny * negadjust; /* underflow, of the right sign */
         }
     } else if (exponent >= 3) {
         /* Negative-number adjustment happens inside here */
         return tgamma_large(x, negative, negadjust);
     } else if (exponent < -113) {
         /* Negative-number adjustment happens inside here */
         return tgamma_ultratiny(x, negative, negadjust);
     } else if (exponent < -5) {
         /* Negative-number adjustment happens inside here */
         return tgamma_tiny(x, negative, negadjust);
     } else if (exponent == 0) {
         g = tgamma_central(x);
     } else if (exponent < 0) {
         /*
          * For x in [1/32,1) we range-reduce upwards to the interval
          * [1,2), using the inverse of the normal recurrence formula:
          * gamma(x) = gamma(x+1)/x.
          */
         g = tgamma_central(1+x) / x;
     } else {
         /*
          * For x in [2,8) we range-reduce downwards to the interval
          * [1,2) by repeated application of the recurrence formula.
          *
          * Actually multiplying (x-1) by (x-2) by (x-3) and so on
          * would introduce multiple ULPs of rounding error. We can get
          * better accuracy by writing x = (k+1/2) + t, where k is an
          * integer and |t|<1/2, and expanding out the obvious factor
          * (x-1)(x-2)...(x-k+1) as a polynomial in t.
          */
         long double mult;
         int i = x;
         if (i == 2) { /* x in [2,3) */
             mult = (x-1);
         } else {
             long double t = x - (i + 0.5L);
             switch (i) {
                 /* E.g. for x=3.5+t, we want
                  * (x-1)(x-2) = (2.5+t)(1.5+t) = 3.75 + 4t + t^2 */
               case 3:
                 mult = 3.75L+t*(4.0L+t);
                 break;
               case 4:
                 mult = 13.125L+t*(17.75L+t*(7.5L+t));
                 break;
               case 5:
                 mult = 59.0625L+t*(93.0L+t*(51.50L+t*(12.0L+t)));
                 break;
               case 6:
                 mult = 324.84375L+t*(570.5625L+t*(376.250L+t*(
                     117.5L+t*(17.5L+t))));
                 break;
               case 7:
                 mult = 2111.484375L+t*(4033.5L+t*(3016.1875L+t*(
                     1140.0L+t*(231.25L+t*(24.0L+t)))));
                 break;
+	    default:
+	        __builtin_unreachable();
             }
         }
 
         g = tgamma_central(x - (i-1)) * mult;
     }
 
     if (!negative) {
         /* Positive domain: return g unmodified */
         return g;
     } else {
         /* Negative domain: apply the reflection formula as commented above */
         return 1.0L / (g * x * negadjust);
     }
 }
 
 #endif
diff --git a/contrib/arm-optimized-routines/pl/math/tools/asin.sollya b/contrib/arm-optimized-routines/math/tools/asin.sollya
similarity index 93%
rename from contrib/arm-optimized-routines/pl/math/tools/asin.sollya
rename to contrib/arm-optimized-routines/math/tools/asin.sollya
index 8ef861d0898b..02c4a93356c3 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/asin.sollya
+++ b/contrib/arm-optimized-routines/math/tools/asin.sollya
@@ -1,29 +1,29 @@
 // polynomial for approximating asin(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 f = asin(x);
 dtype = double;
 
 prec=256;
 
 a = 0x1p-106;
 b = 0.25;
 
 deg = 11;
 
 backward = proc(poly, d) {
   return d + d ^ 3 * poly(d * d);
 };
 
 forward = proc(f, d) {
   return (f(sqrt(d))-sqrt(d))/(d*sqrt(d));
 };
 
 poly = fpminimax(forward(f, x), [|0,...,deg|], [|dtype ...|], [a;b], relative, floating);
 
 display = hexadecimal!;
 print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b]));
 print("in [", a, b, "]");
 for i from 0 to deg do print(coeff(poly, i));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/asinf.sollya b/contrib/arm-optimized-routines/math/tools/asinf.sollya
similarity index 94%
rename from contrib/arm-optimized-routines/pl/math/tools/asinf.sollya
rename to contrib/arm-optimized-routines/math/tools/asinf.sollya
index 5b627e546c73..69d1803875d1 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/asinf.sollya
+++ b/contrib/arm-optimized-routines/math/tools/asinf.sollya
@@ -1,36 +1,36 @@
 // polynomial for approximating asinf(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 f = asin(x);
 dtype = single;
 
 a = 0x1p-24;
 b = 0.25;
 
 deg = 4;
 
 backward = proc(poly, d) {
   return d + d ^ 3 * poly(d * d);
 };
 
 forward = proc(f, d) {
   return (f(sqrt(d))-sqrt(d))/(d*sqrt(d));
 };
 
 approx = proc(poly, d) {
   return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16);
 };
 
 poly = 0;
 for i from 0 to deg do {
   i;
   p = roundcoefficients(approx(poly,i), [|dtype ...|]);
   poly = poly + x^i*coeff(p,0);
 };
 
 display = hexadecimal!;
 print("rel error:", accurateinfnorm(1-backward(poly, x)/f(x), [a;b], 30));
 print("in [", a, b, "]");
 for i from 0 to deg do print(coeff(poly, i));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/asinh.sollya b/contrib/arm-optimized-routines/math/tools/asinh.sollya
similarity index 94%
rename from contrib/arm-optimized-routines/pl/math/tools/asinh.sollya
rename to contrib/arm-optimized-routines/math/tools/asinh.sollya
index 663ee92f3f34..eea9b8081168 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/asinh.sollya
+++ b/contrib/arm-optimized-routines/math/tools/asinh.sollya
@@ -1,28 +1,28 @@
 // polynomial for approximating asinh(x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // Polynomial is used in [2^-26, 1]. However it is least accurate close to 1, so
 // we use 2^-6 as the lower bound for coeff generation, which yields sufficiently
 // accurate results in [2^-26, 2^-6].
 a = 0x1p-6;
 b = 1.0;
 
 f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2);
 
 approx = proc(poly, d) {
   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
 };
 
 poly = 0;
 for i from 0 to deg do {
   i;
   p = roundcoefficients(approx(poly,i), [|D ...|]);
   poly = poly + x^i*coeff(p,0);
 };
 
 
 display = hexadecimal;
 print("coeffs:");
 for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/asinhf.sollya b/contrib/arm-optimized-routines/math/tools/asinhf.sollya
similarity index 93%
rename from contrib/arm-optimized-routines/pl/math/tools/asinhf.sollya
rename to contrib/arm-optimized-routines/math/tools/asinhf.sollya
index ab115b53b8dc..5f1580fce883 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/asinhf.sollya
+++ b/contrib/arm-optimized-routines/math/tools/asinhf.sollya
@@ -1,29 +1,29 @@
 // polynomial for approximating asinh(x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 9;
 
 a = 0x1.0p-12;
 b = 1.0;
 
 f = proc(y) {
   return asinh(x);
 };
 
 approx = proc(poly, d) {
   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
 };
 
 poly = x;
 for i from 2 to deg do {
   p = roundcoefficients(approx(poly,i), [|SG ...|]);
   poly = poly + x^i*coeff(p,0);
 };
 
 display = hexadecimal;
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 2 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/atan.sollya b/contrib/arm-optimized-routines/math/tools/atan.sollya
similarity index 93%
rename from contrib/arm-optimized-routines/pl/math/tools/atan.sollya
rename to contrib/arm-optimized-routines/math/tools/atan.sollya
index ad4f33b8516a..048017d8d269 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/atan.sollya
+++ b/contrib/arm-optimized-routines/math/tools/atan.sollya
@@ -1,23 +1,23 @@
 // polynomial for approximating atan(x) and atan2(y, x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // atan is odd, so approximate with an odd polynomial:
 // x + ax^3 + bx^5 + cx^7 + ...
 // We generate a, b, c, ... such that we can approximate atan(x) by:
 // x + x^3 * (a + bx^2 + cx^4 + ...)
 
 // Assemble monomials
 deg = 20;
 mons = [|1,...,deg|];
 for i from 0 to deg-1 do mons[i] = mons[i] * 2 + 1;
 
 a = 0x1.0p-1022;
 b = 1;
 
 poly = fpminimax(atan(x)-x, mons, [|double ...|], [a;b]);
 
 display = hexadecimal;
 print("coeffs:");
 for i from 0 to deg-1 do coeff(poly,mons[i]);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/atanf.sollya b/contrib/arm-optimized-routines/math/tools/atanf.sollya
similarity index 92%
rename from contrib/arm-optimized-routines/pl/math/tools/atanf.sollya
rename to contrib/arm-optimized-routines/math/tools/atanf.sollya
index ed88d0ba90f9..21c3ba2bfa1d 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/atanf.sollya
+++ b/contrib/arm-optimized-routines/math/tools/atanf.sollya
@@ -1,20 +1,20 @@
 // polynomial for approximating atanf(x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // Generate list of monomials:
 // Taylor series of atan is of the form x + ax^3 + bx^5 + cx^7 + ...
 // So generate a, b, c, ... such that we can approximate atan(x) by:
 // x + x^3 * (a + bx^2 + cx^4 + ...)
 
 deg = 7;
 
 a = 1.1754943508222875e-38;
 b = 1;
 
 poly = fpminimax((atan(sqrt(x))-sqrt(x))/x^(3/2), deg, [|single ...|], [a;b]);
 
 display = hexadecimal;
 print("coeffs:");
 for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/cbrt.sollya b/contrib/arm-optimized-routines/math/tools/cbrt.sollya
similarity index 90%
rename from contrib/arm-optimized-routines/pl/math/tools/cbrt.sollya
rename to contrib/arm-optimized-routines/math/tools/cbrt.sollya
index 1d43dc73d8cd..2490a69ac029 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/cbrt.sollya
+++ b/contrib/arm-optimized-routines/math/tools/cbrt.sollya
@@ -1,20 +1,20 @@
 // polynomial for approximating cbrt(x) in double precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 3;
 
 a = 0.5;
 b = 1;
 
 
 f = x^(1/3);
 
 poly = fpminimax(f, deg, [|double ...|], [a;b]);
 
 display = hexadecimal;
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do round(coeff(poly,i), D, RN);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/cbrtf.sollya b/contrib/arm-optimized-routines/math/tools/cbrtf.sollya
similarity index 90%
rename from contrib/arm-optimized-routines/pl/math/tools/cbrtf.sollya
rename to contrib/arm-optimized-routines/math/tools/cbrtf.sollya
index 4e0cc69b46a5..1debf930e722 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/cbrtf.sollya
+++ b/contrib/arm-optimized-routines/math/tools/cbrtf.sollya
@@ -1,20 +1,20 @@
 // polynomial for approximating cbrt(x) in single precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 3;
 
 a = 0.5;
 b = 1;
 
 
 f = x^(1/3);
 
 poly = fpminimax(f, deg, [|single ...|], [a;b]);
 
 display = hexadecimal;
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do round(coeff(poly,i), SG, RN);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/erf.sollya b/contrib/arm-optimized-routines/math/tools/erf.sollya
similarity index 92%
rename from contrib/arm-optimized-routines/pl/math/tools/erf.sollya
rename to contrib/arm-optimized-routines/math/tools/erf.sollya
index b2fc559b511e..060e1686c835 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/erf.sollya
+++ b/contrib/arm-optimized-routines/math/tools/erf.sollya
@@ -1,25 +1,25 @@
 // tables and constants for approximating erf(x).
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 display = hexadecimal;
 prec=128;
 
 // Tables
 print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}");
 for i from 0 to 768 do {
   r = i / 128;
   t0 = double(erf(r));
   t1 = double(2/sqrt(pi) * exp(-r * r));
   print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },");
 };
 
 // Constants
 double(1/3);
 double(1/10);
 double(2/15);
 double(2/9);
 double(2/45);
 double(2/sqrt(pi));
 
diff --git a/contrib/arm-optimized-routines/pl/math/tools/erfc.sollya b/contrib/arm-optimized-routines/math/tools/erfc.sollya
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/tools/erfc.sollya
rename to contrib/arm-optimized-routines/math/tools/erfc.sollya
index 1e2791291ebb..1b4b00066093 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/erfc.sollya
+++ b/contrib/arm-optimized-routines/math/tools/erfc.sollya
@@ -1,51 +1,51 @@
 // tables and constants for approximating erfc(x).
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 display = hexadecimal;
 prec=128;
 
 // Tables
 print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }");
 for i from 0 to 3787 do {
   r = 0.0 + i / 128;
   t0 = double(erfc(r) * 2^128);
   t1 = double(2/sqrt(pi) * exp(-r * r) * 2^128);
   print("{ " @ t0 @ ",\t" @ t1 @ " },");
 };
 
 // Constants
 print("> 2/sqrt(pi)");
 double(2/sqrt(pi));
 
 print("> 1/3");
 double(1/3);
 
 print("> P5");
 double(2/15);
 double(1/10);
 double(2/9);
 double(2/45);
 
 print("> P6");
 double(1/42);
 double(1/7);
 double(2/21);
 double(4/315);
 
 print("> Q");
 double( 5.0 / 4.0);
 double( 6.0 / 5.0);
 double( 7.0 / 6.0);
 double( 8.0 / 7.0);
 double( 9.0 / 8.0);
 double(10.0 / 9.0);
 
 print("> R");
 double(-2.0 * 4.0 / (5.0 * 6.0));
 double(-2.0 * 5.0 / (6.0 * 7.0));
 double(-2.0 * 6.0 / (7.0 * 8.0));
 double(-2.0 * 7.0 / (8.0 * 9.0));
 double(-2.0 * 8.0 / (9.0 * 10.0));
 double(-2.0 * 9.0 / (10.0 * 11.0));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/erfcf.sollya b/contrib/arm-optimized-routines/math/tools/erfcf.sollya
similarity index 91%
rename from contrib/arm-optimized-routines/pl/math/tools/erfcf.sollya
rename to contrib/arm-optimized-routines/math/tools/erfcf.sollya
index 1d7fc264d99d..a8e0409f5db5 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/erfcf.sollya
+++ b/contrib/arm-optimized-routines/math/tools/erfcf.sollya
@@ -1,22 +1,22 @@
 // tables and constants for approximating erfcf(x).
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 display = hexadecimal;
 prec=128;
 
 // Tables
 print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }");
 for i from 0 to 644 do {
   r = 0.0 + i / 64;
   t0 = single(erfc(r) * 2^47);
   t1 = single(2/sqrt(pi) * exp(-r * r) * 2^47);
   print("{ " @ t0 @ ",\t" @ t1 @ " },");
 };
 
 // Constants
 single(1/3);
 single(2/15);
 single(1/10);
 single(2/sqrt(pi));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/erff.sollya b/contrib/arm-optimized-routines/math/tools/erff.sollya
similarity index 91%
rename from contrib/arm-optimized-routines/pl/math/tools/erff.sollya
rename to contrib/arm-optimized-routines/math/tools/erff.sollya
index 59b23ef021f0..c0178a2b24ad 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/erff.sollya
+++ b/contrib/arm-optimized-routines/math/tools/erff.sollya
@@ -1,20 +1,20 @@
 // tables and constants for approximating erff(x).
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 display = hexadecimal;
 prec=128;
 
 // Tables
 print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}");
 for i from 0 to 512 do {
   r = i / 128;
   t0 = single(erf(r));
   t1 = single(2/sqrt(pi) * exp(-r * r));
   print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },");
 };
 
 // Constants
 single(1/3);
 single(2/sqrt(pi));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/exp10.sollya b/contrib/arm-optimized-routines/math/tools/exp10.sollya
similarity index 97%
rename from contrib/arm-optimized-routines/pl/math/tools/exp10.sollya
rename to contrib/arm-optimized-routines/math/tools/exp10.sollya
index 9f30b4018209..91f92595b96d 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/exp10.sollya
+++ b/contrib/arm-optimized-routines/math/tools/exp10.sollya
@@ -1,55 +1,55 @@
 // polynomial for approximating 10^x
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // exp10f parameters
 deg = 5; // poly degree
 N = 1; // Neon 1, SVE 64
 b = log(2)/(2 * N * log(10)); // interval
 a = -b;
 wp = single;
 
 // exp10 parameters
 //deg = 4; // poly degree - bump to 5 for ~1 ULP
 //N = 128; // table size
 //b = log(2)/(2 * N * log(10)); // interval
 //a = -b;
 //wp = D;
 
 
 // find polynomial with minimal relative error
 
 f = 10^x;
 
 // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
 approx = proc(poly,d) {
   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
 };
 // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
 approx_abs = proc(poly,d) {
   return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
 };
 
 // first coeff is fixed, iteratively find optimal double prec coeffs
 poly = 1;
 for i from 1 to deg do {
   p = roundcoefficients(approx(poly,i), [|wp ...|]);
 //  p = roundcoefficients(approx_abs(poly,i), [|wp ...|]);
   poly = poly + x^i*coeff(p,0);
 };
 
 display = hexadecimal;
 print("rel error:", accurateinfnorm(1-poly(x)/10^x, [a;b], 30));
 print("abs error:", accurateinfnorm(10^x-poly(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do coeff(poly,i);
 
 log10_2 = round(N * log(10) / log(2), wp, RN);
 log2_10 = log(2) / (N * log(10));
 log2_10_hi = round(log2_10, wp, RN);
 log2_10_lo = round(log2_10 - log2_10_hi, wp, RN);
 print(log10_2);
 print(log2_10_hi);
 print(log2_10_lo);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/expm1.sollya b/contrib/arm-optimized-routines/math/tools/expm1.sollya
similarity index 91%
rename from contrib/arm-optimized-routines/pl/math/tools/expm1.sollya
rename to contrib/arm-optimized-routines/math/tools/expm1.sollya
index 7b6f324eb247..d87466a066af 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/expm1.sollya
+++ b/contrib/arm-optimized-routines/math/tools/expm1.sollya
@@ -1,21 +1,21 @@
 // polynomial for approximating exp(x)-1 in double precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 12;
 
 a = -log(2)/2;
 b = log(2)/2;
 
 f = proc(y) {
   return exp(y)-1;
 };
 
 poly = fpminimax(f(x), deg, [|double ...|], [a;b]);
 
 display = hexadecimal;
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 2 to deg do round(coeff(poly,i), D, RN);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/expm1f.sollya b/contrib/arm-optimized-routines/math/tools/expm1f.sollya
similarity index 91%
rename from contrib/arm-optimized-routines/pl/math/tools/expm1f.sollya
rename to contrib/arm-optimized-routines/math/tools/expm1f.sollya
index efdf1bd301e0..bb9496f3f2c4 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/expm1f.sollya
+++ b/contrib/arm-optimized-routines/math/tools/expm1f.sollya
@@ -1,21 +1,21 @@
 // polynomial for approximating exp(x)-1 in single precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 5;
 
 a = -log(2)/2;
 b = log(2)/2;
 
 f = proc(y) {
   return exp(y)-1;
 };
 
 poly = fpminimax(f(x), deg, [|single ...|], [a;b]);
 
 display = hexadecimal;
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 2 to deg do round(coeff(poly,i), SG, RN);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/log10.sollya b/contrib/arm-optimized-routines/math/tools/log10.sollya
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/tools/log10.sollya
rename to contrib/arm-optimized-routines/math/tools/log10.sollya
index 85d1d15c1698..78f956b14b95 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/log10.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log10.sollya
@@ -1,44 +1,44 @@
 // polynomial for approximating log10(1+x)
 //
-// Copyright (c) 2019-2023, Arm Limited.
+// Copyright (c) 2019-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 6; // poly degree
 // |log10(1+x)| > 0x1p-5 outside the interval
 a = -0x1.p-5;
 b = 0x1.p-5;
 
 ln10 = evaluate(log(10),0);
 invln10hi = double(1/ln10 + 0x1p21) - 0x1p21; // round away last 21 bits
 invln10lo = double(1/ln10 - invln10hi);
 
 // find log10(1+x)/x polynomial with minimal relative error
 // (minimal relative error polynomial for log10(1+x) is the same * x)
 deg = deg-1; // because of /x
 
 // f = log(1+x)/x; using taylor series
 f = 0;
 for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
 f = f/ln10;
 
 // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
 approx = proc(poly,d) {
   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
 };
 
 // first coeff is fixed, iteratively find optimal double prec coeffs
 poly = invln10hi + invln10lo;
 for i from 1 to deg do {
   p = roundcoefficients(approx(poly,i), [|D ...|]);
   poly = poly + x^i*coeff(p,0);
 };
 display = hexadecimal;
 print("invln10hi:", invln10hi);
 print("invln10lo:", invln10lo);
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do coeff(poly,i);
 
 display = decimal;
 print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/pl/math/tools/log10f.sollya b/contrib/arm-optimized-routines/math/tools/log10f.sollya
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/tools/log10f.sollya
rename to contrib/arm-optimized-routines/math/tools/log10f.sollya
index 94bf32f2c449..c64a30aa8e18 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/log10f.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log10f.sollya
@@ -1,37 +1,37 @@
 // polynomial for approximating log10f(1+x)
 //
-// Copyright (c) 2019-2023, Arm Limited.
+// Copyright (c) 2019-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // Computation of log10f(1+x) will be carried out in double precision
 
 deg = 4; // poly degree
 // [OFF; 2*OFF] is divided in 2^4 intervals with OFF~0.7
 a = -0.04375;
 b = 0.04375;
 
 // find log(1+x)/x polynomial with minimal relative error
 // (minimal relative error polynomial for log(1+x) is the same * x)
 deg = deg-1; // because of /x
 
 // f = log(1+x)/x; using taylor series
 f = 0;
 for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
 
 // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
 approx = proc(poly,d) {
   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
 };
 
 // first coeff is fixed, iteratively find optimal double prec coeffs
 poly = 1;
 for i from 1 to deg do {
   p = roundcoefficients(approx(poly,i), [|D ...|]);
   poly = poly + x^i*coeff(p,0);
 };
 
 display = hexadecimal;
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do double(coeff(poly,i));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/log1p.sollya b/contrib/arm-optimized-routines/math/tools/log1p.sollya
similarity index 93%
rename from contrib/arm-optimized-routines/pl/math/tools/log1p.sollya
rename to contrib/arm-optimized-routines/math/tools/log1p.sollya
index 598a36af0339..0cf72081fabb 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/log1p.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log1p.sollya
@@ -1,30 +1,30 @@
 // polynomial for approximating log(1+x) in double precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 20;
 
 a = sqrt(2)/2-1;
 b = sqrt(2)-1;
 
 f = proc(y) {
   return log(1+y);
 };
 
 approx = proc(poly, d) {
   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
 };
 
 poly = x;
 for i from 2 to deg do {
   p = roundcoefficients(approx(poly,i), [|D ...|]);
   poly = poly + x^i*coeff(p,0);
 };
 
 
 print("coeffs:");
 display = hexadecimal;
 for i from 2 to deg do coeff(poly,i);
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/pl/math/tools/log1pf.sollya b/contrib/arm-optimized-routines/math/tools/log1pf.sollya
similarity index 91%
rename from contrib/arm-optimized-routines/pl/math/tools/log1pf.sollya
rename to contrib/arm-optimized-routines/math/tools/log1pf.sollya
index cc1db10e4c0c..fc542c937111 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/log1pf.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log1pf.sollya
@@ -1,21 +1,21 @@
 // polynomial for approximating log(1+x) in single precision
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 10;
 
 a = -0.25;
 b = 0.5;
 
 f = proc(y) {
   return log(1+y);
 };
 
 poly = fpminimax(f(x), deg, [|single ...|], [a;b]);
 
 display = hexadecimal;
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 2 to deg do round(coeff(poly,i), SG, RN);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/sincos.sollya b/contrib/arm-optimized-routines/math/tools/sincos.sollya
similarity index 92%
rename from contrib/arm-optimized-routines/pl/math/tools/sincos.sollya
rename to contrib/arm-optimized-routines/math/tools/sincos.sollya
index 7d36266b446b..600368507f4e 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/sincos.sollya
+++ b/contrib/arm-optimized-routines/math/tools/sincos.sollya
@@ -1,33 +1,33 @@
 // polynomial for approximating cos(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
-// This script only finds the coeffs for cos - see math/aarch64/v_sin.c for sin coeffs
+// This script only finds the coeffs for cos - see math/aarch64/advsimd/sin.c for sin coeffs
 
 deg = 14;   // polynomial degree
 a = -pi/4; // interval
 b = pi/4;
 
 // find even polynomial with minimal abs error compared to cos(x)
 
 f = cos(x);
 
 // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
 approx = proc(poly,d) {
   return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
 };
 
 // first coeff is fixed, iteratively find optimal double prec coeffs
 poly = 1;
 for i from 1 to deg/2 do {
   p = roundcoefficients(approx(poly,2*i), [|double ...|]);
   poly = poly + x^(2*i)*coeff(p,0);
 };
 
 display = hexadecimal;
 //print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 //print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/sincosf.sollya b/contrib/arm-optimized-routines/math/tools/sincosf.sollya
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/tools/sincosf.sollya
rename to contrib/arm-optimized-routines/math/tools/sincosf.sollya
index 178ee83ac196..add874e87a9a 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/sincosf.sollya
+++ b/contrib/arm-optimized-routines/math/tools/sincosf.sollya
@@ -1,33 +1,33 @@
 // polynomial for approximating cos(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // This script only finds the coeffs for cos - see math/tools/sin.sollya for sin coeffs.
 
 deg = 8;   // polynomial degree
 a = -pi/4; // interval
 b = pi/4;
 
 // find even polynomial with minimal abs error compared to cos(x)
 
 f = cos(x);
 
 // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
 approx = proc(poly,d) {
   return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
 };
 
 // first coeff is fixed, iteratively find optimal double prec coeffs
 poly = 1;
 for i from 1 to deg/2 do {
   p = roundcoefficients(approx(poly,2*i), [|single ...|]);
   poly = poly + x^(2*i)*coeff(p,0);
 };
 
 display = hexadecimal;
 //print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 //print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/sinpi.sollya b/contrib/arm-optimized-routines/math/tools/sinpi.sollya
similarity index 95%
rename from contrib/arm-optimized-routines/pl/math/tools/sinpi.sollya
rename to contrib/arm-optimized-routines/math/tools/sinpi.sollya
index 62cc87e7697d..9bc5b1c7fc2a 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/sinpi.sollya
+++ b/contrib/arm-optimized-routines/math/tools/sinpi.sollya
@@ -1,33 +1,33 @@
 // polynomial for approximating sinpi(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 19;  // polynomial degree
 a = -1/2; // interval
 b = 1/2;
 
 // find even polynomial with minimal abs error compared to sinpi(x)
 
 // f = sin(pi* x);
 f = pi*x;
 c = 1;
 for i from 1 to 80 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*(pi*x)^(2*i+1)/c; };
 
 // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
 approx = proc(poly,d) {
   return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
 };
 
 // first coeff is predefine, iteratively find optimal double prec coeffs
 poly = pi*x;
 for i from 0 to (deg-1)/2 do {
   p = roundcoefficients(approx(poly,2*i+1), [|D ...|]);
   poly = poly + x^(2*i+1)*coeff(p,0);
 };
 
 display = hexadecimal;
 print("abs error:", accurateinfnorm(sin(pi*x)-poly(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/tan.sollya b/contrib/arm-optimized-routines/math/tools/tan.sollya
similarity index 91%
rename from contrib/arm-optimized-routines/pl/math/tools/tan.sollya
rename to contrib/arm-optimized-routines/math/tools/tan.sollya
index bb0bb28270e3..ca8a170bedaa 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/tan.sollya
+++ b/contrib/arm-optimized-routines/math/tools/tan.sollya
@@ -1,20 +1,20 @@
 // polynomial for approximating double precision tan(x)
 //
-// Copyright (c) 2023, Arm Limited.
+// Copyright (c) 2023-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 8;
 
 // interval bounds
 a = 0x1.0p-126;
 b = pi / 8;
 
 display = hexadecimal;
 
 f = (tan(sqrt(x))-sqrt(x))/x^(3/2);
 poly = fpminimax(f, deg, [|double ...|], [a*a;b*b]);
 
 //print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/tanf.sollya b/contrib/arm-optimized-routines/math/tools/tanf.sollya
similarity index 98%
rename from contrib/arm-optimized-routines/pl/math/tools/tanf.sollya
rename to contrib/arm-optimized-routines/math/tools/tanf.sollya
index f4b49b40ae64..054d3db44046 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/tanf.sollya
+++ b/contrib/arm-optimized-routines/math/tools/tanf.sollya
@@ -1,78 +1,78 @@
 // polynomial for approximating single precision tan(x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 dtype = single;
 
 mthd = 0; // approximate tan
 deg = 5; // poly degree
 
 // // Uncomment for cotan
 // mthd = 1; // approximate cotan
 // deg = 3; // poly degree
 
 // interval bounds
 a = 0x1.0p-126;
 b = pi / 4;
 
 print("Print some useful constants");
 display = hexadecimal!;
 if (dtype==double) then { prec = 53!; }
 else if (dtype==single) then { prec = 23!; };
 
 print("pi/4");
 pi/4;
 
 // Setup precisions (display and computation)
 display = decimal!;
 prec=128!;
 save_prec=prec;
 
 //
 // Select function to approximate with Sollya
 //
 if(mthd==0) then {
   s = "x + x^3 * P(x^2)";
   g = tan(x);
   F = proc(P) { return x + x^3 * P(x^2); };
   f = (g(sqrt(x))-sqrt(x))/(x*sqrt(x));
   init_poly = 0;
   // Display info
   print("Approximate g(x) =", g, "as F(x)=", s, ".");
   poly = fpminimax(f, deg, [|dtype ...|], [a*a;b*b]);
 }
 else if (mthd==1) then {
   s = "1/x + x * P(x^2)";
   g = 1 / tan(x);
   F = proc(P) { return 1/x + x * P(x^2); };
   f = (g(sqrt(x))-1/sqrt(x))/(sqrt(x));
   init_poly = 0;
   deg_init_poly = -1; // a value such that we actually start by building constant coefficient
   // Display info
   print("Approximate g(x) =", g, "as F(x)=", s, ".");
   // Fpminimax used to minimise absolute error
   approx_fpminimax = proc(func, poly, d) {
     return fpminimax(func - poly / x^-(deg-d), 0, [|dtype|], [a;b], absolute, floating);
   };
   // Optimise all coefficients at once
   poly = fpminimax(f, [|0,...,deg|], [|dtype ...|], [a;b], absolute, floating);
 };
 
 
 //
 // Display coefficients in Sollya
 //
 display = hexadecimal!;
 if (dtype==double) then { prec = 53!; }
 else if (dtype==single) then { prec = 23!; };
 print("_coeffs :_ hex");
 for i from 0 to deg do coeff(poly, i);
 
 // Compute errors
 display = hexadecimal!;
 d_rel_err = dirtyinfnorm(1-F(poly)/g(x), [a;b]);
 d_abs_err = dirtyinfnorm(g(x)-F(poly), [a;b]);
 print("dirty rel error:", d_rel_err);
 print("dirty abs error:", d_abs_err);
 print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/math/tools/tanpi.sollya b/contrib/arm-optimized-routines/math/tools/tanpi.sollya
new file mode 100644
index 000000000000..8edbc359ab8e
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/tanpi.sollya
@@ -0,0 +1,48 @@
+// polynomial for approximating tanpi/f(x)
+//
+// Copyright (c) 2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// 0 for tanpi/f [0,0.25], 1 for tanpi/f [0.25,1]
+method = 0;
+dtype = double;
+
+if (dtype == single) then {
+    if (method == 0) then { deg = 5; }
+    else if (method == 1) then { deg = 3; };
+} else if (dtype == double) then {
+    if (method == 0) then { deg = 13; }
+    else if (method == 1) then { deg = 8; };
+};
+
+a = 0x1.0p-126;
+b = 1/4;
+
+if (method == 0) then {
+    g = tan(pi * x);
+    F = proc(P) { return pi * x + x^3 * P(x^2); };
+    f = (g(sqrt(x)) - pi * sqrt(x))/(x^(3/2));
+} else if (method == 1) then {
+    g = 1/tan(pi * x);
+    F = proc(P) { return 1/(pi * x) + x * P(x^2); };
+    f = (g(sqrt(x)) / sqrt(x)) - 1/(pi * x);
+};
+
+poly = fpminimax(f, deg, [|dtype ...|], [a*a;b*b]);
+
+//
+// Display coefficients in Sollya
+//
+display = hexadecimal!;
+if (dtype==double) then { prec = 53!; }
+else if (dtype==single) then { prec = 23!; };
+print("_coeffs :_ hex");
+for i from 0 to deg do coeff(poly, i);
+
+// Compute errors
+//display = hexadecimal!;
+d_rel_err = dirtyinfnorm(1-F(poly)/g(x), [a;b]);
+d_abs_err = dirtyinfnorm(g(x)-F(poly), [a;b]);
+print("dirty rel error:", d_rel_err);
+print("dirty abs error:", d_abs_err);
+print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/pl/math/tools/v_erf.sollya b/contrib/arm-optimized-routines/math/tools/v_erf.sollya
similarity index 84%
rename from contrib/arm-optimized-routines/pl/math/tools/v_erf.sollya
rename to contrib/arm-optimized-routines/math/tools/v_erf.sollya
index 394ba377df12..5d7795842bcd 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/v_erf.sollya
+++ b/contrib/arm-optimized-routines/math/tools/v_erf.sollya
@@ -1,20 +1,20 @@
 // polynomial for approximating erf(x).
 // To generate coefficients for interval i (0 to 47) do:
 // $ sollya v_erf.sollya $i
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 scale = 1/8;
 deg = 9;
 
 itv = parse(__argv[0]);
 if (itv == 0)  then { a = 0x1p-1022; }
 else                { a = itv * scale; };
 
 prec=256;
 
 poly = fpminimax(erf(scale*x+a), deg, [|D ...|], [0; 1]);
 
 display = hexadecimal;
 for i from 0 to deg do coeff(poly, i);
\ No newline at end of file
diff --git a/contrib/arm-optimized-routines/pl/math/tools/v_erfc.sollya b/contrib/arm-optimized-routines/math/tools/v_erfc.sollya
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/tools/v_erfc.sollya
rename to contrib/arm-optimized-routines/math/tools/v_erfc.sollya
index 3b03ba07863d..764b333d6d25 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/v_erfc.sollya
+++ b/contrib/arm-optimized-routines/math/tools/v_erfc.sollya
@@ -1,46 +1,46 @@
 // polynomial for approximating erfc(x)*exp(x*x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 12; // poly degree
 
 itv = parse(__argv[0]);
 
 bounds = [|3.725290298461914e-9,
            0.18920711500272103,
            0.41421356237309515,
            0.681792830507429,
            1,
            1.378414230005442,
            1.8284271247461903,
            2.363585661014858,
            3,
            3.756828460010884,
            4.656854249492381,
            5.727171322029716,
            7,
            8.513656920021768,
            10.313708498984761,
            12.454342644059432,
            15,
            18.027313840043536,
            21.627416997969522,
            25.908685288118864,
            31|];
 
 a = bounds[itv];
 b = bounds[itv + 1];
 
 f = proc(y) {
   t = y + a;
   return erfc(t) * exp(t*t);
 };
 
 poly = fpminimax(f(x), deg, [|double ...|], [0;b-a]);
 
 display = hexadecimal;
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do coeff(poly, i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/v_log10.sollya b/contrib/arm-optimized-routines/math/tools/v_log10.sollya
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/tools/v_log10.sollya
rename to contrib/arm-optimized-routines/math/tools/v_log10.sollya
index e2df4364ada0..5181074f6762 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/v_log10.sollya
+++ b/contrib/arm-optimized-routines/math/tools/v_log10.sollya
@@ -1,38 +1,38 @@
 // polynomial used for __v_log10(x)
 //
-// Copyright (c) 2019-2023, Arm Limited.
+// Copyright (c) 2019-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 6; // poly degree
 a = -0x1.fc1p-9;
 b = 0x1.009p-8;
 
 // find log(1+x)/x polynomial with minimal relative error
 // (minimal relative error polynomial for log(1+x) is the same * x)
 deg = deg-1; // because of /x
 
 // f = log(1+x)/x; using taylor series
 f = 0;
 for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
 
 // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
 approx = proc(poly,d) {
   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
 };
 
 // first coeff is fixed, iteratively find optimal double prec coeffs
 poly = 1;
 for i from 1 to deg do {
   p = roundcoefficients(approx(poly,i), [|D ...|]);
   poly = poly + x^i*coeff(p,0);
 };
 
 // scale coefficients by 1/ln(10)
 ln10 = evaluate(log(10),0);
 poly = poly/ln10;
 
 display = hexadecimal;
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do double(coeff(poly,i));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/v_log10f.sollya b/contrib/arm-optimized-routines/math/tools/v_log10f.sollya
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/tools/v_log10f.sollya
rename to contrib/arm-optimized-routines/math/tools/v_log10f.sollya
index 396d5a92302b..4906cb1d2137 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/v_log10f.sollya
+++ b/contrib/arm-optimized-routines/math/tools/v_log10f.sollya
@@ -1,45 +1,45 @@
 // polynomial for approximating v_log10f(1+x)
 //
-// Copyright (c) 2019-2023, Arm Limited.
+// Copyright (c) 2019-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 9; // poly degree
 // |log10(1+x)| > 0x1p-4 outside the interval
 a = -1/3;
 b =  1/3;
 
 display = hexadecimal;
 print("log10(2) = ", single(log10(2)));
 
 ln10 = evaluate(log(10),0);
 invln10 = single(1/ln10);
 
 // find log10(1+x)/x polynomial with minimal relative error
 // (minimal relative error polynomial for log10(1+x) is the same * x)
 deg = deg-1; // because of /x
 
 // f = log(1+x)/x; using taylor series
 f = 0;
 for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
 f = f/ln10;
 
 // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
 approx = proc(poly,d) {
   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
 };
 
 // first coeff is fixed, iteratively find optimal double prec coeffs
 poly = invln10;
 for i from 1 to deg do {
   p = roundcoefficients(approx(poly,i), [|SG ...|]);
   poly = poly + x^i*coeff(p,0);
 };
 display = hexadecimal;
 print("invln10:", invln10);
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do single(coeff(poly,i));
 
 display = decimal;
 print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/pl/math/tools/v_log2f.sollya b/contrib/arm-optimized-routines/math/tools/v_log2f.sollya
similarity index 96%
rename from contrib/arm-optimized-routines/pl/math/tools/v_log2f.sollya
rename to contrib/arm-optimized-routines/math/tools/v_log2f.sollya
index 99e050c91b03..337d4830a2ae 100644
--- a/contrib/arm-optimized-routines/pl/math/tools/v_log2f.sollya
+++ b/contrib/arm-optimized-routines/math/tools/v_log2f.sollya
@@ -1,38 +1,38 @@
 // polynomial used for __v_log2f(x)
 //
-// Copyright (c) 2022-2023, Arm Limited.
+// Copyright (c) 2022-2024, Arm Limited.
 // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 9; // poly degree
 a = -1/3;
 b = 1/3;
 
 ln2 = evaluate(log(2),0);
 invln2 = single(1/ln2);
 
 // find log2(1+x)/x polynomial with minimal relative error
 // (minimal relative error polynomial for log2(1+x) is the same * x)
 deg = deg-1; // because of /x
 
 // f = log2(1+x)/x; using taylor series
 f = 0;
 for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
 f = f * invln2;
 
 // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
 approx = proc(poly,d) {
   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
 };
 
 // first coeff is fixed, iteratively find optimal double prec coeffs
 poly = invln2;
 for i from 1 to deg do {
   p = roundcoefficients(approx(poly,i), [|SG ...|]);
   poly = poly + x^i*coeff(p,0);
 };
 
 display = hexadecimal;
 print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
 print("in [",a,b,"]");
 print("coeffs:");
 for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/networking/Dir.mk b/contrib/arm-optimized-routines/networking/Dir.mk
index 2589e0a1f91c..b3ca2ff335e4 100644
--- a/contrib/arm-optimized-routines/networking/Dir.mk
+++ b/contrib/arm-optimized-routines/networking/Dir.mk
@@ -1,76 +1,76 @@
 # Makefile fragment - requires GNU make
 #
-# Copyright (c) 2019-2020, Arm Limited.
+# Copyright (c) 2019-2025, Arm Limited.
 # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 S := $(srcdir)/networking
 B := build/networking
 
 ifeq ($(ARCH),)
 all-networking check-networking install-networking clean-networking:
 	@echo "*** Please set ARCH in config.mk. ***"
 	@exit 1
 else
 
 networking-lib-srcs := $(wildcard $(S)/*.[cS]) $(wildcard $(S)/$(ARCH)/*.[cS])
 networking-test-srcs := $(wildcard $(S)/test/*.c)
 
 networking-includes := $(patsubst $(S)/%,build/%,$(wildcard $(S)/include/*.h))
 
 networking-libs := \
 	build/lib/libnetworking.so \
 	build/lib/libnetworking.a \
 
 networking-tools := \
 	build/bin/test/chksum
 
 networking-lib-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(networking-lib-srcs)))
 networking-test-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(networking-test-srcs)))
 
 networking-objs := \
 	$(networking-lib-objs) \
 	$(networking-lib-objs:%.o=%.os) \
 	$(networking-test-objs) \
 
 networking-files := \
 	$(networking-objs) \
 	$(networking-libs) \
 	$(networking-tools) \
 	$(networking-includes) \
 
 all-networking: $(networking-libs) $(networking-tools) $(networking-includes)
 
 $(networking-objs): $(networking-includes)
 $(networking-objs): CFLAGS_ALL += $(networking-cflags)
 
 build/lib/libnetworking.so: $(networking-lib-objs:%.o=%.os)
 	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -shared -o $@ $^
 
-build/lib/libnetworkinglib.a: $(networking-lib-objs)
+build/lib/libnetworking.a: $(networking-lib-objs)
 	rm -f $@
 	$(AR) rc $@ $^
 	$(RANLIB) $@
 
-build/bin/test/%: $(B)/test/%.o build/lib/libnetworkinglib.a
+build/bin/test/%: $(B)/test/%.o build/lib/libnetworking.a
 	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
 
 build/include/%.h: $(S)/include/%.h
 	cp $< $@
 
 build/bin/%.sh: $(S)/test/%.sh
 	cp $< $@
 
 check-networking: $(networking-tools)
 	$(EMULATOR) build/bin/test/chksum -i simple
 	$(EMULATOR) build/bin/test/chksum -i scalar
 	$(EMULATOR) build/bin/test/chksum -i simd || true # simd is not always available
 
 install-networking: \
  $(networking-libs:build/lib/%=$(DESTDIR)$(libdir)/%) \
  $(networking-includes:build/include/%=$(DESTDIR)$(includedir)/%)
 
 clean-networking:
 	rm -f $(networking-files)
 endif
 
 .PHONY: all-networking check-networking install-networking clean-networking
diff --git a/contrib/arm-optimized-routines/pl/Dir.mk b/contrib/arm-optimized-routines/pl/Dir.mk
deleted file mode 100644
index 2d007790d241..000000000000
--- a/contrib/arm-optimized-routines/pl/Dir.mk
+++ /dev/null
@@ -1,21 +0,0 @@
-# Makefile fragment - requires GNU make
-#
-# Copyright (c) 2022, Arm Limited.
-# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-# These targets are defined if we prescribe pl in SUBS.
-# It requires PLSUBS to be set.
-
-$(foreach sub,$(PLSUBS),$(eval include $(srcdir)/pl/$(sub)/Dir.mk))
-
-pl-files := $($(PLSUBS:%=pl/%-files))
-
-all-pl: $(PLSUBS:%=all-pl/%)
-
-check-pl: $(PLSUBS:%=check-pl/%)
-
-install-pl: $(PLSUBS:%=install-pl/%)
-
-clean-pl: $(PLSUBS:%=clean-pl/%)
-
-.PHONY: all-pl check-pl install-pl clean-pl
diff --git a/contrib/arm-optimized-routines/pl/math/Dir.mk b/contrib/arm-optimized-routines/pl/math/Dir.mk
deleted file mode 100644
index 94b26cf3309c..000000000000
--- a/contrib/arm-optimized-routines/pl/math/Dir.mk
+++ /dev/null
@@ -1,216 +0,0 @@
-# Makefile fragment - requires GNU make
-#
-# Copyright (c) 2019-2024, Arm Limited.
-# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-PLM := $(srcdir)/pl/math
-AOR := $(srcdir)/math
-B := build/pl/math
-
-pl-lib-srcs := $(wildcard $(PLM)/*.[cS])
-
-ifeq ($(WANT_SVE_MATH), 0)
-pl-lib-srcs := $(filter-out $(PLM)/sv_%, $(pl-lib-srcs))
-endif
-
-math-test-srcs := \
-	$(AOR)/test/mathtest.c \
-	$(AOR)/test/mathbench.c \
-	$(AOR)/test/ulp.c \
-
-math-test-host-srcs := $(wildcard $(AOR)/test/rtest/*.[cS])
-
-pl-includes := $(patsubst $(PLM)/%,build/pl/%,$(wildcard $(PLM)/include/*.h))
-pl-test-includes := $(patsubst $(PLM)/%,build/pl/include/%,$(wildcard $(PLM)/test/*.h))
-
-pl-libs := \
-	build/pl/lib/libmathlib.so \
-	build/pl/lib/libmathlib.a \
-
-math-tools := \
-	build/pl/bin/mathtest \
-	build/pl/bin/mathbench \
-	build/pl/bin/mathbench_libc \
-	build/pl/bin/runulp.sh \
-	build/pl/bin/ulp \
-
-math-host-tools := \
-	build/pl/bin/rtest \
-
-pl-lib-objs := $(patsubst $(PLM)/%,$(B)/%.o,$(basename $(pl-lib-srcs)))
-math-test-objs := $(patsubst $(AOR)/%,$(B)/%.o,$(basename $(math-test-srcs)))
-math-host-objs := $(patsubst $(AOR)/%,$(B)/%.o,$(basename $(math-test-host-srcs)))
-pl-target-objs := $(pl-lib-objs) $(math-test-objs)
-pl-objs := $(pl-target-objs) $(pl-target-objs:%.o=%.os) $(math-host-objs)
-
-pl/math-files := \
-	$(pl-objs) \
-	$(pl-libs) \
-	$(math-tools) \
-	$(math-host-tools) \
-	$(pl-includes) \
-	$(pl-test-includes) \
-
-all-pl/math: $(pl-libs) $(math-tools) $(pl-includes) $(pl-test-includes)
-
-$(pl-objs): $(pl-includes) $(pl-test-includes)
-$(pl-objs): CFLAGS_PL += $(math-cflags)
-$(B)/test/mathtest.o: CFLAGS_PL += -fmath-errno
-$(math-host-objs): CC = $(HOST_CC)
-$(math-host-objs): CFLAGS_PL = $(HOST_CFLAGS)
-
-$(B)/sv_%: CFLAGS_PL += $(math-sve-cflags)
-
-build/pl/include/test/ulp_funcs_gen.h: $(pl-lib-srcs)
-	# Replace PL_SIG
-	cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=_Z##v##t##a(f)" -P > $@
-
-build/pl/include/test/mathbench_funcs_gen.h: $(pl-lib-srcs)
-	# Replace PL_SIG macros with mathbench func entries
-	cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=_Z##v##t##a(f, ##__VA_ARGS__)" -P > $@
-
-build/pl/include/test/ulp_wrappers_gen.h: $(pl-lib-srcs)
-	# Replace PL_SIG macros with ULP wrapper declarations
-	cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=Z##v##N##t##a##_WRAP(f)" -P > $@
-
-$(B)/test/ulp.o: $(AOR)/test/ulp.h build/pl/include/test/ulp_funcs_gen.h build/pl/include/test/ulp_wrappers_gen.h
-$(B)/test/ulp.o: CFLAGS_PL += -I build/pl/include/test
-
-$(B)/test/mathbench.o: build/pl/include/test/mathbench_funcs_gen.h
-$(B)/test/mathbench.o: CFLAGS_PL += -I build/pl/include/test
-
-build/pl/lib/libmathlib.so: $(pl-lib-objs:%.o=%.os)
-	$(CC) $(CFLAGS_PL) $(LDFLAGS) -shared -o $@ $^
-
-build/pl/lib/libmathlib.a: $(pl-lib-objs)
-	rm -f $@
-	$(AR) rc $@ $^
-	$(RANLIB) $@
-
-$(math-host-tools): HOST_LDLIBS += -lm -lmpfr -lmpc
-$(math-tools): LDLIBS += $(math-ldlibs) -lm
-# math-sve-cflags should be empty if WANT_SVE_MATH is not enabled
-$(math-tools): CFLAGS_PL += $(math-sve-cflags)
-
-# Some targets to build pl/math/test from math/test sources
-build/pl/math/test/%.o: $(srcdir)/math/test/%.S
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/math/test/%.o: $(srcdir)/math/test/%.c
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/math/test/%.os: $(srcdir)/math/test/%.S
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/math/test/%.os: $(srcdir)/math/test/%.c
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-# Some targets to build pl/ sources using appropriate flags
-build/pl/%.o: $(srcdir)/pl/%.S
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/%.o: $(srcdir)/pl/%.c
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/%.os: $(srcdir)/pl/%.S
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/%.os: $(srcdir)/pl/%.c
-	$(CC) $(CFLAGS_PL) -c -o $@ $<
-
-build/pl/bin/rtest: $(math-host-objs)
-	$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LDLIBS)
-
-build/pl/bin/mathtest: $(B)/test/mathtest.o build/pl/lib/libmathlib.a
-	$(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
-
-build/pl/bin/mathbench: $(B)/test/mathbench.o build/pl/lib/libmathlib.a
-	$(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
-
-# This is not ideal, but allows custom symbols in mathbench to get resolved.
-build/pl/bin/mathbench_libc: $(B)/test/mathbench.o build/pl/lib/libmathlib.a
-	$(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $< $(LDLIBS) -lc build/pl/lib/libmathlib.a -lm
-
-build/pl/bin/ulp: $(B)/test/ulp.o build/pl/lib/libmathlib.a
-	$(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
-
-build/pl/include/%.h: $(PLM)/include/%.h
-	cp $< $@
-
-build/pl/include/test/%.h: $(PLM)/test/%.h
-	cp $< $@
-
-build/pl/bin/%.sh: $(PLM)/test/%.sh
-	cp $< $@
-
-pl-math-tests := $(wildcard $(PLM)/test/testcases/directed/*.tst)
-pl-math-rtests := $(wildcard $(PLM)/test/testcases/random/*.tst)
-
-check-pl/math-test: $(math-tools)
-	cat $(pl-math-tests) | $(EMULATOR) build/pl/bin/mathtest $(math-testflags)
-
-check-pl/math-rtest: $(math-host-tools) $(math-tools)
-	cat $(pl-math-rtests) | build/pl/bin/rtest | $(EMULATOR) build/pl/bin/mathtest $(math-testflags)
-
-ulp-input-dir=$(B)/test/inputs
-
-math-lib-lims = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.ulp,$(basename $(pl-lib-srcs)))
-math-lib-fenvs = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.fenv,$(basename $(pl-lib-srcs)))
-math-lib-itvs = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.itv,$(basename $(pl-lib-srcs)))
-
-ulp-inputs = $(math-lib-lims) $(math-lib-fenvs) $(math-lib-itvs)
-
-$(ulp-inputs): CFLAGS_PL += -I$(PLM) -I$(PLM)/include $(math-cflags)
-
-$(ulp-input-dir)/%.ulp: $(PLM)/%.c
-	mkdir -p $(@D)
-	$(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep -o "PL_TEST_ULP [^ ]* [^ ]*" || true; } > $@
-
-$(ulp-input-dir)/%.fenv: $(PLM)/%.c
-	mkdir -p $(@D)
-	$(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep -o "PL_TEST_EXPECT_FENV_ENABLED [^ ]*" || true; } > $@
-
-$(ulp-input-dir)/%.itv: $(PLM)/%.c
-	mkdir -p $(dir $@)
-	$(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep "PL_TEST_INTERVAL " || true; } | sed "s/ PL_TEST_INTERVAL/\nPL_TEST_INTERVAL/g" > $@
-
-ulp-lims := $(ulp-input-dir)/limits
-$(ulp-lims): $(math-lib-lims)
-	cat $^ | sed "s/PL_TEST_ULP //g;s/^ *//g" > $@
-
-fenv-exps := $(ulp-input-dir)/fenv
-$(fenv-exps): $(math-lib-fenvs)
-	cat $^ | sed "s/PL_TEST_EXPECT_FENV_ENABLED //g;s/^ *//g" > $@
-
-ulp-itvs := $(ulp-input-dir)/intervals
-$(ulp-itvs): $(math-lib-itvs)
-	cat $^ | sort -u | sed "s/PL_TEST_INTERVAL //g" > $@
-
-check-pl/math-ulp: $(math-tools) $(ulp-lims) $(fenv-exps) $(ulp-itvs)
-	WANT_SVE_MATH=$(WANT_SVE_MATH) \
-	ULPFLAGS="$(math-ulpflags)" \
-	LIMITS=../../../$(ulp-lims) \
-	INTERVALS=../../../$(ulp-itvs) \
-	FENV=../../../$(fenv-exps) \
-	FUNC=$(func) \
-	build/pl/bin/runulp.sh $(EMULATOR)
-
-check-pl/math: check-pl/math-test check-pl/math-rtest check-pl/math-ulp
-
-$(DESTDIR)$(libdir)/pl/%.so: build/pl/lib/%.so
-	$(INSTALL) -D $< $@
-
-$(DESTDIR)$(libdir)/pl/%: build/pl/lib/%
-	$(INSTALL) -m 644 -D $< $@
-
-$(DESTDIR)$(includedir)/pl/%: build/pl/include/%
-	$(INSTALL) -m 644 -D $< $@
-
-install-pl/math: \
- $(pl-libs:build/pl/lib/%=$(DESTDIR)$(libdir)/pl/%) \
- $(pl-includes:build/pl/include/%=$(DESTDIR)$(includedir)/pl/%)
-
-clean-pl/math:
-	rm -f $(pl/math-files)
-
-.PHONY: all-pl/math check-pl/math-test check-pl/math-rtest check-pl/math-ulp check-pl/math install-pl/math clean-pl/math
diff --git a/contrib/arm-optimized-routines/pl/math/asinhf_data.c b/contrib/arm-optimized-routines/pl/math/asinhf_data.c
deleted file mode 100644
index cd1ef16b3b6a..000000000000
--- a/contrib/arm-optimized-routines/pl/math/asinhf_data.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Coefficients for single-precision asinh(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Approximate asinhf(x) directly in [2^-12, 1]. See for tools/asinhf.sollya for
-   these coeffs were generated.  */
-const struct asinhf_data __asinhf_data
-  = {.coeffs
-     = {-0x1.9b16fap-19f, -0x1.552baap-3f, -0x1.4e572ap-11f, 0x1.3a81dcp-4f,
-	0x1.65bbaap-10f, -0x1.057f1p-4f, 0x1.6c1d46p-5f, -0x1.4cafe8p-7f}};
diff --git a/contrib/arm-optimized-routines/pl/math/atan_data.c b/contrib/arm-optimized-routines/pl/math/atan_data.c
deleted file mode 100644
index 91d0f61d2eaf..000000000000
--- a/contrib/arm-optimized-routines/pl/math/atan_data.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Double-precision polynomial coefficients for vector atan(x) and atan2(y,x).
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-const struct atan_poly_data __atan_poly_data = {
-  .poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
-	      [2**-1022, 1.0]. See atan.sollya for details of how these were
-	      generated.  */
-	   -0x1.5555555555555p-2,  0x1.99999999996c1p-3,  -0x1.2492492478f88p-3,
-	   0x1.c71c71bc3951cp-4,   -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
-	   -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5,  -0x1.aebfe7b418581p-5,
-	   0x1.842dbe9b0d916p-5,   -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
-	   -0x1.00e6eece7de8p-5,   0x1.860897b29e5efp-6,  -0x1.0051381722a59p-6,
-	   0x1.14e9dc19a4a4ep-7,   -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
-	   -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16}};
diff --git a/contrib/arm-optimized-routines/pl/math/atanf_data.c b/contrib/arm-optimized-routines/pl/math/atanf_data.c
deleted file mode 100644
index c4cba2378cea..000000000000
--- a/contrib/arm-optimized-routines/pl/math/atanf_data.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Single-precision polynomial coefficients for vector atan(x) and atan2(y,x).
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
- */
-const struct atanf_poly_data __atanf_poly_data = {
-  .poly = {/* See atanf.sollya for details of how these were generated.  */
-	   -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
-	   -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f}};
diff --git a/contrib/arm-optimized-routines/pl/math/exp_data.c b/contrib/arm-optimized-routines/pl/math/exp_data.c
deleted file mode 100644
index 2354be76cfab..000000000000
--- a/contrib/arm-optimized-routines/pl/math/exp_data.c
+++ /dev/null
@@ -1,1120 +0,0 @@
-/*
- * Shared data between exp, exp2 and pow.
- *
- * Copyright (c) 2018-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-#define N (1 << EXP_TABLE_BITS)
-
-const struct exp_data __exp_data = {
-// N/ln2
-.invln2N = 0x1.71547652b82fep0 * N,
-// -ln2/N
-#if N == 64
-.negln2hiN = -0x1.62e42fefa0000p-7,
-.negln2loN = -0x1.cf79abc9e3b3ap-46,
-#elif N == 128
-.negln2hiN = -0x1.62e42fefa0000p-8,
-.negln2loN = -0x1.cf79abc9e3b3ap-47,
-#elif N == 256
-.negln2hiN = -0x1.62e42fefc0000p-9,
-.negln2loN = 0x1.c610ca86c3899p-45,
-#elif N == 512
-.negln2hiN = -0x1.62e42fef80000p-10,
-.negln2loN = -0x1.1cf79abc9e3b4p-45,
-#endif
-// Used for rounding when !TOINT_INTRINSICS
-#if EXP_USE_TOINT_NARROW
-.shift = 0x1800000000.8p0,
-#else
-.shift = 0x1.8p52,
-#endif
-// exp polynomial coefficients.
-.poly = {
-#if N == 64 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE
-// abs error: 1.5543*2^-60
-// ulp error: 0.529 (0.533 without fma)
-// if |x| < ln2/128+eps
-// abs error if |x| < ln2/64: 1.7157*2^-50
-0x1.fffffffffdbcdp-2,
-0x1.555555555444cp-3,
-0x1.555573c6a9f7dp-5,
-0x1.1111266d28935p-7,
-#elif N == 64 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE
-// abs error: 1.6735*2^-64
-// ulp error: 0.518 (0.522 without fma)
-// if |x| < ln2/64
-0x1.5555555548f9ap-3,
-0x1.555555554bf5dp-5,
-0x1.11115b75f0f4dp-7,
-0x1.6c171a6b6303ep-10,
-#elif N == 128 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE
-// abs error: 1.555*2^-66
-// ulp error: 0.509 (0.511 without fma)
-// if |x| < ln2/256+eps
-// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
-// abs error if |x| < ln2/128: 1.7145*2^-56
-0x1.ffffffffffdbdp-2,
-0x1.555555555543cp-3,
-0x1.55555cf172b91p-5,
-0x1.1111167a4d017p-7,
-#elif N == 128 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE
-// abs error: 1.5542*2^-60
-// ulp error: 0.521 (0.523 without fma)
-// if |x| < ln2/128
-0x1.fffffffffdbcep-2,
-0x1.55555555543c2p-3,
-0x1.555573c64f2e3p-5,
-0x1.111126b4eff73p-7,
-#elif N == 128 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE
-// abs error: 1.6861*2^-71
-// ulp error: 0.509 (0.511 without fma)
-// if |x| < ln2/128
-0x1.55555555548fdp-3,
-0x1.555555555658fp-5,
-0x1.111123a859bb6p-7,
-0x1.6c16ba6920cabp-10,
-#elif N == 256 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE
-// abs error: 1.43*2^-58
-// ulp error: 0.549 (0.550 without fma)
-// if |x| < ln2/512
-0x1p0, // unused
-0x1.fffffffffffd4p-2,
-0x1.5555571d6ef9p-3,
-0x1.5555576a5adcep-5,
-#elif N == 256 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE
-// abs error: 1.5547*2^-66
-// ulp error: 0.505 (0.506 without fma)
-// if |x| < ln2/256
-0x1.ffffffffffdbdp-2,
-0x1.555555555543cp-3,
-0x1.55555cf16e1edp-5,
-0x1.1111167a4b553p-7,
-#elif N == 512 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE
-// abs error: 1.4300*2^-63
-// ulp error: 0.504
-// if |x| < ln2/1024
-// abs error if |x| < ln2/512: 1.0689*2^-55
-0x1p0, // unused
-0x1.ffffffffffffdp-2,
-0x1.555555c75bb6p-3,
-0x1.555555dec04a8p-5,
-#endif
-},
-.exp2_shift = 0x1.8p52 / N,
-// exp2 polynomial coefficients.
-.exp2_poly = {
-#if N == 64 && EXP2_POLY_ORDER == 6 && EXP2_POLY_WIDE
-// abs error: 1.3054*2^-63
-// ulp error: 0.515
-// if |x| < 1/64
-0x1.62e42fefa39efp-1,
-0x1.ebfbdff82c58fp-3,
-0x1.c6b08d7045cf1p-5,
-0x1.3b2ab6fb8fd0ep-7,
-0x1.5d884afec48d7p-10,
-0x1.43097dc684ae1p-13,
-#elif N == 128 && EXP2_POLY_ORDER == 5 && !EXP2_POLY_WIDE
-// abs error: 1.2195*2^-65
-// ulp error: 0.507 (0.511 without fma)
-// if |x| < 1/256
-// abs error if |x| < 1/128: 1.9941*2^-56
-0x1.62e42fefa39efp-1,
-0x1.ebfbdff82c424p-3,
-0x1.c6b08d70cf4b5p-5,
-0x1.3b2abd24650ccp-7,
-0x1.5d7e09b4e3a84p-10,
-#elif N == 256 && EXP2_POLY_ORDER == 5 && EXP2_POLY_WIDE
-// abs error: 1.2195*2^-65
-// ulp error: 0.504 (0.508 without fma)
-// if |x| < 1/256
-0x1.62e42fefa39efp-1,
-0x1.ebfbdff82c424p-3,
-0x1.c6b08d70cf4b5p-5,
-0x1.3b2abd24650ccp-7,
-0x1.5d7e09b4e3a84p-10,
-#elif N == 512 && EXP2_POLY_ORDER == 4 && !EXP2_POLY_WIDE
-// abs error: 1.4411*2^-64
-// ulp error: 0.5024 (0.5063 without fma)
-// if |x| < 1/1024
-// abs error if |x| < 1/512: 1.9430*2^-56
-0x1.62e42fefa39ecp-1,
-0x1.ebfbdff82c58bp-3,
-0x1.c6b08e46de41fp-5,
-0x1.3b2ab786ee1dap-7,
-#endif
-},
-// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
-// tab[2*k] = asuint64(T[k])
-// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
-.tab = {
-#if N == 64
-0x0, 0x3ff0000000000000,
-0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
-0x3c8cd2523567f613, 0x3fefd9b0d3158574,
-0x3c60f74e61e6c861, 0x3fefc74518759bc8,
-0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
-0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
-0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
-0xbc91c923b9d5f416, 0x3fef829aaea92de0,
-0xbc801b15eaa59348, 0x3fef72b83c7d517b,
-0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
-0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
-0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
-0x3c968efde3a8a894, 0x3fef387a6e756238,
-0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
-0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
-0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
-0x3c834d754db0abb6, 0x3fef06fe0a31b715,
-0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
-0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
-0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
-0x3c859f48a72a4c6d, 0x3feedea64c123422,
-0xbc58a78f4817895b, 0x3feed60a21f72e2a,
-0x3c4363ed60c2ac11, 0x3feece086061892d,
-0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
-0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
-0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
-0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
-0x3c93350518fdd78e, 0x3feeaf4736b527da,
-0x3c9063e1e21c5409, 0x3feeab07dd485429,
-0x3c9432e62b64c035, 0x3feea76f15ad2148,
-0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
-0xbc93cedd78565858, 0x3feea23882552225,
-0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
-0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
-0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
-0xbc8619321e55e68a, 0x3fee9feb564267c9,
-0xbc7b32dcb94da51d, 0x3feea11473eb0187,
-0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
-0xbc9369b6f13b3734, 0x3feea589994cce13,
-0xbc94d450d872576e, 0x3feea8d99b4492ed,
-0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
-0x3c7bf68359f35f44, 0x3feeb1ae99157736,
-0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
-0xbc92434322f4f9aa, 0x3feebd829fde4e50,
-0x3c71affc2b91ce27, 0x3feec49182a3f090,
-0xbc87c50422622263, 0x3feecc667b5de565,
-0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
-0x3c8469846e735ab3, 0x3feede6b5579fdbf,
-0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
-0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
-0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
-0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
-0x3c736eae30af0cb3, 0x3fef199bdd85529c,
-0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
-0x3c676b2c6c921968, 0x3fef3720dcef9069,
-0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
-0x3c74a385a63d07a7, 0x3fef5818dcfba487,
-0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
-0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
-0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
-0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
-0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
-0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
-0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
-#elif N == 128
-0x0, 0x3ff0000000000000,
-0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
-0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
-0xbc905e7a108766d1, 0x3fefe315e86e7f85,
-0x3c8cd2523567f613, 0x3fefd9b0d3158574,
-0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
-0x3c60f74e61e6c861, 0x3fefc74518759bc8,
-0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
-0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
-0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
-0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
-0xbc6a033489906e0b, 0x3fef9b66affed31b,
-0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
-0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
-0xbc91c923b9d5f416, 0x3fef829aaea92de0,
-0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
-0xbc801b15eaa59348, 0x3fef72b83c7d517b,
-0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
-0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
-0xbc96d99c7611eb26, 0x3fef5be084045cd4,
-0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
-0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
-0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
-0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
-0x3c968efde3a8a894, 0x3fef387a6e756238,
-0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
-0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
-0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
-0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
-0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
-0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
-0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
-0x3c834d754db0abb6, 0x3fef06fe0a31b715,
-0x3c864201e2ac744c, 0x3fef0170fc4cd831,
-0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
-0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
-0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
-0xbc9907f81b512d8e, 0x3feeecae6d05d866,
-0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
-0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
-0x3c859f48a72a4c6d, 0x3feedea64c123422,
-0xbc9312607a28698a, 0x3feeda4504ac801c,
-0xbc58a78f4817895b, 0x3feed60a21f72e2a,
-0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
-0x3c4363ed60c2ac11, 0x3feece086061892d,
-0x3c9666093b0664ef, 0x3feeca41ed1d0057,
-0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
-0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
-0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
-0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
-0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
-0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
-0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
-0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
-0x3c93350518fdd78e, 0x3feeaf4736b527da,
-0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
-0x3c9063e1e21c5409, 0x3feeab07dd485429,
-0x3c34c7855019c6ea, 0x3feea9268a5946b7,
-0x3c9432e62b64c035, 0x3feea76f15ad2148,
-0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
-0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
-0xbc845378892be9ae, 0x3feea34634ccc320,
-0xbc93cedd78565858, 0x3feea23882552225,
-0x3c5710aa807e1964, 0x3feea155d44ca973,
-0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
-0xbc6a12ad8734b982, 0x3feea012750bdabf,
-0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
-0xbc80dc3d54e08851, 0x3fee9f7df9519484,
-0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
-0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
-0xbc8619321e55e68a, 0x3fee9feb564267c9,
-0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
-0xbc7b32dcb94da51d, 0x3feea11473eb0187,
-0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
-0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
-0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
-0xbc9369b6f13b3734, 0x3feea589994cce13,
-0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
-0xbc94d450d872576e, 0x3feea8d99b4492ed,
-0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
-0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
-0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
-0x3c7bf68359f35f44, 0x3feeb1ae99157736,
-0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
-0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
-0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
-0xbc92434322f4f9aa, 0x3feebd829fde4e50,
-0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
-0x3c71affc2b91ce27, 0x3feec49182a3f090,
-0x3c6dd235e10a73bb, 0x3feec86319e32323,
-0xbc87c50422622263, 0x3feecc667b5de565,
-0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
-0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
-0x3c90cc319cee31d2, 0x3feed99e1330b358,
-0x3c8469846e735ab3, 0x3feede6b5579fdbf,
-0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
-0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
-0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
-0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
-0xbc90a40e3da6f640, 0x3feef9728de5593a,
-0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
-0xbc91eee26b588a35, 0x3fef05b030a1064a,
-0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
-0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
-0x3c736eae30af0cb3, 0x3fef199bdd85529c,
-0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
-0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
-0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
-0x3c676b2c6c921968, 0x3fef3720dcef9069,
-0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
-0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
-0xbc900dae3875a949, 0x3fef4f87080d89f2,
-0x3c74a385a63d07a7, 0x3fef5818dcfba487,
-0xbc82919e2040220f, 0x3fef60e316c98398,
-0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
-0x3c843a59ac016b4b, 0x3fef7321f301b460,
-0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
-0xbc892ab93b470dc9, 0x3fef864614f5a129,
-0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
-0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
-0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
-0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
-0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
-0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
-0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
-0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
-0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
-0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
-#elif N == 256
-0x0, 0x3ff0000000000000,
-0xbc84e82fc61851ac, 0x3feffb1afa5abcbf,
-0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
-0xbc82985dd8521d32, 0x3feff168143b0281,
-0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
-0x3c651e617061bfbd, 0x3fefe7d42e11bbcc,
-0xbc905e7a108766d1, 0x3fefe315e86e7f85,
-0x3c845fad437fa426, 0x3fefde5f72f654b1,
-0x3c8cd2523567f613, 0x3fefd9b0d3158574,
-0xbc954529642b232f, 0x3fefd50a0e3c1f89,
-0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
-0x3c8293708ef5c32e, 0x3fefcbd42b72a836,
-0x3c60f74e61e6c861, 0x3fefc74518759bc8,
-0xbc95b9280905b2a4, 0x3fefc2bdf66607e0,
-0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
-0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919,
-0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
-0x3c9407fb30d06420, 0x3fefb0f145e46c85,
-0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
-0xbc9a5d04b3b9911b, 0x3fefa83b23395dec,
-0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
-0xbc937a01f0739546, 0x3fef9fa55fdfa9c5,
-0xbc6a033489906e0b, 0x3fef9b66affed31b,
-0x3c8b8268b04ef0a5, 0x3fef973028d7233e,
-0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
-0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6,
-0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
-0xbc65704e90c9f860, 0x3fef86a814f204ab,
-0xbc91c923b9d5f416, 0x3fef829aaea92de0,
-0xbc897cea57e46280, 0x3fef7e95934f312e,
-0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
-0x3c56f01429e2b9d2, 0x3fef76a45471c3c2,
-0xbc801b15eaa59348, 0x3fef72b83c7d517b,
-0x3c6e653b2459034b, 0x3fef6ed48695bbc0,
-0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
-0x3c92cc7ea345b7dc, 0x3fef672658375d2f,
-0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
-0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c,
-0xbc96d99c7611eb26, 0x3fef5be084045cd4,
-0x3c8cdc1873af2155, 0x3fef582f95281c6b,
-0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
-0xbc9493684653a131, 0x3fef50e75eb44027,
-0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
-0xbc98e2899077520a, 0x3fef49c18438ce4d,
-0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
-0x3c9120fcd4f59273, 0x3fef42be3578a819,
-0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
-0x3c89b788c188c9b8, 0x3fef3bdda27912d1,
-0x3c968efde3a8a894, 0x3fef387a6e756238,
-0x3c877afbca90ef84, 0x3fef351ffb82140a,
-0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
-0x3c91512f082876ee, 0x3fef2e85711ece75,
-0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
-0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29,
-0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
-0xbc803297e78260bf, 0x3fef21ba7591bb70,
-0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
-0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13,
-0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
-0xbc91e75c40b4251e, 0x3fef157e39771b2f,
-0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
-0x3c98a911f1f7785a, 0x3fef0f961f641589,
-0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
-0xbc61e7c998db7dbb, 0x3fef09d24abd886b,
-0x3c834d754db0abb6, 0x3fef06fe0a31b715,
-0x3c85425c11faadf4, 0x3fef0432edeeb2fd,
-0x3c864201e2ac744c, 0x3fef0170fc4cd831,
-0xbc979517a03e2847, 0x3feefeb83ba8ea32,
-0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
-0xbc800e2a46da4bee, 0x3feef96266e3fa2d,
-0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
-0xbc87430803972b34, 0x3feef431a2de883b,
-0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
-0xbc954de30ae02d94, 0x3feeef26231e754a,
-0xbc9907f81b512d8e, 0x3feeecae6d05d866,
-0xbc94f2487e1c03ec, 0x3feeea401b7140ef,
-0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
-0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4,
-0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
-0x3c79c3bba5562a2f, 0x3feee0e544ede173,
-0x3c859f48a72a4c6d, 0x3feedea64c123422,
-0xbc85a71612e21658, 0x3feedc70df1c5175,
-0xbc9312607a28698a, 0x3feeda4504ac801c,
-0x3c86421f6f1d24d6, 0x3feed822c367a024,
-0xbc58a78f4817895b, 0x3feed60a21f72e2a,
-0xbc9348a6815fce65, 0x3feed3fb2709468a,
-0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
-0x3c835c43984d9871, 0x3feecffa3f84b9d4,
-0x3c4363ed60c2ac11, 0x3feece086061892d,
-0xbc632afc8d9473a0, 0x3feecc2042a7d232,
-0x3c9666093b0664ef, 0x3feeca41ed1d0057,
-0xbc95fc5e44de020e, 0x3feec86d668b3237,
-0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
-0xbc7ea0148327c42f, 0x3feec4e1e192aed2,
-0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
-0xbc7a843ad1a88022, 0x3feec17dea6db7d7,
-0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
-0x3c892ca3bf144e63, 0x3feebe41b817c114,
-0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
-0xbc902c99b04aa8b0, 0x3feebb2d81d8abff,
-0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
-0x3c73e34f67e67118, 0x3feeb8417f4531ee,
-0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
-0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef,
-0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
-0x3c81bd2888075068, 0x3feeb2e2f4f6ad27,
-0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
-0xbc896be8ae89ef8f, 0x3feeb070dde910d2,
-0x3c93350518fdd78e, 0x3feeaf4736b527da,
-0xbc88e6ac90348602, 0x3feeae27dbe2c4cf,
-0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
-0xbc91af7f1365c3ac, 0x3feeac0827ff07cc,
-0x3c9063e1e21c5409, 0x3feeab07dd485429,
-0xbc943a3540d1898a, 0x3feeaa11fba87a03,
-0x3c34c7855019c6ea, 0x3feea9268a5946b7,
-0xbc951f58ddaa8090, 0x3feea84590998b93,
-0x3c9432e62b64c035, 0x3feea76f15ad2148,
-0xbc82e1648e50a17c, 0x3feea6a320dceb71,
-0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
-0x3c95f30eda98a575, 0x3feea52ae6cdf6f4,
-0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
-0x3c917ecda8a72159, 0x3feea3dd1d1929fd,
-0xbc845378892be9ae, 0x3feea34634ccc320,
-0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7,
-0xbc93cedd78565858, 0x3feea23882552225,
-0xbc85c33fdf910406, 0x3feea1c1c70833f6,
-0x3c5710aa807e1964, 0x3feea155d44ca973,
-0x3c81079ab5789604, 0x3feea0f4b19e9538,
-0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
-0x3c727df161cd7778, 0x3feea052fa75173e,
-0xbc6a12ad8734b982, 0x3feea012750bdabf,
-0x3c93f9924a05b767, 0x3fee9fdcddd47645,
-0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
-0xbc87557939a8b5ef, 0x3fee9f9298593ae5,
-0xbc80dc3d54e08851, 0x3fee9f7df9519484,
-0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87,
-0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
-0xbc88e67a9006c909, 0x3fee9f8286ead08a,
-0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
-0x3c86597566977ac8, 0x3fee9fbd35d7cbfd,
-0xbc8619321e55e68a, 0x3fee9feb564267c9,
-0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09,
-0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
-0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6,
-0xbc7b32dcb94da51d, 0x3feea11473eb0187,
-0xbc92dad3519d7b5b, 0x3feea17b0976cfdb,
-0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
-0x3c87d51410fd15c2, 0x3feea26a62ff86f0,
-0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
-0xbc760a3629969871, 0x3feea3878491c491,
-0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
-0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9,
-0xbc9369b6f13b3734, 0x3feea589994cce13,
-0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7,
-0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
-0xbc522cea4f3afa1e, 0x3feea7f4179f5b21,
-0xbc94d450d872576e, 0x3feea8d99b4492ed,
-0x3c7c88549b958471, 0x3feea9cad931a436,
-0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
-0x3c931143962f7877, 0x3feeabd0a478580f,
-0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
-0x3c93e9e96f112479, 0x3feeae05bad61778,
-0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
-0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9,
-0x3c7bf68359f35f44, 0x3feeb1ae99157736,
-0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a,
-0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
-0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2,
-0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
-0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5,
-0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
-0xbc51669428996971, 0x3feebbdd9a7670b3,
-0xbc92434322f4f9aa, 0x3feebd829fde4e50,
-0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2,
-0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
-0xbc9294f304f166b6, 0x3feec2bb4d53fe0d,
-0x3c71affc2b91ce27, 0x3feec49182a3f090,
-0xbc8a1e58414c07d3, 0x3feec674194bb8d5,
-0x3c6dd235e10a73bb, 0x3feec86319e32323,
-0xbc79740b58a20091, 0x3feeca5e8d07f29e,
-0xbc87c50422622263, 0x3feecc667b5de565,
-0x3c9165830a2b96c2, 0x3feece7aed8eb8bb,
-0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
-0xbc903d5cbe27874b, 0x3feed2c980460ad8,
-0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
-0x3c5986178980fce0, 0x3feed74a8af46052,
-0x3c90cc319cee31d2, 0x3feed99e1330b358,
-0xbc89472975b1f2a5, 0x3feedbfe53c12e59,
-0x3c8469846e735ab3, 0x3feede6b5579fdbf,
-0x3c7d8157a34b7e7f, 0x3feee0e521356eba,
-0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
-0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774,
-0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
-0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff,
-0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
-0x3c889c2ea41433c7, 0x3feef0ce6c9a8952,
-0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
-0xbc7274aedac8ff80, 0x3feef68415b749b1,
-0xbc90a40e3da6f640, 0x3feef9728de5593a,
-0x3c85c620ce76df06, 0x3feefc6e29f1c52a,
-0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
-0xbc8fda52e1b51e41, 0x3fef028cf22749e4,
-0xbc91eee26b588a35, 0x3fef05b030a1064a,
-0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f,
-0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
-0xbc302899507554e5, 0x3fef0f69c3f3a207,
-0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
-0xbc80dda2d4c0010c, 0x3fef16286141b33d,
-0x3c736eae30af0cb3, 0x3fef199bdd85529c,
-0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c,
-0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
-0x3c836909391181d3, 0x3fef244778fafb22,
-0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
-0xbc811cd7dbdf9547, 0x3fef2ba88988c933,
-0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
-0xbc7ac28b7bef6621, 0x3fef33405751c4db,
-0x3c676b2c6c921968, 0x3fef3720dcef9069,
-0xbc7030587207b9e1, 0x3fef3b0f2e6d1675,
-0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
-0xbc8cc734592af7fc, 0x3fef43155b5bab74,
-0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
-0x3c87752a44f587e8, 0x3fef4b532b08c968,
-0xbc900dae3875a949, 0x3fef4f87080d89f2,
-0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6,
-0x3c74a385a63d07a7, 0x3fef5818dcfba487,
-0x3c5159d9d908a96e, 0x3fef5c76e862e6d3,
-0xbc82919e2040220f, 0x3fef60e316c98398,
-0x3c8c254d16117a68, 0x3fef655d71ff6075,
-0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
-0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315,
-0x3c843a59ac016b4b, 0x3fef7321f301b460,
-0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658,
-0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
-0xbc63e8e3eab2cbb4, 0x3fef81676b197d17,
-0xbc892ab93b470dc9, 0x3fef864614f5a129,
-0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12,
-0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
-0xbc776caa4c2ff1cf, 0x3fef953924676d76,
-0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
-0xbc81d5fc525d9940, 0x3fef9f7977cdb740,
-0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
-0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e,
-0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
-0x3c8269947c2bed4a, 0x3fefb4aaa2188510,
-0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
-0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a,
-0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
-0xbc69fa74878ba7c7, 0x3fefcac948dd7274,
-0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
-0x3c901f3a75ee0efe, 0x3fefd632798844f8,
-0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
-0xbc516a9ce6ed84fa, 0x3fefe1d802243c89,
-0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
-0xbc699c7db2effc76, 0x3fefedba3692d514,
-0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
-0x3c64b458677f9840, 0x3feff9d96b2a23d9,
-#elif N == 512
-0x0, 0x3ff0000000000000,
-0xbc75d87ade1f60d5, 0x3feffd8c86da1c0a,
-0xbc84e82fc61851ac, 0x3feffb1afa5abcbf,
-0x3c9bffdaa7ac4bac, 0x3feff8ab5b2cbd11,
-0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
-0x3c75c18e5ae0563a, 0x3feff3d1e77170b4,
-0xbc82985dd8521d32, 0x3feff168143b0281,
-0xbc705b1125cf49a5, 0x3fefef003103b10e,
-0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
-0x3c9f879abbff3f87, 0x3fefea363d42b027,
-0x3c651e617061bfbd, 0x3fefe7d42e11bbcc,
-0x3c9b14003824712a, 0x3fefe57411915a8a,
-0xbc905e7a108766d1, 0x3fefe315e86e7f85,
-0x3c61cbf0f38af658, 0x3fefe0b9b35659d8,
-0x3c845fad437fa426, 0x3fefde5f72f654b1,
-0xbc9a3316383dcbc5, 0x3fefdc0727fc1762,
-0x3c8cd2523567f613, 0x3fefd9b0d3158574,
-0x3c9901c9e0e797fd, 0x3fefd75c74f0bec2,
-0xbc954529642b232f, 0x3fefd50a0e3c1f89,
-0xbc89b3236d111646, 0x3fefd2b99fa6407c,
-0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
-0xbc8cb191be99b1b0, 0x3fefce1ead925493,
-0x3c8293708ef5c32e, 0x3fefcbd42b72a836,
-0xbc9acb71e83765b7, 0x3fefc98ba42e7d30,
-0x3c60f74e61e6c861, 0x3fefc74518759bc8,
-0x3c5cd3e58b03697e, 0x3fefc50088f8093f,
-0xbc95b9280905b2a4, 0x3fefc2bdf66607e0,
-0xbc8bfb07d4755452, 0x3fefc07d61701716,
-0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
-0x3c8aedeb3e7b14cd, 0x3fefbc02331b9715,
-0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919,
-0x3c9a8eb1f3d914b4, 0x3fefb78f03834e52,
-0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
-0xbc85b9eb0402507b, 0x3fefb323d833d93f,
-0x3c9407fb30d06420, 0x3fefb0f145e46c85,
-0xbc93f0f225bbf3ee, 0x3fefaec0b6bdae53,
-0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
-0xbc9c3fe7282d1784, 0x3fefaa65a4b520ba,
-0xbc9a5d04b3b9911b, 0x3fefa83b23395dec,
-0x3c9c8be44bf4cde8, 0x3fefa612a7b26300,
-0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
-0x3c820c5444c93c44, 0x3fefa1c7c55189c6,
-0xbc937a01f0739546, 0x3fef9fa55fdfa9c5,
-0xbc84c6baeb580d7a, 0x3fef9d8503328e6d,
-0xbc6a033489906e0b, 0x3fef9b66affed31b,
-0x3c8657aa1b0d9f83, 0x3fef994a66f951ce,
-0x3c8b8268b04ef0a5, 0x3fef973028d7233e,
-0x3c62f2c7fd6ee145, 0x3fef9517f64d9ef1,
-0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
-0xbc6b0b2789925e90, 0x3fef90edb6db2dc1,
-0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6,
-0xbc93aad17d197fae, 0x3fef8ccbae51a5c8,
-0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
-0xbc989c464a07ad70, 0x3fef88b1e264a0e9,
-0xbc65704e90c9f860, 0x3fef86a814f204ab,
-0xbc72c338fce197f4, 0x3fef84a058cbae1e,
-0xbc91c923b9d5f416, 0x3fef829aaea92de0,
-0xbc6dca724cea0eb6, 0x3fef809717425438,
-0xbc897cea57e46280, 0x3fef7e95934f312e,
-0x3c464770b955d34d, 0x3fef7c962388149e,
-0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
-0xbc962811c114424f, 0x3fef789d83606e12,
-0x3c56f01429e2b9d2, 0x3fef76a45471c3c2,
-0x3c8ec58e74904dd4, 0x3fef74ad3c92df73,
-0xbc801b15eaa59348, 0x3fef72b83c7d517b,
-0x3c8d63b0ab2d5bbf, 0x3fef70c554eaea89,
-0x3c6e653b2459034b, 0x3fef6ed48695bbc0,
-0xbc9ca9effbeeac92, 0x3fef6ce5d23816c9,
-0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
-0x3c8bda920de0f6e2, 0x3fef690eba4df41f,
-0x3c92cc7ea345b7dc, 0x3fef672658375d2f,
-0xbc9a597f9a5ff71c, 0x3fef654013041dc2,
-0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
-0x3c50835b125aa573, 0x3fef6179e2363cf8,
-0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c,
-0x3c8aaa13d61aec1f, 0x3fef5dbc2dc40bf0,
-0xbc96d99c7611eb26, 0x3fef5be084045cd4,
-0x3c8a4f81aa7110bd, 0x3fef5a06fb91588f,
-0x3c8cdc1873af2155, 0x3fef582f95281c6b,
-0xbc6817fd6a313e3e, 0x3fef565a51860746,
-0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
-0xbc96236af85fd26a, 0x3fef52b6358e15e8,
-0xbc9493684653a131, 0x3fef50e75eb44027,
-0x3c7795eb4523abe7, 0x3fef4f1aad999e82,
-0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
-0x3c8fe58b91b40095, 0x3fef4b87bf9cda38,
-0xbc98e2899077520a, 0x3fef49c18438ce4d,
-0x3c91ecaa860c614a, 0x3fef47fd7190241e,
-0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
-0xbc3e45c83ba0bbcb, 0x3fef447bc96ffc18,
-0x3c9120fcd4f59273, 0x3fef42be3578a819,
-0xbc29fd3bea07b4ee, 0x3fef4102cd3d09b9,
-0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
-0x3c87f1c7350e256d, 0x3fef3d9282fc1f27,
-0x3c89b788c188c9b8, 0x3fef3bdda27912d1,
-0x3c420dac6c124f4f, 0x3fef3a2af0b63bff,
-0x3c968efde3a8a894, 0x3fef387a6e756238,
-0xbc99501d09bc09fd, 0x3fef36cc1c78903a,
-0x3c877afbca90ef84, 0x3fef351ffb82140a,
-0x3c73baf864dc8675, 0x3fef33760c547f15,
-0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
-0x3c91b0575c1eaf54, 0x3fef3028c65fa1ff,
-0x3c91512f082876ee, 0x3fef2e85711ece75,
-0xbc90364bc9ce33ab, 0x3fef2ce450b3cb82,
-0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
-0xbc7548165d85ed32, 0x3fef29a8b16f0a30,
-0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29,
-0x3c7c3b977a68e32c, 0x3fef2675eeb3ab98,
-0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
-0xbc93a255f697ecfe, 0x3fef234c0ea83f36,
-0xbc803297e78260bf, 0x3fef21ba7591bb70,
-0x3c8d2d19edc1e550, 0x3fef202b17779965,
-0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
-0xbc76b2173113dd8c, 0x3fef1d130f50d65c,
-0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13,
-0x3c811aa5f853590b, 0x3fef1a03fc675d1f,
-0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
-0x3c61d61a34c8aa02, 0x3fef16fde4f2e280,
-0xbc91e75c40b4251e, 0x3fef157e39771b2f,
-0xbc91f892bf6b286d, 0x3fef1400cf2f6c18,
-0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
-0x3c7590c65c20e680, 0x3fef110cc15d5346,
-0x3c98a911f1f7785a, 0x3fef0f961f641589,
-0x3c86fe320b5c1e9d, 0x3fef0e21c1c14833,
-0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
-0xbc903cd8b2f25790, 0x3fef0b3fd6a454d2,
-0xbc61e7c998db7dbb, 0x3fef09d24abd886b,
-0x3c7b3bf786a54a87, 0x3fef08670653dfe4,
-0x3c834d754db0abb6, 0x3fef06fe0a31b715,
-0x3c74bb6c41732885, 0x3fef05975721b004,
-0x3c85425c11faadf4, 0x3fef0432edeeb2fd,
-0xbc99d7399abb9a8b, 0x3fef02d0cf63eeac,
-0x3c864201e2ac744c, 0x3fef0170fc4cd831,
-0xbc5451d60c6ac9eb, 0x3fef001375752b40,
-0xbc979517a03e2847, 0x3feefeb83ba8ea32,
-0x3c8787a210ceafd9, 0x3feefd5f4fb45e20,
-0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
-0xbc888d1e4629943d, 0x3feefab46484ebb4,
-0xbc800e2a46da4bee, 0x3feef96266e3fa2d,
-0xbc93369c544088b6, 0x3feef812ba4ea77d,
-0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
-0x3c85373ce4eb6dfb, 0x3feef57a577dd72b,
-0xbc87430803972b34, 0x3feef431a2de883b,
-0x3c83adec8265a67f, 0x3feef2eb428335b4,
-0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
-0xbc835388bcac6bc5, 0x3feef06581d3f669,
-0xbc954de30ae02d94, 0x3feeef26231e754a,
-0x3c727cdb4e4b6640, 0x3feeede91be9c811,
-0xbc9907f81b512d8e, 0x3feeecae6d05d866,
-0x3c86c2696a26af35, 0x3feeeb761742d808,
-0xbc94f2487e1c03ec, 0x3feeea401b7140ef,
-0x3c888f6ff06b979a, 0x3feee90c7a61d55b,
-0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
-0xbc89d5efaabc2030, 0x3feee6ac4bcdf3ea,
-0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4,
-0xbc76b8867f91c9d6, 0x3feee4559212ef89,
-0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
-0x3c94c9c0b5157fe6, 0x3feee20853c10f28,
-0x3c79c3bba5562a2f, 0x3feee0e544ede173,
-0xbc62455345b51c8e, 0x3feedfc4976d27fa,
-0x3c859f48a72a4c6d, 0x3feedea64c123422,
-0xbc93331de45477d0, 0x3feedd8a63b0a09b,
-0xbc85a71612e21658, 0x3feedc70df1c5175,
-0xbc95f84d39b39b16, 0x3feedb59bf29743f,
-0xbc9312607a28698a, 0x3feeda4504ac801c,
-0xbc72ba4dc7c4d562, 0x3feed932b07a35df,
-0x3c86421f6f1d24d6, 0x3feed822c367a024,
-0xbc844f25dc02691f, 0x3feed7153e4a136a,
-0xbc58a78f4817895b, 0x3feed60a21f72e2a,
-0xbc888d328eb9b501, 0x3feed5016f44d8f5,
-0xbc9348a6815fce65, 0x3feed3fb2709468a,
-0x3c7f0bec42ddb15a, 0x3feed2f74a1af3f1,
-0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
-0xbc615f0a2b9cd452, 0x3feed0f6d5817663,
-0x3c835c43984d9871, 0x3feecffa3f84b9d4,
-0xbc8c2e465a919e1d, 0x3feecf0018321a1a,
-0x3c4363ed60c2ac11, 0x3feece086061892d,
-0xbc865dfd02bd08f1, 0x3feecd1318eb43ec,
-0xbc632afc8d9473a0, 0x3feecc2042a7d232,
-0xbc8e68cec89b1762, 0x3feecb2fde7006f4,
-0x3c9666093b0664ef, 0x3feeca41ed1d0057,
-0xbc48ae858eb682ca, 0x3feec9566f8827d0,
-0xbc95fc5e44de020e, 0x3feec86d668b3237,
-0x3c5dd71277c0915f, 0x3feec786d3001fe5,
-0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
-0x3c92001325ecd7fb, 0x3feec5c10fa920a1,
-0xbc7ea0148327c42f, 0x3feec4e1e192aed2,
-0x3c65ace6e2870332, 0x3feec4052c5916c4,
-0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
-0xbc9595c55690ffaf, 0x3feec2532feaada6,
-0xbc7a843ad1a88022, 0x3feec17dea6db7d7,
-0xbc8b401ba9fb5199, 0x3feec0ab213d5283,
-0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
-0x3c6df82bf324cc57, 0x3feebf0d073537ca,
-0x3c892ca3bf144e63, 0x3feebe41b817c114,
-0x3c97cae38641c7bb, 0x3feebd78e8bb586b,
-0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
-0x3c62d80c5c4a2b67, 0x3feebbeeccbd7b2a,
-0xbc902c99b04aa8b0, 0x3feebb2d81d8abff,
-0x3c8f39c10d12eaf0, 0x3feeba6eba2e35f0,
-0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
-0xbc80b582d74a55d9, 0x3feeb8f8b804f127,
-0x3c73e34f67e67118, 0x3feeb8417f4531ee,
-0xbc6b4e327ff434ca, 0x3feeb78ccd3deb0d,
-0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
-0xbc592dca38593e20, 0x3feeb62b00da3b14,
-0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef,
-0xbc85daca9994833e, 0x3feeb4d359dfd53d,
-0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
-0xbc980b4321bc6dae, 0x3feeb385df598d78,
-0x3c81bd2888075068, 0x3feeb2e2f4f6ad27,
-0xbc8390afec5241c5, 0x3feeb24298571b06,
-0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
-0x3c8f15cdafe7d586, 0x3feeb1098bed1bdf,
-0xbc896be8ae89ef8f, 0x3feeb070dde910d2,
-0xbc910aa91ae9b67f, 0x3feeafdac1351819,
-0x3c93350518fdd78e, 0x3feeaf4736b527da,
-0x3c957e1b67462375, 0x3feeaeb63f4d854c,
-0xbc88e6ac90348602, 0x3feeae27dbe2c4cf,
-0x3c8124d5051552a7, 0x3feead9c0d59ca07,
-0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
-0xbc3ca103952ecf1f, 0x3feeac8c32824135,
-0xbc91af7f1365c3ac, 0x3feeac0827ff07cc,
-0x3c773345c02a4fd6, 0x3feeab86b5f43d92,
-0x3c9063e1e21c5409, 0x3feeab07dd485429,
-0xbc909d2a0fce20f2, 0x3feeaa8b9ee20d1e,
-0xbc943a3540d1898a, 0x3feeaa11fba87a03,
-0xbc924f2cb4f81746, 0x3feea99af482fc8f,
-0x3c34c7855019c6ea, 0x3feea9268a5946b7,
-0xbc943592a0a9846b, 0x3feea8b4be135acc,
-0xbc951f58ddaa8090, 0x3feea84590998b93,
-0xbc956bc85d444f4f, 0x3feea7d902d47c65,
-0x3c9432e62b64c035, 0x3feea76f15ad2148,
-0x3c914d1e4218319f, 0x3feea707ca0cbf0f,
-0xbc82e1648e50a17c, 0x3feea6a320dceb71,
-0x3c971c93709313f4, 0x3feea6411b078d26,
-0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
-0x3c7f88303b60d222, 0x3feea584fd15612a,
-0x3c95f30eda98a575, 0x3feea52ae6cdf6f4,
-0x3c70125ca18d4b5b, 0x3feea4d3778bc944,
-0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
-0x3c9592ea73798b11, 0x3feea42c91c56acd,
-0x3c917ecda8a72159, 0x3feea3dd1d1929fd,
-0xbc9371d6d7d75739, 0x3feea390532205d8,
-0xbc845378892be9ae, 0x3feea34634ccc320,
-0xbc8ac05fd996f807, 0x3feea2fec30678b7,
-0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7,
-0xbc91f5067d03653a, 0x3feea277e8dcc390,
-0xbc93cedd78565858, 0x3feea23882552225,
-0x3c917339c86ce3ad, 0x3feea1fbcc140be7,
-0xbc85c33fdf910406, 0x3feea1c1c70833f6,
-0xbc77e66065ba2500, 0x3feea18a7420a036,
-0x3c5710aa807e1964, 0x3feea155d44ca973,
-0x3c964c827ee6b49a, 0x3feea123e87bfb7a,
-0x3c81079ab5789604, 0x3feea0f4b19e9538,
-0xbc928311a3c73480, 0x3feea0c830a4c8d4,
-0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
-0x3c882c79e185e981, 0x3feea077541ee718,
-0x3c727df161cd7778, 0x3feea052fa75173e,
-0xbc8b48cea80b043b, 0x3feea0315a736c75,
-0xbc6a12ad8734b982, 0x3feea012750bdabf,
-0xbc4f4863bc8e5180, 0x3fee9ff64b30aa09,
-0x3c93f9924a05b767, 0x3fee9fdcddd47645,
-0x3c954835dd4b7548, 0x3fee9fc62dea2f8a,
-0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
-0xbc8bf41f59b59f8a, 0x3fee9fa10a38cee8,
-0xbc87557939a8b5ef, 0x3fee9f9298593ae5,
-0xbc8f652fde52775c, 0x3fee9f86e7ba9fef,
-0xbc80dc3d54e08851, 0x3fee9f7df9519484,
-0xbc7b0300defbcf98, 0x3fee9f77ce1303f6,
-0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87,
-0xbc89dab646035dc0, 0x3fee9f73c4eaa988,
-0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
-0xbc91f0c230588dde, 0x3fee9f7ad3ef9011,
-0xbc88e67a9006c909, 0x3fee9f8286ead08a,
-0x3c9106450507a28c, 0x3fee9f8d02d50b8f,
-0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
-0xbc9129729a10f3a0, 0x3fee9faa5953c849,
-0x3c86597566977ac8, 0x3fee9fbd35d7cbfd,
-0x3c781a70a5124f67, 0x3fee9fd2df29ce7c,
-0xbc8619321e55e68a, 0x3fee9feb564267c9,
-0x3c941626ea62646d, 0x3feea0069c1a861d,
-0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09,
-0xbc940b9f54365b7c, 0x3feea04597eeba8f,
-0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
-0x3c873455e0e826c1, 0x3feea08fda749e5d,
-0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6,
-0x3c94f006ad874e3e, 0x3feea0e56b7fcf03,
-0xbc7b32dcb94da51d, 0x3feea11473eb0187,
-0xbc8f6d693d0973bb, 0x3feea14652e958aa,
-0xbc92dad3519d7b5b, 0x3feea17b0976cfdb,
-0x3c58c5ee2b7e7848, 0x3feea1b2988fb9ec,
-0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
-0xbc88b25e045d207b, 0x3feea22a4456e7a3,
-0x3c87d51410fd15c2, 0x3feea26a62ff86f0,
-0xbc69cb3314060ca7, 0x3feea2ad5e2850ac,
-0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
-0x3c87a0b15d19e0bb, 0x3feea33bedf2e1b9,
-0xbc760a3629969871, 0x3feea3878491c491,
-0x3c94aa7212bfa73c, 0x3feea3d5fbab091f,
-0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
-0xbc81e688272a8a12, 0x3feea47b8f4abaa9,
-0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9,
-0x3c4ab7b7112ec9d5, 0x3feea52cb0d1736a,
-0xbc9369b6f13b3734, 0x3feea589994cce13,
-0x3c8a1e274eed4476, 0x3feea5e968443d9a,
-0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7,
-0x3c94a533a59324da, 0x3feea6b1bdadb46d,
-0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
-0x3c7a56d2760d087d, 0x3feea785b91e07f1,
-0xbc522cea4f3afa1e, 0x3feea7f4179f5b21,
-0x3c91682c1c6e8b05, 0x3feea86562ab00ec,
-0xbc94d450d872576e, 0x3feea8d99b4492ed,
-0x3c89ea99cf7a9591, 0x3feea950c27004c2,
-0x3c7c88549b958471, 0x3feea9cad931a436,
-0xbc59e57d8f92ff8e, 0x3feeaa47e08e1957,
-0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
-0x3c909b176e05a9cd, 0x3feeab4ac52be8f7,
-0x3c931143962f7877, 0x3feeabd0a478580f,
-0x3c711607f1952c95, 0x3feeac597875c644,
-0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
-0x3c869608f0f86431, 0x3feead74029db01e,
-0x3c93e9e96f112479, 0x3feeae05bad61778,
-0xbc7f1ced15c5c5c0, 0x3feeae9a6bdb5598,
-0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
-0x3c614b97be3f7b4e, 0x3feeafccbc6c19e6,
-0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9,
-0x3c81c1701c359530, 0x3feeb10afc931857,
-0x3c7bf68359f35f44, 0x3feeb1ae99157736,
-0xbc8edb1bf6809287, 0x3feeb2553499284b,
-0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a,
-0xbc8ba58ce7a736d3, 0x3feeb3ab6ccce12c,
-0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
-0xbc93fc025e1db9ce, 0x3feeb50dad829e70,
-0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2,
-0xbc8d737c7d71382e, 0x3feeb67bff148396,
-0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
-0x3c6ae88c43905293, 0x3feeb7f669e2802b,
-0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5,
-0xbc93d1f7661fe51b, 0x3feeb97cf65253d1,
-0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
-0x3c651b68797ffc1c, 0x3feebb0faccf9243,
-0xbc51669428996971, 0x3feebbdd9a7670b3,
-0x3c54579c5ceed70b, 0x3feebcae95cba768,
-0xbc92434322f4f9aa, 0x3feebd829fde4e50,
-0x3c87298413381667, 0x3feebe59b9bddb5b,
-0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2,
-0xbc905000be64e965, 0x3feec01121235681,
-0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
-0xbc89fb12e3454b73, 0x3feec1d4d47f2598,
-0xbc9294f304f166b6, 0x3feec2bb4d53fe0d,
-0x3c7be2a03697693b, 0x3feec3a4dc5a3dd3,
-0x3c71affc2b91ce27, 0x3feec49182a3f090,
-0x3c90622b15810eea, 0x3feec581414380f2,
-0xbc8a1e58414c07d3, 0x3feec674194bb8d5,
-0x3be9a5ecc875d327, 0x3feec76a0bcfc15e,
-0x3c6dd235e10a73bb, 0x3feec86319e32323,
-0x3c88ea486a3350ef, 0x3feec95f4499c647,
-0xbc79740b58a20091, 0x3feeca5e8d07f29e,
-0xbc7a2ee551d4c40f, 0x3feecb60f4424fcb,
-0xbc87c50422622263, 0x3feecc667b5de565,
-0x3c89c31f7e38028b, 0x3feecd6f23701b15,
-0x3c9165830a2b96c2, 0x3feece7aed8eb8bb,
-0xbc5fac13f4e005a3, 0x3feecf89dacfe68c,
-0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
-0x3c7d8aced7162e89, 0x3feed1b1231475f7,
-0xbc903d5cbe27874b, 0x3feed2c980460ad8,
-0xbc848f50cea7269f, 0x3feed3e504f696b1,
-0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
-0x3c821eb9a08a0542, 0x3feed625893523d4,
-0x3c5986178980fce0, 0x3feed74a8af46052,
-0xbc6133a953131cfd, 0x3feed872b8950a73,
-0x3c90cc319cee31d2, 0x3feed99e1330b358,
-0x3c89e95e6f4a0ae4, 0x3feedacc9be14dca,
-0xbc89472975b1f2a5, 0x3feedbfe53c12e59,
-0xbc90260cf07cb311, 0x3feedd333beb0b7e,
-0x3c8469846e735ab3, 0x3feede6b5579fdbf,
-0x3c1bca400a7b939d, 0x3feedfa6a1897fd2,
-0x3c7d8157a34b7e7f, 0x3feee0e521356eba,
-0x3c9140bc34dfc19f, 0x3feee226d59a09ee,
-0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
-0xbc8c9b1da461ab87, 0x3feee4b3e100301e,
-0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774,
-0x3c8c115f23ebea8e, 0x3feee74dcca5a413,
-0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
-0xbc6dcab99f23f84e, 0x3feee9f4a17a4735,
-0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff,
-0x3c60a43e8b7e4bfe, 0x3feeeca868742ee4,
-0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
-0x3c915b1397075f04, 0x3feeef692a8fa8cd,
-0x3c889c2ea41433c7, 0x3feef0ce6c9a8952,
-0xbc839f7a1f04d2b0, 0x3feef236f0cf3f3a,
-0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
-0xbc86a510f31e13e6, 0x3feef511c43bbd62,
-0xbc7274aedac8ff80, 0x3feef68415b749b1,
-0xbc92887ea88e7340, 0x3feef7f9ade433c6,
-0xbc90a40e3da6f640, 0x3feef9728de5593a,
-0xbc6e57ac604759ba, 0x3feefaeeb6ddfc87,
-0x3c85c620ce76df06, 0x3feefc6e29f1c52a,
-0x3c8e6c6db4f83226, 0x3feefdf0e844bfc6,
-0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
-0xbc8d1bf10460dba0, 0x3fef01004b3a7804,
-0xbc8fda52e1b51e41, 0x3fef028cf22749e4,
-0x3c8e5d80813dddfc, 0x3fef041ce8e77680,
-0xbc91eee26b588a35, 0x3fef05b030a1064a,
-0x3c8caff9640f2dcb, 0x3fef0746ca7a67a7,
-0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f,
-0x3c7a77557fd62db3, 0x3fef0a7df9285775,
-0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
-0xbc651ba6128db749, 0x3fef0dc27e2cb5e5,
-0xbc302899507554e5, 0x3fef0f69c3f3a207,
-0xbc7c0ffefdc5e251, 0x3fef111462c95b60,
-0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
-0xbc8b6cd058bfd6fa, 0x3fef1473b0468d30,
-0xbc80dda2d4c0010c, 0x3fef16286141b33d,
-0x3c923759b8aca76d, 0x3fef17e06ff301f4,
-0x3c736eae30af0cb3, 0x3fef199bdd85529c,
-0xbc895498a73dac7d, 0x3fef1b5aab23e61e,
-0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c,
-0x3c851de924583108, 0x3fef1ee26b34e065,
-0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
-0xbc8c5fe4051ba06c, 0x3fef2277b9881650,
-0x3c836909391181d3, 0x3fef244778fafb22,
-0xbc6d1816c0a9ac07, 0x3fef261a9f8630ad,
-0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
-0xbc7af5c67c4e8235, 0x3fef29cb269e601f,
-0xbc811cd7dbdf9547, 0x3fef2ba88988c933,
-0xbc8304ef0045d575, 0x3fef2d89584661a1,
-0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
-0x3c8725f94f910375, 0x3fef31553dfa8313,
-0xbc7ac28b7bef6621, 0x3fef33405751c4db,
-0x3c7b53e99f9191e8, 0x3fef352ee13da7cb,
-0x3c676b2c6c921968, 0x3fef3720dcef9069,
-0xbc810a79e6d7e2b8, 0x3fef39164b994d23,
-0xbc7030587207b9e1, 0x3fef3b0f2e6d1675,
-0x3c840635f6d2a9c0, 0x3fef3d0b869d8f0f,
-0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
-0x3c549eeef9ec910c, 0x3fef410e9be12cb9,
-0xbc8cc734592af7fc, 0x3fef43155b5bab74,
-0xbc8335827ffb9dce, 0x3fef451f95018d17,
-0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
-0x3c645563980ef762, 0x3fef493e7ba2c38c,
-0x3c87752a44f587e8, 0x3fef4b532b08c968,
-0xbc8cd0205eb2aab2, 0x3fef4d6b596f948c,
-0xbc900dae3875a949, 0x3fef4f87080d89f2,
-0xbc8aab80ceab2b4a, 0x3fef51a638197a3c,
-0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6,
-0xbc8f870f40a8ba1b, 0x3fef55ef2158a91f,
-0x3c74a385a63d07a7, 0x3fef5818dcfba487,
-0x3c83c119f18464c5, 0x3fef5a461eec14be,
-0x3c5159d9d908a96e, 0x3fef5c76e862e6d3,
-0xbc5a628c2be4e7c7, 0x3fef5eab3a99745b,
-0xbc82919e2040220f, 0x3fef60e316c98398,
-0xbc72550d76be719a, 0x3fef631e7e2d479d,
-0x3c8c254d16117a68, 0x3fef655d71ff6075,
-0xbc82090274667d12, 0x3fef679ff37adb4a,
-0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
-0x3c75f7d28150cac4, 0x3fef6c2fa45c4dfd,
-0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315,
-0x3c890de9296f4cd1, 0x3fef70cd9ab294e4,
-0x3c843a59ac016b4b, 0x3fef7321f301b460,
-0x3c832ff9978b34bc, 0x3fef7579e065807d,
-0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658,
-0xbc7303b63dda1980, 0x3fef7a347f63c159,
-0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
-0xbc81f2ba385f2f95, 0x3fef7efd81a2ece1,
-0xbc63e8e3eab2cbb4, 0x3fef81676b197d17,
-0x3c768d9144ae12fc, 0x3fef83d4f11f8220,
-0xbc892ab93b470dc9, 0x3fef864614f5a129,
-0x3c853687f542403b, 0x3fef88bad7dcee90,
-0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12,
-0xbc736ed2de40b407, 0x3fef8daf3fe592e8,
-0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
-0xbc614ef56c770f3b, 0x3fef92b2334ac7ee,
-0xbc776caa4c2ff1cf, 0x3fef953924676d76,
-0x3c8df7d1353d8e88, 0x3fef97c3bc24e350,
-0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
-0xbc850bed64091b8a, 0x3fef9ce3e4933c7e,
-0xbc81d5fc525d9940, 0x3fef9f7977cdb740,
-0x3c89d852381c317f, 0x3fefa212b6bc3181,
-0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
-0x3c68a00e3cca04c4, 0x3fefa7503ccd2be5,
-0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e,
-0xbc5a1f25ce94cae7, 0x3fefac9c80faa594,
-0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
-0xbc6fb5f3ee307976, 0x3fefb1f78d802dc2,
-0x3c8269947c2bed4a, 0x3fefb4aaa2188510,
-0x3c737e8ae802b851, 0x3fefb7616ca06dd6,
-0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
-0x3c875119560e34af, 0x3fefbcda28a52e59,
-0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a,
-0xbc7431c3840929c6, 0x3fefc261cbdf5be7,
-0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
-0xbc8cb472d2e86b99, 0x3fefc7f860a70c22,
-0xbc69fa74878ba7c7, 0x3fefcac948dd7274,
-0x3c83f5df2fde16a8, 0x3fefcd9df15b82ac,
-0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
-0x3c8eef18336b62e3, 0x3fefd35288633625,
-0x3c901f3a75ee0efe, 0x3fefd632798844f8,
-0x3c80d23f87b50a2a, 0x3fefd916302bd526,
-0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
-0x3c8302dee657c8e6, 0x3fefdee8f32a4b45,
-0xbc516a9ce6ed84fa, 0x3fefe1d802243c89,
-0xbc7b0caa080df170, 0x3fefe4cadbdac61d,
-0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
-0x3c7617a9f2fd24e5, 0x3fefeabbf4c0ba54,
-0xbc699c7db2effc76, 0x3fefedba3692d514,
-0x3c75f103b8fd5ca7, 0x3feff0bc4866e8ad,
-0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
-0x3c8e70b094fa075a, 0x3feff6cbe15f6314,
-0x3c64b458677f9840, 0x3feff9d96b2a23d9,
-0xbc72ec9a3e5d680a, 0x3feffceaca4391b6,
-#endif
-},
-};
diff --git a/contrib/arm-optimized-routines/pl/math/expf.c b/contrib/arm-optimized-routines/pl/math/expf.c
deleted file mode 100644
index cd3cfa925c64..000000000000
--- a/contrib/arm-optimized-routines/pl/math/expf.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Single-precision e^x function.
- *
- * Copyright (c) 2017-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include <math.h>
-#include <stdint.h>
-#include "math_config.h"
-
-/*
-EXPF_TABLE_BITS = 5
-EXPF_POLY_ORDER = 3
-
-ULP error: 0.502 (nearest rounding.)
-Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
-Wrong count: 170635 (all nearest rounding wrong results with fma.)
-Non-nearest ULP error: 1 (rounded ULP error)
-*/
-
-#define N (1 << EXPF_TABLE_BITS)
-#define InvLn2N __expf_data.invln2_scaled
-#define T __expf_data.tab
-#define C __expf_data.poly_scaled
-
-static inline uint32_t
-top12 (float x)
-{
-  return asuint (x) >> 20;
-}
-
-float
-optr_aor_exp_f32 (float x)
-{
-  uint32_t abstop;
-  uint64_t ki, t;
-  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
-  double_t kd, xd, z, r, r2, y, s;
-
-  xd = (double_t) x;
-  abstop = top12 (x) & 0x7ff;
-  if (unlikely (abstop >= top12 (88.0f)))
-    {
-      /* |x| >= 88 or x is nan.  */
-      if (asuint (x) == asuint (-INFINITY))
-	return 0.0f;
-      if (abstop >= top12 (INFINITY))
-	return x + x;
-      if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
-	return __math_oflowf (0);
-      if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
-	return __math_uflowf (0);
-    }
-
-  /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
-  z = InvLn2N * xd;
-
-  /* Round and convert z to int, the result is in [-150*N, 128*N] and
-     ideally nearest int is used, otherwise the magnitude of r can be
-     bigger which gives larger approximation error.  */
-  kd = round (z);
-  ki = lround (z);
-  r = z - kd;
-
-  /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
-  t = T[ki % N];
-  t += ki << (52 - EXPF_TABLE_BITS);
-  s = asdouble (t);
-  z = C[0] * r + C[1];
-  r2 = r * r;
-  y = C[2] * r + 1;
-  y = z * r2 + y;
-  y = y * s;
-  return eval_as_float (y);
-}
diff --git a/contrib/arm-optimized-routines/pl/math/expm1_data.c b/contrib/arm-optimized-routines/pl/math/expm1_data.c
deleted file mode 100644
index ff7426b90135..000000000000
--- a/contrib/arm-optimized-routines/pl/math/expm1_data.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Coefficients for double-precision e^x - 1 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Generated using fpminimax, see tools/expm1.sollya for details.  */
-const double __expm1_poly[] = {0x1p-1,
-			       0x1.5555555555559p-3,
-			       0x1.555555555554bp-5,
-			       0x1.111111110f663p-7,
-			       0x1.6c16c16c1b5f3p-10,
-			       0x1.a01a01affa35dp-13,
-			       0x1.a01a018b4ecbbp-16,
-			       0x1.71ddf82db5bb4p-19,
-			       0x1.27e517fc0d54bp-22,
-			       0x1.af5eedae67435p-26,
-			       0x1.1f143d060a28ap-29};
diff --git a/contrib/arm-optimized-routines/pl/math/include/mathlib.h b/contrib/arm-optimized-routines/pl/math/include/mathlib.h
deleted file mode 100644
index f886e7f8c07a..000000000000
--- a/contrib/arm-optimized-routines/pl/math/include/mathlib.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Public API.
- *
- * Copyright (c) 2015-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef _MATHLIB_H
-#define _MATHLIB_H
-
-float acosf (float);
-float acoshf (float);
-float asinf (float);
-float asinhf (float);
-float atan2f (float, float);
-float atanf (float);
-float atanhf (float);
-float cbrtf (float);
-float coshf (float);
-float cospif (float);
-float erfcf (float);
-float erff (float);
-float erfinvf (float);
-float exp10f (float);
-float expm1f (float);
-float log10f (float);
-float log1pf (float);
-float sinhf (float);
-float sinpif (float);
-float tanf (float);
-float tanhf (float);
-
-double acos (double);
-double acosh (double);
-double asin (double);
-double asinh (double);
-double atan (double);
-double atan2 (double, double);
-double atanh (double);
-double cbrt (double);
-double cosh (double);
-double cospi (double);
-double erfc (double);
-double erfinv (double);
-double exp10 (double);
-double expm1 (double);
-double log10 (double);
-double log1p (double);
-double sinh (double);
-double sinpi (double);
-double tanh (double);
-
-long double cospil (long double);
-long double erfinvl (long double);
-long double exp10l (long double);
-long double sinpil (long double);
-
-#if __aarch64__
-# if __GNUC__ >= 5
-typedef __Float32x4_t __f32x4_t;
-typedef __Float64x2_t __f64x2_t;
-# elif __clang_major__ * 100 + __clang_minor__ >= 305
-typedef __attribute__ ((__neon_vector_type__ (4))) float __f32x4_t;
-typedef __attribute__ ((__neon_vector_type__ (2))) double __f64x2_t;
-# else
-#  error Unsupported compiler
-# endif
-
-# if __GNUC__ >= 9 || __clang_major__ >= 8
-#  define __vpcs __attribute__ ((__aarch64_vector_pcs__))
-
-typedef struct __f32x4x2_t
-{
-  __f32x4_t val[2];
-} __f32x4x2_t;
-
-typedef struct __f64x2x2_t
-{
-  __f64x2_t val[2];
-} __f64x2x2_t;
-
-/* Vector functions following the vector PCS using ABI names.  */
-__vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_acosf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_acos (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4vv_atan2f (__f32x4_t, __f32x4_t);
-__vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_cbrtf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_cbrt (__f64x2_t);
-__vpcs __f32x4x2_t _ZGVnN4v_cexpif (__f32x4_t);
-__vpcs __f64x2x2_t _ZGVnN2v_cexpi (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_cospif (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_cospi (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_erfcf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_erfc (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_erfinvf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_erfinv (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_exp10f (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_exp10 (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2v_exp2 (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_expm1f (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_expm1 (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4vv_hypotf (__f32x4_t, __f32x4_t);
-__vpcs __f64x2_t _ZGVnN2vv_hypot (__f64x2_t, __f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t);
-__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_sinpif (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_sinpi (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_tan (__f64x2_t);
-__vpcs __f32x4_t _ZGVnN4v_tanhf (__f32x4_t);
-__vpcs __f64x2_t _ZGVnN2v_tanh (__f64x2_t);
-__vpcs void _ZGVnN4vl4l4_sincosf (__f32x4_t, __f32x4_t *, __f32x4_t *);
-__vpcs void _ZGVnN2vl8l8_sincos (__f64x2_t, __f64x2_t *, __f64x2_t *);
-
-# endif
-
-# if WANT_SVE_MATH
-#  include <arm_sve.h>
-svfloat32_t _ZGVsMxv_acoshf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_acosh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_acosf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_acos (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_asinhf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_asinh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_asinf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_asin (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_atanhf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_atanh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxvv_atan2f (svfloat32_t, svfloat32_t, svbool_t);
-svfloat32_t _ZGVsMxv_atanf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_atan (svfloat64_t, svbool_t);
-svfloat64_t _ZGVsMxvv_atan2 (svfloat64_t, svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_cbrtf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_cbrt (svfloat64_t, svbool_t);
-svfloat32x2_t _ZGVsMxv_cexpif (svfloat32_t, svbool_t);
-svfloat64x2_t _ZGVsMxv_cexpi (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_coshf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_cosh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_cosf (svfloat32_t, svbool_t);
-svfloat32_t _ZGVsMxv_cospif (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_cos (svfloat64_t, svbool_t);
-svfloat64_t _ZGVsMxv_cospi (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_erff (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_erf (svfloat64_t, svbool_t);
-svfloat64_t _ZGVsMxv_erfc (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_erfcf (svfloat32_t, svbool_t);
-svfloat32_t _ZGVsMxv_expf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_exp (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_exp10f (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_exp10 (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_exp2f (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_exp2 (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_expm1f (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_expm1 (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxvv_hypotf (svfloat32_t, svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxvv_hypot (svfloat64_t, svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_logf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_log (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_log10f (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_log10 (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_log1pf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_log1p (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_log2f (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_log2 (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxvv_powi (svfloat32_t, svint32_t, svbool_t);
-svfloat64_t _ZGVsMxvv_powk (svfloat64_t, svint64_t, svbool_t);
-svfloat32_t _ZGVsMxvv_powf (svfloat32_t, svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxvv_pow (svfloat64_t, svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_sinhf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_sinh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_sinf (svfloat32_t, svbool_t);
-svfloat32_t _ZGVsMxv_sinpif (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_sin (svfloat64_t, svbool_t);
-svfloat64_t _ZGVsMxv_sinpi (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_tanhf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_tanh (svfloat64_t, svbool_t);
-svfloat32_t _ZGVsMxv_tanf (svfloat32_t, svbool_t);
-svfloat64_t _ZGVsMxv_tan (svfloat64_t, svbool_t);
-void _ZGVsMxvl4l4_sincosf (svfloat32_t, float *, float *, svbool_t);
-void _ZGVsMxvl8l8_sincos (svfloat64_t, double *, double *, svbool_t);
-# endif
-
-#endif
-
-#endif
diff --git a/contrib/arm-optimized-routines/pl/math/include/pl_test.h b/contrib/arm-optimized-routines/pl/math/include/pl_test.h
deleted file mode 100644
index 3a3407e337b8..000000000000
--- a/contrib/arm-optimized-routines/pl/math/include/pl_test.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * PL macros to aid testing. This version of this file is used for building the
- * routine, not the tests. Separate definitions are found in test/pl_test.h
- * which emit test parameters.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
- */
-
-/* Emit max ULP threshold - silenced for building the routine.  */
-#define PL_TEST_ULP(f, l)
-
-/* Emit routine name if e == 1 and f is expected to correctly trigger fenv
-   exceptions. e allows declaration to be emitted conditionally upon certain
-   build flags - defer expansion by one pass to allow those flags to be expanded
-   properly.  */
-#define PL_TEST_EXPECT_FENV(f, e)
-#define PL_TEST_EXPECT_FENV_ALWAYS(f)
-
-#define PL_TEST_INTERVAL(f, lo, hi, n)
-#define PL_TEST_SYM_INTERVAL(f, lo, hi, n)
-#define PL_TEST_INTERVAL_C(f, lo, hi, n, c)
-#define PL_TEST_SYM_INTERVAL_C(f, lo, hi, n, c)
-#define PL_TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n)
diff --git a/contrib/arm-optimized-routines/pl/math/log.c b/contrib/arm-optimized-routines/pl/math/log.c
deleted file mode 100644
index 40b0441d981d..000000000000
--- a/contrib/arm-optimized-routines/pl/math/log.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Double-precision log(x) function.
- *
- * Copyright (c) 2018-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include <float.h>
-#include <math.h>
-#include <stdint.h>
-#include "math_config.h"
-
-#define T __log_data.tab
-#define T2 __log_data.tab2
-#define B __log_data.poly1
-#define A __log_data.poly
-#define Ln2hi __log_data.ln2hi
-#define Ln2lo __log_data.ln2lo
-#define N (1 << LOG_TABLE_BITS)
-#define OFF 0x3fe6000000000000
-
-/* Top 16 bits of a double.  */
-static inline uint32_t
-top16 (double x)
-{
-  return asuint64 (x) >> 48;
-}
-
-double
-optr_aor_log_f64 (double x)
-{
-  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
-  double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
-  uint64_t ix, iz, tmp;
-  uint32_t top;
-  int k, i;
-
-  ix = asuint64 (x);
-  top = top16 (x);
-
-#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
-#define LO asuint64 (1.0 - 0x1p-5)
-#define HI asuint64 (1.0 + 0x1.1p-5)
-#elif LOG_POLY1_ORDER == 12
-#define LO asuint64 (1.0 - 0x1p-4)
-#define HI asuint64 (1.0 + 0x1.09p-4)
-#endif
-  if (unlikely (ix - LO < HI - LO))
-    {
-      /* Handle close to 1.0 inputs separately.  */
-      /* Fix sign of zero with downward rounding when x==1.  */
-      if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
-	return 0;
-      r = x - 1.0;
-      r2 = r * r;
-      r3 = r * r2;
-#if LOG_POLY1_ORDER == 10
-      /* Worst-case error is around 0.516 ULP.  */
-      y = r3
-	  * (B[1] + r * B[2] + r2 * B[3]
-	     + r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
-      w = B[0] * r2; /* B[0] == -0.5.  */
-      hi = r + w;
-      y += r - hi + w;
-      y += hi;
-#elif LOG_POLY1_ORDER == 11
-      /* Worst-case error is around 0.516 ULP.  */
-      y = r3
-	  * (B[1] + r * B[2]
-	     + r2
-		 * (B[3] + r * B[4] + r2 * B[5]
-		    + r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
-      w = B[0] * r2; /* B[0] == -0.5.  */
-      hi = r + w;
-      y += r - hi + w;
-      y += hi;
-#elif LOG_POLY1_ORDER == 12
-      y = r3
-	  * (B[1] + r * B[2] + r2 * B[3]
-	     + r3
-		 * (B[4] + r * B[5] + r2 * B[6]
-		    + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
-#if N <= 64
-      /* Worst-case error is around 0.532 ULP.  */
-      w = B[0] * r2; /* B[0] == -0.5.  */
-      hi = r + w;
-      y += r - hi + w;
-      y += hi;
-#else
-      /* Worst-case error is around 0.507 ULP.  */
-      w = r * 0x1p27;
-      double_t rhi = r + w - w;
-      double_t rlo = r - rhi;
-      w = rhi * rhi * B[0]; /* B[0] == -0.5.  */
-      hi = r + w;
-      lo = r - hi + w;
-      lo += B[0] * rlo * (rhi + r);
-      y += lo;
-      y += hi;
-#endif
-#endif
-      return eval_as_double (y);
-    }
-  if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
-    {
-      /* x < 0x1p-1022 or inf or nan.  */
-      if (ix * 2 == 0)
-	return __math_divzero (1);
-      if (ix == asuint64 (INFINITY)) /* log(inf) == inf.  */
-	return x;
-      if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
-	return __math_invalid (x);
-      /* x is subnormal, normalize it.  */
-      ix = asuint64 (x * 0x1p52);
-      ix -= 52ULL << 52;
-    }
-
-  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  tmp = ix - OFF;
-  i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
-  k = (int64_t) tmp >> 52; /* arithmetic shift */
-  iz = ix - (tmp & 0xfffULL << 52);
-  invc = T[i].invc;
-  logc = T[i].logc;
-  z = asdouble (iz);
-
-  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
-  /* r ~= z/c - 1, |r| < 1/(2*N).  */
-#if HAVE_FAST_FMA
-  /* rounding error: 0x1p-55/N.  */
-  r = fma (z, invc, -1.0);
-#else
-  /* rounding error: 0x1p-55/N + 0x1p-66.  */
-  r = (z - T2[i].chi - T2[i].clo) * invc;
-#endif
-  kd = (double_t) k;
-
-  /* hi + lo = r + log(c) + k*Ln2.  */
-  w = kd * Ln2hi + logc;
-  hi = w + r;
-  lo = w - hi + r + kd * Ln2lo;
-
-  /* log(x) = lo + (log1p(r) - r) + hi.  */
-  r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
-  /* Worst case error if |y| > 0x1p-5:
-     0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
-     Worst case error if |y| > 0x1p-4:
-     0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma).  */
-#if LOG_POLY_ORDER == 6
-  y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
-#elif LOG_POLY_ORDER == 7
-  y = lo
-      + r2
-	  * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
-	     + r2 * r2 * (A[4] + r * A[5]))
-      + hi;
-#endif
-  return eval_as_double (y);
-}
diff --git a/contrib/arm-optimized-routines/pl/math/log1p_data.c b/contrib/arm-optimized-routines/pl/math/log1p_data.c
deleted file mode 100644
index 6168a0c9a214..000000000000
--- a/contrib/arm-optimized-routines/pl/math/log1p_data.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Data used in double-precision log(1+x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Polynomial coefficients generated using Remez algorithm, see
-   log1p.sollya for details.  */
-const struct log1p_data __log1p_data = {
-  .coeffs = {-0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
-	     0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
-	     -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
-	     0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
-	     -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
-	     0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
-	     -0x1.cfa7385bdb37ep-6}};
diff --git a/contrib/arm-optimized-routines/pl/math/log_data.c b/contrib/arm-optimized-routines/pl/math/log_data.c
deleted file mode 100644
index 34715e5036a3..000000000000
--- a/contrib/arm-optimized-routines/pl/math/log_data.c
+++ /dev/null
@@ -1,511 +0,0 @@
-/*
- * Data for log.
- *
- * Copyright (c) 2018-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-#define N (1 << LOG_TABLE_BITS)
-
-const struct log_data __log_data = {
-.ln2hi = 0x1.62e42fefa3800p-1,
-.ln2lo = 0x1.ef35793c76730p-45,
-.poly1 = {
-#if LOG_POLY1_ORDER == 10
-// relative error: 0x1.32eccc6p-62
-// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
--0x1p-1,
-0x1.55555555554e5p-2,
--0x1.0000000000af2p-2,
-0x1.9999999bbe436p-3,
--0x1.55555537f9cdep-3,
-0x1.24922fc8127cfp-3,
--0x1.0000b7d6bb612p-3,
-0x1.c806ee1ddbcafp-4,
--0x1.972335a9c2d6ep-4,
-#elif LOG_POLY1_ORDER == 11
-// relative error: 0x1.52c8b708p-68
-// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
--0x1p-1,
-0x1.5555555555555p-2,
--0x1.ffffffffffea9p-3,
-0x1.999999999c4d4p-3,
--0x1.55555557f5541p-3,
-0x1.249248fbe33e4p-3,
--0x1.ffffc9a3c825bp-4,
-0x1.c71e1f204435dp-4,
--0x1.9a7f26377d06ep-4,
-0x1.71c30cf8f7364p-4,
-#elif LOG_POLY1_ORDER == 12
-// relative error: 0x1.c04d76cp-63
-// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
--0x1p-1,
-0x1.5555555555577p-2,
--0x1.ffffffffffdcbp-3,
-0x1.999999995dd0cp-3,
--0x1.55555556745a7p-3,
-0x1.24924a344de3p-3,
--0x1.fffffa4423d65p-4,
-0x1.c7184282ad6cap-4,
--0x1.999eb43b068ffp-4,
-0x1.78182f7afd085p-4,
--0x1.5521375d145cdp-4,
-#endif
-},
-.poly = {
-#if N == 64 && LOG_POLY_ORDER == 7
-// relative error: 0x1.906eb8ap-58
-// abs error: 0x1.d2cad5a8p-67
-// in -0x1.fp-8 0x1.fp-8
--0x1.0000000000027p-1,
-0x1.555555555556ap-2,
--0x1.fffffff0440bap-3,
-0x1.99999991906c3p-3,
--0x1.555c8d7e8201ep-3,
-0x1.24978c59151fap-3,
-#elif N == 128 && LOG_POLY_ORDER == 6
-// relative error: 0x1.926199e8p-56
-// abs error: 0x1.882ff33p-65
-// in -0x1.fp-9 0x1.fp-9
--0x1.0000000000001p-1,
-0x1.555555551305bp-2,
--0x1.fffffffeb459p-3,
-0x1.999b324f10111p-3,
--0x1.55575e506c89fp-3,
-#elif N == 128 && LOG_POLY_ORDER == 7
-// relative error: 0x1.649fc4bp-64
-// abs error: 0x1.c3b5769p-74
-// in -0x1.fp-9 0x1.fp-9
--0x1.0000000000001p-1,
-0x1.5555555555556p-2,
--0x1.fffffffea1a8p-3,
-0x1.99999998e9139p-3,
--0x1.555776801b968p-3,
-0x1.2493c29331a5cp-3,
-#endif
-},
-/* Algorithm:
-
-	x = 2^k z
-	log(x) = k ln2 + log(c) + log(z/c)
-	log(z/c) = poly(z/c - 1)
-
-where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
-into the ith one, then table entries are computed as
-
-	tab[i].invc = 1/c
-	tab[i].logc = (double)log(c)
-	tab2[i].chi = (double)c
-	tab2[i].clo = (double)(c - (double)c)
-
-where c is near the center of the subinterval and is chosen by trying +-2^29
-floating point invc candidates around 1/center and selecting one for which
-
-	1) the rounding error in 0x1.8p9 + logc is 0,
-	2) the rounding error in z - chi - clo is < 0x1p-66 and
-	3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
-
-Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
-2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
-a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
-that logc + poly(z/c - 1) has small error, however near x == 1 when
-|log(x)| < 0x1p-4, this is not enough so that is special cased.  */
-.tab = {
-#if N == 64
-{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
-{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
-{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
-{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
-{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
-{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
-{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
-{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
-{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
-{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
-{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
-{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
-{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
-{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
-{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
-{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
-{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
-{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
-{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
-{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
-{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
-{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
-{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
-{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
-{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
-{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
-{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
-{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
-{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
-{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
-{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
-{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
-{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
-{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
-{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
-{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
-{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
-{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
-{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
-{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
-{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
-{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
-{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
-{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
-{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
-{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
-{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
-{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
-{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
-{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
-{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
-{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
-{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
-{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
-{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
-{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
-{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
-{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
-{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
-{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
-{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
-{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
-{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
-{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
-#elif N == 128
-{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
-{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
-{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
-{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
-{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
-{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
-{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
-{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
-{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
-{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
-{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
-{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
-{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
-{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
-{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
-{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
-{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
-{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
-{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
-{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
-{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
-{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
-{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
-{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
-{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
-{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
-{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
-{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
-{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
-{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
-{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
-{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
-{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
-{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
-{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
-{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
-{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
-{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
-{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
-{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
-{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
-{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
-{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
-{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
-{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
-{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
-{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
-{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
-{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
-{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
-{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
-{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
-{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
-{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
-{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
-{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
-{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
-{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
-{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
-{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
-{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
-{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
-{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
-{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
-{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
-{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
-{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
-{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
-{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
-{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
-{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
-{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
-{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
-{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
-{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
-{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
-{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
-{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
-{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
-{0x1.008040614b195p+0, -0x1.0040979240000p-9},
-{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
-{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
-{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
-{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
-{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
-{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
-{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
-{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
-{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
-{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
-{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
-{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
-{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
-{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
-{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
-{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
-{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
-{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
-{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
-{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
-{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
-{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
-{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
-{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
-{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
-{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
-{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
-{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
-{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
-{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
-{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
-{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
-{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
-{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
-{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
-{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
-{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
-{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
-{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
-{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
-{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
-{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
-{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
-{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
-{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
-{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
-{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
-{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
-#endif
-},
-#if !HAVE_FAST_FMA
-.tab2 = {
-#if N == 64
-{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
-{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
-{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
-{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
-{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
-{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
-{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
-{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
-{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
-{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
-{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
-{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
-{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
-{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
-{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
-{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
-{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
-{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
-{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
-{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
-{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
-{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
-{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
-{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
-{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
-{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
-{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
-{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
-{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
-{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
-{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
-{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
-{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
-{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
-{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
-{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
-{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
-{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
-{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
-{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
-{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
-{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
-{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
-{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
-{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
-{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
-{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
-{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
-{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
-{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
-{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
-{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
-{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
-{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
-{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
-{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
-{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
-{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
-{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
-{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
-{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
-{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
-{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
-{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
-#elif N == 128
-{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
-{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
-{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
-{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
-{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
-{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
-{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
-{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
-{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
-{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
-{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
-{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
-{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
-{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
-{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
-{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
-{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
-{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
-{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
-{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
-{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
-{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
-{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
-{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
-{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
-{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
-{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
-{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
-{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
-{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
-{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
-{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
-{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
-{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
-{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
-{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
-{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
-{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
-{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
-{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
-{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
-{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
-{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
-{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
-{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
-{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
-{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
-{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
-{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
-{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
-{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
-{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
-{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
-{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
-{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
-{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
-{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
-{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
-{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
-{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
-{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
-{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
-{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
-{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
-{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
-{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
-{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
-{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
-{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
-{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
-{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
-{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
-{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
-{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
-{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
-{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
-{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
-{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
-{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
-{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
-{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
-{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
-{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
-{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
-{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
-{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
-{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
-{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
-{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
-{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
-{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
-{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
-{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
-{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
-{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
-{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
-{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
-{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
-{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
-{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
-{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
-{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
-{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
-{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
-{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
-{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
-{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
-{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
-{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
-{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
-{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
-{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
-{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
-{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
-{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
-{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
-{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
-{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
-{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
-{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
-{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
-{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
-{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
-{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
-{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
-{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
-{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
-{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
-#endif
-},
-#endif /* !HAVE_FAST_FMA */
-};
diff --git a/contrib/arm-optimized-routines/pl/math/logf.c b/contrib/arm-optimized-routines/pl/math/logf.c
deleted file mode 100644
index 17a74ed6d28f..000000000000
--- a/contrib/arm-optimized-routines/pl/math/logf.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Single-precision log function.
- *
- * Copyright (c) 2017-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include <math.h>
-#include <stdint.h>
-#include "math_config.h"
-
-/*
-LOGF_TABLE_BITS = 4
-LOGF_POLY_ORDER = 4
-
-ULP error: 0.818 (nearest rounding.)
-Relative error: 1.957 * 2^-26 (before rounding.)
-*/
-
-#define T __logf_data.tab
-#define A __logf_data.poly
-#define Ln2 __logf_data.ln2
-#define N (1 << LOGF_TABLE_BITS)
-#define OFF 0x3f330000
-
-float
-optr_aor_log_f32 (float x)
-{
-  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
-  double_t z, r, r2, y, y0, invc, logc;
-  uint32_t ix, iz, tmp;
-  int k, i;
-
-  ix = asuint (x);
-#if WANT_ROUNDING
-  /* Fix sign of zero with downward rounding when x==1.  */
-  if (unlikely (ix == 0x3f800000))
-    return 0;
-#endif
-  if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
-    {
-      /* x < 0x1p-126 or inf or nan.  */
-      if (ix * 2 == 0)
-	return __math_divzerof (1);
-      if (ix == 0x7f800000) /* log(inf) == inf.  */
-	return x;
-      if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
-	return __math_invalidf (x);
-      /* x is subnormal, normalize it.  */
-      ix = asuint (x * 0x1p23f);
-      ix -= 23 << 23;
-    }
-
-  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  tmp = ix - OFF;
-  i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
-  k = (int32_t) tmp >> 23; /* arithmetic shift */
-  iz = ix - (tmp & 0x1ff << 23);
-  invc = T[i].invc;
-  logc = T[i].logc;
-  z = (double_t) asfloat (iz);
-
-  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
-  r = z * invc - 1;
-  y0 = logc + (double_t) k * Ln2;
-
-  /* Pipelined polynomial evaluation to approximate log1p(r).  */
-  r2 = r * r;
-  y = A[1] * r + A[2];
-  y = A[0] * r2 + y;
-  y = y * r2 + (y0 + r);
-  return eval_as_float (y);
-}
diff --git a/contrib/arm-optimized-routines/pl/math/logf_data.c b/contrib/arm-optimized-routines/pl/math/logf_data.c
deleted file mode 100644
index 97d9eb8d0097..000000000000
--- a/contrib/arm-optimized-routines/pl/math/logf_data.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Data definition for logf and log10f.
- *
- * Copyright (c) 2017-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-const struct logf_data __logf_data = {
-    .tab =
-        {
-            {0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2},
-            {0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2},
-            {0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2},
-            {0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3},
-            {0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3},
-            {0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3},
-            {0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4},
-            {0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4},
-            {0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5},
-            {0x1p+0, 0x0p+0},
-            {0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5},
-            {0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4},
-            {0x1.b2036576afce6p-1, 0x1.526e57720db08p-3},
-            {0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3},
-            {0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2},
-            {0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2},
-        },
-    .ln2 = 0x1.62e42fefa39efp-1,
-    .invln10 = 0x1.bcb7b1526e50ep-2,
-    .poly = {
-        -0x1.00ea348b88334p-2,
-        0x1.5575b0be00b6ap-2,
-        -0x1.ffffef20a4123p-2,
-    }};
diff --git a/contrib/arm-optimized-routines/pl/math/math_config.h b/contrib/arm-optimized-routines/pl/math/math_config.h
deleted file mode 100644
index c3dd8f2db8c7..000000000000
--- a/contrib/arm-optimized-routines/pl/math/math_config.h
+++ /dev/null
@@ -1,624 +0,0 @@
-/*
- * Configuration for math routines.
- *
- * Copyright (c) 2017-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef _MATH_CONFIG_H
-#define _MATH_CONFIG_H
-
-#include <math.h>
-#include <stdint.h>
-
-#ifndef WANT_ROUNDING
-/* If defined to 1, return correct results for special cases in non-nearest
-   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than
-   -0.0f). This may be set to 0 if there is no fenv support or if math
-   functions only get called in round to nearest mode.  */
-# define WANT_ROUNDING 1
-#endif
-#ifndef WANT_ERRNO
-/* If defined to 1, set errno in math functions according to ISO C.  Many math
-   libraries do not set errno, so this is 0 by default.  It may need to be
-   set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
-# define WANT_ERRNO 0
-#endif
-#ifndef WANT_SIMD_EXCEPT
-/* If defined to 1, trigger fp exceptions in vector routines, consistently with
-   behaviour expected from the corresponding scalar routine.  */
-# define WANT_SIMD_EXCEPT 0
-#endif
-
-/* Compiler can inline round as a single instruction.  */
-#ifndef HAVE_FAST_ROUND
-# if __aarch64__
-#  define HAVE_FAST_ROUND 1
-# else
-#  define HAVE_FAST_ROUND 0
-# endif
-#endif
-
-/* Compiler can inline lround, but not (long)round(x).  */
-#ifndef HAVE_FAST_LROUND
-# if __aarch64__ && (100 * __GNUC__ + __GNUC_MINOR__) >= 408                 \
-      && __NO_MATH_ERRNO__
-#  define HAVE_FAST_LROUND 1
-# else
-#  define HAVE_FAST_LROUND 0
-# endif
-#endif
-
-/* Compiler can inline fma as a single instruction.  */
-#ifndef HAVE_FAST_FMA
-# if defined FP_FAST_FMA || __aarch64__
-#  define HAVE_FAST_FMA 1
-# else
-#  define HAVE_FAST_FMA 0
-# endif
-#endif
-
-/* Provide *_finite symbols and some of the glibc hidden symbols
-   so libmathlib can be used with binaries compiled against glibc
-   to interpose math functions with both static and dynamic linking.  */
-#ifndef USE_GLIBC_ABI
-# if __GNUC__
-#  define USE_GLIBC_ABI 1
-# else
-#  define USE_GLIBC_ABI 0
-# endif
-#endif
-
-/* Optionally used extensions.  */
-#ifdef __GNUC__
-# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
-# define NOINLINE __attribute__ ((noinline))
-# define UNUSED __attribute__ ((unused))
-# define likely(x) __builtin_expect (!!(x), 1)
-# define unlikely(x) __builtin_expect (x, 0)
-# if __GNUC__ >= 9
-#  define attribute_copy(f) __attribute__ ((copy (f)))
-# else
-#  define attribute_copy(f)
-# endif
-# define strong_alias(f, a)                                                   \
-    extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
-# define hidden_alias(f, a)                                                   \
-    extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
-	attribute_copy (f);
-#else
-# define HIDDEN
-# define NOINLINE
-# define UNUSED
-# define likely(x) (x)
-# define unlikely(x) (x)
-#endif
-
-/* Return ptr but hide its value from the compiler so accesses through it
-   cannot be optimized based on the contents.  */
-#define ptr_barrier(ptr)                                                      \
-  ({                                                                          \
-    __typeof (ptr) __ptr = (ptr);                                             \
-    __asm("" : "+r"(__ptr));                                                  \
-    __ptr;                                                                    \
-  })
-
-/* Symbol renames to avoid libc conflicts.  */
-#define __math_oflowf arm_math_oflowf
-#define __math_uflowf arm_math_uflowf
-#define __math_may_uflowf arm_math_may_uflowf
-#define __math_divzerof arm_math_divzerof
-#define __math_oflow arm_math_oflow
-#define __math_uflow arm_math_uflow
-#define __math_may_uflow arm_math_may_uflow
-#define __math_divzero arm_math_divzero
-#define __math_invalidf arm_math_invalidf
-#define __math_invalid arm_math_invalid
-#define __math_check_oflow arm_math_check_oflow
-#define __math_check_uflow arm_math_check_uflow
-#define __math_check_oflowf arm_math_check_oflowf
-#define __math_check_uflowf arm_math_check_uflowf
-
-#if HAVE_FAST_ROUND
-/* When set, the roundtoint and converttoint functions are provided with
-   the semantics documented below.  */
-# define TOINT_INTRINSICS 1
-
-/* Round x to nearest int in all rounding modes, ties have to be rounded
-   consistently with converttoint so the results match.  If the result
-   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
-static inline double_t
-roundtoint (double_t x)
-{
-  return round (x);
-}
-
-/* Convert x to nearest int in all rounding modes, ties have to be rounded
-   consistently with roundtoint.  If the result is not representible in an
-   int32_t then the semantics is unspecified.  */
-static inline int32_t
-converttoint (double_t x)
-{
-# if HAVE_FAST_LROUND
-  return lround (x);
-# else
-  return (long) round (x);
-# endif
-}
-#endif
-
-static inline uint32_t
-asuint (float f)
-{
-  union
-  {
-    float f;
-    uint32_t i;
-  } u = { f };
-  return u.i;
-}
-
-static inline float
-asfloat (uint32_t i)
-{
-  union
-  {
-    uint32_t i;
-    float f;
-  } u = { i };
-  return u.f;
-}
-
-static inline uint64_t
-asuint64 (double f)
-{
-  union
-  {
-    double f;
-    uint64_t i;
-  } u = { f };
-  return u.i;
-}
-
-static inline double
-asdouble (uint64_t i)
-{
-  union
-  {
-    uint64_t i;
-    double f;
-  } u = { i };
-  return u.f;
-}
-
-#ifndef IEEE_754_2008_SNAN
-# define IEEE_754_2008_SNAN 1
-#endif
-static inline int
-issignalingf_inline (float x)
-{
-  uint32_t ix = asuint (x);
-  if (!IEEE_754_2008_SNAN)
-    return (ix & 0x7fc00000) == 0x7fc00000;
-  return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
-}
-
-static inline int
-issignaling_inline (double x)
-{
-  uint64_t ix = asuint64 (x);
-  if (!IEEE_754_2008_SNAN)
-    return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
-  return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
-}
-
-#if __aarch64__ && __GNUC__
-/* Prevent the optimization of a floating-point expression.  */
-static inline float
-opt_barrier_float (float x)
-{
-  __asm__ __volatile__ ("" : "+w" (x));
-  return x;
-}
-static inline double
-opt_barrier_double (double x)
-{
-  __asm__ __volatile__ ("" : "+w" (x));
-  return x;
-}
-/* Force the evaluation of a floating-point expression for its side-effect.  */
-static inline void
-force_eval_float (float x)
-{
-  __asm__ __volatile__ ("" : "+w" (x));
-}
-static inline void
-force_eval_double (double x)
-{
-  __asm__ __volatile__ ("" : "+w" (x));
-}
-#else
-static inline float
-opt_barrier_float (float x)
-{
-  volatile float y = x;
-  return y;
-}
-static inline double
-opt_barrier_double (double x)
-{
-  volatile double y = x;
-  return y;
-}
-static inline void
-force_eval_float (float x)
-{
-  volatile float y UNUSED = x;
-}
-static inline void
-force_eval_double (double x)
-{
-  volatile double y UNUSED = x;
-}
-#endif
-
-/* Evaluate an expression as the specified type, normally a type
-   cast should be enough, but compilers implement non-standard
-   excess-precision handling, so when FLT_EVAL_METHOD != 0 then
-   these functions may need to be customized.  */
-static inline float
-eval_as_float (float x)
-{
-  return x;
-}
-static inline double
-eval_as_double (double x)
-{
-  return x;
-}
-
-/* Error handling tail calls for special cases, with a sign argument.
-   The sign of the return value is set if the argument is non-zero.  */
-
-/* The result overflows.  */
-HIDDEN float __math_oflowf (uint32_t);
-/* The result underflows to 0 in nearest rounding mode.  */
-HIDDEN float __math_uflowf (uint32_t);
-/* The result underflows to 0 in some directed rounding mode only.  */
-HIDDEN float __math_may_uflowf (uint32_t);
-/* Division by zero.  */
-HIDDEN float __math_divzerof (uint32_t);
-/* The result overflows.  */
-HIDDEN double __math_oflow (uint32_t);
-/* The result underflows to 0 in nearest rounding mode.  */
-HIDDEN double __math_uflow (uint32_t);
-/* The result underflows to 0 in some directed rounding mode only.  */
-HIDDEN double __math_may_uflow (uint32_t);
-/* Division by zero.  */
-HIDDEN double __math_divzero (uint32_t);
-
-/* Error handling using input checking.  */
-
-/* Invalid input unless it is a quiet NaN.  */
-HIDDEN float __math_invalidf (float);
-/* Invalid input unless it is a quiet NaN.  */
-HIDDEN double __math_invalid (double);
-
-/* Error handling using output checking, only for errno setting.  */
-
-/* Check if the result overflowed to infinity.  */
-HIDDEN double __math_check_oflow (double);
-/* Check if the result underflowed to 0.  */
-HIDDEN double __math_check_uflow (double);
-
-/* Check if the result overflowed to infinity.  */
-static inline double
-check_oflow (double x)
-{
-  return WANT_ERRNO ? __math_check_oflow (x) : x;
-}
-
-/* Check if the result underflowed to 0.  */
-static inline double
-check_uflow (double x)
-{
-  return WANT_ERRNO ? __math_check_uflow (x) : x;
-}
-
-/* Check if the result overflowed to infinity.  */
-HIDDEN float __math_check_oflowf (float);
-/* Check if the result underflowed to 0.  */
-HIDDEN float __math_check_uflowf (float);
-
-/* Check if the result overflowed to infinity.  */
-static inline float
-check_oflowf (float x)
-{
-  return WANT_ERRNO ? __math_check_oflowf (x) : x;
-}
-
-/* Check if the result underflowed to 0.  */
-static inline float
-check_uflowf (float x)
-{
-  return WANT_ERRNO ? __math_check_uflowf (x) : x;
-}
-
-extern const struct erff_data
-{
-  struct
-  {
-    float erf, scale;
-  } tab[513];
-} __erff_data HIDDEN;
-
-extern const struct sv_erff_data
-{
-  float erf[513];
-  float scale[513];
-} __sv_erff_data HIDDEN;
-
-extern const struct erfcf_data
-{
-  struct
-  {
-    float erfc, scale;
-  } tab[645];
-} __erfcf_data HIDDEN;
-
-/* Data for logf and log10f.  */
-#define LOGF_TABLE_BITS 4
-#define LOGF_POLY_ORDER 4
-extern const struct logf_data
-{
-  struct
-  {
-    double invc, logc;
-  } tab[1 << LOGF_TABLE_BITS];
-  double ln2;
-  double invln10;
-  double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
-} __logf_data HIDDEN;
-
-/* Data for low accuracy log10 (with 1/ln(10) included in coefficients).  */
-#define LOG10_TABLE_BITS 7
-#define LOG10_POLY_ORDER 6
-#define LOG10_POLY1_ORDER 12
-extern const struct log10_data
-{
-  double ln2hi;
-  double ln2lo;
-  double invln10;
-  double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10).  */
-  double poly1[LOG10_POLY1_ORDER - 1];
-  struct
-  {
-    double invc, logc;
-  } tab[1 << LOG10_TABLE_BITS];
-#if !HAVE_FAST_FMA
-  struct
-  {
-    double chi, clo;
-  } tab2[1 << LOG10_TABLE_BITS];
-#endif
-} __log10_data HIDDEN;
-
-#define EXP_TABLE_BITS 7
-#define EXP_POLY_ORDER 5
-/* Use polynomial that is optimized for a wider input range.  This may be
-   needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
-#define EXP_POLY_WIDE 0
-/* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
-   needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
-#define EXP_USE_TOINT_NARROW 0
-#define EXP2_POLY_ORDER 5
-#define EXP2_POLY_WIDE 0
-extern const struct exp_data
-{
-  double invln2N;
-  double shift;
-  double negln2hiN;
-  double negln2loN;
-  double poly[4]; /* Last four coefficients.  */
-  double exp2_shift;
-  double exp2_poly[EXP2_POLY_ORDER];
-  uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
-} __exp_data HIDDEN;
-
-/* Copied from math/v_exp.h for use in vector exp_tail.  */
-#define V_EXP_TAIL_TABLE_BITS 8
-extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
-
-/* Copied from math/v_exp.h for use in vector exp2.  */
-#define V_EXP_TABLE_BITS 7
-extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
-
-extern const struct erf_data
-{
-  struct
-  {
-    double erf, scale;
-  } tab[769];
-} __erf_data HIDDEN;
-
-extern const struct sv_erf_data
-{
-  double erf[769];
-  double scale[769];
-} __sv_erf_data HIDDEN;
-
-extern const struct erfc_data
-{
-  struct
-  {
-    double erfc, scale;
-  } tab[3488];
-} __erfc_data HIDDEN;
-
-#define ATAN_POLY_NCOEFFS 20
-extern const struct atan_poly_data
-{
-  double poly[ATAN_POLY_NCOEFFS];
-} __atan_poly_data HIDDEN;
-
-#define ATANF_POLY_NCOEFFS 8
-extern const struct atanf_poly_data
-{
-  float poly[ATANF_POLY_NCOEFFS];
-} __atanf_poly_data HIDDEN;
-
-#define ASINHF_NCOEFFS 8
-extern const struct asinhf_data
-{
-  float coeffs[ASINHF_NCOEFFS];
-} __asinhf_data HIDDEN;
-
-#define LOG_TABLE_BITS 7
-#define LOG_POLY_ORDER 6
-#define LOG_POLY1_ORDER 12
-extern const struct log_data
-{
-  double ln2hi;
-  double ln2lo;
-  double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
-  double poly1[LOG_POLY1_ORDER - 1];
-  struct
-  {
-    double invc, logc;
-  } tab[1 << LOG_TABLE_BITS];
-#if !HAVE_FAST_FMA
-  struct
-  {
-    double chi, clo;
-  } tab2[1 << LOG_TABLE_BITS];
-#endif
-} __log_data HIDDEN;
-
-#define ASINH_NCOEFFS 18
-extern const struct asinh_data
-{
-  double poly[ASINH_NCOEFFS];
-} __asinh_data HIDDEN;
-
-#define LOG1P_NCOEFFS 19
-extern const struct log1p_data
-{
-  double coeffs[LOG1P_NCOEFFS];
-} __log1p_data HIDDEN;
-
-#define LOG1PF_2U5
-#define LOG1PF_NCOEFFS 9
-extern const struct log1pf_data
-{
-  float coeffs[LOG1PF_NCOEFFS];
-} __log1pf_data HIDDEN;
-
-#define TANF_P_POLY_NCOEFFS 6
-/* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps.  */
-#define TANF_Q_POLY_NCOEFFS 4
-extern const struct tanf_poly_data
-{
-  float poly_tan[TANF_P_POLY_NCOEFFS];
-  float poly_cotan[TANF_Q_POLY_NCOEFFS];
-} __tanf_poly_data HIDDEN;
-
-#define V_LOG2_TABLE_BITS 7
-extern const struct v_log2_data
-{
-  double poly[5];
-  double invln2;
-  struct
-  {
-    double invc, log2c;
-  } table[1 << V_LOG2_TABLE_BITS];
-} __v_log2_data HIDDEN;
-
-#define V_LOG10_TABLE_BITS 7
-extern const struct v_log10_data
-{
-  double poly[5];
-  double invln10, log10_2;
-  struct
-  {
-    double invc, log10c;
-  } table[1 << V_LOG10_TABLE_BITS];
-} __v_log10_data HIDDEN;
-
-/* Some data for SVE powf's internal exp and log.  */
-#define V_POWF_EXP2_TABLE_BITS 5
-#define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
-#define V_POWF_LOG2_TABLE_BITS 5
-#define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
-extern const struct v_powf_data
-{
-  double invc[V_POWF_LOG2_N];
-  double logc[V_POWF_LOG2_N];
-  uint64_t scale[V_POWF_EXP2_N];
-} __v_powf_data HIDDEN;
-
-#define V_LOG_POLY_ORDER 6
-#define V_LOG_TABLE_BITS 7
-extern const struct v_log_data
-{
-  /* Shared data for vector log and log-derived routines (e.g. asinh).  */
-  double poly[V_LOG_POLY_ORDER - 1];
-  double ln2;
-  struct
-  {
-    double invc, logc;
-  } table[1 << V_LOG_TABLE_BITS];
-} __v_log_data HIDDEN;
-
-#define EXPM1F_POLY_ORDER 5
-extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
-
-#define EXPF_TABLE_BITS 5
-#define EXPF_POLY_ORDER 3
-extern const struct expf_data
-{
-  uint64_t tab[1 << EXPF_TABLE_BITS];
-  double invln2_scaled;
-  double poly_scaled[EXPF_POLY_ORDER];
-} __expf_data HIDDEN;
-
-#define EXPM1_POLY_ORDER 11
-extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN;
-
-extern const struct cbrtf_data
-{
-  float poly[4];
-  float table[5];
-} __cbrtf_data HIDDEN;
-
-extern const struct cbrt_data
-{
-  double poly[4];
-  double table[5];
-} __cbrt_data HIDDEN;
-
-#define ASINF_POLY_ORDER 4
-extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN;
-
-#define ASIN_POLY_ORDER 11
-extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN;
-
-/* Some data for AdvSIMD and SVE pow's internal exp and log.  */
-#define V_POW_EXP_TABLE_BITS 8
-extern const struct v_pow_exp_data
-{
-  double poly[3];
-  double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
-  uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
-} __v_pow_exp_data HIDDEN;
-
-#define V_POW_LOG_TABLE_BITS 7
-extern const struct v_pow_log_data
-{
-  double poly[7]; /* First coefficient is 1.  */
-  double ln2_hi, ln2_lo;
-  double invc[1 << V_POW_LOG_TABLE_BITS];
-  double logc[1 << V_POW_LOG_TABLE_BITS];
-  double logctail[1 << V_POW_LOG_TABLE_BITS];
-} __v_pow_log_data HIDDEN;
-
-#endif
diff --git a/contrib/arm-optimized-routines/pl/math/math_err.c b/contrib/arm-optimized-routines/pl/math/math_err.c
deleted file mode 100644
index 74db54a5b2cd..000000000000
--- a/contrib/arm-optimized-routines/pl/math/math_err.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Double-precision math error handling.
- *
- * Copyright (c) 2018-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-#if WANT_ERRNO
-# include <errno.h>
-/* NOINLINE reduces code size and avoids making math functions non-leaf
-   when the error handling is inlined.  */
-NOINLINE static double
-with_errno (double y, int e)
-{
-  errno = e;
-  return y;
-}
-#else
-# define with_errno(x, e) (x)
-#endif
-
-/* NOINLINE reduces code size.  */
-NOINLINE static double
-xflow (uint32_t sign, double y)
-{
-  y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
-  return with_errno (y, ERANGE);
-}
-
-HIDDEN double
-__math_uflow (uint32_t sign)
-{
-  return xflow (sign, 0x1p-767);
-}
-
-/* Underflows to zero in some non-nearest rounding mode, setting errno
-   is valid even if the result is non-zero, but in the subnormal range.  */
-HIDDEN double
-__math_may_uflow (uint32_t sign)
-{
-  return xflow (sign, 0x1.8p-538);
-}
-
-HIDDEN double
-__math_oflow (uint32_t sign)
-{
-  return xflow (sign, 0x1p769);
-}
-
-HIDDEN double
-__math_divzero (uint32_t sign)
-{
-  double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
-  return with_errno (y, ERANGE);
-}
-
-HIDDEN double
-__math_invalid (double x)
-{
-  double y = (x - x) / (x - x);
-  return isnan (x) ? y : with_errno (y, EDOM);
-}
-
-/* Check result and set errno if necessary.  */
-
-HIDDEN double
-__math_check_uflow (double y)
-{
-  return y == 0.0 ? with_errno (y, ERANGE) : y;
-}
-
-HIDDEN double
-__math_check_oflow (double y)
-{
-  return isinf (y) ? with_errno (y, ERANGE) : y;
-}
diff --git a/contrib/arm-optimized-routines/pl/math/math_errf.c b/contrib/arm-optimized-routines/pl/math/math_errf.c
deleted file mode 100644
index 2b8c6bd25753..000000000000
--- a/contrib/arm-optimized-routines/pl/math/math_errf.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Single-precision math error handling.
- *
- * Copyright (c) 2017-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-#if WANT_ERRNO
-# include <errno.h>
-/* NOINLINE reduces code size and avoids making math functions non-leaf
-   when the error handling is inlined.  */
-NOINLINE static float
-with_errnof (float y, int e)
-{
-  errno = e;
-  return y;
-}
-#else
-# define with_errnof(x, e) (x)
-#endif
-
-/* NOINLINE reduces code size.  */
-NOINLINE static float
-xflowf (uint32_t sign, float y)
-{
-  y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
-  return with_errnof (y, ERANGE);
-}
-
-HIDDEN float
-__math_uflowf (uint32_t sign)
-{
-  return xflowf (sign, 0x1p-95f);
-}
-
-/* Underflows to zero in some non-nearest rounding mode, setting errno
-   is valid even if the result is non-zero, but in the subnormal range.  */
-HIDDEN float
-__math_may_uflowf (uint32_t sign)
-{
-  return xflowf (sign, 0x1.4p-75f);
-}
-
-HIDDEN float
-__math_oflowf (uint32_t sign)
-{
-  return xflowf (sign, 0x1p97f);
-}
-
-HIDDEN float
-__math_divzerof (uint32_t sign)
-{
-  float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
-  return with_errnof (y, ERANGE);
-}
-
-HIDDEN float
-__math_invalidf (float x)
-{
-  float y = (x - x) / (x - x);
-  return isnan (x) ? y : with_errnof (y, EDOM);
-}
-
-/* Check result and set errno if necessary.  */
-
-HIDDEN float
-__math_check_uflowf (float y)
-{
-  return y == 0.0f ? with_errnof (y, ERANGE) : y;
-}
-
-HIDDEN float
-__math_check_oflowf (float y)
-{
-  return isinf (y) ? with_errnof (y, ERANGE) : y;
-}
diff --git a/contrib/arm-optimized-routines/pl/math/pl_sig.h b/contrib/arm-optimized-routines/pl/math/pl_sig.h
deleted file mode 100644
index 52d988f0e1ce..000000000000
--- a/contrib/arm-optimized-routines/pl/math/pl_sig.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * PL macros for emitting various ulp/bench entries based on function signature
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
- */
-
-#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
-#define V_NAME_D1(fun) _ZGVnN2v_##fun
-#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
-#define V_NAME_D2(fun) _ZGVnN2vv_##fun
-
-#define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
-#define SV_NAME_D1(fun) _ZGVsMxv_##fun
-#define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
-#define SV_NAME_D2(fun) _ZGVsMxvv_##fun
-
-#define PL_DECL_SF1(fun) float fun##f (float);
-#define PL_DECL_SF2(fun) float fun##f (float, float);
-#define PL_DECL_SD1(fun) double fun (double);
-#define PL_DECL_SD2(fun) double fun (double, double);
-
-#if WANT_VMATH
-# define PL_DECL_VF1(fun)                                                    \
-    VPCS_ATTR float32x4_t V_NAME_F1 (fun##f) (float32x4_t);
-# define PL_DECL_VF2(fun)                                                    \
-    VPCS_ATTR float32x4_t V_NAME_F2 (fun##f) (float32x4_t, float32x4_t);
-# define PL_DECL_VD1(fun) VPCS_ATTR float64x2_t V_NAME_D1 (fun) (float64x2_t);
-# define PL_DECL_VD2(fun)                                                    \
-    VPCS_ATTR float64x2_t V_NAME_D2 (fun) (float64x2_t, float64x2_t);
-#else
-# define PL_DECL_VF1(fun)
-# define PL_DECL_VF2(fun)
-# define PL_DECL_VD1(fun)
-# define PL_DECL_VD2(fun)
-#endif
-
-#if WANT_SVE_MATH
-# define PL_DECL_SVF1(fun)                                                   \
-    svfloat32_t SV_NAME_F1 (fun) (svfloat32_t, svbool_t);
-# define PL_DECL_SVF2(fun)                                                   \
-    svfloat32_t SV_NAME_F2 (fun) (svfloat32_t, svfloat32_t, svbool_t);
-# define PL_DECL_SVD1(fun)                                                   \
-    svfloat64_t SV_NAME_D1 (fun) (svfloat64_t, svbool_t);
-# define PL_DECL_SVD2(fun)                                                   \
-    svfloat64_t SV_NAME_D2 (fun) (svfloat64_t, svfloat64_t, svbool_t);
-#else
-# define PL_DECL_SVF1(fun)
-# define PL_DECL_SVF2(fun)
-# define PL_DECL_SVD1(fun)
-# define PL_DECL_SVD2(fun)
-#endif
-
-/* For building the routines, emit function prototype from PL_SIG. This
-   ensures that the correct signature has been chosen (wrong one will be a
-   compile error). PL_SIG is defined differently by various components of the
-   build system to emit entries in the wrappers and entries for mathbench and
-   ulp.  */
-#define PL_SIG(v, t, a, f, ...) PL_DECL_##v##t##a (f)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_acosh_3u5.c b/contrib/arm-optimized-routines/pl/math/sv_acosh_3u5.c
deleted file mode 100644
index faf351331464..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_acosh_3u5.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Double-precision SVE acosh(x) function.
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define WANT_SV_LOG1P_K0_SHORTCUT 1
-#include "sv_log1p_inline.h"
-
-#define BigBoundTop 0x5fe /* top12 (asuint64 (0x1p511)).  */
-#define OneTop 0x3ff
-
-static NOINLINE svfloat64_t
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
-{
-  return sv_call_f64 (acosh, x, y, special);
-}
-
-/* SVE approximation for double-precision acosh, based on log1p.
-   The largest observed error is 3.19 ULP in the region where the
-   argument to log1p falls in the k=0 interval, i.e. x close to 1:
-   SV_NAME_D1 (acosh)(0x1.1e4388d4ca821p+0) got 0x1.ed23399f5137p-2
-					   want 0x1.ed23399f51373p-2.  */
-svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
-{
-  svuint64_t itop = svlsr_x (pg, svreinterpret_u64 (x), 52);
-  /* (itop - OneTop) >= (BigBoundTop - OneTop).  */
-  svbool_t special = svcmpge (pg, svsub_x (pg, itop, OneTop), sv_u64 (0x1ff));
-
-  svfloat64_t xm1 = svsub_x (pg, x, 1);
-  svfloat64_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1));
-  svfloat64_t y = sv_log1p_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg);
-
-  /* Fall back to scalar routine for special lanes.  */
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
-
-  return y;
-}
-
-PL_SIG (SV, D, 1, acosh, 1.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (acosh), 2.69)
-PL_TEST_INTERVAL (SV_NAME_D1 (acosh), 1, 0x1p511, 90000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acosh), 0x1p511, inf, 10000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acosh), 0, 1, 1000)
-PL_TEST_INTERVAL (SV_NAME_D1 (acosh), -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_acoshf_2u8.c b/contrib/arm-optimized-routines/pl/math/sv_acoshf_2u8.c
deleted file mode 100644
index f527083af40a..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_acoshf_2u8.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Single-precision SVE acosh(x) function.
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define One 0x3f800000
-#define Thres 0x20000000 /* asuint(0x1p64) - One.  */
-
-#include "sv_log1pf_inline.h"
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (acoshf, x, y, special);
-}
-
-/* Single-precision SVE acosh(x) routine. Implements the same algorithm as
-   vector acoshf and log1p.
-
-   Maximum error is 2.78 ULPs:
-   SV_NAME_F1 (acosh) (0x1.01e996p+0) got 0x1.f45b42p-4
-				     want 0x1.f45b3cp-4.  */
-svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
-{
-  svuint32_t ix = svreinterpret_u32 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
-
-  svfloat32_t xm1 = svsub_x (pg, x, 1.0f);
-  svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f));
-  svfloat32_t y = sv_log1pf_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
-  return y;
-}
-
-PL_SIG (SV, F, 1, acosh, 1.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (acosh), 2.29)
-PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 0, 1, 500)
-PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 1, 0x1p64, 100000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 0x1p64, inf, 1000)
-PL_TEST_INTERVAL (SV_NAME_F1 (acosh), -0, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_asinh_3u0.c b/contrib/arm-optimized-routines/pl/math/sv_asinh_3u0.c
deleted file mode 100644
index 711f0dfdbedc..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_asinh_3u0.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Double-precision SVE asinh(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define OneTop sv_u64 (0x3ff)	 /* top12(asuint64(1.0f)).  */
-#define HugeBound sv_u64 (0x5fe) /* top12(asuint64(0x1p511)).  */
-#define TinyBound (0x3e5)	 /* top12(asuint64(0x1p-26)).  */
-#define SignMask (0x8000000000000000)
-
-/* Constants & data for log.  */
-#define A(i) __v_log_data.poly[i]
-#define Ln2 (0x1.62e42fefa39efp-1)
-#define N (1 << V_LOG_TABLE_BITS)
-#define OFF (0x3fe6900900000000)
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
-{
-  return sv_call_f64 (asinh, x, y, special);
-}
-
-static inline svfloat64_t
-__sv_log_inline (svfloat64_t x, const svbool_t pg)
-{
-  /* Double-precision SVE log, copied from pl/math/sv_log_2u5.c with some
-     cosmetic modification and special-cases removed. See that file for details
-     of the algorithm used.  */
-  svuint64_t ix = svreinterpret_u64 (x);
-  svuint64_t tmp = svsub_x (pg, ix, OFF);
-  svuint64_t i
-      = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
-  svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);
-  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
-  svfloat64_t z = svreinterpret_f64 (iz);
-  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
-  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
-  svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), invc, z);
-  svfloat64_t kd = svcvt_f64_x (pg, k);
-  svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, Ln2);
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t y = svmla_x (pg, sv_f64 (A (2)), r, A (3));
-  svfloat64_t p = svmla_x (pg, sv_f64 (A (0)), r, A (1));
-  y = svmla_x (pg, y, r2, A (4));
-  y = svmla_x (pg, p, r2, y);
-  y = svmla_x (pg, hi, r2, y);
-  return y;
-}
-
-/* Double-precision implementation of SVE asinh(x).
-   asinh is very sensitive around 1, so it is impractical to devise a single
-   low-cost algorithm which is sufficiently accurate on a wide range of input.
-   Instead we use two different algorithms:
-   asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1)      if |x| >= 1
-	    = sign(x) * (|x| + |x|^3 * P(x^2))       otherwise
-   where log(x) is an optimized log approximation, and P(x) is a polynomial
-   shared with the scalar routine. The greatest observed error 2.51 ULP, in
-   |x| >= 1:
-   _ZGVsMxv_asinh(0x1.170469d024505p+0) got 0x1.e3181c43b0f36p-1
-				       want 0x1.e3181c43b0f39p-1.  */
-svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
-{
-  svuint64_t ix = svreinterpret_u64 (x);
-  svuint64_t iax = svbic_x (pg, ix, SignMask);
-  svuint64_t sign = svand_x (pg, ix, SignMask);
-  svfloat64_t ax = svreinterpret_f64 (iax);
-  svuint64_t top12 = svlsr_x (pg, iax, 52);
-
-  svbool_t ge1 = svcmpge (pg, top12, OneTop);
-  svbool_t special = svcmpge (pg, top12, HugeBound);
-
-  /* Option 1: |x| >= 1.
-     Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)).  */
-  svfloat64_t option_1 = sv_f64 (0);
-  if (likely (svptest_any (pg, ge1)))
-    {
-      svfloat64_t axax = svmul_x (pg, ax, ax);
-      option_1 = __sv_log_inline (
-	  svadd_x (pg, ax, svsqrt_x (pg, svadd_x (pg, axax, 1))), pg);
-    }
-
-  /* Option 2: |x| < 1.
-     Compute asinh(x) using a polynomial.
-     The largest observed error in this region is 1.51 ULPs:
-     _ZGVsMxv_asinh(0x1.fe12bf8c616a2p-1) got 0x1.c1e649ee2681bp-1
-					 want 0x1.c1e649ee2681dp-1.  */
-  svfloat64_t option_2 = sv_f64 (0);
-  if (likely (svptest_any (pg, svnot_z (pg, ge1))))
-    {
-      svfloat64_t x2 = svmul_x (pg, ax, ax);
-      svfloat64_t z2 = svmul_x (pg, x2, x2);
-      svfloat64_t z4 = svmul_x (pg, z2, z2);
-      svfloat64_t z8 = svmul_x (pg, z4, z4);
-      svfloat64_t z16 = svmul_x (pg, z8, z8);
-      svfloat64_t p
-	  = sv_estrin_17_f64_x (pg, x2, z2, z4, z8, z16, __asinh_data.poly);
-      option_2 = svmla_x (pg, ax, p, svmul_x (pg, x2, ax));
-    }
-
-  /* Choose the right option for each lane.  */
-  svfloat64_t y = svsel (ge1, option_1, option_2);
-
-  /* Apply sign of x to y.  */
-  y = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
-  return y;
-}
-
-PL_SIG (SV, D, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_D1 (asinh), 2.52)
-/* Test vector asinh 3 times, with control lane < 1, > 1 and special.
-   Ensures the svsel is choosing the right option in all cases.  */
-#define SV_ASINH_INTERVAL(lo, hi, n)                                          \
-  PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (asinh), lo, hi, n, 0.5)                 \
-  PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (asinh), lo, hi, n, 2)                   \
-  PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (asinh), lo, hi, n, 0x1p600)
-SV_ASINH_INTERVAL (0, 0x1p-26, 50000)
-SV_ASINH_INTERVAL (0x1p-26, 1, 50000)
-SV_ASINH_INTERVAL (1, 0x1p511, 50000)
-SV_ASINH_INTERVAL (0x1p511, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_coshf_2u.c b/contrib/arm-optimized-routines/pl/math/sv_coshf_2u.c
deleted file mode 100644
index 81680fef318e..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_coshf_2u.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Single-precision SVE cosh(x) function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#include "sv_expf_inline.h"
-
-static const struct data
-{
-  struct sv_expf_data expf_consts;
-  uint32_t special_bound;
-} data = {
-  .expf_consts = SV_EXPF_DATA,
-  /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case.  */
-  .special_bound = 0x42ad496c,
-};
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t pg)
-{
-  return sv_call_f32 (coshf, x, y, pg);
-}
-
-/* Single-precision vector cosh, using vector expf.
-   Maximum error is 1.89 ULP:
-   _ZGVsMxv_coshf (-0x1.65898cp+6) got 0x1.f00aep+127
-				  want 0x1.f00adcp+127.  */
-svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  svfloat32_t ax = svabs_x (pg, x);
-  svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->special_bound);
-
-  /* Calculate cosh by exp(x) / 2 + exp(-x) / 2.  */
-  svfloat32_t t = expf_inline (ax, pg, &d->expf_consts);
-  svfloat32_t half_t = svmul_x (pg, t, 0.5);
-  svfloat32_t half_over_t = svdivr_x (pg, t, 0.5);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svadd_x (pg, half_t, half_over_t), special);
-
-  return svadd_x (pg, half_t, half_over_t);
-}
-
-PL_SIG (SV, F, 1, cosh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (cosh), 1.39)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0, 0x1p-63, 100)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0, 0x1.5a92d8p+6, 80000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0x1.5a92d8p+6, inf, 2000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erf_data.c b/contrib/arm-optimized-routines/pl/math/sv_erf_data.c
deleted file mode 100644
index 7244aceda5a5..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_erf_data.c
+++ /dev/null
@@ -1,1558 +0,0 @@
-/*
- * Data for approximation of erf.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Lookup table used in vector erf.
-   For each possible rounded input r (multiples of 1/128), between
-   r = 0.0 and r = 6.0 (769 values):
-   - the first entry __erf_data.tab.erf contains the values of erf(r),
-   - the second entry __erf_data.tab.scale contains the values of
-   2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the
-   algorithm, since lookup is performed only for x >= 1/64-1/512.  */
-const struct sv_erf_data __sv_erf_data = {
-  .erf = { 0x0.0000000000000p+0,
-	   0x1.20dbf3deb1340p-7,
-	   0x1.20d77083f17a0p-6,
-	   0x1.b137e0cf584dcp-6,
-	   0x1.20c5645dd2538p-5,
-	   0x1.68e5d3bbc9526p-5,
-	   0x1.b0fafef135745p-5,
-	   0x1.f902a77bd3821p-5,
-	   0x1.207d480e90658p-4,
-	   0x1.44703e87e8593p-4,
-	   0x1.68591a1e83b5dp-4,
-	   0x1.8c36beb8a8d23p-4,
-	   0x1.b0081148a873ap-4,
-	   0x1.d3cbf7e70a4b3p-4,
-	   0x1.f78159ec8bb50p-4,
-	   0x1.0d939005f65e5p-3,
-	   0x1.1f5e1a35c3b89p-3,
-	   0x1.311fc15f56d14p-3,
-	   0x1.42d7fc2f64959p-3,
-	   0x1.548642321d7c6p-3,
-	   0x1.662a0bdf7a89fp-3,
-	   0x1.77c2d2a765f9ep-3,
-	   0x1.895010fdbdbfdp-3,
-	   0x1.9ad142662e14dp-3,
-	   0x1.ac45e37fe2526p-3,
-	   0x1.bdad72110a648p-3,
-	   0x1.cf076d1233237p-3,
-	   0x1.e05354b96ff36p-3,
-	   0x1.f190aa85540e2p-3,
-	   0x1.015f78a3dcf3dp-2,
-	   0x1.09eed6982b948p-2,
-	   0x1.127631eb8de32p-2,
-	   0x1.1af54e232d609p-2,
-	   0x1.236bef825d9a2p-2,
-	   0x1.2bd9db0f7827fp-2,
-	   0x1.343ed6989b7d9p-2,
-	   0x1.3c9aa8b84bedap-2,
-	   0x1.44ed18d9f6462p-2,
-	   0x1.4d35ef3e5372ep-2,
-	   0x1.5574f4ffac98ep-2,
-	   0x1.5da9f415ff23fp-2,
-	   0x1.65d4b75b00471p-2,
-	   0x1.6df50a8dff772p-2,
-	   0x1.760aba57a76bfp-2,
-	   0x1.7e15944d9d3e4p-2,
-	   0x1.861566f5fd3c0p-2,
-	   0x1.8e0a01cab516bp-2,
-	   0x1.95f3353cbb146p-2,
-	   0x1.9dd0d2b721f39p-2,
-	   0x1.a5a2aca209394p-2,
-	   0x1.ad68966569a87p-2,
-	   0x1.b522646bbda68p-2,
-	   0x1.bccfec24855b8p-2,
-	   0x1.c4710406a65fcp-2,
-	   0x1.cc058392a6d2dp-2,
-	   0x1.d38d4354c3bd0p-2,
-	   0x1.db081ce6e2a48p-2,
-	   0x1.e275eaf25e458p-2,
-	   0x1.e9d68931ae650p-2,
-	   0x1.f129d471eabb1p-2,
-	   0x1.f86faa9428f9dp-2,
-	   0x1.ffa7ea8eb5fd0p-2,
-	   0x1.03693a371519cp-1,
-	   0x1.06f794ab2cae7p-1,
-	   0x1.0a7ef5c18edd2p-1,
-	   0x1.0dff4f247f6c6p-1,
-	   0x1.1178930ada115p-1,
-	   0x1.14eab43841b55p-1,
-	   0x1.1855a5fd3dd50p-1,
-	   0x1.1bb95c3746199p-1,
-	   0x1.1f15cb50bc4dep-1,
-	   0x1.226ae840d4d70p-1,
-	   0x1.25b8a88b6dd7fp-1,
-	   0x1.28ff0240d52cdp-1,
-	   0x1.2c3debfd7d6c1p-1,
-	   0x1.2f755ce9a21f4p-1,
-	   0x1.32a54cb8db67bp-1,
-	   0x1.35cdb3a9a144dp-1,
-	   0x1.38ee8a84beb71p-1,
-	   0x1.3c07ca9cb4f9ep-1,
-	   0x1.3f196dcd0f135p-1,
-	   0x1.42236e79a5fa6p-1,
-	   0x1.4525c78dd5966p-1,
-	   0x1.4820747ba2dc2p-1,
-	   0x1.4b13713ad3513p-1,
-	   0x1.4dfeba47f63ccp-1,
-	   0x1.50e24ca35fd2cp-1,
-	   0x1.53be25d016a4fp-1,
-	   0x1.569243d2b3a9bp-1,
-	   0x1.595ea53035283p-1,
-	   0x1.5c2348ecc4dc3p-1,
-	   0x1.5ee02e8a71a53p-1,
-	   0x1.61955607dd15dp-1,
-	   0x1.6442bfdedd397p-1,
-	   0x1.66e86d0312e82p-1,
-	   0x1.69865ee075011p-1,
-	   0x1.6c1c9759d0e5fp-1,
-	   0x1.6eab18c74091bp-1,
-	   0x1.7131e5f496a5ap-1,
-	   0x1.73b1021fc0cb8p-1,
-	   0x1.762870f720c6fp-1,
-	   0x1.78983697dc96fp-1,
-	   0x1.7b00578c26037p-1,
-	   0x1.7d60d8c979f7bp-1,
-	   0x1.7fb9bfaed8078p-1,
-	   0x1.820b1202f27fbp-1,
-	   0x1.8454d5f25760dp-1,
-	   0x1.8697120d92a4ap-1,
-	   0x1.88d1cd474a2e0p-1,
-	   0x1.8b050ef253c37p-1,
-	   0x1.8d30debfc572ep-1,
-	   0x1.8f5544bd00c04p-1,
-	   0x1.91724951b8fc6p-1,
-	   0x1.9387f53df5238p-1,
-	   0x1.959651980da31p-1,
-	   0x1.979d67caa6631p-1,
-	   0x1.999d4192a5715p-1,
-	   0x1.9b95e8fd26abap-1,
-	   0x1.9d8768656cc42p-1,
-	   0x1.9f71ca72cffb6p-1,
-	   0x1.a1551a16aaeafp-1,
-	   0x1.a331628a45b92p-1,
-	   0x1.a506af4cc00f4p-1,
-	   0x1.a6d50c20fa293p-1,
-	   0x1.a89c850b7d54dp-1,
-	   0x1.aa5d265064366p-1,
-	   0x1.ac16fc7143263p-1,
-	   0x1.adca142b10f98p-1,
-	   0x1.af767a741088bp-1,
-	   0x1.b11c3c79bb424p-1,
-	   0x1.b2bb679ead19cp-1,
-	   0x1.b4540978921eep-1,
-	   0x1.b5e62fce16095p-1,
-	   0x1.b771e894d602ep-1,
-	   0x1.b8f741ef54f83p-1,
-	   0x1.ba764a2af2b78p-1,
-	   0x1.bbef0fbde6221p-1,
-	   0x1.bd61a1453ab44p-1,
-	   0x1.bece0d82d1a5cp-1,
-	   0x1.c034635b66e23p-1,
-	   0x1.c194b1d49a184p-1,
-	   0x1.c2ef0812fc1bdp-1,
-	   0x1.c443755820d64p-1,
-	   0x1.c5920900b5fd1p-1,
-	   0x1.c6dad2829ec62p-1,
-	   0x1.c81de16b14cefp-1,
-	   0x1.c95b455cce69dp-1,
-	   0x1.ca930e0e2a825p-1,
-	   0x1.cbc54b476248dp-1,
-	   0x1.ccf20ce0c0d27p-1,
-	   0x1.ce1962c0e0d8bp-1,
-	   0x1.cf3b5cdaf0c39p-1,
-	   0x1.d0580b2cfd249p-1,
-	   0x1.d16f7dbe41ca0p-1,
-	   0x1.d281c49d818d0p-1,
-	   0x1.d38eefdf64fddp-1,
-	   0x1.d4970f9ce00d9p-1,
-	   0x1.d59a33f19ed42p-1,
-	   0x1.d6986cfa798e7p-1,
-	   0x1.d791cad3eff01p-1,
-	   0x1.d8865d98abe01p-1,
-	   0x1.d97635600bb89p-1,
-	   0x1.da61623cb41e0p-1,
-	   0x1.db47f43b2980dp-1,
-	   0x1.dc29fb60715afp-1,
-	   0x1.dd0787a8bb39dp-1,
-	   0x1.dde0a90611a0dp-1,
-	   0x1.deb56f5f12d28p-1,
-	   0x1.df85ea8db188ep-1,
-	   0x1.e0522a5dfda73p-1,
-	   0x1.e11a3e8cf4eb8p-1,
-	   0x1.e1de36c75ba58p-1,
-	   0x1.e29e22a89d766p-1,
-	   0x1.e35a11b9b61cep-1,
-	   0x1.e4121370224ccp-1,
-	   0x1.e4c6372cd8927p-1,
-	   0x1.e5768c3b4a3fcp-1,
-	   0x1.e62321d06c5e0p-1,
-	   0x1.e6cc0709c8a0dp-1,
-	   0x1.e7714aec96534p-1,
-	   0x1.e812fc64db369p-1,
-	   0x1.e8b12a44944a8p-1,
-	   0x1.e94be342e6743p-1,
-	   0x1.e9e335fb56f87p-1,
-	   0x1.ea7730ed0bbb9p-1,
-	   0x1.eb07e27a133aap-1,
-	   0x1.eb9558e6b42cep-1,
-	   0x1.ec1fa258c4beap-1,
-	   0x1.eca6ccd709544p-1,
-	   0x1.ed2ae6489ac1ep-1,
-	   0x1.edabfc7453e63p-1,
-	   0x1.ee2a1d004692cp-1,
-	   0x1.eea5557137ae0p-1,
-	   0x1.ef1db32a2277cp-1,
-	   0x1.ef93436bc2daap-1,
-	   0x1.f006135426b26p-1,
-	   0x1.f0762fde45ee6p-1,
-	   0x1.f0e3a5e1a1788p-1,
-	   0x1.f14e8211e8c55p-1,
-	   0x1.f1b6d0fea5f4dp-1,
-	   0x1.f21c9f12f0677p-1,
-	   0x1.f27ff89525acfp-1,
-	   0x1.f2e0e9a6a8b09p-1,
-	   0x1.f33f7e43a706bp-1,
-	   0x1.f39bc242e43e6p-1,
-	   0x1.f3f5c1558b19ep-1,
-	   0x1.f44d870704911p-1,
-	   0x1.f4a31ebcd47dfp-1,
-	   0x1.f4f693b67bd77p-1,
-	   0x1.f547f10d60597p-1,
-	   0x1.f59741b4b97cfp-1,
-	   0x1.f5e4907982a07p-1,
-	   0x1.f62fe80272419p-1,
-	   0x1.f67952cff6282p-1,
-	   0x1.f6c0db3c34641p-1,
-	   0x1.f7068b7b10fd9p-1,
-	   0x1.f74a6d9a38383p-1,
-	   0x1.f78c8b812d498p-1,
-	   0x1.f7cceef15d631p-1,
-	   0x1.f80ba18636f07p-1,
-	   0x1.f848acb544e95p-1,
-	   0x1.f88419ce4e184p-1,
-	   0x1.f8bdf1fb78370p-1,
-	   0x1.f8f63e416ebffp-1,
-	   0x1.f92d077f8d56dp-1,
-	   0x1.f96256700da8ep-1,
-	   0x1.f99633a838a57p-1,
-	   0x1.f9c8a7989af0dp-1,
-	   0x1.f9f9ba8d3c733p-1,
-	   0x1.fa2974addae45p-1,
-	   0x1.fa57ddfe27376p-1,
-	   0x1.fa84fe5e05c8dp-1,
-	   0x1.fab0dd89d1309p-1,
-	   0x1.fadb831a9f9c3p-1,
-	   0x1.fb04f6868a944p-1,
-	   0x1.fb2d3f20f9101p-1,
-	   0x1.fb54641aebbc9p-1,
-	   0x1.fb7a6c834b5a2p-1,
-	   0x1.fb9f5f4739170p-1,
-	   0x1.fbc3433260ca5p-1,
-	   0x1.fbe61eef4cf6ap-1,
-	   0x1.fc07f907bc794p-1,
-	   0x1.fc28d7e4f9cd0p-1,
-	   0x1.fc48c1d033c7ap-1,
-	   0x1.fc67bcf2d7b8fp-1,
-	   0x1.fc85cf56ecd38p-1,
-	   0x1.fca2fee770c79p-1,
-	   0x1.fcbf5170b578bp-1,
-	   0x1.fcdacca0bfb73p-1,
-	   0x1.fcf57607a6e7cp-1,
-	   0x1.fd0f5317f582fp-1,
-	   0x1.fd2869270a56fp-1,
-	   0x1.fd40bd6d7a785p-1,
-	   0x1.fd58550773cb5p-1,
-	   0x1.fd6f34f52013ap-1,
-	   0x1.fd85621b0876dp-1,
-	   0x1.fd9ae142795e3p-1,
-	   0x1.fdafb719e6a69p-1,
-	   0x1.fdc3e835500b3p-1,
-	   0x1.fdd7790ea5bc0p-1,
-	   0x1.fdea6e062d0c9p-1,
-	   0x1.fdfccb62e52d3p-1,
-	   0x1.fe0e9552ebdd6p-1,
-	   0x1.fe1fcfebe2083p-1,
-	   0x1.fe307f2b503d0p-1,
-	   0x1.fe40a6f70af4bp-1,
-	   0x1.fe504b1d9696cp-1,
-	   0x1.fe5f6f568b301p-1,
-	   0x1.fe6e1742f7cf6p-1,
-	   0x1.fe7c466dc57a1p-1,
-	   0x1.fe8a004c19ae6p-1,
-	   0x1.fe97483db8670p-1,
-	   0x1.fea4218d6594ap-1,
-	   0x1.feb08f7146046p-1,
-	   0x1.febc950b3fa75p-1,
-	   0x1.fec835695932ep-1,
-	   0x1.fed37386190fbp-1,
-	   0x1.fede5248e38f4p-1,
-	   0x1.fee8d486585eep-1,
-	   0x1.fef2fd00af31ap-1,
-	   0x1.fefcce6813974p-1,
-	   0x1.ff064b5afffbep-1,
-	   0x1.ff0f766697c76p-1,
-	   0x1.ff18520700971p-1,
-	   0x1.ff20e0a7ba8c2p-1,
-	   0x1.ff2924a3f7a83p-1,
-	   0x1.ff312046f2339p-1,
-	   0x1.ff38d5cc4227fp-1,
-	   0x1.ff404760319b4p-1,
-	   0x1.ff47772010262p-1,
-	   0x1.ff4e671a85425p-1,
-	   0x1.ff55194fe19dfp-1,
-	   0x1.ff5b8fb26f5f6p-1,
-	   0x1.ff61cc26c1578p-1,
-	   0x1.ff67d08401202p-1,
-	   0x1.ff6d9e943c231p-1,
-	   0x1.ff733814af88cp-1,
-	   0x1.ff789eb6130c9p-1,
-	   0x1.ff7dd41ce2b4dp-1,
-	   0x1.ff82d9e1a76d8p-1,
-	   0x1.ff87b1913e853p-1,
-	   0x1.ff8c5cad200a5p-1,
-	   0x1.ff90dcaba4096p-1,
-	   0x1.ff9532f846ab0p-1,
-	   0x1.ff9960f3eb327p-1,
-	   0x1.ff9d67f51ddbap-1,
-	   0x1.ffa14948549a7p-1,
-	   0x1.ffa506302ebaep-1,
-	   0x1.ffa89fe5b3625p-1,
-	   0x1.ffac17988ef4bp-1,
-	   0x1.ffaf6e6f4f5c0p-1,
-	   0x1.ffb2a5879f35ep-1,
-	   0x1.ffb5bdf67fe6fp-1,
-	   0x1.ffb8b8c88295fp-1,
-	   0x1.ffbb970200110p-1,
-	   0x1.ffbe599f4f9d9p-1,
-	   0x1.ffc10194fcb64p-1,
-	   0x1.ffc38fcffbb7cp-1,
-	   0x1.ffc60535dd7f5p-1,
-	   0x1.ffc862a501fd7p-1,
-	   0x1.ffcaa8f4c9beap-1,
-	   0x1.ffccd8f5c66d1p-1,
-	   0x1.ffcef371ea4d7p-1,
-	   0x1.ffd0f92cb6ba7p-1,
-	   0x1.ffd2eae369a07p-1,
-	   0x1.ffd4c94d29fdbp-1,
-	   0x1.ffd6951b33686p-1,
-	   0x1.ffd84ef9009eep-1,
-	   0x1.ffd9f78c7524ap-1,
-	   0x1.ffdb8f7605ee7p-1,
-	   0x1.ffdd1750e1220p-1,
-	   0x1.ffde8fb314ebfp-1,
-	   0x1.ffdff92db56e5p-1,
-	   0x1.ffe1544d01ccbp-1,
-	   0x1.ffe2a1988857cp-1,
-	   0x1.ffe3e19349dc7p-1,
-	   0x1.ffe514bbdc197p-1,
-	   0x1.ffe63b8c8b5f7p-1,
-	   0x1.ffe7567b7b5e1p-1,
-	   0x1.ffe865fac722bp-1,
-	   0x1.ffe96a78a04a9p-1,
-	   0x1.ffea645f6d6dap-1,
-	   0x1.ffeb5415e7c44p-1,
-	   0x1.ffec39ff380b9p-1,
-	   0x1.ffed167b12ac2p-1,
-	   0x1.ffede9e5d3262p-1,
-	   0x1.ffeeb49896c6dp-1,
-	   0x1.ffef76e956a9fp-1,
-	   0x1.fff0312b010b5p-1,
-	   0x1.fff0e3ad91ec2p-1,
-	   0x1.fff18ebe2b0e1p-1,
-	   0x1.fff232a72b48ep-1,
-	   0x1.fff2cfb0453d9p-1,
-	   0x1.fff3661e9569dp-1,
-	   0x1.fff3f634b79f9p-1,
-	   0x1.fff48032dbe40p-1,
-	   0x1.fff50456dab8cp-1,
-	   0x1.fff582dc48d30p-1,
-	   0x1.fff5fbfc8a439p-1,
-	   0x1.fff66feee5129p-1,
-	   0x1.fff6dee89352ep-1,
-	   0x1.fff7491cd4af6p-1,
-	   0x1.fff7aebcff755p-1,
-	   0x1.fff80ff8911fdp-1,
-	   0x1.fff86cfd3e657p-1,
-	   0x1.fff8c5f702ccfp-1,
-	   0x1.fff91b102fca8p-1,
-	   0x1.fff96c717b695p-1,
-	   0x1.fff9ba420e834p-1,
-	   0x1.fffa04a7928b1p-1,
-	   0x1.fffa4bc63ee9ap-1,
-	   0x1.fffa8fc0e5f33p-1,
-	   0x1.fffad0b901755p-1,
-	   0x1.fffb0ecebee1bp-1,
-	   0x1.fffb4a210b172p-1,
-	   0x1.fffb82cd9dcbfp-1,
-	   0x1.fffbb8f1049c6p-1,
-	   0x1.fffbeca6adbe9p-1,
-	   0x1.fffc1e08f25f5p-1,
-	   0x1.fffc4d3120aa1p-1,
-	   0x1.fffc7a37857d2p-1,
-	   0x1.fffca53375ce3p-1,
-	   0x1.fffcce3b57bffp-1,
-	   0x1.fffcf564ab6b7p-1,
-	   0x1.fffd1ac4135f9p-1,
-	   0x1.fffd3e6d5cd87p-1,
-	   0x1.fffd607387b07p-1,
-	   0x1.fffd80e8ce0dap-1,
-	   0x1.fffd9fdeabccep-1,
-	   0x1.fffdbd65e5ad0p-1,
-	   0x1.fffdd98e903b2p-1,
-	   0x1.fffdf46816833p-1,
-	   0x1.fffe0e0140857p-1,
-	   0x1.fffe26683972ap-1,
-	   0x1.fffe3daa95b18p-1,
-	   0x1.fffe53d558ae9p-1,
-	   0x1.fffe68f4fa777p-1,
-	   0x1.fffe7d156d244p-1,
-	   0x1.fffe904222101p-1,
-	   0x1.fffea2860ee1ep-1,
-	   0x1.fffeb3ebb267bp-1,
-	   0x1.fffec47d19457p-1,
-	   0x1.fffed443e2787p-1,
-	   0x1.fffee34943b15p-1,
-	   0x1.fffef1960d85dp-1,
-	   0x1.fffeff32af7afp-1,
-	   0x1.ffff0c273bea2p-1,
-	   0x1.ffff187b6bc0ep-1,
-	   0x1.ffff2436a21dcp-1,
-	   0x1.ffff2f5fefcaap-1,
-	   0x1.ffff39fe16963p-1,
-	   0x1.ffff44178c8d2p-1,
-	   0x1.ffff4db27f146p-1,
-	   0x1.ffff56d4d5e5ep-1,
-	   0x1.ffff5f8435efcp-1,
-	   0x1.ffff67c604180p-1,
-	   0x1.ffff6f9f67e55p-1,
-	   0x1.ffff77154e0d6p-1,
-	   0x1.ffff7e2c6aea2p-1,
-	   0x1.ffff84e93cd75p-1,
-	   0x1.ffff8b500e77cp-1,
-	   0x1.ffff9164f8e46p-1,
-	   0x1.ffff972be5c59p-1,
-	   0x1.ffff9ca891572p-1,
-	   0x1.ffffa1de8c582p-1,
-	   0x1.ffffa6d13de73p-1,
-	   0x1.ffffab83e54b8p-1,
-	   0x1.ffffaff99bac4p-1,
-	   0x1.ffffb43555b5fp-1,
-	   0x1.ffffb839e52f3p-1,
-	   0x1.ffffbc09fa7cdp-1,
-	   0x1.ffffbfa82616bp-1,
-	   0x1.ffffc316d9ed0p-1,
-	   0x1.ffffc6586abf6p-1,
-	   0x1.ffffc96f1165ep-1,
-	   0x1.ffffcc5cec0c1p-1,
-	   0x1.ffffcf23ff5fcp-1,
-	   0x1.ffffd1c637b2bp-1,
-	   0x1.ffffd4456a10dp-1,
-	   0x1.ffffd6a3554a1p-1,
-	   0x1.ffffd8e1a2f22p-1,
-	   0x1.ffffdb01e8546p-1,
-	   0x1.ffffdd05a75eap-1,
-	   0x1.ffffdeee4f810p-1,
-	   0x1.ffffe0bd3e852p-1,
-	   0x1.ffffe273c15b7p-1,
-	   0x1.ffffe41314e06p-1,
-	   0x1.ffffe59c6698bp-1,
-	   0x1.ffffe710d565ep-1,
-	   0x1.ffffe8717232dp-1,
-	   0x1.ffffe9bf4098cp-1,
-	   0x1.ffffeafb377d5p-1,
-	   0x1.ffffec2641a9ep-1,
-	   0x1.ffffed413e5b7p-1,
-	   0x1.ffffee4d01cd6p-1,
-	   0x1.ffffef4a55bd4p-1,
-	   0x1.fffff039f9e8fp-1,
-	   0x1.fffff11ca4876p-1,
-	   0x1.fffff1f302bc1p-1,
-	   0x1.fffff2bdb904dp-1,
-	   0x1.fffff37d63a36p-1,
-	   0x1.fffff43297019p-1,
-	   0x1.fffff4dde0118p-1,
-	   0x1.fffff57fc4a95p-1,
-	   0x1.fffff618c3da6p-1,
-	   0x1.fffff6a956450p-1,
-	   0x1.fffff731ee681p-1,
-	   0x1.fffff7b2f8ed6p-1,
-	   0x1.fffff82cdcf1bp-1,
-	   0x1.fffff89ffc4aap-1,
-	   0x1.fffff90cb3c81p-1,
-	   0x1.fffff9735b73bp-1,
-	   0x1.fffff9d446cccp-1,
-	   0x1.fffffa2fc5015p-1,
-	   0x1.fffffa8621251p-1,
-	   0x1.fffffad7a2652p-1,
-	   0x1.fffffb248c39dp-1,
-	   0x1.fffffb6d1e95dp-1,
-	   0x1.fffffbb196132p-1,
-	   0x1.fffffbf22c1e2p-1,
-	   0x1.fffffc2f171e3p-1,
-	   0x1.fffffc688a9cfp-1,
-	   0x1.fffffc9eb76acp-1,
-	   0x1.fffffcd1cbc28p-1,
-	   0x1.fffffd01f36afp-1,
-	   0x1.fffffd2f57d68p-1,
-	   0x1.fffffd5a2041fp-1,
-	   0x1.fffffd8271d12p-1,
-	   0x1.fffffda86faa9p-1,
-	   0x1.fffffdcc3b117p-1,
-	   0x1.fffffdedf37edp-1,
-	   0x1.fffffe0db6b91p-1,
-	   0x1.fffffe2ba0ea5p-1,
-	   0x1.fffffe47ccb60p-1,
-	   0x1.fffffe62534d4p-1,
-	   0x1.fffffe7b4c81ep-1,
-	   0x1.fffffe92ced93p-1,
-	   0x1.fffffea8ef9cfp-1,
-	   0x1.fffffebdc2ec6p-1,
-	   0x1.fffffed15bcbap-1,
-	   0x1.fffffee3cc32cp-1,
-	   0x1.fffffef5251c2p-1,
-	   0x1.ffffff0576917p-1,
-	   0x1.ffffff14cfb92p-1,
-	   0x1.ffffff233ee1dp-1,
-	   0x1.ffffff30d18e8p-1,
-	   0x1.ffffff3d9480fp-1,
-	   0x1.ffffff4993c46p-1,
-	   0x1.ffffff54dab72p-1,
-	   0x1.ffffff5f74141p-1,
-	   0x1.ffffff6969fb8p-1,
-	   0x1.ffffff72c5fb6p-1,
-	   0x1.ffffff7b91176p-1,
-	   0x1.ffffff83d3d07p-1,
-	   0x1.ffffff8b962bep-1,
-	   0x1.ffffff92dfba2p-1,
-	   0x1.ffffff99b79d2p-1,
-	   0x1.ffffffa0248e8p-1,
-	   0x1.ffffffa62ce54p-1,
-	   0x1.ffffffabd69b4p-1,
-	   0x1.ffffffb127525p-1,
-	   0x1.ffffffb624592p-1,
-	   0x1.ffffffbad2affp-1,
-	   0x1.ffffffbf370cdp-1,
-	   0x1.ffffffc355dfdp-1,
-	   0x1.ffffffc733572p-1,
-	   0x1.ffffffcad3626p-1,
-	   0x1.ffffffce39b67p-1,
-	   0x1.ffffffd169d0cp-1,
-	   0x1.ffffffd466fa5p-1,
-	   0x1.ffffffd7344aap-1,
-	   0x1.ffffffd9d4aabp-1,
-	   0x1.ffffffdc4ad7ap-1,
-	   0x1.ffffffde9964ep-1,
-	   0x1.ffffffe0c2bf0p-1,
-	   0x1.ffffffe2c92dbp-1,
-	   0x1.ffffffe4aed5ep-1,
-	   0x1.ffffffe675bbdp-1,
-	   0x1.ffffffe81fc4ep-1,
-	   0x1.ffffffe9aeb97p-1,
-	   0x1.ffffffeb24467p-1,
-	   0x1.ffffffec81ff2p-1,
-	   0x1.ffffffedc95e7p-1,
-	   0x1.ffffffeefbc85p-1,
-	   0x1.fffffff01a8b6p-1,
-	   0x1.fffffff126e1ep-1,
-	   0x1.fffffff221f30p-1,
-	   0x1.fffffff30cd3fp-1,
-	   0x1.fffffff3e8892p-1,
-	   0x1.fffffff4b606fp-1,
-	   0x1.fffffff57632dp-1,
-	   0x1.fffffff629e44p-1,
-	   0x1.fffffff6d1e56p-1,
-	   0x1.fffffff76ef3fp-1,
-	   0x1.fffffff801c1fp-1,
-	   0x1.fffffff88af67p-1,
-	   0x1.fffffff90b2e3p-1,
-	   0x1.fffffff982fc1p-1,
-	   0x1.fffffff9f2e9fp-1,
-	   0x1.fffffffa5b790p-1,
-	   0x1.fffffffabd229p-1,
-	   0x1.fffffffb18582p-1,
-	   0x1.fffffffb6d844p-1,
-	   0x1.fffffffbbd0aap-1,
-	   0x1.fffffffc0748fp-1,
-	   0x1.fffffffc4c96cp-1,
-	   0x1.fffffffc8d462p-1,
-	   0x1.fffffffcc9a41p-1,
-	   0x1.fffffffd01f89p-1,
-	   0x1.fffffffd36871p-1,
-	   0x1.fffffffd678edp-1,
-	   0x1.fffffffd954aep-1,
-	   0x1.fffffffdbff2ap-1,
-	   0x1.fffffffde7ba0p-1,
-	   0x1.fffffffe0cd16p-1,
-	   0x1.fffffffe2f664p-1,
-	   0x1.fffffffe4fa30p-1,
-	   0x1.fffffffe6daf7p-1,
-	   0x1.fffffffe89b0cp-1,
-	   0x1.fffffffea3c9ap-1,
-	   0x1.fffffffebc1a9p-1,
-	   0x1.fffffffed2c21p-1,
-	   0x1.fffffffee7dc8p-1,
-	   0x1.fffffffefb847p-1,
-	   0x1.ffffffff0dd2bp-1,
-	   0x1.ffffffff1ede9p-1,
-	   0x1.ffffffff2ebdap-1,
-	   0x1.ffffffff3d843p-1,
-	   0x1.ffffffff4b453p-1,
-	   0x1.ffffffff58126p-1,
-	   0x1.ffffffff63fc3p-1,
-	   0x1.ffffffff6f121p-1,
-	   0x1.ffffffff79626p-1,
-	   0x1.ffffffff82fabp-1,
-	   0x1.ffffffff8be77p-1,
-	   0x1.ffffffff94346p-1,
-	   0x1.ffffffff9bec8p-1,
-	   0x1.ffffffffa319fp-1,
-	   0x1.ffffffffa9c63p-1,
-	   0x1.ffffffffaffa4p-1,
-	   0x1.ffffffffb5be5p-1,
-	   0x1.ffffffffbb1a2p-1,
-	   0x1.ffffffffc014ep-1,
-	   0x1.ffffffffc4b56p-1,
-	   0x1.ffffffffc901cp-1,
-	   0x1.ffffffffccfffp-1,
-	   0x1.ffffffffd0b56p-1,
-	   0x1.ffffffffd4271p-1,
-	   0x1.ffffffffd759dp-1,
-	   0x1.ffffffffda520p-1,
-	   0x1.ffffffffdd13cp-1,
-	   0x1.ffffffffdfa2dp-1,
-	   0x1.ffffffffe202dp-1,
-	   0x1.ffffffffe4371p-1,
-	   0x1.ffffffffe642ap-1,
-	   0x1.ffffffffe8286p-1,
-	   0x1.ffffffffe9eb0p-1,
-	   0x1.ffffffffeb8d0p-1,
-	   0x1.ffffffffed10ap-1,
-	   0x1.ffffffffee782p-1,
-	   0x1.ffffffffefc57p-1,
-	   0x1.fffffffff0fa7p-1,
-	   0x1.fffffffff218fp-1,
-	   0x1.fffffffff3227p-1,
-	   0x1.fffffffff4188p-1,
-	   0x1.fffffffff4fc9p-1,
-	   0x1.fffffffff5cfdp-1,
-	   0x1.fffffffff6939p-1,
-	   0x1.fffffffff748ep-1,
-	   0x1.fffffffff7f0dp-1,
-	   0x1.fffffffff88c5p-1,
-	   0x1.fffffffff91c6p-1,
-	   0x1.fffffffff9a1bp-1,
-	   0x1.fffffffffa1d2p-1,
-	   0x1.fffffffffa8f6p-1,
-	   0x1.fffffffffaf92p-1,
-	   0x1.fffffffffb5b0p-1,
-	   0x1.fffffffffbb58p-1,
-	   0x1.fffffffffc095p-1,
-	   0x1.fffffffffc56dp-1,
-	   0x1.fffffffffc9e8p-1,
-	   0x1.fffffffffce0dp-1,
-	   0x1.fffffffffd1e1p-1,
-	   0x1.fffffffffd56cp-1,
-	   0x1.fffffffffd8b3p-1,
-	   0x1.fffffffffdbbap-1,
-	   0x1.fffffffffde86p-1,
-	   0x1.fffffffffe11dp-1,
-	   0x1.fffffffffe380p-1,
-	   0x1.fffffffffe5b6p-1,
-	   0x1.fffffffffe7c0p-1,
-	   0x1.fffffffffe9a2p-1,
-	   0x1.fffffffffeb60p-1,
-	   0x1.fffffffffecfbp-1,
-	   0x1.fffffffffee77p-1,
-	   0x1.fffffffffefd6p-1,
-	   0x1.ffffffffff11ap-1,
-	   0x1.ffffffffff245p-1,
-	   0x1.ffffffffff359p-1,
-	   0x1.ffffffffff457p-1,
-	   0x1.ffffffffff542p-1,
-	   0x1.ffffffffff61bp-1,
-	   0x1.ffffffffff6e3p-1,
-	   0x1.ffffffffff79bp-1,
-	   0x1.ffffffffff845p-1,
-	   0x1.ffffffffff8e2p-1,
-	   0x1.ffffffffff973p-1,
-	   0x1.ffffffffff9f8p-1,
-	   0x1.ffffffffffa73p-1,
-	   0x1.ffffffffffae4p-1,
-	   0x1.ffffffffffb4cp-1,
-	   0x1.ffffffffffbadp-1,
-	   0x1.ffffffffffc05p-1,
-	   0x1.ffffffffffc57p-1,
-	   0x1.ffffffffffca2p-1,
-	   0x1.ffffffffffce7p-1,
-	   0x1.ffffffffffd27p-1,
-	   0x1.ffffffffffd62p-1,
-	   0x1.ffffffffffd98p-1,
-	   0x1.ffffffffffdcap-1,
-	   0x1.ffffffffffdf8p-1,
-	   0x1.ffffffffffe22p-1,
-	   0x1.ffffffffffe49p-1,
-	   0x1.ffffffffffe6cp-1,
-	   0x1.ffffffffffe8dp-1,
-	   0x1.ffffffffffeabp-1,
-	   0x1.ffffffffffec7p-1,
-	   0x1.ffffffffffee1p-1,
-	   0x1.ffffffffffef8p-1,
-	   0x1.fffffffffff0ep-1,
-	   0x1.fffffffffff22p-1,
-	   0x1.fffffffffff34p-1,
-	   0x1.fffffffffff45p-1,
-	   0x1.fffffffffff54p-1,
-	   0x1.fffffffffff62p-1,
-	   0x1.fffffffffff6fp-1,
-	   0x1.fffffffffff7bp-1,
-	   0x1.fffffffffff86p-1,
-	   0x1.fffffffffff90p-1,
-	   0x1.fffffffffff9ap-1,
-	   0x1.fffffffffffa2p-1,
-	   0x1.fffffffffffaap-1,
-	   0x1.fffffffffffb1p-1,
-	   0x1.fffffffffffb8p-1,
-	   0x1.fffffffffffbep-1,
-	   0x1.fffffffffffc3p-1,
-	   0x1.fffffffffffc8p-1,
-	   0x1.fffffffffffcdp-1,
-	   0x1.fffffffffffd1p-1,
-	   0x1.fffffffffffd5p-1,
-	   0x1.fffffffffffd9p-1,
-	   0x1.fffffffffffdcp-1,
-	   0x1.fffffffffffdfp-1,
-	   0x1.fffffffffffe2p-1,
-	   0x1.fffffffffffe4p-1,
-	   0x1.fffffffffffe7p-1,
-	   0x1.fffffffffffe9p-1,
-	   0x1.fffffffffffebp-1,
-	   0x1.fffffffffffedp-1,
-	   0x1.fffffffffffeep-1,
-	   0x1.ffffffffffff0p-1,
-	   0x1.ffffffffffff1p-1,
-	   0x1.ffffffffffff3p-1,
-	   0x1.ffffffffffff4p-1,
-	   0x1.ffffffffffff5p-1,
-	   0x1.ffffffffffff6p-1,
-	   0x1.ffffffffffff7p-1,
-	   0x1.ffffffffffff7p-1,
-	   0x1.ffffffffffff8p-1,
-	   0x1.ffffffffffff9p-1,
-	   0x1.ffffffffffff9p-1,
-	   0x1.ffffffffffffap-1,
-	   0x1.ffffffffffffbp-1,
-	   0x1.ffffffffffffbp-1,
-	   0x1.ffffffffffffbp-1,
-	   0x1.ffffffffffffcp-1,
-	   0x1.ffffffffffffcp-1,
-	   0x1.ffffffffffffdp-1,
-	   0x1.ffffffffffffdp-1,
-	   0x1.ffffffffffffdp-1,
-	   0x1.ffffffffffffdp-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.ffffffffffffep-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.fffffffffffffp-1,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-	   0x1.0000000000000p+0,
-  },
-  .scale = { 0x1.20dd750429b6dp+0,
-	     0x1.20d8f1975c85dp+0,
-	     0x1.20cb67bd452c7p+0,
-	     0x1.20b4d8bac36c1p+0,
-	     0x1.209546ad13ccfp+0,
-	     0x1.206cb4897b148p+0,
-	     0x1.203b261cd0052p+0,
-	     0x1.2000a00ae3804p+0,
-	     0x1.1fbd27cdc72d3p+0,
-	     0x1.1f70c3b4f2cc7p+0,
-	     0x1.1f1b7ae44867fp+0,
-	     0x1.1ebd5552f795bp+0,
-	     0x1.1e565bca400d4p+0,
-	     0x1.1de697e413d28p+0,
-	     0x1.1d6e14099944ap+0,
-	     0x1.1cecdb718d61cp+0,
-	     0x1.1c62fa1e869b6p+0,
-	     0x1.1bd07cdd189acp+0,
-	     0x1.1b357141d95d5p+0,
-	     0x1.1a91e5a748165p+0,
-	     0x1.19e5e92b964abp+0,
-	     0x1.19318bae53a04p+0,
-	     0x1.1874ddcdfce24p+0,
-	     0x1.17aff0e56ec10p+0,
-	     0x1.16e2d7093cd8cp+0,
-	     0x1.160da304ed92fp+0,
-	     0x1.153068581b781p+0,
-	     0x1.144b3b337c90cp+0,
-	     0x1.135e3075d076bp+0,
-	     0x1.12695da8b5bdep+0,
-	     0x1.116cd8fd67618p+0,
-	     0x1.1068b94962e5ep+0,
-	     0x1.0f5d1602f7e41p+0,
-	     0x1.0e4a073dc1b91p+0,
-	     0x1.0d2fa5a70c168p+0,
-	     0x1.0c0e0a8223359p+0,
-	     0x1.0ae54fa490722p+0,
-	     0x1.09b58f724416bp+0,
-	     0x1.087ee4d9ad247p+0,
-	     0x1.07416b4fbfe7cp+0,
-	     0x1.05fd3ecbec297p+0,
-	     0x1.04b27bc403d30p+0,
-	     0x1.03613f2812dafp+0,
-	     0x1.0209a65e29545p+0,
-	     0x1.00abcf3e187a9p+0,
-	     0x1.fe8fb01a47307p-1,
-	     0x1.fbbbbef34b4b2p-1,
-	     0x1.f8dc092d58ff8p-1,
-	     0x1.f5f0cdaf15313p-1,
-	     0x1.f2fa4c16c0019p-1,
-	     0x1.eff8c4b1375dbp-1,
-	     0x1.ecec7870ebca7p-1,
-	     0x1.e9d5a8e4c934ep-1,
-	     0x1.e6b4982f158b9p-1,
-	     0x1.e38988fc46e72p-1,
-	     0x1.e054be79d3042p-1,
-	     0x1.dd167c4cf9d2ap-1,
-	     0x1.d9cf06898cdafp-1,
-	     0x1.d67ea1a8b5368p-1,
-	     0x1.d325927fb9d89p-1,
-	     0x1.cfc41e36c7df9p-1,
-	     0x1.cc5a8a3fbea40p-1,
-	     0x1.c8e91c4d01368p-1,
-	     0x1.c5701a484ef9dp-1,
-	     0x1.c1efca49a5011p-1,
-	     0x1.be68728e29d5dp-1,
-	     0x1.bada596f25436p-1,
-	     0x1.b745c55905bf8p-1,
-	     0x1.b3aafcc27502ep-1,
-	     0x1.b00a46237d5bep-1,
-	     0x1.ac63e7ecc1411p-1,
-	     0x1.a8b8287ec6a09p-1,
-	     0x1.a5074e2157620p-1,
-	     0x1.a1519efaf889ep-1,
-	     0x1.9d97610879642p-1,
-	     0x1.99d8da149c13fp-1,
-	     0x1.96164fafd8de3p-1,
-	     0x1.925007283d7aap-1,
-	     0x1.8e86458169af8p-1,
-	     0x1.8ab94f6caa71dp-1,
-	     0x1.86e9694134b9ep-1,
-	     0x1.8316d6f48133dp-1,
-	     0x1.7f41dc12c9e89p-1,
-	     0x1.7b6abbb7aaf19p-1,
-	     0x1.7791b886e7403p-1,
-	     0x1.73b714a552763p-1,
-	     0x1.6fdb11b1e0c34p-1,
-	     0x1.6bfdf0beddaf5p-1,
-	     0x1.681ff24b4ab04p-1,
-	     0x1.6441563c665d4p-1,
-	     0x1.60625bd75d07bp-1,
-	     0x1.5c8341bb23767p-1,
-	     0x1.58a445da7c74cp-1,
-	     0x1.54c5a57629db0p-1,
-	     0x1.50e79d1749ac9p-1,
-	     0x1.4d0a6889dfd9fp-1,
-	     0x1.492e42d78d2c5p-1,
-	     0x1.4553664273d24p-1,
-	     0x1.417a0c4049fd0p-1,
-	     0x1.3da26d759aef5p-1,
-	     0x1.39ccc1b136d5ap-1,
-	     0x1.35f93fe7d1b3dp-1,
-	     0x1.32281e2fd1a92p-1,
-	     0x1.2e5991bd4cbfcp-1,
-	     0x1.2a8dcede3673bp-1,
-	     0x1.26c508f6bd0ffp-1,
-	     0x1.22ff727dd6f7bp-1,
-	     0x1.1f3d3cf9ffe5ap-1,
-	     0x1.1b7e98fe26217p-1,
-	     0x1.17c3b626c7a11p-1,
-	     0x1.140cc3173f007p-1,
-	     0x1.1059ed7740313p-1,
-	     0x1.0cab61f084b93p-1,
-	     0x1.09014c2ca74dap-1,
-	     0x1.055bd6d32e8d7p-1,
-	     0x1.01bb2b87c6968p-1,
-	     0x1.fc3ee5d1524b0p-2,
-	     0x1.f511a91a67d2ap-2,
-	     0x1.edeeee0959518p-2,
-	     0x1.e6d6ffaa65a25p-2,
-	     0x1.dfca26f5bbf88p-2,
-	     0x1.d8c8aace11e63p-2,
-	     0x1.d1d2cfff91594p-2,
-	     0x1.cae8d93f1d7b6p-2,
-	     0x1.c40b0729ed547p-2,
-	     0x1.bd3998457afdap-2,
-	     0x1.b674c8ffc6283p-2,
-	     0x1.afbcd3afe8ab6p-2,
-	     0x1.a911f096fbc26p-2,
-	     0x1.a27455e14c93cp-2,
-	     0x1.9be437a7de946p-2,
-	     0x1.9561c7f23a47bp-2,
-	     0x1.8eed36b886d93p-2,
-	     0x1.8886b1e5ecfd1p-2,
-	     0x1.822e655b417e6p-2,
-	     0x1.7be47af1f5d89p-2,
-	     0x1.75a91a7f4d2edp-2,
-	     0x1.6f7c69d7d3ef8p-2,
-	     0x1.695e8cd31867ep-2,
-	     0x1.634fa54fa285fp-2,
-	     0x1.5d4fd33729015p-2,
-	     0x1.575f3483021c3p-2,
-	     0x1.517de540ce2a3p-2,
-	     0x1.4babff975a04cp-2,
-	     0x1.45e99bcbb7915p-2,
-	     0x1.4036d0468a7a2p-2,
-	     0x1.3a93b1998736cp-2,
-	     0x1.35005285227f1p-2,
-	     0x1.2f7cc3fe6f423p-2,
-	     0x1.2a09153529381p-2,
-	     0x1.24a55399ea239p-2,
-	     0x1.1f518ae487dc8p-2,
-	     0x1.1a0dc51a9934dp-2,
-	     0x1.14da0a961fd14p-2,
-	     0x1.0fb6620c550afp-2,
-	     0x1.0aa2d09497f2bp-2,
-	     0x1.059f59af7a906p-2,
-	     0x1.00abff4dec7a3p-2,
-	     0x1.f79183b101c5bp-3,
-	     0x1.edeb406d9c824p-3,
-	     0x1.e4652fadcb6b2p-3,
-	     0x1.daff4969c0b04p-3,
-	     0x1.d1b982c501370p-3,
-	     0x1.c893ce1dcbef7p-3,
-	     0x1.bf8e1b1ca2279p-3,
-	     0x1.b6a856c3ed54fp-3,
-	     0x1.ade26b7fbed95p-3,
-	     0x1.a53c4135a6526p-3,
-	     0x1.9cb5bd549b111p-3,
-	     0x1.944ec2e4f5630p-3,
-	     0x1.8c07329874652p-3,
-	     0x1.83deeada4d25ap-3,
-	     0x1.7bd5c7df3fe9cp-3,
-	     0x1.73eba3b5b07b7p-3,
-	     0x1.6c205655be71fp-3,
-	     0x1.6473b5b15a7a1p-3,
-	     0x1.5ce595c455b0ap-3,
-	     0x1.5575c8a468361p-3,
-	     0x1.4e241e912c305p-3,
-	     0x1.46f066040a832p-3,
-	     0x1.3fda6bc016994p-3,
-	     0x1.38e1fae1d6a9dp-3,
-	     0x1.3206dceef5f87p-3,
-	     0x1.2b48d9e5dea1cp-3,
-	     0x1.24a7b84d38971p-3,
-	     0x1.1e233d434b813p-3,
-	     0x1.17bb2c8d41535p-3,
-	     0x1.116f48a6476ccp-3,
-	     0x1.0b3f52ce8c383p-3,
-	     0x1.052b0b1a174eap-3,
-	     0x1.fe6460fef4680p-4,
-	     0x1.f2a901ccafb37p-4,
-	     0x1.e723726b824a9p-4,
-	     0x1.dbd32ac4c99b0p-4,
-	     0x1.d0b7a0f921e7cp-4,
-	     0x1.c5d0497c09e74p-4,
-	     0x1.bb1c972f23e50p-4,
-	     0x1.b09bfb7d11a83p-4,
-	     0x1.a64de673e8837p-4,
-	     0x1.9c31c6df3b1b8p-4,
-	     0x1.92470a61b6965p-4,
-	     0x1.888d1d8e510a3p-4,
-	     0x1.7f036c0107294p-4,
-	     0x1.75a96077274bap-4,
-	     0x1.6c7e64e7281cbp-4,
-	     0x1.6381e2980956bp-4,
-	     0x1.5ab342383d177p-4,
-	     0x1.5211ebf41880bp-4,
-	     0x1.499d478bca735p-4,
-	     0x1.4154bc68d75c3p-4,
-	     0x1.3937b1b319259p-4,
-	     0x1.31458e6542847p-4,
-	     0x1.297db960e4f63p-4,
-	     0x1.21df9981f8e53p-4,
-	     0x1.1a6a95b1e786fp-4,
-	     0x1.131e14fa1625dp-4,
-	     0x1.0bf97e95f2a64p-4,
-	     0x1.04fc3a0481321p-4,
-	     0x1.fc4b5e32d6259p-5,
-	     0x1.eeea8c1b1db93p-5,
-	     0x1.e1d4cf1e2450ap-5,
-	     0x1.d508f9a1ea64ep-5,
-	     0x1.c885df3451a07p-5,
-	     0x1.bc4a54a84e834p-5,
-	     0x1.b055303221015p-5,
-	     0x1.a4a549829587ep-5,
-	     0x1.993979e14fffdp-5,
-	     0x1.8e109c4622913p-5,
-	     0x1.83298d717210ep-5,
-	     0x1.78832c03aa2b1p-5,
-	     0x1.6e1c5893c380bp-5,
-	     0x1.63f3f5c4de13bp-5,
-	     0x1.5a08e85af27e0p-5,
-	     0x1.505a174e9c929p-5,
-	     0x1.46e66be002240p-5,
-	     0x1.3dacd1a8d8ccdp-5,
-	     0x1.34ac36ad8dafep-5,
-	     0x1.2be38b6d92415p-5,
-	     0x1.2351c2f2d1449p-5,
-	     0x1.1af5d2e04f3f6p-5,
-	     0x1.12ceb37ff9bc3p-5,
-	     0x1.0adb5fcfa8c75p-5,
-	     0x1.031ad58d56279p-5,
-	     0x1.f7182a851bca2p-6,
-	     0x1.e85c449e377f2p-6,
-	     0x1.da0005e5f28dfp-6,
-	     0x1.cc0180af00a8bp-6,
-	     0x1.be5ecd2fcb5f9p-6,
-	     0x1.b1160991ff737p-6,
-	     0x1.a4255a00b9f03p-6,
-	     0x1.978ae8b55ce1bp-6,
-	     0x1.8b44e6031383ep-6,
-	     0x1.7f5188610ddc8p-6,
-	     0x1.73af0c737bb45p-6,
-	     0x1.685bb5134ef13p-6,
-	     0x1.5d55cb54cd53ap-6,
-	     0x1.529b9e8cf9a1ep-6,
-	     0x1.482b8455dc491p-6,
-	     0x1.3e03d891b37dep-6,
-	     0x1.3422fd6d12e2bp-6,
-	     0x1.2a875b5ffab56p-6,
-	     0x1.212f612dee7fbp-6,
-	     0x1.181983e5133ddp-6,
-	     0x1.0f443edc5ce49p-6,
-	     0x1.06ae13b0d3255p-6,
-	     0x1.fcab1483ea7fcp-7,
-	     0x1.ec72615a894c4p-7,
-	     0x1.dcaf3691fc448p-7,
-	     0x1.cd5ec93c12431p-7,
-	     0x1.be7e5ac24963bp-7,
-	     0x1.b00b38d6b3575p-7,
-	     0x1.a202bd6372dcep-7,
-	     0x1.94624e78e0fafp-7,
-	     0x1.87275e3a6869dp-7,
-	     0x1.7a4f6aca256cbp-7,
-	     0x1.6dd7fe3358230p-7,
-	     0x1.61beae53b72b7p-7,
-	     0x1.56011cc3b036dp-7,
-	     0x1.4a9cf6bda3f4cp-7,
-	     0x1.3f8ff5042a88ep-7,
-	     0x1.34d7dbc76d7e5p-7,
-	     0x1.2a727a89a3f14p-7,
-	     0x1.205dac02bd6b9p-7,
-	     0x1.1697560347b25p-7,
-	     0x1.0d1d69569b82dp-7,
-	     0x1.03ede1a45bfeep-7,
-	     0x1.f60d8aa2a88f2p-8,
-	     0x1.e4cc4abf7d065p-8,
-	     0x1.d4143a9dfe965p-8,
-	     0x1.c3e1a5f5c077cp-8,
-	     0x1.b430ecf4a83a8p-8,
-	     0x1.a4fe83fb9db25p-8,
-	     0x1.9646f35a76623p-8,
-	     0x1.8806d70b2fc36p-8,
-	     0x1.7a3ade6c8b3e4p-8,
-	     0x1.6cdfcbfc1e263p-8,
-	     0x1.5ff2750fe7820p-8,
-	     0x1.536fc18f7ce5cp-8,
-	     0x1.4754abacdf1dcp-8,
-	     0x1.3b9e3f9d06e3fp-8,
-	     0x1.30499b503957fp-8,
-	     0x1.2553ee2a336bfp-8,
-	     0x1.1aba78ba3af89p-8,
-	     0x1.107a8c7323a6ep-8,
-	     0x1.06918b6355624p-8,
-	     0x1.f9f9cfd9c3035p-9,
-	     0x1.e77448fb66bb9p-9,
-	     0x1.d58da68fd1170p-9,
-	     0x1.c4412bf4b8f0bp-9,
-	     0x1.b38a3af2e55b4p-9,
-	     0x1.a3645330550ffp-9,
-	     0x1.93cb11a30d765p-9,
-	     0x1.84ba3004a50d0p-9,
-	     0x1.762d84469c18fp-9,
-	     0x1.6821000795a03p-9,
-	     0x1.5a90b00981d93p-9,
-	     0x1.4d78bba8ca5fdp-9,
-	     0x1.40d564548fad7p-9,
-	     0x1.34a305080681fp-9,
-	     0x1.28de11c5031ebp-9,
-	     0x1.1d83170fbf6fbp-9,
-	     0x1.128eb96be8798p-9,
-	     0x1.07fdb4dafea5fp-9,
-	     0x1.fb99b8b8279e1p-10,
-	     0x1.e7f232d9e2630p-10,
-	     0x1.d4fed7195d7e8p-10,
-	     0x1.c2b9cf7f893bfp-10,
-	     0x1.b11d702b3deb1p-10,
-	     0x1.a024365f771bdp-10,
-	     0x1.8fc8c794b03b5p-10,
-	     0x1.8005f08d6f1efp-10,
-	     0x1.70d6a46e07ddap-10,
-	     0x1.6235fbd7a4345p-10,
-	     0x1.541f340697987p-10,
-	     0x1.468dadf4080abp-10,
-	     0x1.397ced7af2b15p-10,
-	     0x1.2ce898809244ep-10,
-	     0x1.20cc76202c5fap-10,
-	     0x1.15246dda49d47p-10,
-	     0x1.09ec86c75d497p-10,
-	     0x1.fe41cd9bb4eeep-11,
-	     0x1.e97ba3b77f306p-11,
-	     0x1.d57f524723822p-11,
-	     0x1.c245d4b998479p-11,
-	     0x1.afc85e0f82e12p-11,
-	     0x1.9e005769dbc1dp-11,
-	     0x1.8ce75e9f6f8a0p-11,
-	     0x1.7c7744d9378f7p-11,
-	     0x1.6caa0d3582fe9p-11,
-	     0x1.5d79eb71e893bp-11,
-	     0x1.4ee1429bf7cc0p-11,
-	     0x1.40daa3c89f5b6p-11,
-	     0x1.3360ccd23db3ap-11,
-	     0x1.266ea71d4f71ap-11,
-	     0x1.19ff4663ae9dfp-11,
-	     0x1.0e0de78654d1ep-11,
-	     0x1.0295ef6591848p-11,
-	     0x1.ef25d37f49fe1p-12,
-	     0x1.da01102b5f851p-12,
-	     0x1.c5b5412dcafadp-12,
-	     0x1.b23a5a23e4210p-12,
-	     0x1.9f8893d8fd1c1p-12,
-	     0x1.8d986a4187285p-12,
-	     0x1.7c629a822bc9ep-12,
-	     0x1.6be02102b3520p-12,
-	     0x1.5c0a378c90bcap-12,
-	     0x1.4cda5374ea275p-12,
-	     0x1.3e4a23d1f4702p-12,
-	     0x1.30538fbb77ecdp-12,
-	     0x1.22f0b496539bdp-12,
-	     0x1.161be46ad3b50p-12,
-	     0x1.09cfa445b00ffp-12,
-	     0x1.fc0d55470cf51p-13,
-	     0x1.e577bbcd49935p-13,
-	     0x1.cfd4a5adec5bfp-13,
-	     0x1.bb1a9657ce465p-13,
-	     0x1.a740684026555p-13,
-	     0x1.943d4a1d1ed39p-13,
-	     0x1.8208bc334a6a5p-13,
-	     0x1.709a8db59f25cp-13,
-	     0x1.5feada379d8b7p-13,
-	     0x1.4ff207314a102p-13,
-	     0x1.40a8c1949f75ep-13,
-	     0x1.3207fb7420eb9p-13,
-	     0x1.2408e9ba3327fp-13,
-	     0x1.16a501f0e42cap-13,
-	     0x1.09d5f819c9e29p-13,
-	     0x1.fb2b792b40a22p-14,
-	     0x1.e3bcf436a1a95p-14,
-	     0x1.cd55277c18d05p-14,
-	     0x1.b7e94604479dcp-14,
-	     0x1.a36eec00926ddp-14,
-	     0x1.8fdc1b2dcf7b9p-14,
-	     0x1.7d2737527c3f9p-14,
-	     0x1.6b4702d7d5849p-14,
-	     0x1.5a329b7d30748p-14,
-	     0x1.49e17724f4d41p-14,
-	     0x1.3a4b60ba9aa4dp-14,
-	     0x1.2b6875310f785p-14,
-	     0x1.1d312098e9dbap-14,
-	     0x1.0f9e1b4dd36dfp-14,
-	     0x1.02a8673a94691p-14,
-	     0x1.ec929a665b449p-15,
-	     0x1.d4f4b4c8e09edp-15,
-	     0x1.be6abbb10a5aap-15,
-	     0x1.a8e8cc1fadef6p-15,
-	     0x1.94637d5bacfdbp-15,
-	     0x1.80cfdc72220cfp-15,
-	     0x1.6e2367dc27f95p-15,
-	     0x1.5c540b4936fd2p-15,
-	     0x1.4b581b8d170fcp-15,
-	     0x1.3b2652b06c2b2p-15,
-	     0x1.2bb5cc22e5db6p-15,
-	     0x1.1cfe010e2052dp-15,
-	     0x1.0ef6c4c84a0fep-15,
-	     0x1.01984165a5f36p-15,
-	     0x1.e9b5e8d00ce76p-16,
-	     0x1.d16f5716c6c1ap-16,
-	     0x1.ba4f035d60e02p-16,
-	     0x1.a447b7b03f045p-16,
-	     0x1.8f4ccca7fc90dp-16,
-	     0x1.7b5223dac7336p-16,
-	     0x1.684c227fcacefp-16,
-	     0x1.562fac4329b48p-16,
-	     0x1.44f21e49054f2p-16,
-	     0x1.34894a5e24657p-16,
-	     0x1.24eb7254ccf83p-16,
-	     0x1.160f438c70913p-16,
-	     0x1.07ebd2a2d2844p-16,
-	     0x1.f4f12e9ab070ap-17,
-	     0x1.db5ad0b27805cp-17,
-	     0x1.c304efa2c6f4ep-17,
-	     0x1.abe09e9144b5ep-17,
-	     0x1.95df988e76644p-17,
-	     0x1.80f439b4ee04bp-17,
-	     0x1.6d11788a69c64p-17,
-	     0x1.5a2adfa0b4bc4p-17,
-	     0x1.4834877429b8fp-17,
-	     0x1.37231085c7d9ap-17,
-	     0x1.26eb9daed6f7ep-17,
-	     0x1.1783ceac28910p-17,
-	     0x1.08e1badf0fcedp-17,
-	     0x1.f5f7d88472604p-18,
-	     0x1.db92b5212fb8dp-18,
-	     0x1.c282cd3957edap-18,
-	     0x1.aab7abace48dcp-18,
-	     0x1.94219bfcb4928p-18,
-	     0x1.7eb1a2075864dp-18,
-	     0x1.6a597219a93d9p-18,
-	     0x1.570b69502f313p-18,
-	     0x1.44ba864670882p-18,
-	     0x1.335a62115bce2p-18,
-	     0x1.22df298214423p-18,
-	     0x1.133d96ae7e0ddp-18,
-	     0x1.046aeabcfcdecp-18,
-	     0x1.ecb9cfe1d8642p-19,
-	     0x1.d21397ead99cbp-19,
-	     0x1.b8d094c86d374p-19,
-	     0x1.a0df0f0c626dcp-19,
-	     0x1.8a2e269750a39p-19,
-	     0x1.74adc8f4064d3p-19,
-	     0x1.604ea819f007cp-19,
-	     0x1.4d0231928c6f9p-19,
-	     0x1.3aba85fe22e1fp-19,
-	     0x1.296a70f414053p-19,
-	     0x1.1905613b3abf2p-19,
-	     0x1.097f6156f32c5p-19,
-	     0x1.f59a20caf6695p-20,
-	     0x1.d9c73698fb1dcp-20,
-	     0x1.bf716c6168baep-20,
-	     0x1.a6852c6b58392p-20,
-	     0x1.8eefd70594a88p-20,
-	     0x1.789fb715aae95p-20,
-	     0x1.6383f726a8e04p-20,
-	     0x1.4f8c96f26a26ap-20,
-	     0x1.3caa61607f920p-20,
-	     0x1.2acee2f5ecdb8p-20,
-	     0x1.19ec60b1242edp-20,
-	     0x1.09f5cf4dd2877p-20,
-	     0x1.f5bd95d8730d8p-21,
-	     0x1.d9371e2ff7c35p-21,
-	     0x1.be41de54d155ap-21,
-	     0x1.a4c89e08ef4f3p-21,
-	     0x1.8cb738399b12cp-21,
-	     0x1.75fa8dbc84becp-21,
-	     0x1.608078a70dcbcp-21,
-	     0x1.4c37c0394d094p-21,
-	     0x1.39100d5687bfep-21,
-	     0x1.26f9df8519bd6p-21,
-	     0x1.15e6827001f18p-21,
-	     0x1.05c803e4831c1p-21,
-	     0x1.ed22548cffd35p-22,
-	     0x1.d06ad6ecdf971p-22,
-	     0x1.b551c847fbc96p-22,
-	     0x1.9bc09f112b494p-22,
-	     0x1.83a1ff0aa239dp-22,
-	     0x1.6ce1aa3fd7bddp-22,
-	     0x1.576c72b514859p-22,
-	     0x1.43302cc4a0da8p-22,
-	     0x1.301ba221dc9bbp-22,
-	     0x1.1e1e857adc568p-22,
-	     0x1.0d2966b1746f7p-22,
-	     0x1.fa5b4f49cc6b2p-23,
-	     0x1.dc3ae30b55c16p-23,
-	     0x1.bfd7555a3bd68p-23,
-	     0x1.a517d9e61628ap-23,
-	     0x1.8be4f8f6c951fp-23,
-	     0x1.74287ded49339p-23,
-	     0x1.5dcd669f2cd34p-23,
-	     0x1.48bfd38302870p-23,
-	     0x1.34ecf8a3c124ap-23,
-	     0x1.22430f521cbcfp-23,
-	     0x1.10b1488aeb235p-23,
-	     0x1.0027c00a263a6p-23,
-	     0x1.e12ee004efc37p-24,
-	     0x1.c3e44ae32b16bp-24,
-	     0x1.a854ea14102a8p-24,
-	     0x1.8e6761569f45dp-24,
-	     0x1.7603bac345f65p-24,
-	     0x1.5f1353cdad001p-24,
-	     0x1.4980cb3c80949p-24,
-	     0x1.3537f00b6ad4dp-24,
-	     0x1.2225b12bffc68p-24,
-	     0x1.10380e1adb7e9p-24,
-	     0x1.febc107d5efaap-25,
-	     0x1.df0f2a0ee6946p-25,
-	     0x1.c14b2188bcee4p-25,
-	     0x1.a553644f7f07dp-25,
-	     0x1.8b0cfce0579dfp-25,
-	     0x1.725e7c5dd20f7p-25,
-	     0x1.5b2fe547a1340p-25,
-	     0x1.456a974e92e93p-25,
-	     0x1.30f93c3699078p-25,
-	     0x1.1dc7b5b978cf8p-25,
-	     0x1.0bc30c5d52f15p-25,
-	     0x1.f5b2be65a0c7fp-26,
-	     0x1.d5f3a8dea7357p-26,
-	     0x1.b82915b03515bp-26,
-	     0x1.9c3517e789488p-26,
-	     0x1.81fb7df06136ep-26,
-	     0x1.6961b8d641d06p-26,
-	     0x1.524ec4d916caep-26,
-	     0x1.3cab1343d18d1p-26,
-	     0x1.2860757487a01p-26,
-	     0x1.155a09065d4f7p-26,
-	     0x1.0384250e4c9fcp-26,
-	     0x1.e59890b926c78p-27,
-	     0x1.c642116a8a9e3p-27,
-	     0x1.a8e405e651ab6p-27,
-	     0x1.8d5f98114f872p-27,
-	     0x1.7397c5a66e307p-27,
-	     0x1.5b71456c5a4c4p-27,
-	     0x1.44d26de513197p-27,
-	     0x1.2fa31d6371537p-27,
-	     0x1.1bcca373b7b43p-27,
-	     0x1.0939ab853339fp-27,
-	     0x1.efac5187b2863p-28,
-	     0x1.cf1e86235d0e6p-28,
-	     0x1.b0a68a2128babp-28,
-	     0x1.9423165bc4444p-28,
-	     0x1.7974e743dea3cp-28,
-	     0x1.607e9eacd1050p-28,
-	     0x1.4924a74dec728p-28,
-	     0x1.334d19e0c2160p-28,
-	     0x1.1edfa3c5f5ccap-28,
-	     0x1.0bc56f1b54701p-28,
-	     0x1.f3d2185e047d9p-29,
-	     0x1.d26cb87945e87p-29,
-	     0x1.b334fac4b9f99p-29,
-	     0x1.96076f7918d1cp-29,
-	     0x1.7ac2d72fc2c63p-29,
-	     0x1.614801550319ep-29,
-	     0x1.4979ac8b28926p-29,
-	     0x1.333c68e2d0548p-29,
-	     0x1.1e767bce37dd7p-29,
-	     0x1.0b0fc5b6d05a0p-29,
-	     0x1.f1e3523b41d7dp-30,
-	     0x1.d00de6608effep-30,
-	     0x1.b0778b7b3301ap-30,
-	     0x1.92fb04ec0f6cfp-30,
-	     0x1.77756ec9f78fap-30,
-	     0x1.5dc61922d5a06p-30,
-	     0x1.45ce65699ff6dp-30,
-	     0x1.2f71a5f159970p-30,
-	     0x1.1a94ff571654fp-30,
-	     0x1.071f4bbea09ecp-30,
-	     0x1.e9f1ff8ddd774p-31,
-	     0x1.c818223a202c7p-31,
-	     0x1.a887bd2b4404dp-31,
-	     0x1.8b1a336c5eb6bp-31,
-	     0x1.6fab63324088ap-31,
-	     0x1.56197e30205bap-31,
-	     0x1.3e44e45301b92p-31,
-	     0x1.281000bfe4c3fp-31,
-	     0x1.135f28f2d50b4p-31,
-	     0x1.00187dded5975p-31,
-	     0x1.dc479de0ef001p-32,
-	     0x1.bad4fdad3caa1p-32,
-	     0x1.9baed3ed27ab8p-32,
-	     0x1.7ead9ce4285bbp-32,
-	     0x1.63ac6b4edc88ep-32,
-	     0x1.4a88be2a6390cp-32,
-	     0x1.332259185f1a0p-32,
-	     0x1.1d5b1f3793044p-32,
-	     0x1.0916f04b6e18bp-32,
-	     0x1.ec77101de6926p-33,
-	     0x1.c960bf23153e0p-33,
-	     0x1.a8bd20fc65ef7p-33,
-	     0x1.8a61745ec7d1dp-33,
-	     0x1.6e25d0e756261p-33,
-	     0x1.53e4f7d1666cbp-33,
-	     0x1.3b7c27a7ddb0ep-33,
-	     0x1.24caf2c32af14p-33,
-	     0x1.0fb3186804d0fp-33,
-	     0x1.f830c0bb41fd7p-34,
-	     0x1.d3c0f1a91c846p-34,
-	     0x1.b1e5acf351d87p-34,
-	     0x1.92712d259ce66p-34,
-	     0x1.7538c60a04476p-34,
-	     0x1.5a14b04b47879p-34,
-	     0x1.40dfd87456f4cp-34,
-	     0x1.2977b1172b9d5p-34,
-	     0x1.13bc07e891491p-34,
-	     0x1.ff1dbb4300811p-35,
-	     0x1.d9a880f306bd8p-35,
-	     0x1.b6e45220b55e0p-35,
-	     0x1.96a0b33f2c4dap-35,
-	     0x1.78b07e9e924acp-35,
-	     0x1.5ce9ab1670dd2p-35,
-	     0x1.4325167006bb0p-35,
-	     0x1.2b3e53538ff3fp-35,
-	     0x1.15137a7f44864p-35,
-	     0x1.0084ff125639dp-35,
-	     0x1.daeb0b7311ec7p-36,
-	     0x1.b7937d1c40c52p-36,
-	     0x1.96d082f59ab06p-36,
-	     0x1.7872d9fa10aadp-36,
-	     0x1.5c4e8e37bc7d0p-36,
-	     0x1.423ac0df49a40p-36,
-	     0x1.2a117230ad284p-36,
-	     0x1.13af4f04f9998p-36,
-	     0x1.fde703724e560p-37,
-	     0x1.d77f0c82e7641p-37,
-	     0x1.b3ee02611d7ddp-37,
-	     0x1.92ff33023d5bdp-37,
-	     0x1.7481a9e69f53fp-37,
-	     0x1.5847eda620959p-37,
-	     0x1.3e27c1fcc74bdp-37,
-	     0x1.25f9ee0b923dcp-37,
-	     0x1.0f9a0686531ffp-37,
-	     0x1.f5cc7718082afp-38,
-	     0x1.cf7e53d6a2ca5p-38,
-	     0x1.ac0f5f3229372p-38,
-	     0x1.8b498644847eap-38,
-	     0x1.6cfa9bcca59dcp-38,
-	     0x1.50f411d4fd2cdp-38,
-	     0x1.370ab8327af5ep-38,
-	     0x1.1f167f88c6b6ep-38,
-	     0x1.08f24085d4597p-38,
-	     0x1.e8f70e181d619p-39,
-	     0x1.c324c20e337dcp-39,
-	     0x1.a03261574b54ep-39,
-	     0x1.7fe903cdf5855p-39,
-	     0x1.6215c58da3450p-39,
-	     0x1.46897d4b69fc6p-39,
-	     0x1.2d1877d731b7bp-39,
-	     0x1.159a386b11517p-39,
-	     0x1.ffd27ae9393cep-40,
-	     0x1.d7c593130dd0bp-40,
-	     0x1.b2cd607c79bcfp-40,
-	     0x1.90ae4d3405651p-40,
-	     0x1.71312dd1759e2p-40,
-	     0x1.5422ef5d8949dp-40,
-	     0x1.39544b0ecc957p-40,
-	     0x1.20997f73e73ddp-40,
-	     0x1.09ca0eaacd277p-40,
-	     0x1.e9810295890ecp-41,
-	     0x1.c2b45b5aa4a1dp-41,
-	     0x1.9eee068fa7596p-41,
-	     0x1.7df2b399c10a8p-41,
-	     0x1.5f8b87a31bd85p-41,
-	     0x1.4385c96e9a2d9p-41,
-	     0x1.29b2933ef4cbcp-41,
-	     0x1.11e68a6378f8ap-41,
-	     0x1.f7f338086a86bp-42,
-	     0x1.cf8d7d9ce040ap-42,
-	     0x1.aa577251ae484p-42,
-	     0x1.8811d739efb5ep-42,
-	     0x1.68823e52970bep-42,
-	     0x1.4b72ae68e8b4cp-42,
-	     0x1.30b14dbe876bcp-42,
-	     0x1.181012ef86610p-42,
-	     0x1.01647ba798744p-42,
-	     0x1.d90e917701675p-43,
-	     0x1.b2a87e86d0c8ap-43,
-	     0x1.8f53dcb377293p-43,
-	     0x1.6ed2f2515e933p-43,
-	     0x1.50ecc9ed47f19p-43,
-	     0x1.356cd5ce7799ep-43,
-	     0x1.1c229a587ab78p-43,
-	     0x1.04e15ecc7f3f6p-43,
-	     0x1.deffc7e6a6017p-44,
-	     0x1.b7b040832f310p-44,
-	     0x1.938e021f36d76p-44,
-	     0x1.7258610b3b233p-44,
-	     0x1.53d3bfc82a909p-44,
-	     0x1.37c92babdc2fdp-44,
-	     0x1.1e06010120f6ap-44,
-	     0x1.065b9616170d4p-44,
-	     0x1.e13dd96b3753ap-45,
-	     0x1.b950d32467392p-45,
-	     0x1.94a72263259a5p-45,
-	     0x1.72fd93e036cdcp-45,
-	     0x1.54164576929abp-45,
-	     0x1.37b83c521fe96p-45,
-	     0x1.1daf033182e96p-45,
-	     0x1.05ca50205d26ap-45,
-	     0x1.dfbb6235639fap-46,
-	     0x1.b7807e294781fp-46,
-	     0x1.9298add70a734p-46,
-	     0x1.70beaf9c7ffb6p-46,
-	     0x1.51b2cd6709222p-46,
-	     0x1.353a6cf7f7fffp-46,
-	     0x1.1b1fa8cbe84a7p-46,
-	     0x1.0330f0fd69921p-46,
-	     0x1.da81670f96f9bp-47,
-	     0x1.b24a16b4d09aap-47,
-	     0x1.8d6eeb6efdbd6p-47,
-	     0x1.6ba91ac734785p-47,
-	     0x1.4cb7966770ab5p-47,
-	     0x1.305e9721d0981p-47,
-	     0x1.1667311fff70ap-47,
-	     0x1.fd3de10d62855p-48,
-	     0x1.d1aefbcd48d0cp-48,
-	     0x1.a9cc93c25aca9p-48,
-	     0x1.85487ee3ea735p-48,
-	     0x1.63daf8b4b1e0cp-48,
-	     0x1.45421e69a6ca1p-48,
-	     0x1.294175802d99ap-48,
-	     0x1.0fa17bf41068fp-48,
-	     0x1.f05e82aae2bb9p-49,
-	     0x1.c578101b29058p-49,
-	     0x1.9e39dc5dd2f7cp-49,
-	     0x1.7a553a728bbf2p-49,
-	     0x1.5982008db1304p-49,
-	     0x1.3b7e00422e51bp-49,
-	     0x1.200c898d9ee3ep-49,
-	     0x1.06f5f7eb65a56p-49,
-	     0x1.e00e9148a1d25p-50,
-	     0x1.b623734024e92p-50,
-	     0x1.8fd4e01891bf8p-50,
-	     0x1.6cd44c7470d89p-50,
-	     0x1.4cd9c04158cd7p-50,
-	     0x1.2fa34bf5c8344p-50,
-	     0x1.14f4890ff2461p-50,
-	     0x1.f92c49dfa4df5p-51,
-	     0x1.ccaaea71ab0dfp-51,
-	     0x1.a40829f001197p-51,
-	     0x1.7eef13b59e96cp-51,
-	     0x1.5d11e1a252bf5p-51,
-	     0x1.3e296303b2297p-51,
-	     0x1.21f47009f43cep-51,
-	     0x1.083768c5e4541p-51,
-	     0x1.e1777d831265ep-52,
-	     0x1.b69f10b0191b5p-52,
-	     0x1.8f8a3a05b5b52p-52,
-	     0x1.6be573c40c8e7p-52,
-	     0x1.4b645ba991fdbp-52,
-	     0x1.2dc119095729fp-52,
-  },
-};
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erff_data.c b/contrib/arm-optimized-routines/pl/math/sv_erff_data.c
deleted file mode 100644
index 154d3c188874..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_erff_data.c
+++ /dev/null
@@ -1,1046 +0,0 @@
-/*
- * Data for approximation of vector erff.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* Lookup table used in SVE erff.
-   For each possible rounded input r (multiples of 1/128), between
-   r = 0.0 and r = 4.0 (513 values):
-   - __erff_data.erf contains the values of erf(r),
-   - __erff_data.scale contains the values of 2/sqrt(pi)*exp(-r^2).
-   Note that indices 0 and 1 are never hit by the algorithm, since lookup is
-   performed only for x >= 1/64-1/512.  */
-const struct sv_erff_data __sv_erff_data = {
-  .erf = { 0x0.000000p+0,
-	   0x1.20dbf4p-7,
-	   0x1.20d770p-6,
-	   0x1.b137e0p-6,
-	   0x1.20c564p-5,
-	   0x1.68e5d4p-5,
-	   0x1.b0fafep-5,
-	   0x1.f902a8p-5,
-	   0x1.207d48p-4,
-	   0x1.44703ep-4,
-	   0x1.68591ap-4,
-	   0x1.8c36bep-4,
-	   0x1.b00812p-4,
-	   0x1.d3cbf8p-4,
-	   0x1.f7815ap-4,
-	   0x1.0d9390p-3,
-	   0x1.1f5e1ap-3,
-	   0x1.311fc2p-3,
-	   0x1.42d7fcp-3,
-	   0x1.548642p-3,
-	   0x1.662a0cp-3,
-	   0x1.77c2d2p-3,
-	   0x1.895010p-3,
-	   0x1.9ad142p-3,
-	   0x1.ac45e4p-3,
-	   0x1.bdad72p-3,
-	   0x1.cf076ep-3,
-	   0x1.e05354p-3,
-	   0x1.f190aap-3,
-	   0x1.015f78p-2,
-	   0x1.09eed6p-2,
-	   0x1.127632p-2,
-	   0x1.1af54ep-2,
-	   0x1.236bf0p-2,
-	   0x1.2bd9dcp-2,
-	   0x1.343ed6p-2,
-	   0x1.3c9aa8p-2,
-	   0x1.44ed18p-2,
-	   0x1.4d35f0p-2,
-	   0x1.5574f4p-2,
-	   0x1.5da9f4p-2,
-	   0x1.65d4b8p-2,
-	   0x1.6df50ap-2,
-	   0x1.760abap-2,
-	   0x1.7e1594p-2,
-	   0x1.861566p-2,
-	   0x1.8e0a02p-2,
-	   0x1.95f336p-2,
-	   0x1.9dd0d2p-2,
-	   0x1.a5a2acp-2,
-	   0x1.ad6896p-2,
-	   0x1.b52264p-2,
-	   0x1.bccfecp-2,
-	   0x1.c47104p-2,
-	   0x1.cc0584p-2,
-	   0x1.d38d44p-2,
-	   0x1.db081cp-2,
-	   0x1.e275eap-2,
-	   0x1.e9d68ap-2,
-	   0x1.f129d4p-2,
-	   0x1.f86faap-2,
-	   0x1.ffa7eap-2,
-	   0x1.03693ap-1,
-	   0x1.06f794p-1,
-	   0x1.0a7ef6p-1,
-	   0x1.0dff50p-1,
-	   0x1.117894p-1,
-	   0x1.14eab4p-1,
-	   0x1.1855a6p-1,
-	   0x1.1bb95cp-1,
-	   0x1.1f15ccp-1,
-	   0x1.226ae8p-1,
-	   0x1.25b8a8p-1,
-	   0x1.28ff02p-1,
-	   0x1.2c3decp-1,
-	   0x1.2f755cp-1,
-	   0x1.32a54cp-1,
-	   0x1.35cdb4p-1,
-	   0x1.38ee8ap-1,
-	   0x1.3c07cap-1,
-	   0x1.3f196ep-1,
-	   0x1.42236ep-1,
-	   0x1.4525c8p-1,
-	   0x1.482074p-1,
-	   0x1.4b1372p-1,
-	   0x1.4dfebap-1,
-	   0x1.50e24cp-1,
-	   0x1.53be26p-1,
-	   0x1.569244p-1,
-	   0x1.595ea6p-1,
-	   0x1.5c2348p-1,
-	   0x1.5ee02ep-1,
-	   0x1.619556p-1,
-	   0x1.6442c0p-1,
-	   0x1.66e86ep-1,
-	   0x1.69865ep-1,
-	   0x1.6c1c98p-1,
-	   0x1.6eab18p-1,
-	   0x1.7131e6p-1,
-	   0x1.73b102p-1,
-	   0x1.762870p-1,
-	   0x1.789836p-1,
-	   0x1.7b0058p-1,
-	   0x1.7d60d8p-1,
-	   0x1.7fb9c0p-1,
-	   0x1.820b12p-1,
-	   0x1.8454d6p-1,
-	   0x1.869712p-1,
-	   0x1.88d1cep-1,
-	   0x1.8b050ep-1,
-	   0x1.8d30dep-1,
-	   0x1.8f5544p-1,
-	   0x1.91724ap-1,
-	   0x1.9387f6p-1,
-	   0x1.959652p-1,
-	   0x1.979d68p-1,
-	   0x1.999d42p-1,
-	   0x1.9b95e8p-1,
-	   0x1.9d8768p-1,
-	   0x1.9f71cap-1,
-	   0x1.a1551ap-1,
-	   0x1.a33162p-1,
-	   0x1.a506b0p-1,
-	   0x1.a6d50cp-1,
-	   0x1.a89c86p-1,
-	   0x1.aa5d26p-1,
-	   0x1.ac16fcp-1,
-	   0x1.adca14p-1,
-	   0x1.af767ap-1,
-	   0x1.b11c3cp-1,
-	   0x1.b2bb68p-1,
-	   0x1.b4540ap-1,
-	   0x1.b5e630p-1,
-	   0x1.b771e8p-1,
-	   0x1.b8f742p-1,
-	   0x1.ba764ap-1,
-	   0x1.bbef10p-1,
-	   0x1.bd61a2p-1,
-	   0x1.bece0ep-1,
-	   0x1.c03464p-1,
-	   0x1.c194b2p-1,
-	   0x1.c2ef08p-1,
-	   0x1.c44376p-1,
-	   0x1.c5920ap-1,
-	   0x1.c6dad2p-1,
-	   0x1.c81de2p-1,
-	   0x1.c95b46p-1,
-	   0x1.ca930ep-1,
-	   0x1.cbc54cp-1,
-	   0x1.ccf20cp-1,
-	   0x1.ce1962p-1,
-	   0x1.cf3b5cp-1,
-	   0x1.d0580cp-1,
-	   0x1.d16f7ep-1,
-	   0x1.d281c4p-1,
-	   0x1.d38ef0p-1,
-	   0x1.d49710p-1,
-	   0x1.d59a34p-1,
-	   0x1.d6986cp-1,
-	   0x1.d791cap-1,
-	   0x1.d8865ep-1,
-	   0x1.d97636p-1,
-	   0x1.da6162p-1,
-	   0x1.db47f4p-1,
-	   0x1.dc29fcp-1,
-	   0x1.dd0788p-1,
-	   0x1.dde0aap-1,
-	   0x1.deb570p-1,
-	   0x1.df85eap-1,
-	   0x1.e0522ap-1,
-	   0x1.e11a3ep-1,
-	   0x1.e1de36p-1,
-	   0x1.e29e22p-1,
-	   0x1.e35a12p-1,
-	   0x1.e41214p-1,
-	   0x1.e4c638p-1,
-	   0x1.e5768cp-1,
-	   0x1.e62322p-1,
-	   0x1.e6cc08p-1,
-	   0x1.e7714ap-1,
-	   0x1.e812fcp-1,
-	   0x1.e8b12ap-1,
-	   0x1.e94be4p-1,
-	   0x1.e9e336p-1,
-	   0x1.ea7730p-1,
-	   0x1.eb07e2p-1,
-	   0x1.eb9558p-1,
-	   0x1.ec1fa2p-1,
-	   0x1.eca6ccp-1,
-	   0x1.ed2ae6p-1,
-	   0x1.edabfcp-1,
-	   0x1.ee2a1ep-1,
-	   0x1.eea556p-1,
-	   0x1.ef1db4p-1,
-	   0x1.ef9344p-1,
-	   0x1.f00614p-1,
-	   0x1.f07630p-1,
-	   0x1.f0e3a6p-1,
-	   0x1.f14e82p-1,
-	   0x1.f1b6d0p-1,
-	   0x1.f21ca0p-1,
-	   0x1.f27ff8p-1,
-	   0x1.f2e0eap-1,
-	   0x1.f33f7ep-1,
-	   0x1.f39bc2p-1,
-	   0x1.f3f5c2p-1,
-	   0x1.f44d88p-1,
-	   0x1.f4a31ep-1,
-	   0x1.f4f694p-1,
-	   0x1.f547f2p-1,
-	   0x1.f59742p-1,
-	   0x1.f5e490p-1,
-	   0x1.f62fe8p-1,
-	   0x1.f67952p-1,
-	   0x1.f6c0dcp-1,
-	   0x1.f7068cp-1,
-	   0x1.f74a6ep-1,
-	   0x1.f78c8cp-1,
-	   0x1.f7cceep-1,
-	   0x1.f80ba2p-1,
-	   0x1.f848acp-1,
-	   0x1.f8841ap-1,
-	   0x1.f8bdf2p-1,
-	   0x1.f8f63ep-1,
-	   0x1.f92d08p-1,
-	   0x1.f96256p-1,
-	   0x1.f99634p-1,
-	   0x1.f9c8a8p-1,
-	   0x1.f9f9bap-1,
-	   0x1.fa2974p-1,
-	   0x1.fa57dep-1,
-	   0x1.fa84fep-1,
-	   0x1.fab0dep-1,
-	   0x1.fadb84p-1,
-	   0x1.fb04f6p-1,
-	   0x1.fb2d40p-1,
-	   0x1.fb5464p-1,
-	   0x1.fb7a6cp-1,
-	   0x1.fb9f60p-1,
-	   0x1.fbc344p-1,
-	   0x1.fbe61ep-1,
-	   0x1.fc07fap-1,
-	   0x1.fc28d8p-1,
-	   0x1.fc48c2p-1,
-	   0x1.fc67bcp-1,
-	   0x1.fc85d0p-1,
-	   0x1.fca2fep-1,
-	   0x1.fcbf52p-1,
-	   0x1.fcdaccp-1,
-	   0x1.fcf576p-1,
-	   0x1.fd0f54p-1,
-	   0x1.fd286ap-1,
-	   0x1.fd40bep-1,
-	   0x1.fd5856p-1,
-	   0x1.fd6f34p-1,
-	   0x1.fd8562p-1,
-	   0x1.fd9ae2p-1,
-	   0x1.fdafb8p-1,
-	   0x1.fdc3e8p-1,
-	   0x1.fdd77ap-1,
-	   0x1.fdea6ep-1,
-	   0x1.fdfcccp-1,
-	   0x1.fe0e96p-1,
-	   0x1.fe1fd0p-1,
-	   0x1.fe3080p-1,
-	   0x1.fe40a6p-1,
-	   0x1.fe504cp-1,
-	   0x1.fe5f70p-1,
-	   0x1.fe6e18p-1,
-	   0x1.fe7c46p-1,
-	   0x1.fe8a00p-1,
-	   0x1.fe9748p-1,
-	   0x1.fea422p-1,
-	   0x1.feb090p-1,
-	   0x1.febc96p-1,
-	   0x1.fec836p-1,
-	   0x1.fed374p-1,
-	   0x1.fede52p-1,
-	   0x1.fee8d4p-1,
-	   0x1.fef2fep-1,
-	   0x1.fefccep-1,
-	   0x1.ff064cp-1,
-	   0x1.ff0f76p-1,
-	   0x1.ff1852p-1,
-	   0x1.ff20e0p-1,
-	   0x1.ff2924p-1,
-	   0x1.ff3120p-1,
-	   0x1.ff38d6p-1,
-	   0x1.ff4048p-1,
-	   0x1.ff4778p-1,
-	   0x1.ff4e68p-1,
-	   0x1.ff551ap-1,
-	   0x1.ff5b90p-1,
-	   0x1.ff61ccp-1,
-	   0x1.ff67d0p-1,
-	   0x1.ff6d9ep-1,
-	   0x1.ff7338p-1,
-	   0x1.ff789ep-1,
-	   0x1.ff7dd4p-1,
-	   0x1.ff82dap-1,
-	   0x1.ff87b2p-1,
-	   0x1.ff8c5cp-1,
-	   0x1.ff90dcp-1,
-	   0x1.ff9532p-1,
-	   0x1.ff9960p-1,
-	   0x1.ff9d68p-1,
-	   0x1.ffa14ap-1,
-	   0x1.ffa506p-1,
-	   0x1.ffa8a0p-1,
-	   0x1.ffac18p-1,
-	   0x1.ffaf6ep-1,
-	   0x1.ffb2a6p-1,
-	   0x1.ffb5bep-1,
-	   0x1.ffb8b8p-1,
-	   0x1.ffbb98p-1,
-	   0x1.ffbe5ap-1,
-	   0x1.ffc102p-1,
-	   0x1.ffc390p-1,
-	   0x1.ffc606p-1,
-	   0x1.ffc862p-1,
-	   0x1.ffcaa8p-1,
-	   0x1.ffccd8p-1,
-	   0x1.ffcef4p-1,
-	   0x1.ffd0fap-1,
-	   0x1.ffd2eap-1,
-	   0x1.ffd4cap-1,
-	   0x1.ffd696p-1,
-	   0x1.ffd84ep-1,
-	   0x1.ffd9f8p-1,
-	   0x1.ffdb90p-1,
-	   0x1.ffdd18p-1,
-	   0x1.ffde90p-1,
-	   0x1.ffdffap-1,
-	   0x1.ffe154p-1,
-	   0x1.ffe2a2p-1,
-	   0x1.ffe3e2p-1,
-	   0x1.ffe514p-1,
-	   0x1.ffe63cp-1,
-	   0x1.ffe756p-1,
-	   0x1.ffe866p-1,
-	   0x1.ffe96ap-1,
-	   0x1.ffea64p-1,
-	   0x1.ffeb54p-1,
-	   0x1.ffec3ap-1,
-	   0x1.ffed16p-1,
-	   0x1.ffedeap-1,
-	   0x1.ffeeb4p-1,
-	   0x1.ffef76p-1,
-	   0x1.fff032p-1,
-	   0x1.fff0e4p-1,
-	   0x1.fff18ep-1,
-	   0x1.fff232p-1,
-	   0x1.fff2d0p-1,
-	   0x1.fff366p-1,
-	   0x1.fff3f6p-1,
-	   0x1.fff480p-1,
-	   0x1.fff504p-1,
-	   0x1.fff582p-1,
-	   0x1.fff5fcp-1,
-	   0x1.fff670p-1,
-	   0x1.fff6dep-1,
-	   0x1.fff74ap-1,
-	   0x1.fff7aep-1,
-	   0x1.fff810p-1,
-	   0x1.fff86cp-1,
-	   0x1.fff8c6p-1,
-	   0x1.fff91cp-1,
-	   0x1.fff96cp-1,
-	   0x1.fff9bap-1,
-	   0x1.fffa04p-1,
-	   0x1.fffa4cp-1,
-	   0x1.fffa90p-1,
-	   0x1.fffad0p-1,
-	   0x1.fffb0ep-1,
-	   0x1.fffb4ap-1,
-	   0x1.fffb82p-1,
-	   0x1.fffbb8p-1,
-	   0x1.fffbecp-1,
-	   0x1.fffc1ep-1,
-	   0x1.fffc4ep-1,
-	   0x1.fffc7ap-1,
-	   0x1.fffca6p-1,
-	   0x1.fffccep-1,
-	   0x1.fffcf6p-1,
-	   0x1.fffd1ap-1,
-	   0x1.fffd3ep-1,
-	   0x1.fffd60p-1,
-	   0x1.fffd80p-1,
-	   0x1.fffda0p-1,
-	   0x1.fffdbep-1,
-	   0x1.fffddap-1,
-	   0x1.fffdf4p-1,
-	   0x1.fffe0ep-1,
-	   0x1.fffe26p-1,
-	   0x1.fffe3ep-1,
-	   0x1.fffe54p-1,
-	   0x1.fffe68p-1,
-	   0x1.fffe7ep-1,
-	   0x1.fffe90p-1,
-	   0x1.fffea2p-1,
-	   0x1.fffeb4p-1,
-	   0x1.fffec4p-1,
-	   0x1.fffed4p-1,
-	   0x1.fffee4p-1,
-	   0x1.fffef2p-1,
-	   0x1.ffff00p-1,
-	   0x1.ffff0cp-1,
-	   0x1.ffff18p-1,
-	   0x1.ffff24p-1,
-	   0x1.ffff30p-1,
-	   0x1.ffff3ap-1,
-	   0x1.ffff44p-1,
-	   0x1.ffff4ep-1,
-	   0x1.ffff56p-1,
-	   0x1.ffff60p-1,
-	   0x1.ffff68p-1,
-	   0x1.ffff70p-1,
-	   0x1.ffff78p-1,
-	   0x1.ffff7ep-1,
-	   0x1.ffff84p-1,
-	   0x1.ffff8cp-1,
-	   0x1.ffff92p-1,
-	   0x1.ffff98p-1,
-	   0x1.ffff9cp-1,
-	   0x1.ffffa2p-1,
-	   0x1.ffffa6p-1,
-	   0x1.ffffacp-1,
-	   0x1.ffffb0p-1,
-	   0x1.ffffb4p-1,
-	   0x1.ffffb8p-1,
-	   0x1.ffffbcp-1,
-	   0x1.ffffc0p-1,
-	   0x1.ffffc4p-1,
-	   0x1.ffffc6p-1,
-	   0x1.ffffcap-1,
-	   0x1.ffffccp-1,
-	   0x1.ffffd0p-1,
-	   0x1.ffffd2p-1,
-	   0x1.ffffd4p-1,
-	   0x1.ffffd6p-1,
-	   0x1.ffffd8p-1,
-	   0x1.ffffdcp-1,
-	   0x1.ffffdep-1,
-	   0x1.ffffdep-1,
-	   0x1.ffffe0p-1,
-	   0x1.ffffe2p-1,
-	   0x1.ffffe4p-1,
-	   0x1.ffffe6p-1,
-	   0x1.ffffe8p-1,
-	   0x1.ffffe8p-1,
-	   0x1.ffffeap-1,
-	   0x1.ffffeap-1,
-	   0x1.ffffecp-1,
-	   0x1.ffffeep-1,
-	   0x1.ffffeep-1,
-	   0x1.fffff0p-1,
-	   0x1.fffff0p-1,
-	   0x1.fffff2p-1,
-	   0x1.fffff2p-1,
-	   0x1.fffff2p-1,
-	   0x1.fffff4p-1,
-	   0x1.fffff4p-1,
-	   0x1.fffff4p-1,
-	   0x1.fffff6p-1,
-	   0x1.fffff6p-1,
-	   0x1.fffff6p-1,
-	   0x1.fffff8p-1,
-	   0x1.fffff8p-1,
-	   0x1.fffff8p-1,
-	   0x1.fffff8p-1,
-	   0x1.fffffap-1,
-	   0x1.fffffap-1,
-	   0x1.fffffap-1,
-	   0x1.fffffap-1,
-	   0x1.fffffap-1,
-	   0x1.fffffap-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffcp-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.fffffep-1,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-	   0x1.000000p+0,
-  },
-  .scale = { 0x1.20dd76p+0,
-	     0x1.20d8f2p+0,
-	     0x1.20cb68p+0,
-	     0x1.20b4d8p+0,
-	     0x1.209546p+0,
-	     0x1.206cb4p+0,
-	     0x1.203b26p+0,
-	     0x1.2000a0p+0,
-	     0x1.1fbd28p+0,
-	     0x1.1f70c4p+0,
-	     0x1.1f1b7ap+0,
-	     0x1.1ebd56p+0,
-	     0x1.1e565cp+0,
-	     0x1.1de698p+0,
-	     0x1.1d6e14p+0,
-	     0x1.1cecdcp+0,
-	     0x1.1c62fap+0,
-	     0x1.1bd07cp+0,
-	     0x1.1b3572p+0,
-	     0x1.1a91e6p+0,
-	     0x1.19e5eap+0,
-	     0x1.19318cp+0,
-	     0x1.1874dep+0,
-	     0x1.17aff0p+0,
-	     0x1.16e2d8p+0,
-	     0x1.160da4p+0,
-	     0x1.153068p+0,
-	     0x1.144b3cp+0,
-	     0x1.135e30p+0,
-	     0x1.12695ep+0,
-	     0x1.116cd8p+0,
-	     0x1.1068bap+0,
-	     0x1.0f5d16p+0,
-	     0x1.0e4a08p+0,
-	     0x1.0d2fa6p+0,
-	     0x1.0c0e0ap+0,
-	     0x1.0ae550p+0,
-	     0x1.09b590p+0,
-	     0x1.087ee4p+0,
-	     0x1.07416cp+0,
-	     0x1.05fd3ep+0,
-	     0x1.04b27cp+0,
-	     0x1.036140p+0,
-	     0x1.0209a6p+0,
-	     0x1.00abd0p+0,
-	     0x1.fe8fb0p-1,
-	     0x1.fbbbbep-1,
-	     0x1.f8dc0ap-1,
-	     0x1.f5f0cep-1,
-	     0x1.f2fa4cp-1,
-	     0x1.eff8c4p-1,
-	     0x1.ecec78p-1,
-	     0x1.e9d5a8p-1,
-	     0x1.e6b498p-1,
-	     0x1.e38988p-1,
-	     0x1.e054bep-1,
-	     0x1.dd167cp-1,
-	     0x1.d9cf06p-1,
-	     0x1.d67ea2p-1,
-	     0x1.d32592p-1,
-	     0x1.cfc41ep-1,
-	     0x1.cc5a8ap-1,
-	     0x1.c8e91cp-1,
-	     0x1.c5701ap-1,
-	     0x1.c1efcap-1,
-	     0x1.be6872p-1,
-	     0x1.bada5ap-1,
-	     0x1.b745c6p-1,
-	     0x1.b3aafcp-1,
-	     0x1.b00a46p-1,
-	     0x1.ac63e8p-1,
-	     0x1.a8b828p-1,
-	     0x1.a5074ep-1,
-	     0x1.a1519ep-1,
-	     0x1.9d9762p-1,
-	     0x1.99d8dap-1,
-	     0x1.961650p-1,
-	     0x1.925008p-1,
-	     0x1.8e8646p-1,
-	     0x1.8ab950p-1,
-	     0x1.86e96ap-1,
-	     0x1.8316d6p-1,
-	     0x1.7f41dcp-1,
-	     0x1.7b6abcp-1,
-	     0x1.7791b8p-1,
-	     0x1.73b714p-1,
-	     0x1.6fdb12p-1,
-	     0x1.6bfdf0p-1,
-	     0x1.681ff2p-1,
-	     0x1.644156p-1,
-	     0x1.60625cp-1,
-	     0x1.5c8342p-1,
-	     0x1.58a446p-1,
-	     0x1.54c5a6p-1,
-	     0x1.50e79ep-1,
-	     0x1.4d0a68p-1,
-	     0x1.492e42p-1,
-	     0x1.455366p-1,
-	     0x1.417a0cp-1,
-	     0x1.3da26ep-1,
-	     0x1.39ccc2p-1,
-	     0x1.35f940p-1,
-	     0x1.32281ep-1,
-	     0x1.2e5992p-1,
-	     0x1.2a8dcep-1,
-	     0x1.26c508p-1,
-	     0x1.22ff72p-1,
-	     0x1.1f3d3cp-1,
-	     0x1.1b7e98p-1,
-	     0x1.17c3b6p-1,
-	     0x1.140cc4p-1,
-	     0x1.1059eep-1,
-	     0x1.0cab62p-1,
-	     0x1.09014cp-1,
-	     0x1.055bd6p-1,
-	     0x1.01bb2cp-1,
-	     0x1.fc3ee6p-2,
-	     0x1.f511aap-2,
-	     0x1.edeeeep-2,
-	     0x1.e6d700p-2,
-	     0x1.dfca26p-2,
-	     0x1.d8c8aap-2,
-	     0x1.d1d2d0p-2,
-	     0x1.cae8dap-2,
-	     0x1.c40b08p-2,
-	     0x1.bd3998p-2,
-	     0x1.b674c8p-2,
-	     0x1.afbcd4p-2,
-	     0x1.a911f0p-2,
-	     0x1.a27456p-2,
-	     0x1.9be438p-2,
-	     0x1.9561c8p-2,
-	     0x1.8eed36p-2,
-	     0x1.8886b2p-2,
-	     0x1.822e66p-2,
-	     0x1.7be47ap-2,
-	     0x1.75a91ap-2,
-	     0x1.6f7c6ap-2,
-	     0x1.695e8cp-2,
-	     0x1.634fa6p-2,
-	     0x1.5d4fd4p-2,
-	     0x1.575f34p-2,
-	     0x1.517de6p-2,
-	     0x1.4bac00p-2,
-	     0x1.45e99cp-2,
-	     0x1.4036d0p-2,
-	     0x1.3a93b2p-2,
-	     0x1.350052p-2,
-	     0x1.2f7cc4p-2,
-	     0x1.2a0916p-2,
-	     0x1.24a554p-2,
-	     0x1.1f518ap-2,
-	     0x1.1a0dc6p-2,
-	     0x1.14da0ap-2,
-	     0x1.0fb662p-2,
-	     0x1.0aa2d0p-2,
-	     0x1.059f5ap-2,
-	     0x1.00ac00p-2,
-	     0x1.f79184p-3,
-	     0x1.edeb40p-3,
-	     0x1.e46530p-3,
-	     0x1.daff4ap-3,
-	     0x1.d1b982p-3,
-	     0x1.c893cep-3,
-	     0x1.bf8e1cp-3,
-	     0x1.b6a856p-3,
-	     0x1.ade26cp-3,
-	     0x1.a53c42p-3,
-	     0x1.9cb5bep-3,
-	     0x1.944ec2p-3,
-	     0x1.8c0732p-3,
-	     0x1.83deeap-3,
-	     0x1.7bd5c8p-3,
-	     0x1.73eba4p-3,
-	     0x1.6c2056p-3,
-	     0x1.6473b6p-3,
-	     0x1.5ce596p-3,
-	     0x1.5575c8p-3,
-	     0x1.4e241ep-3,
-	     0x1.46f066p-3,
-	     0x1.3fda6cp-3,
-	     0x1.38e1fap-3,
-	     0x1.3206dcp-3,
-	     0x1.2b48dap-3,
-	     0x1.24a7b8p-3,
-	     0x1.1e233ep-3,
-	     0x1.17bb2cp-3,
-	     0x1.116f48p-3,
-	     0x1.0b3f52p-3,
-	     0x1.052b0cp-3,
-	     0x1.fe6460p-4,
-	     0x1.f2a902p-4,
-	     0x1.e72372p-4,
-	     0x1.dbd32ap-4,
-	     0x1.d0b7a0p-4,
-	     0x1.c5d04ap-4,
-	     0x1.bb1c98p-4,
-	     0x1.b09bfcp-4,
-	     0x1.a64de6p-4,
-	     0x1.9c31c6p-4,
-	     0x1.92470ap-4,
-	     0x1.888d1ep-4,
-	     0x1.7f036cp-4,
-	     0x1.75a960p-4,
-	     0x1.6c7e64p-4,
-	     0x1.6381e2p-4,
-	     0x1.5ab342p-4,
-	     0x1.5211ecp-4,
-	     0x1.499d48p-4,
-	     0x1.4154bcp-4,
-	     0x1.3937b2p-4,
-	     0x1.31458ep-4,
-	     0x1.297dbap-4,
-	     0x1.21df9ap-4,
-	     0x1.1a6a96p-4,
-	     0x1.131e14p-4,
-	     0x1.0bf97ep-4,
-	     0x1.04fc3ap-4,
-	     0x1.fc4b5ep-5,
-	     0x1.eeea8cp-5,
-	     0x1.e1d4d0p-5,
-	     0x1.d508fap-5,
-	     0x1.c885e0p-5,
-	     0x1.bc4a54p-5,
-	     0x1.b05530p-5,
-	     0x1.a4a54ap-5,
-	     0x1.99397ap-5,
-	     0x1.8e109cp-5,
-	     0x1.83298ep-5,
-	     0x1.78832cp-5,
-	     0x1.6e1c58p-5,
-	     0x1.63f3f6p-5,
-	     0x1.5a08e8p-5,
-	     0x1.505a18p-5,
-	     0x1.46e66cp-5,
-	     0x1.3dacd2p-5,
-	     0x1.34ac36p-5,
-	     0x1.2be38cp-5,
-	     0x1.2351c2p-5,
-	     0x1.1af5d2p-5,
-	     0x1.12ceb4p-5,
-	     0x1.0adb60p-5,
-	     0x1.031ad6p-5,
-	     0x1.f7182ap-6,
-	     0x1.e85c44p-6,
-	     0x1.da0006p-6,
-	     0x1.cc0180p-6,
-	     0x1.be5ecep-6,
-	     0x1.b1160ap-6,
-	     0x1.a4255ap-6,
-	     0x1.978ae8p-6,
-	     0x1.8b44e6p-6,
-	     0x1.7f5188p-6,
-	     0x1.73af0cp-6,
-	     0x1.685bb6p-6,
-	     0x1.5d55ccp-6,
-	     0x1.529b9ep-6,
-	     0x1.482b84p-6,
-	     0x1.3e03d8p-6,
-	     0x1.3422fep-6,
-	     0x1.2a875cp-6,
-	     0x1.212f62p-6,
-	     0x1.181984p-6,
-	     0x1.0f443ep-6,
-	     0x1.06ae14p-6,
-	     0x1.fcab14p-7,
-	     0x1.ec7262p-7,
-	     0x1.dcaf36p-7,
-	     0x1.cd5ecap-7,
-	     0x1.be7e5ap-7,
-	     0x1.b00b38p-7,
-	     0x1.a202bep-7,
-	     0x1.94624ep-7,
-	     0x1.87275ep-7,
-	     0x1.7a4f6ap-7,
-	     0x1.6dd7fep-7,
-	     0x1.61beaep-7,
-	     0x1.56011cp-7,
-	     0x1.4a9cf6p-7,
-	     0x1.3f8ff6p-7,
-	     0x1.34d7dcp-7,
-	     0x1.2a727ap-7,
-	     0x1.205dacp-7,
-	     0x1.169756p-7,
-	     0x1.0d1d6ap-7,
-	     0x1.03ede2p-7,
-	     0x1.f60d8ap-8,
-	     0x1.e4cc4ap-8,
-	     0x1.d4143ap-8,
-	     0x1.c3e1a6p-8,
-	     0x1.b430ecp-8,
-	     0x1.a4fe84p-8,
-	     0x1.9646f4p-8,
-	     0x1.8806d8p-8,
-	     0x1.7a3adep-8,
-	     0x1.6cdfccp-8,
-	     0x1.5ff276p-8,
-	     0x1.536fc2p-8,
-	     0x1.4754acp-8,
-	     0x1.3b9e40p-8,
-	     0x1.30499cp-8,
-	     0x1.2553eep-8,
-	     0x1.1aba78p-8,
-	     0x1.107a8cp-8,
-	     0x1.06918cp-8,
-	     0x1.f9f9d0p-9,
-	     0x1.e77448p-9,
-	     0x1.d58da6p-9,
-	     0x1.c4412cp-9,
-	     0x1.b38a3ap-9,
-	     0x1.a36454p-9,
-	     0x1.93cb12p-9,
-	     0x1.84ba30p-9,
-	     0x1.762d84p-9,
-	     0x1.682100p-9,
-	     0x1.5a90b0p-9,
-	     0x1.4d78bcp-9,
-	     0x1.40d564p-9,
-	     0x1.34a306p-9,
-	     0x1.28de12p-9,
-	     0x1.1d8318p-9,
-	     0x1.128ebap-9,
-	     0x1.07fdb4p-9,
-	     0x1.fb99b8p-10,
-	     0x1.e7f232p-10,
-	     0x1.d4fed8p-10,
-	     0x1.c2b9d0p-10,
-	     0x1.b11d70p-10,
-	     0x1.a02436p-10,
-	     0x1.8fc8c8p-10,
-	     0x1.8005f0p-10,
-	     0x1.70d6a4p-10,
-	     0x1.6235fcp-10,
-	     0x1.541f34p-10,
-	     0x1.468daep-10,
-	     0x1.397ceep-10,
-	     0x1.2ce898p-10,
-	     0x1.20cc76p-10,
-	     0x1.15246ep-10,
-	     0x1.09ec86p-10,
-	     0x1.fe41cep-11,
-	     0x1.e97ba4p-11,
-	     0x1.d57f52p-11,
-	     0x1.c245d4p-11,
-	     0x1.afc85ep-11,
-	     0x1.9e0058p-11,
-	     0x1.8ce75ep-11,
-	     0x1.7c7744p-11,
-	     0x1.6caa0ep-11,
-	     0x1.5d79ecp-11,
-	     0x1.4ee142p-11,
-	     0x1.40daa4p-11,
-	     0x1.3360ccp-11,
-	     0x1.266ea8p-11,
-	     0x1.19ff46p-11,
-	     0x1.0e0de8p-11,
-	     0x1.0295f0p-11,
-	     0x1.ef25d4p-12,
-	     0x1.da0110p-12,
-	     0x1.c5b542p-12,
-	     0x1.b23a5ap-12,
-	     0x1.9f8894p-12,
-	     0x1.8d986ap-12,
-	     0x1.7c629ap-12,
-	     0x1.6be022p-12,
-	     0x1.5c0a38p-12,
-	     0x1.4cda54p-12,
-	     0x1.3e4a24p-12,
-	     0x1.305390p-12,
-	     0x1.22f0b4p-12,
-	     0x1.161be4p-12,
-	     0x1.09cfa4p-12,
-	     0x1.fc0d56p-13,
-	     0x1.e577bcp-13,
-	     0x1.cfd4a6p-13,
-	     0x1.bb1a96p-13,
-	     0x1.a74068p-13,
-	     0x1.943d4ap-13,
-	     0x1.8208bcp-13,
-	     0x1.709a8ep-13,
-	     0x1.5feadap-13,
-	     0x1.4ff208p-13,
-	     0x1.40a8c2p-13,
-	     0x1.3207fcp-13,
-	     0x1.2408eap-13,
-	     0x1.16a502p-13,
-	     0x1.09d5f8p-13,
-	     0x1.fb2b7ap-14,
-	     0x1.e3bcf4p-14,
-	     0x1.cd5528p-14,
-	     0x1.b7e946p-14,
-	     0x1.a36eecp-14,
-	     0x1.8fdc1cp-14,
-	     0x1.7d2738p-14,
-	     0x1.6b4702p-14,
-	     0x1.5a329cp-14,
-	     0x1.49e178p-14,
-	     0x1.3a4b60p-14,
-	     0x1.2b6876p-14,
-	     0x1.1d3120p-14,
-	     0x1.0f9e1cp-14,
-	     0x1.02a868p-14,
-	     0x1.ec929ap-15,
-	     0x1.d4f4b4p-15,
-	     0x1.be6abcp-15,
-	     0x1.a8e8ccp-15,
-	     0x1.94637ep-15,
-	     0x1.80cfdcp-15,
-	     0x1.6e2368p-15,
-	     0x1.5c540cp-15,
-	     0x1.4b581cp-15,
-	     0x1.3b2652p-15,
-	     0x1.2bb5ccp-15,
-	     0x1.1cfe02p-15,
-	     0x1.0ef6c4p-15,
-	     0x1.019842p-15,
-	     0x1.e9b5e8p-16,
-	     0x1.d16f58p-16,
-	     0x1.ba4f04p-16,
-	     0x1.a447b8p-16,
-	     0x1.8f4cccp-16,
-	     0x1.7b5224p-16,
-	     0x1.684c22p-16,
-	     0x1.562facp-16,
-	     0x1.44f21ep-16,
-	     0x1.34894ap-16,
-	     0x1.24eb72p-16,
-	     0x1.160f44p-16,
-	     0x1.07ebd2p-16,
-	     0x1.f4f12ep-17,
-	     0x1.db5ad0p-17,
-	     0x1.c304f0p-17,
-	     0x1.abe09ep-17,
-	     0x1.95df98p-17,
-	     0x1.80f43ap-17,
-	     0x1.6d1178p-17,
-	     0x1.5a2ae0p-17,
-	     0x1.483488p-17,
-	     0x1.372310p-17,
-	     0x1.26eb9ep-17,
-	     0x1.1783cep-17,
-	     0x1.08e1bap-17,
-	     0x1.f5f7d8p-18,
-	     0x1.db92b6p-18,
-	     0x1.c282cep-18,
-	     0x1.aab7acp-18,
-	     0x1.94219cp-18,
-	     0x1.7eb1a2p-18,
-	     0x1.6a5972p-18,
-	     0x1.570b6ap-18,
-	     0x1.44ba86p-18,
-	     0x1.335a62p-18,
-	     0x1.22df2ap-18,
-	     0x1.133d96p-18,
-	     0x1.046aeap-18,
-	     0x1.ecb9d0p-19,
-	     0x1.d21398p-19,
-	     0x1.b8d094p-19,
-	     0x1.a0df10p-19,
-	     0x1.8a2e26p-19,
-	     0x1.74adc8p-19,
-	     0x1.604ea8p-19,
-	     0x1.4d0232p-19,
-	     0x1.3aba86p-19,
-	     0x1.296a70p-19,
-	     0x1.190562p-19,
-	     0x1.097f62p-19,
-	     0x1.f59a20p-20,
-	     0x1.d9c736p-20,
-	     0x1.bf716cp-20,
-	     0x1.a6852cp-20,
-	     0x1.8eefd8p-20,
-	     0x1.789fb8p-20,
-	     0x1.6383f8p-20,
-	     0x1.4f8c96p-20,
-	     0x1.3caa62p-20,
-	     0x1.2acee2p-20,
-	     0x1.19ec60p-20,
-	     0x1.09f5d0p-20,
-	     0x1.f5bd96p-21,
-	     0x1.d9371ep-21,
-	     0x1.be41dep-21,
-	     0x1.a4c89ep-21,
-	     0x1.8cb738p-21,
-	     0x1.75fa8ep-21,
-	     0x1.608078p-21,
-	     0x1.4c37c0p-21,
-	     0x1.39100ep-21,
-	     0x1.26f9e0p-21,
-	     0x1.15e682p-21,
-	     0x1.05c804p-21,
-	     0x1.ed2254p-22,
-	     0x1.d06ad6p-22,
-	     0x1.b551c8p-22,
-	     0x1.9bc0a0p-22,
-	     0x1.83a200p-22,
-	     0x1.6ce1aap-22,
-	     0x1.576c72p-22,
-	     0x1.43302cp-22,
-	     0x1.301ba2p-22,
-	     0x1.1e1e86p-22,
-	     0x1.0d2966p-22,
-	     0x1.fa5b50p-23,
-	     0x1.dc3ae4p-23,
-	     0x1.bfd756p-23,
-	     0x1.a517dap-23,
-	     0x1.8be4f8p-23,
-	     0x1.74287ep-23,
-	     0x1.5dcd66p-23,
-	     0x1.48bfd4p-23,
-	     0x1.34ecf8p-23,
-	     0x1.224310p-23,
-	     0x1.10b148p-23,
-  },
-};
diff --git a/contrib/arm-optimized-routines/pl/math/sv_exp10f_1u5.c b/contrib/arm-optimized-routines/pl/math/sv_exp10f_1u5.c
deleted file mode 100644
index 9ecde8f1aa52..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_exp10f_1u5.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Single-precision SVE 2^x function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "include/mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
-
-/* For x < -SpecialBound, the result is subnormal and not handled correctly by
-   FEXPA.  */
-#define SpecialBound 37.9
-
-static const struct data
-{
-  float poly[5];
-  float shift, log10_2, log2_10_hi, log2_10_lo, special_bound;
-} data = {
-  /* Coefficients generated using Remez algorithm with minimisation of relative
-     error.
-     rel error: 0x1.89dafa3p-24
-     abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
-     maxerr: 0.52 +0.5 ulp.  */
-  .poly = { 0x1.26bb16p+1f, 0x1.5350d2p+1f, 0x1.04744ap+1f, 0x1.2d8176p+0f,
-	    0x1.12b41ap-1f },
-  /* 1.5*2^17 + 127, a shift value suitable for FEXPA.  */
-  .shift = 0x1.903f8p17f,
-  .log10_2 = 0x1.a934fp+1,
-  .log2_10_hi = 0x1.344136p-2,
-  .log2_10_lo = -0x1.ec10cp-27,
-  .special_bound = SpecialBound,
-};
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (exp10f, x, y, special);
-}
-
-/* Single-precision SVE exp10f routine. Implements the same algorithm
-   as AdvSIMD exp10f.
-   Worst case error is 1.02 ULPs.
-   _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
-				  want 0x1.ba5f9cp-1.  */
-svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-  /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
-     with poly(r) in [1/sqrt(2), sqrt(2)] and
-     x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N].  */
-
-  /* Load some constants in quad-word chunks to minimise memory access (last
-     lane is wasted).  */
-  svfloat32_t log10_2_and_inv = svld1rq (svptrue_b32 (), &d->log10_2);
-
-  /* n = round(x/(log10(2)/N)).  */
-  svfloat32_t shift = sv_f32 (d->shift);
-  svfloat32_t z = svmla_lane (shift, x, log10_2_and_inv, 0);
-  svfloat32_t n = svsub_x (pg, z, shift);
-
-  /* r = x - n*log10(2)/N.  */
-  svfloat32_t r = svmls_lane (x, n, log10_2_and_inv, 1);
-  r = svmls_lane (r, n, log10_2_and_inv, 2);
-
-  svbool_t special = svacgt (pg, x, d->special_bound);
-  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
-
-  /* Polynomial evaluation: poly(r) ~ exp10(r)-1.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t poly
-      = svmla_x (pg, svmul_x (pg, r, d->poly[0]),
-		 sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1), r2);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (pg, scale, scale, poly), special);
-
-  return svmla_x (pg, scale, scale, poly);
-}
-
-PL_SIG (SV, F, 1, exp10, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_F1 (exp10), 0.52)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp10), 0, SpecialBound, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp10), SpecialBound, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_exp2f_1u6.c b/contrib/arm-optimized-routines/pl/math/sv_exp2f_1u6.c
deleted file mode 100644
index 9698ff6f0682..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_exp2f_1u6.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Single-precision SVE 2^x function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "poly_sve_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float poly[5];
-  float shift, thres;
-} data = {
-  /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
-     compatibility with polynomial helpers.  */
-  .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f,
-	    0x1.59977ap-10f },
-  /* 1.5*2^17 + 127.  */
-  .shift = 0x1.903f8p17f,
-  /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
-     correctly by FEXPA.  */
-  .thres = 0x1.5d5e2ap+6f,
-};
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (exp2f, x, y, special);
-}
-
-/* Single-precision SVE exp2f routine. Implements the same algorithm
-   as AdvSIMD exp2f.
-   Worst case error is 1.04 ULPs.
-   SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0
-				  want 0x1.ba7ebp+0.  */
-svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-  /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-    x = n + r, with r in [-1/2, 1/2].  */
-  svfloat32_t shift = sv_f32 (d->shift);
-  svfloat32_t z = svadd_x (pg, x, shift);
-  svfloat32_t n = svsub_x (pg, z, shift);
-  svfloat32_t r = svsub_x (pg, x, n);
-
-  svbool_t special = svacgt (pg, x, d->thres);
-  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
-
-  /* Polynomial evaluation: poly(r) ~ exp2(r)-1.
-     Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for
-     coefficients 1 to 4, and apply most significant coefficient directly.  */
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1);
-  svfloat32_t p0 = svmul_x (pg, r, d->poly[0]);
-  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (pg, scale, scale, poly), special);
-
-  return svmla_x (pg, scale, scale, poly);
-}
-
-PL_SIG (SV, F, 1, exp2, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_F1 (exp2), 0.55)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), 0, Thres, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, 1, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), 1, Thres, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, inf, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, -0x1p-23, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p-23, -1, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, -0x1p23, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p23, -inf, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, ScaleThres, 40000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -1, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, ScaleThres, 50000)
-PL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_expf_2u.c b/contrib/arm-optimized-routines/pl/math/sv_expf_2u.c
deleted file mode 100644
index 93d705ce420a..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_expf_2u.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Single-precision vector e^x function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float poly[5];
-  float inv_ln2, ln2_hi, ln2_lo, shift, thres;
-} data = {
-  /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
-     compatibility with polynomial helpers.  */
-  .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f,
-	    0x1.0e4020p-7f },
-  .inv_ln2 = 0x1.715476p+0f,
-  .ln2_hi = 0x1.62e4p-1f,
-  .ln2_lo = 0x1.7f7d1cp-20f,
-  /* 1.5*2^17 + 127.  */
-  .shift = 0x1.903f8p17f,
-  /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
-     correctly by FEXPA.  */
-  .thres = 0x1.5d5e2ap+6f,
-};
-
-#define C(i) sv_f32 (d->poly[i])
-#define ExponentBias 0x3f800000
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (expf, x, y, special);
-}
-
-/* Optimised single-precision SVE exp function.
-   Worst-case error is 1.04 ulp:
-   SV_NAME_F1 (exp)(0x1.a8eda4p+1) got 0x1.ba74bcp+4
-				  want 0x1.ba74bap+4.  */
-svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-
-  /* Load some constants in quad-word chunks to minimise memory access (last
-     lane is wasted).  */
-  svfloat32_t invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->inv_ln2);
-
-  /* n = round(x/(ln2/N)).  */
-  svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, invln2_and_ln2, 0);
-  svfloat32_t n = svsub_x (pg, z, d->shift);
-
-  /* r = x - n*ln2/N.  */
-  svfloat32_t r = svmls_lane (x, n, invln2_and_ln2, 1);
-  r = svmls_lane (r, n, invln2_and_ln2, 2);
-
-  /* scale = 2^(n/N).  */
-  svbool_t is_special_case = svacgt (pg, x, d->thres);
-  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
-
-  /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6.  */
-  svfloat32_t p12 = svmla_x (pg, C (1), C (2), r);
-  svfloat32_t p34 = svmla_x (pg, C (3), C (4), r);
-  svfloat32_t r2 = svmul_x (pg, r, r);
-  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
-  svfloat32_t p0 = svmul_x (pg, r, C (0));
-  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
-
-  if (unlikely (svptest_any (pg, is_special_case)))
-    return special_case (x, svmla_x (pg, scale, scale, poly), is_special_case);
-
-  return svmla_x (pg, scale, scale, poly);
-}
-
-PL_SIG (SV, F, 1, exp, -9.9, 9.9)
-PL_TEST_ULP (SV_NAME_F1 (exp), 0.55)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0, 0x1p-23, 40000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0x1p-23, 1, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 1, 0x1p23, 50000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0x1p23, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_expf_inline.h b/contrib/arm-optimized-routines/pl/math/sv_expf_inline.h
deleted file mode 100644
index 0ef4e0fda946..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_expf_inline.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * SVE helper for single-precision routines which calculate exp(x) and do
- * not need special-case handling
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef PL_MATH_SV_EXPF_INLINE_H
-#define PL_MATH_SV_EXPF_INLINE_H
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-struct sv_expf_data
-{
-  float poly[5];
-  float inv_ln2, ln2_hi, ln2_lo, shift;
-};
-
-/* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
-   compatibility with polynomial helpers. Shift is 1.5*2^17 + 127.  */
-#define SV_EXPF_DATA                                                          \
-  {                                                                           \
-    .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, \
-	      0x1.0e4020p-7f },                                               \
-                                                                              \
-    .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f,                        \
-    .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f,                        \
-  }
-
-#define C(i) sv_f32 (d->poly[i])
-
-static inline svfloat32_t
-expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
-{
-  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-
-  /* Load some constants in quad-word chunks to minimise memory access.  */
-  svfloat32_t c4_invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->poly[4]);
-
-  /* n = round(x/(ln2/N)).  */
-  svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, c4_invln2_and_ln2, 1);
-  svfloat32_t n = svsub_x (pg, z, d->shift);
-
-  /* r = x - n*ln2/N.  */
-  svfloat32_t r = svmls_lane (x, n, c4_invln2_and_ln2, 2);
-  r = svmls_lane (r, n, c4_invln2_and_ln2, 3);
-
-  /* scale = 2^(n/N).  */
-  svfloat32_t scale = svexpa (svreinterpret_u32_f32 (z));
-
-  /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6.  */
-  svfloat32_t p12 = svmla_x (pg, C (1), C (2), r);
-  svfloat32_t p34 = svmla_lane (C (3), r, c4_invln2_and_ln2, 0);
-  svfloat32_t r2 = svmul_f32_x (pg, r, r);
-  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
-  svfloat32_t p0 = svmul_f32_x (pg, r, C (0));
-  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
-
-  return svmla_x (pg, scale, scale, poly);
-}
-
-#endif // PL_MATH_SV_EXPF_INLINE_H
\ No newline at end of file
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log10_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_log10_2u5.c
deleted file mode 100644
index f55e068fd442..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_log10_2u5.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Double-precision SVE log10(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
-
-#define Min 0x0010000000000000
-#define Max 0x7ff0000000000000
-#define Thres 0x7fe0000000000000 /* Max - Min.  */
-#define Off 0x3fe6900900000000
-#define N (1 << V_LOG10_TABLE_BITS)
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
-{
-  return sv_call_f64 (log10, x, y, special);
-}
-
-/* SVE log10 algorithm.
-   Maximum measured error is 2.46 ulps.
-   SV_NAME_D1 (log10)(0x1.131956cd4b627p+0) got 0x1.fffbdf6eaa669p-6
-					   want 0x1.fffbdf6eaa667p-6.  */
-svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg)
-{
-  svuint64_t ix = svreinterpret_u64 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres);
-
-  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  svuint64_t tmp = svsub_x (pg, ix, Off);
-  svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG10_TABLE_BITS);
-  i = svand_x (pg, i, (N - 1) << 1);
-  svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
-  svfloat64_t z = svreinterpret_f64 (
-      svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)));
-
-  /* log(x) = k*log(2) + log(c) + log(z/c).  */
-  svfloat64_t invc = svld1_gather_index (pg, &__v_log10_data.table[0].invc, i);
-  svfloat64_t logc
-      = svld1_gather_index (pg, &__v_log10_data.table[0].log10c, i);
-
-  /* We approximate log(z/c) with a polynomial P(x) ~= log(x + 1):
-     r = z/c - 1 (we look up precomputed 1/c)
-     log(z/c) ~= P(r).  */
-  svfloat64_t r = svmad_x (pg, invc, z, -1.0);
-
-  /* hi = log(c) + k*log(2).  */
-  svfloat64_t w = svmla_x (pg, logc, r, __v_log10_data.invln10);
-  svfloat64_t hi = svmla_x (pg, w, k, __v_log10_data.log10_2);
-
-  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t y = sv_pw_horner_4_f64_x (pg, r, r2, __v_log10_data.poly);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y),
-			 special);
-  return svmla_x (pg, hi, r2, y);
-}
-
-PL_SIG (SV, D, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_D1 (log10), 1.97)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), -0.0, -0x1p126, 100)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log10), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log1pf_1u3.c b/contrib/arm-optimized-routines/pl/math/sv_log1pf_1u3.c
deleted file mode 100644
index ea1a3dbf723a..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_log1pf_1u3.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Single-precision vector log(x + 1) function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f32.h"
-
-static const struct data
-{
-  float poly[8];
-  float ln2, exp_bias;
-  uint32_t four, three_quarters;
-} data = {.poly = {/* Do not store first term of polynomial, which is -0.5, as
-                      this can be fmov-ed directly instead of including it in
-                      the main load-and-mla polynomial schedule.  */
-		   0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
-		   -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f,
-		   0x1.abcb6p-4f, -0x1.6f0d5ep-5f},
-	  .ln2 = 0x1.62e43p-1f,
-	  .exp_bias = 0x1p-23f,
-	  .four = 0x40800000,
-	  .three_quarters = 0x3f400000};
-
-#define SignExponentMask 0xff800000
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (log1pf, x, y, special);
-}
-
-/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
-   error is 1.27 ULP very close to 0.5.
-   _ZGVsMxv_log1pf(0x1.fffffep-2) got 0x1.9f324p-2
-				 want 0x1.9f323ep-2.  */
-svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-  /* x < -1, Inf/Nan.  */
-  svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000);
-  special = svorn_z (pg, special, svcmpge (pg, x, -1));
-
-  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
-			   is in [-0.25, 0.5]):
-     log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
-
-     We approximate log1p(m) with a polynomial, then scale by
-     k*log(2). Instead of doing this directly, we use an intermediate
-     scale factor s = 4*k*log(2) to ensure the scale is representable
-     as a normalised fp32 number.  */
-  svfloat32_t m = svadd_x (pg, x, 1);
-
-  /* Choose k to scale x to the range [-1/4, 1/2].  */
-  svint32_t k
-      = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters),
-		 sv_s32 (SignExponentMask));
-
-  /* Scale x by exponent manipulation.  */
-  svfloat32_t m_scale = svreinterpret_f32 (
-      svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k)));
-
-  /* Scale up to ensure that the scale factor is representable as normalised
-     fp32 number, and scale m down accordingly.  */
-  svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four));
-  m_scale = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1), s, 0.25));
-
-  /* Evaluate polynomial on reduced interval.  */
-  svfloat32_t ms2 = svmul_x (pg, m_scale, m_scale),
-	      ms4 = svmul_x (pg, ms2, ms2);
-  svfloat32_t p = sv_estrin_7_f32_x (pg, m_scale, ms2, ms4, d->poly);
-  p = svmad_x (pg, m_scale, p, -0.5);
-  p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p));
-
-  /* The scale factor to be applied back at the end - by multiplying float(k)
-     by 2^-23 we get the unbiased exponent of k.  */
-  svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->exp_bias);
-
-  /* Apply the scaling back.  */
-  svfloat32_t y = svmla_x (pg, p, scale_back, d->ln2);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
-
-  return y;
-}
-
-PL_SIG (SV, F, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (log1p), 0.77)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0, 0x1p-23, 5000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0x1p-23, 1, 5000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log1p), 1, inf, 10000)
-PL_TEST_INTERVAL (SV_NAME_F1 (log1p), -1, -inf, 10)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log1pf_inline.h b/contrib/arm-optimized-routines/pl/math/sv_log1pf_inline.h
deleted file mode 100644
index d13b094f6b5d..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_log1pf_inline.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Helper for SVE routines which calculate log(1 + x) and do not
- * need special-case handling
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef PL_MATH_SV_LOG1PF_INLINE_H
-#define PL_MATH_SV_LOG1PF_INLINE_H
-
-#include "v_math.h"
-#include "math_config.h"
-#include "poly_sve_f32.h"
-
-static const struct sv_log1pf_data
-{
-  float32_t poly[9];
-  float32_t ln2;
-  float32_t scale_back;
-} sv_log1pf_data = {
-  /* Polynomial generated using FPMinimax in [-0.25, 0.5].  */
-  .poly = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
-	    -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
-	    -0x1.6f0d5ep-5f },
-  .scale_back = 0x1.0p-23f,
-  .ln2 = 0x1.62e43p-1f,
-};
-
-static inline svfloat32_t
-eval_poly (svfloat32_t m, const float32_t *c, svbool_t pg)
-{
-  svfloat32_t p_12 = svmla_x (pg, sv_f32 (c[0]), m, sv_f32 (c[1]));
-  svfloat32_t m2 = svmul_x (pg, m, m);
-  svfloat32_t q = svmla_x (pg, m, m2, p_12);
-  svfloat32_t p = sv_pw_horner_6_f32_x (pg, m, m2, c + 2);
-  p = svmul_x (pg, m2, p);
-
-  return svmla_x (pg, q, m2, p);
-}
-
-static inline svfloat32_t
-sv_log1pf_inline (svfloat32_t x, svbool_t pg)
-{
-  const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data);
-
-  svfloat32_t m = svadd_x (pg, x, 1.0f);
-
-  svint32_t ks = svsub_x (pg, svreinterpret_s32 (m),
-			  svreinterpret_s32 (svdup_f32 (0.75f)));
-  ks = svand_x (pg, ks, 0xff800000);
-  svuint32_t k = svreinterpret_u32 (ks);
-  svfloat32_t s = svreinterpret_f32 (
-      svsub_x (pg, svreinterpret_u32 (svdup_f32 (4.0f)), k));
-
-  svfloat32_t m_scale
-      = svreinterpret_f32 (svsub_x (pg, svreinterpret_u32 (x), k));
-  m_scale
-      = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1.0f), sv_f32 (0.25f), s));
-  svfloat32_t p = eval_poly (m_scale, d->poly, pg);
-  svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->scale_back);
-  return svmla_x (pg, p, scale_back, d->ln2);
-}
-
-#endif //  PL_MATH_SV_LOG1PF_INLINE_H
\ No newline at end of file
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log2_3u.c b/contrib/arm-optimized-routines/pl/math/sv_log2_3u.c
deleted file mode 100644
index 0775a39cc85d..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_log2_3u.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Double-precision SVE log2 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_sve_f64.h"
-
-#define N (1 << V_LOG2_TABLE_BITS)
-#define Off 0x3fe6900900000000
-#define Max (0x7ff0000000000000)
-#define Min (0x0010000000000000)
-#define Thresh (0x7fe0000000000000) /* Max - Min.  */
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
-{
-  return sv_call_f64 (log2, x, y, cmp);
-}
-
-/* Double-precision SVE log2 routine.
-   Implements the same algorithm as AdvSIMD log10, with coefficients and table
-   entries scaled in extended precision.
-   The maximum observed error is 2.58 ULP:
-   SV_NAME_D1 (log2)(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
-					  want 0x1.fffb34198d9ddp-5.  */
-svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg)
-{
-  svuint64_t ix = svreinterpret_u64 (x);
-  svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh);
-
-  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  svuint64_t tmp = svsub_x (pg, ix, Off);
-  svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG2_TABLE_BITS);
-  i = svand_x (pg, i, (N - 1) << 1);
-  svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
-  svfloat64_t z = svreinterpret_f64 (
-      svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)));
-
-  svfloat64_t invc = svld1_gather_index (pg, &__v_log2_data.table[0].invc, i);
-  svfloat64_t log2c
-      = svld1_gather_index (pg, &__v_log2_data.table[0].log2c, i);
-
-  /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k.  */
-
-  svfloat64_t r = svmad_x (pg, invc, z, -1.0);
-  svfloat64_t w = svmla_x (pg, log2c, r, __v_log2_data.invln2);
-
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t y = sv_pw_horner_4_f64_x (pg, r, r2, __v_log2_data.poly);
-  w = svadd_x (pg, k, w);
-
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svmla_x (svnot_z (pg, special), w, r2, y),
-			 special);
-  return svmla_x (pg, w, r2, y);
-}
-
-PL_SIG (SV, D, 1, log2, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_D1 (log2), 2.09)
-PL_TEST_EXPECT_FENV_ALWAYS (SV_NAME_D1 (log2))
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), -0.0, -0x1p126, 1000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), 0.0, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log2), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_log_2u5.c
deleted file mode 100644
index 2530c9e3f62c..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_log_2u5.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Double-precision SVE log(x) function.
- *
- * Copyright (c) 2020-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define P(i) sv_f64 (__v_log_data.poly[i])
-#define N (1 << V_LOG_TABLE_BITS)
-#define Off (0x3fe6900900000000)
-#define MaxTop (0x7ff)
-#define MinTop (0x001)
-#define ThreshTop (0x7fe) /* MaxTop - MinTop.  */
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
-{
-  return sv_call_f64 (log, x, y, cmp);
-}
-
-/* SVE port of AdvSIMD log algorithm.
-   Maximum measured error is 2.17 ulp:
-   SV_NAME_D1 (log)(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2
-					 want 0x1.ffffff1cca045p-2.  */
-svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
-{
-  svuint64_t ix = svreinterpret_u64 (x);
-  svuint64_t top = svlsr_x (pg, ix, 52);
-  svbool_t cmp = svcmpge (pg, svsub_x (pg, top, MinTop), sv_u64 (ThreshTop));
-
-  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  svuint64_t tmp = svsub_x (pg, ix, Off);
-  /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N.
-     The actual value of i is double this due to table layout.  */
-  svuint64_t i
-      = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
-  svint64_t k
-      = svasr_x (pg, svreinterpret_s64 (tmp), 52); /* Arithmetic shift.  */
-  svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
-  svfloat64_t z = svreinterpret_f64 (iz);
-  /* Lookup in 2 global lists (length N).  */
-  svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
-  svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
-
-  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
-  svfloat64_t r = svmad_x (pg, invc, z, -1);
-  svfloat64_t kd = svcvt_f64_x (pg, k);
-  /* hi = r + log(c) + k*Ln2.  */
-  svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, __v_log_data.ln2);
-  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t y = svmla_x (pg, P (2), r, P (3));
-  svfloat64_t p = svmla_x (pg, P (0), r, P (1));
-  y = svmla_x (pg, y, r2, P (4));
-  y = svmla_x (pg, p, r2, y);
-
-  if (unlikely (svptest_any (pg, cmp)))
-    return special_case (x, svmla_x (svnot_z (pg, cmp), hi, r2, y), cmp);
-  return svmla_x (pg, hi, r2, y);
-}
-
-PL_SIG (SV, D, 1, log, 0.01, 11.1)
-PL_TEST_ULP (SV_NAME_D1 (log), 1.68)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), -0.0, -inf, 1000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 0, 0x1p-149, 1000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 1.0, 100, 50000)
-PL_TEST_INTERVAL (SV_NAME_D1 (log), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_tan_3u5.c b/contrib/arm-optimized-routines/pl/math/sv_tan_3u5.c
deleted file mode 100644
index 746396e98a10..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_tan_3u5.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Double-precision SVE tan(x) function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "poly_sve_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  double poly[9];
-  double half_pi_hi, half_pi_lo, inv_half_pi, range_val, shift;
-} data = {
-  /* Polynomial generated with FPMinimax.  */
-  .poly = { 0x1.5555555555556p-2, 0x1.1111111110a63p-3, 0x1.ba1ba1bb46414p-5,
-	    0x1.664f47e5b5445p-6, 0x1.226e5e5ecdfa3p-7, 0x1.d6c7ddbf87047p-9,
-	    0x1.7ea75d05b583ep-10, 0x1.289f22964a03cp-11,
-	    0x1.4e4fd14147622p-12, },
-  .half_pi_hi = 0x1.921fb54442d18p0,
-  .half_pi_lo = 0x1.1a62633145c07p-54,
-  .inv_half_pi = 0x1.45f306dc9c883p-1,
-  .range_val = 0x1p23,
-  .shift = 0x1.8p52,
-};
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
-{
-  return sv_call_f64 (tan, x, y, special);
-}
-
-/* Vector approximation for double-precision tan.
-   Maximum measured error is 3.48 ULP:
-   _ZGVsMxv_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
-				      want -0x1.f6ccd8ecf7deap+37.  */
-svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg)
-{
-  const struct data *dat = ptr_barrier (&data);
-
-  /* Invert condition to catch NaNs and Infs as well as large values.  */
-  svbool_t special = svnot_z (pg, svaclt (pg, x, dat->range_val));
-
-  /* q = nearest integer to 2 * x / pi.  */
-  svfloat64_t shift = sv_f64 (dat->shift);
-  svfloat64_t q = svmla_x (pg, shift, x, dat->inv_half_pi);
-  q = svsub_x (pg, q, shift);
-  svint64_t qi = svcvt_s64_x (pg, q);
-
-  /* Use q to reduce x to r in [-pi/4, pi/4], by:
-     r = x - q * pi/2, in extended precision.  */
-  svfloat64_t r = x;
-  svfloat64_t half_pi = svld1rq (svptrue_b64 (), &dat->half_pi_hi);
-  r = svmls_lane (r, q, half_pi, 0);
-  r = svmls_lane (r, q, half_pi, 1);
-  /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
-     formula.  */
-  r = svmul_x (pg, r, 0.5);
-
-  /* Approximate tan(r) using order 8 polynomial.
-     tan(x) is odd, so polynomial has the form:
-     tan(x) ~= x + C0 * x^3 + C1 * x^5 + C3 * x^7 + ...
-     Hence we first approximate P(r) = C1 + C2 * r^2 + C3 * r^4 + ...
-     Then compute the approximation by:
-     tan(r) ~= r + r^3 * (C0 + r^2 * P(r)).  */
-  svfloat64_t r2 = svmul_x (pg, r, r);
-  svfloat64_t r4 = svmul_x (pg, r2, r2);
-  svfloat64_t r8 = svmul_x (pg, r4, r4);
-  /* Use offset version coeff array by 1 to evaluate from C1 onwards.  */
-  svfloat64_t p = sv_estrin_7_f64_x (pg, r2, r4, r8, dat->poly + 1);
-  p = svmad_x (pg, p, r2, dat->poly[0]);
-  p = svmla_x (pg, r, r2, svmul_x (pg, p, r));
-
-  /* Recombination uses double-angle formula:
-     tan(2x) = 2 * tan(x) / (1 - (tan(x))^2)
-     and reciprocity around pi/2:
-     tan(x) = 1 / (tan(pi/2 - x))
-     to assemble result using change-of-sign and conditional selection of
-     numerator/denominator dependent on odd/even-ness of q (hence quadrant).  */
-  svbool_t use_recip
-      = svcmpeq (pg, svand_x (pg, svreinterpret_u64 (qi), 1), 0);
-
-  svfloat64_t n = svmad_x (pg, p, p, -1);
-  svfloat64_t d = svmul_x (pg, p, 2);
-  svfloat64_t swap = n;
-  n = svneg_m (n, use_recip, d);
-  d = svsel (use_recip, swap, d);
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svdiv_x (svnot_z (pg, special), n, d), special);
-  return svdiv_x (pg, n, d);
-}
-
-PL_SIG (SV, D, 1, tan, -3.1, 3.1)
-PL_TEST_ULP (SV_NAME_D1 (tan), 2.99)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tan), 0, 0x1p23, 500000)
-PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tan), 0x1p23, inf, 5000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_tanhf_2u6.c b/contrib/arm-optimized-routines/pl/math/sv_tanhf_2u6.c
deleted file mode 100644
index 988a56de0b2e..000000000000
--- a/contrib/arm-optimized-routines/pl/math/sv_tanhf_2u6.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Single-precision SVE tanh(x) function.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "sv_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#include "sv_expm1f_inline.h"
-
-static const struct data
-{
-  struct sv_expm1f_data expm1f_consts;
-  uint32_t boring_bound, onef;
-} data = {
-  .expm1f_consts = SV_EXPM1F_DATA,
-  /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative).  */
-  .boring_bound = 0x41102cb3,
-  .onef = 0x3f800000,
-};
-
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
-  return sv_call_f32 (tanhf, x, y, special);
-}
-
-/* Approximation for single-precision SVE tanh(x), using a simplified
-   version of expm1f. The maximum error is 2.57 ULP:
-   _ZGVsMxv_tanhf (0x1.fc1832p-5) got 0x1.fb71a4p-5
-				 want 0x1.fb71aap-5.  */
-svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  svfloat32_t ax = svabs_x (pg, x);
-  svuint32_t iax = svreinterpret_u32 (ax);
-  svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
-  svbool_t is_boring = svcmpgt (pg, iax, d->boring_bound);
-  svfloat32_t boring = svreinterpret_f32 (svorr_x (pg, sign, d->onef));
-
-  svbool_t special = svcmpgt (pg, iax, 0x7f800000);
-
-  /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
-  svfloat32_t q = expm1f_inline (svmul_x (pg, x, 2.0), pg, &d->expm1f_consts);
-  svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0));
-  if (unlikely (svptest_any (pg, special)))
-    return special_case (x, svsel_f32 (is_boring, boring, y), special);
-  return svsel_f32 (is_boring, boring, y);
-}
-
-PL_SIG (SV, F, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (SV_NAME_F1 (tanh), 2.07)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0, 0x1p-23, 1000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0x1p-23, 0x1.205966p+3, 100000)
-PL_TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0x1.205966p+3, inf, 100)
diff --git a/contrib/arm-optimized-routines/pl/math/test/mathbench_funcs.h b/contrib/arm-optimized-routines/pl/math/test/mathbench_funcs.h
deleted file mode 100644
index f2710a979d40..000000000000
--- a/contrib/arm-optimized-routines/pl/math/test/mathbench_funcs.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// clang-format off
-/*
- * Function entries for mathbench.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#define _ZSF1(fun, a, b) F(fun##f, a, b)
-#define _ZSD1(f, a, b) D(f, a, b)
-
-#if defined(__vpcs) && __aarch64__
-
-#define _ZVF1(fun, a, b) VNF(_ZGVnN4v_##fun##f, a, b)
-#define _ZVD1(f, a, b) VND(_ZGVnN2v_##f, a, b)
-
-#else
-
-#define _ZVF1(f, a, b)
-#define _ZVD1(f, a, b)
-
-#endif
-
-#if WANT_SVE_MATH
-
-#define _ZSVF1(fun, a, b) SVF(_ZGVsMxv_##fun##f, a, b)
-#define _ZSVD1(f, a, b) SVD(_ZGVsMxv_##f, a, b)
-
-#else
-
-#define _ZSVF1(f, a, b)
-#define _ZSVD1(f, a, b)
-
-#endif
-
-/* No auto-generated wrappers for binary functions - they have be
-   manually defined in mathbench_wrappers.h. We have to define silent
-   macros for them anyway as they will be emitted by PL_SIG.  */
-#define _ZSF2(...)
-#define _ZSD2(...)
-#define _ZVF2(...)
-#define _ZVD2(...)
-#define _ZSVF2(...)
-#define _ZSVD2(...)
-
-#include "mathbench_funcs_gen.h"
-
-/* PL_SIG only emits entries for unary functions, since if a function
-   needs to be wrapped in mathbench there is no way for it to know the
-   same of the wrapper. Add entries for binary functions, or any other
-   exotic signatures that need wrapping, below.  */
-
-{"atan2f", 'f', 0, -10.0, 10.0, {.f = atan2f_wrap}},
-{"atan2",  'd', 0, -10.0, 10.0, {.d = atan2_wrap}},
-{"powi",   'd', 0,  0.01, 11.1, {.d = powi_wrap}},
-
-{"_ZGVnN4vv_atan2f", 'f', 'n', -10.0, 10.0, {.vnf = _Z_atan2f_wrap}},
-{"_ZGVnN2vv_atan2",  'd', 'n', -10.0, 10.0, {.vnd = _Z_atan2_wrap}},
-{"_ZGVnN4vv_hypotf", 'f', 'n', -10.0, 10.0, {.vnf = _Z_hypotf_wrap}},
-{"_ZGVnN2vv_hypot",  'd', 'n', -10.0, 10.0, {.vnd = _Z_hypot_wrap}},
-{"_ZGVnN2vv_pow",    'd', 'n', -10.0, 10.0, {.vnd = xy_Z_pow}},
-{"x_ZGVnN2vv_pow",   'd', 'n', -10.0, 10.0, {.vnd = x_Z_pow}},
-{"y_ZGVnN2vv_pow",   'd', 'n', -10.0, 10.0, {.vnd = y_Z_pow}},
-{"_ZGVnN4vl4l4_sincosf", 'f', 'n', -3.1, 3.1, {.vnf = _Z_sincosf_wrap}},
-{"_ZGVnN2vl8l8_sincos", 'd', 'n', -3.1, 3.1, {.vnd = _Z_sincos_wrap}},
-{"_ZGVnN4v_cexpif", 'f', 'n', -3.1, 3.1, {.vnf = _Z_cexpif_wrap}},
-{"_ZGVnN2v_cexpi", 'd', 'n', -3.1, 3.1, {.vnd = _Z_cexpi_wrap}},
-
-#if WANT_SVE_MATH
-{"_ZGVsMxvv_atan2f", 'f', 's', -10.0, 10.0, {.svf = _Z_sv_atan2f_wrap}},
-{"_ZGVsMxvv_atan2",  'd', 's', -10.0, 10.0, {.svd = _Z_sv_atan2_wrap}},
-{"_ZGVsMxvv_hypotf", 'f', 's', -10.0, 10.0, {.svf = _Z_sv_hypotf_wrap}},
-{"_ZGVsMxvv_hypot",  'd', 's', -10.0, 10.0, {.svd = _Z_sv_hypot_wrap}},
-{"_ZGVsMxvv_powi",   'f', 's', -10.0, 10.0, {.svf = _Z_sv_powi_wrap}},
-{"_ZGVsMxvv_powk",   'd', 's', -10.0, 10.0, {.svd = _Z_sv_powk_wrap}},
-{"_ZGVsMxvv_powf",   'f', 's', -10.0, 10.0, {.svf = xy_Z_sv_powf}},
-{"x_ZGVsMxvv_powf",  'f', 's', -10.0, 10.0, {.svf = x_Z_sv_powf}},
-{"y_ZGVsMxvv_powf",  'f', 's', -10.0, 10.0, {.svf = y_Z_sv_powf}},
-{"_ZGVsMxvv_pow",    'd', 's', -10.0, 10.0, {.svd = xy_Z_sv_pow}},
-{"x_ZGVsMxvv_pow",   'd', 's', -10.0, 10.0, {.svd = x_Z_sv_pow}},
-{"y_ZGVsMxvv_pow",   'd', 's', -10.0, 10.0, {.svd = y_Z_sv_pow}},
-{"_ZGVsMxvl4l4_sincosf", 'f', 's', -3.1, 3.1, {.svf = _Z_sv_sincosf_wrap}},
-{"_ZGVsMxvl8l8_sincos", 'd', 's', -3.1, 3.1, {.svd = _Z_sv_sincos_wrap}},
-{"_ZGVsMxv_cexpif", 'f', 's', -3.1, 3.1, {.svf = _Z_sv_cexpif_wrap}},
-{"_ZGVsMxv_cexpi", 'd', 's', -3.1, 3.1, {.svd = _Z_sv_cexpi_wrap}},
-#endif
-    // clang-format on
diff --git a/contrib/arm-optimized-routines/pl/math/test/mathbench_wrappers.h b/contrib/arm-optimized-routines/pl/math/test/mathbench_wrappers.h
deleted file mode 100644
index fe7f8963cdee..000000000000
--- a/contrib/arm-optimized-routines/pl/math/test/mathbench_wrappers.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Function wrappers for mathbench.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-static double
-atan2_wrap (double x)
-{
-  return atan2 (5.0, x);
-}
-
-static float
-atan2f_wrap (float x)
-{
-  return atan2f (5.0f, x);
-}
-
-static double
-powi_wrap (double x)
-{
-  return __builtin_powi (x, (int) round (x));
-}
-
-#if __aarch64__ && defined(__vpcs)
-
-__vpcs static v_double
-_Z_atan2_wrap (v_double x)
-{
-  return _ZGVnN2vv_atan2 (v_double_dup (5.0), x);
-}
-
-__vpcs static v_float
-_Z_atan2f_wrap (v_float x)
-{
-  return _ZGVnN4vv_atan2f (v_float_dup (5.0f), x);
-}
-
-__vpcs static v_float
-_Z_hypotf_wrap (v_float x)
-{
-  return _ZGVnN4vv_hypotf (v_float_dup (5.0f), x);
-}
-
-__vpcs static v_double
-_Z_hypot_wrap (v_double x)
-{
-  return _ZGVnN2vv_hypot (v_double_dup (5.0), x);
-}
-
-__vpcs static v_double
-xy_Z_pow (v_double x)
-{
-  return _ZGVnN2vv_pow (x, x);
-}
-
-__vpcs static v_double
-x_Z_pow (v_double x)
-{
-  return _ZGVnN2vv_pow (x, v_double_dup (23.4));
-}
-
-__vpcs static v_double
-y_Z_pow (v_double x)
-{
-  return _ZGVnN2vv_pow (v_double_dup (2.34), x);
-}
-
-__vpcs static v_float
-_Z_sincosf_wrap (v_float x)
-{
-  v_float s, c;
-  _ZGVnN4vl4l4_sincosf (x, &s, &c);
-  return s + c;
-}
-
-__vpcs static v_float
-_Z_cexpif_wrap (v_float x)
-{
-  __f32x4x2_t sc = _ZGVnN4v_cexpif (x);
-  return sc.val[0] + sc.val[1];
-}
-
-__vpcs static v_double
-_Z_sincos_wrap (v_double x)
-{
-  v_double s, c;
-  _ZGVnN2vl8l8_sincos (x, &s, &c);
-  return s + c;
-}
-
-__vpcs static v_double
-_Z_cexpi_wrap (v_double x)
-{
-  __f64x2x2_t sc = _ZGVnN2v_cexpi (x);
-  return sc.val[0] + sc.val[1];
-}
-
-#endif // __arch64__ && __vpcs
-
-#if WANT_SVE_MATH
-
-static sv_float
-_Z_sv_atan2f_wrap (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_atan2f (x, svdup_f32 (5.0f), pg);
-}
-
-static sv_double
-_Z_sv_atan2_wrap (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_atan2 (x, svdup_f64 (5.0), pg);
-}
-
-static sv_float
-_Z_sv_hypotf_wrap (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_hypotf (x, svdup_f32 (5.0), pg);
-}
-
-static sv_double
-_Z_sv_hypot_wrap (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_hypot (x, svdup_f64 (5.0), pg);
-}
-
-static sv_float
-_Z_sv_powi_wrap (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_powi (x, svcvt_s32_f32_x (pg, x), pg);
-}
-
-static sv_double
-_Z_sv_powk_wrap (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_powk (x, svcvt_s64_f64_x (pg, x), pg);
-}
-
-static sv_float
-xy_Z_sv_powf (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_powf (x, x, pg);
-}
-
-static sv_float
-x_Z_sv_powf (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_powf (x, svdup_f32 (23.4f), pg);
-}
-
-static sv_float
-y_Z_sv_powf (sv_float x, sv_bool pg)
-{
-  return _ZGVsMxvv_powf (svdup_f32 (2.34f), x, pg);
-}
-
-static sv_double
-xy_Z_sv_pow (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_pow (x, x, pg);
-}
-
-static sv_double
-x_Z_sv_pow (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_pow (x, svdup_f64 (23.4), pg);
-}
-
-static sv_double
-y_Z_sv_pow (sv_double x, sv_bool pg)
-{
-  return _ZGVsMxvv_pow (svdup_f64 (2.34), x, pg);
-}
-
-static sv_float
-_Z_sv_sincosf_wrap (sv_float x, sv_bool pg)
-{
-  float s[svcntw ()], c[svcntw ()];
-  _ZGVsMxvl4l4_sincosf (x, s, c, pg);
-  return svadd_x (pg, svld1 (pg, s), svld1 (pg, s));
-}
-
-static sv_float
-_Z_sv_cexpif_wrap (sv_float x, sv_bool pg)
-{
-  svfloat32x2_t sc = _ZGVsMxv_cexpif (x, pg);
-  return svadd_x (pg, svget2 (sc, 0), svget2 (sc, 1));
-}
-
-static sv_double
-_Z_sv_sincos_wrap (sv_double x, sv_bool pg)
-{
-  double s[svcntd ()], c[svcntd ()];
-  _ZGVsMxvl8l8_sincos (x, s, c, pg);
-  return svadd_x (pg, svld1 (pg, s), svld1 (pg, s));
-}
-
-static sv_double
-_Z_sv_cexpi_wrap (sv_double x, sv_bool pg)
-{
-  svfloat64x2_t sc = _ZGVsMxv_cexpi (x, pg);
-  return svadd_x (pg, svget2 (sc, 0), svget2 (sc, 1));
-}
-
-#endif // WANT_SVE_MATH
diff --git a/contrib/arm-optimized-routines/pl/math/test/pl_test.h b/contrib/arm-optimized-routines/pl/math/test/pl_test.h
deleted file mode 100644
index e7ed4eed634e..000000000000
--- a/contrib/arm-optimized-routines/pl/math/test/pl_test.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * PL macros for emitting various details about routines for consumption by
- * runulp.sh.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
- */
-
-/* Emit the max ULP threshold, l, for routine f. Piggy-back PL_TEST_EXPECT_FENV
-   on PL_TEST_ULP to add EXPECT_FENV to all scalar routines.  */
-#if WANT_VMATH || defined(IGNORE_SCALAR_FENV)
-# define PL_TEST_ULP(f, l) PL_TEST_ULP f l
-#else
-# define PL_TEST_ULP(f, l)                                                   \
-    PL_TEST_EXPECT_FENV_ALWAYS (f)                                            \
-    PL_TEST_ULP f l
-#endif
-
-/* Emit routine name if e == 1 and f is expected to correctly trigger fenv
-   exceptions. e allows declaration to be emitted conditionally upon certain
-   build flags - defer expansion by one pass to allow those flags to be expanded
-   properly.  */
-#define PL_TEST_EXPECT_FENV(f, e) PL_TEST_EXPECT_FENV_ (f, e)
-#define PL_TEST_EXPECT_FENV_(f, e) PL_TEST_EXPECT_FENV_##e (f)
-#define PL_TEST_EXPECT_FENV_1(f) PL_TEST_EXPECT_FENV_ENABLED f
-#define PL_TEST_EXPECT_FENV_ALWAYS(f) PL_TEST_EXPECT_FENV (f, 1)
-
-#define PL_TEST_INTERVAL(f, lo, hi, n) PL_TEST_INTERVAL f lo hi n
-#define PL_TEST_SYM_INTERVAL(f, lo, hi, n)                                    \
-  PL_TEST_INTERVAL (f, lo, hi, n)                                             \
-  PL_TEST_INTERVAL (f, -lo, -hi, n)
-#define PL_TEST_INTERVAL_C(f, lo, hi, n, c) PL_TEST_INTERVAL f lo hi n c
-#define PL_TEST_SYM_INTERVAL_C(f, lo, hi, n, c)                               \
-  PL_TEST_INTERVAL_C (f, lo, hi, n, c)                                        \
-  PL_TEST_INTERVAL_C (f, -lo, -hi, n, c)
-// clang-format off
-#define PL_TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n)                            \
-  PL_TEST_INTERVAL f xlo,ylo xhi,yhi n
-// clang-format on
diff --git a/contrib/arm-optimized-routines/pl/math/test/runulp.sh b/contrib/arm-optimized-routines/pl/math/test/runulp.sh
deleted file mode 100755
index 0f5a41f76b25..000000000000
--- a/contrib/arm-optimized-routines/pl/math/test/runulp.sh
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/bin/bash
-
-# ULP error check script.
-#
-# Copyright (c) 2019-2023, Arm Limited.
-# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-#set -x
-set -eu
-
-# cd to bin directory.
-cd "${0%/*}"
-
-flags="${ULPFLAGS:--q}"
-emu="$@"
-
-# Enable SVE testing
-WANT_SVE_MATH=${WANT_SVE_MATH:-0}
-
-FAIL=0
-PASS=0
-
-t() {
-	routine=$1
-	L=$(cat $LIMITS | grep "^$routine " | awk '{print $2}')
-	[[ $L =~ ^[0-9]+\.[0-9]+$ ]]
-	extra_flags=
-	[[ -z "${5:-}" ]] || extra_flags="$extra_flags -c $5"
-	grep -q "^$routine$" $FENV || extra_flags="$extra_flags -f"
-	IFS=',' read -ra LO <<< "$2"
-	IFS=',' read -ra HI <<< "$3"
-	ITV="${LO[0]} ${HI[0]}"
-	for i in "${!LO[@]}"; do
-	[[ "$i" -eq "0" ]] || ITV="$ITV x ${LO[$i]} ${HI[$i]}"
-	done
-	# Add -z flag to ignore zero sign for vector routines
-	{ echo $routine | grep -q "ZGV"; } && extra_flags="$extra_flags -z"
-	$emu ./ulp -e $L $flags ${extra_flags} $routine $ITV $4 && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
-}
-
-check() {
-	$emu ./ulp -f -q "$@" #>/dev/null
-}
-
-if [ "$FUNC" == "atan2" ] || [ -z "$FUNC" ]; then
-    # Regression-test for correct NaN handling in atan2
-    check atan2 0x1p-1022 0x1p-1000 x 0 0x1p-1022 40000
-    check atan2 0x1.7887a0a717aefp+1017 0x1.7887a0a717aefp+1017 x -nan -nan
-    check atan2 nan nan x -nan -nan
-fi
-
-# vector functions
-flags="${ULPFLAGS:--q}"
-runsv=
-if [ $WANT_SVE_MATH -eq 1 ]; then
-# No guarantees about powi accuracy, so regression-test for exactness
-# w.r.t. the custom reference impl in ulp_wrappers.h
-check -q -f -e 0 _ZGVsMxvv_powi  0  inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powi  0  inf x -0 -1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x -0 -1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk  0  inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk  0  inf x -0 -1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x -0 -1000 100000 && runsv=1
-fi
-
-while read F LO HI N C
-do
-	t $F $LO $HI $N $C
-done << EOF
-$(cat $INTERVALS | grep "\b$FUNC\b")
-EOF
-
-[ 0 -eq $FAIL ] || {
-	echo "FAILED $FAIL PASSED $PASS"
-	exit 1
-}
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erff.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erff.tst
deleted file mode 100644
index 9b1d3d5114ae..000000000000
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erff.tst
+++ /dev/null
@@ -1,17 +0,0 @@
-; erff.tst
-;
-; Copyright (c) 2007-2023, Arm Limited.
-; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-func=erff op1=7fc00001 result=7fc00001 errno=0
-func=erff op1=ffc00001 result=7fc00001 errno=0
-func=erff op1=7f800001 result=7fc00001 errno=0 status=i
-func=erff op1=ff800001 result=7fc00001 errno=0 status=i
-func=erff op1=7f800000 result=3f800000 errno=0
-func=erff op1=ff800000 result=bf800000 errno=0
-func=erff op1=00000000 result=00000000 errno=ERANGE
-func=erff op1=80000000 result=80000000 errno=ERANGE
-func=erff op1=00000001 result=00000001 errno=0 status=ux
-func=erff op1=80000001 result=80000001 errno=0 status=ux
-func=erff op1=3f800000 result=3f57bb3d.3a0 errno=0
-func=erff op1=bf800000 result=bf57bb3d.3a0 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2.tst
deleted file mode 100644
index 5d1eb9b877e8..000000000000
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2.tst
+++ /dev/null
@@ -1,21 +0,0 @@
-; Directed test cases for log2
-;
-; Copyright (c) 2018-2023, Arm Limited.
-; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
-func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
-func=log2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
-func=log2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
-func=log2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
-func=log2 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
-func=log2 op1=7fefffff.ffffffff result=408fffff.ffffffff.ffa errno=0
-func=log2 op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
-func=log2 op1=3ff00000.00000000 result=00000000.00000000 errno=0
-func=log2 op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
-func=log2 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
-func=log2 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
-func=log2 op1=00000000.00000001 result=c090c800.00000000 errno=0
-func=log2 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
-func=log2 op1=40000000.00000000 result=3ff00000.00000000 errno=0
-func=log2 op1=3fe00000.00000000 result=bff00000.00000000 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2f.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2f.tst
deleted file mode 100644
index 4e08110878d6..000000000000
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2f.tst
+++ /dev/null
@@ -1,27 +0,0 @@
-; log2f.tst - Directed test cases for log2f
-;
-; Copyright (c) 2017-2023, Arm Limited.
-; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-func=log2f op1=7fc00001 result=7fc00001 errno=0
-func=log2f op1=ffc00001 result=7fc00001 errno=0
-func=log2f op1=7f800001 result=7fc00001 errno=0 status=i
-func=log2f op1=ff800001 result=7fc00001 errno=0 status=i
-func=log2f op1=ff810000 result=7fc00001 errno=0 status=i
-func=log2f op1=7f800000 result=7f800000 errno=0
-func=log2f op1=ff800000 result=7fc00001 errno=EDOM status=i
-func=log2f op1=3f800000 result=00000000 errno=0
-func=log2f op1=00000000 result=ff800000 errno=ERANGE status=z
-func=log2f op1=80000000 result=ff800000 errno=ERANGE status=z
-func=log2f op1=80000001 result=7fc00001 errno=EDOM status=i
-
-func=log2f op1=3f7d70a4 result=bc6d8f8b.7d4 error=0
-func=log2f op1=3f604189 result=be4394c8.395 error=0
-func=log2f op1=3f278034 result=bf1caa73.88e error=0
-func=log2f op1=3edd3c36 result=bf9af3b9.619 error=0
-func=log2f op1=3e61259a result=c00bdb95.650 error=0
-func=log2f op1=3f8147ae result=3c6b3267.d6a error=0
-func=log2f op1=3f8fbe77 result=3e2b5fe2.a1c error=0
-func=log2f op1=3fac3eea result=3edb4d5e.1fc error=0
-func=log2f op1=3fd6e632 result=3f3f5d3a.827 error=0
-func=log2f op1=40070838 result=3f89e055.a0a error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/random/double.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/random/double.tst
deleted file mode 100644
index d83283ef7864..000000000000
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/random/double.tst
+++ /dev/null
@@ -1,6 +0,0 @@
-!! double.tst - Random test case specification for DP functions
-!!
-!! Copyright (c) 1999-2023, Arm Limited.
-!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-test log10 10000
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/random/float.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/random/float.tst
deleted file mode 100644
index fa77efecfabb..000000000000
--- a/contrib/arm-optimized-routines/pl/math/test/testcases/random/float.tst
+++ /dev/null
@@ -1,8 +0,0 @@
-!! float.tst - Random test case specification for SP functions
-!!
-!! Copyright (c) 2022-2023, Arm Limited.
-!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
-
-test erff 10000
-test log10f 10000
-test tanf 10000
diff --git a/contrib/arm-optimized-routines/pl/math/test/ulp_funcs.h b/contrib/arm-optimized-routines/pl/math/test/ulp_funcs.h
deleted file mode 100644
index 4929b481ffe1..000000000000
--- a/contrib/arm-optimized-routines/pl/math/test/ulp_funcs.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Function entries for ulp.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#if defined(__vpcs) && __aarch64__
-
-#define _ZVF1(f) ZVF1 (f)
-#define _ZVD1(f) ZVD1 (f)
-#define _ZVF2(f) ZVF2 (f)
-#define _ZVD2(f) ZVD2 (f)
-
-#else
-
-#define _ZVF1(f)
-#define _ZVD1(f)
-#define _ZVF2(f)
-#define _ZVD2(f)
-
-#endif
-
-#if WANT_SVE_MATH
-
-#define _ZSVF1(f) ZSVF1 (f)
-#define _ZSVF2(f) ZSVF2 (f)
-#define _ZSVD1(f) ZSVD1 (f)
-#define _ZSVD2(f) ZSVD2 (f)
-
-#else
-
-#define _ZSVF1(f)
-#define _ZSVF2(f)
-#define _ZSVD1(f)
-#define _ZSVD2(f)
-
-#endif
-
-#define _ZSF1(f) F1 (f)
-#define _ZSF2(f) F2 (f)
-#define _ZSD1(f) D1 (f)
-#define _ZSD2(f) D2 (f)
-
-#include "ulp_funcs_gen.h"
-
-F (_ZGVnN4v_sincosf_sin, v_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
-F (_ZGVnN4v_sincosf_cos, v_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
-F (_ZGVnN4v_cexpif_sin, v_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
-F (_ZGVnN4v_cexpif_cos, v_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
-
-F (_ZGVnN2v_sincos_sin, v_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
-F (_ZGVnN2v_sincos_cos, v_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
-F (_ZGVnN2v_cexpi_sin, v_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
-F (_ZGVnN2v_cexpi_cos, v_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
-
-#if WANT_SVE_MATH
-F (_ZGVsMxvv_powk, Z_sv_powk, ref_powi, mpfr_powi, 2, 0, d2, 0)
-F (_ZGVsMxvv_powi, Z_sv_powi, ref_powif, mpfr_powi, 2, 1, f2, 0)
-
-F (_ZGVsMxv_sincosf_sin, sv_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
-F (_ZGVsMxv_sincosf_cos, sv_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
-F (_ZGVsMxv_cexpif_sin, sv_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
-F (_ZGVsMxv_cexpif_cos, sv_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
-
-F (_ZGVsMxv_sincos_sin, sv_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
-F (_ZGVsMxv_sincos_cos, sv_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
-F (_ZGVsMxv_cexpi_sin, sv_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
-F (_ZGVsMxv_cexpi_cos, sv_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
-#endif
diff --git a/contrib/arm-optimized-routines/pl/math/test/ulp_wrappers.h b/contrib/arm-optimized-routines/pl/math/test/ulp_wrappers.h
deleted file mode 100644
index 0f7b68949c7b..000000000000
--- a/contrib/arm-optimized-routines/pl/math/test/ulp_wrappers.h
+++ /dev/null
@@ -1,140 +0,0 @@
-// clang-format off
-/*
- * Function wrappers for ulp.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#define _GNU_SOURCE
-#include <stdbool.h>
-#include <arm_neon.h>
-
-#if USE_MPFR
-static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
-  mpfr_cos(y, x, r);
-  return mpfr_sin(y, x, r);
-}
-static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
-  mpfr_sin(y, x, r);
-  return mpfr_cos(y, x, r);
-}
-static int wrap_mpfr_powi(mpfr_t ret, const mpfr_t x, const mpfr_t y, mpfr_rnd_t rnd) {
-  mpfr_t y2;
-  mpfr_init(y2);
-  mpfr_trunc(y2, y);
-  return mpfr_pow(ret, x, y2, rnd);
-}
-#endif
-
-/* Our implementations of powi/powk are too imprecise to verify
-   against any established pow implementation. Instead we have the
-   following simple implementation, against which it is enough to
-   maintain bitwise reproducibility. Note the test framework expects
-   the reference impl to be of higher precision than the function
-   under test. For instance this means that the reference for
-   double-precision powi will be passed a long double, so to check
-   bitwise reproducibility we have to cast it back down to
-   double. This is fine since a round-trip to higher precision and
-   back down is correctly rounded.  */
-#define DECL_POW_INT_REF(NAME, DBL_T, FLT_T, INT_T)                            \
-  static DBL_T __attribute__((unused)) NAME (DBL_T in_val, DBL_T y)            \
-  {                                                                            \
-    INT_T n = (INT_T) round (y);                                               \
-    FLT_T acc = 1.0;                                                           \
-    bool want_recip = n < 0;                                                   \
-    n = n < 0 ? -n : n;                                                        \
-                                                                               \
-    for (FLT_T c = in_val; n; c *= c, n >>= 1)                                 \
-      {                                                                        \
-        if (n & 0x1)                                                           \
-          {                                                                    \
-            acc *= c;                                                          \
-          }                                                                    \
-      }                                                                        \
-    if (want_recip)                                                            \
-      {                                                                        \
-        acc = 1.0 / acc;                                                       \
-      }                                                                        \
-    return acc;                                                                \
-  }
-
-DECL_POW_INT_REF(ref_powif, double, float, int)
-DECL_POW_INT_REF(ref_powi, long double, double, int)
-
-#define ZVF1_WRAP(func) static float Z_##func##f(float x) { return _ZGVnN4v_##func##f(argf(x))[0]; }
-#define ZVF2_WRAP(func) static float Z_##func##f(float x, float y) { return _ZGVnN4vv_##func##f(argf(x), argf(y))[0]; }
-#define ZVD1_WRAP(func) static double Z_##func(double x) { return _ZGVnN2v_##func(argd(x))[0]; }
-#define ZVD2_WRAP(func) static double Z_##func(double x, double y) { return _ZGVnN2vv_##func(argd(x), argd(y))[0]; }
-
-#if defined(__vpcs) && __aarch64__
-
-#define ZVNF1_WRAP(func) ZVF1_WRAP(func)
-#define ZVNF2_WRAP(func) ZVF2_WRAP(func)
-#define ZVND1_WRAP(func) ZVD1_WRAP(func)
-#define ZVND2_WRAP(func) ZVD2_WRAP(func)
-
-#else
-
-#define ZVNF1_WRAP(func)
-#define ZVNF2_WRAP(func)
-#define ZVND1_WRAP(func)
-#define ZVND2_WRAP(func)
-
-#endif
-
-#define ZSVF1_WRAP(func) static float Z_sv_##func##f(float x) { return svretf(_ZGVsMxv_##func##f(svargf(x), svptrue_b32())); }
-#define ZSVF2_WRAP(func) static float Z_sv_##func##f(float x, float y) { return svretf(_ZGVsMxvv_##func##f(svargf(x), svargf(y), svptrue_b32())); }
-#define ZSVD1_WRAP(func) static double Z_sv_##func(double x) { return svretd(_ZGVsMxv_##func(svargd(x), svptrue_b64())); }
-#define ZSVD2_WRAP(func) static double Z_sv_##func(double x, double y) { return svretd(_ZGVsMxvv_##func(svargd(x), svargd(y), svptrue_b64())); }
-
-#if WANT_SVE_MATH
-
-#define ZSVNF1_WRAP(func) ZSVF1_WRAP(func)
-#define ZSVNF2_WRAP(func) ZSVF2_WRAP(func)
-#define ZSVND1_WRAP(func) ZSVD1_WRAP(func)
-#define ZSVND2_WRAP(func) ZSVD2_WRAP(func)
-
-#else
-
-#define ZSVNF1_WRAP(func)
-#define ZSVNF2_WRAP(func)
-#define ZSVND1_WRAP(func)
-#define ZSVND2_WRAP(func)
-
-#endif
-
-/* No wrappers for scalar routines, but PL_SIG will emit them.  */
-#define ZSNF1_WRAP(func)
-#define ZSNF2_WRAP(func)
-#define ZSND1_WRAP(func)
-#define ZSND2_WRAP(func)
-
-#include "ulp_wrappers_gen.h"
-
-float v_sincosf_sin(float x) { float32x4_t s, c; _ZGVnN4vl4l4_sincosf(vdupq_n_f32(x), &s, &c); return s[0]; }
-float v_sincosf_cos(float x) { float32x4_t s, c; _ZGVnN4vl4l4_sincosf(vdupq_n_f32(x), &s, &c); return c[0]; }
-float v_cexpif_sin(float x) { return _ZGVnN4v_cexpif(vdupq_n_f32(x)).val[0][0]; }
-float v_cexpif_cos(float x) { return _ZGVnN4v_cexpif(vdupq_n_f32(x)).val[1][0]; }
-
-double v_sincos_sin(double x) { float64x2_t s, c; _ZGVnN2vl8l8_sincos(vdupq_n_f64(x), &s, &c); return s[0]; }
-double v_sincos_cos(double x) { float64x2_t s, c; _ZGVnN2vl8l8_sincos(vdupq_n_f64(x), &s, &c); return c[0]; }
-double v_cexpi_sin(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[0][0]; }
-double v_cexpi_cos(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[1][0]; }
-
-#if WANT_SVE_MATH
-static float Z_sv_powi(float x, float y) { return svretf(_ZGVsMxvv_powi(svargf(x), svdup_s32((int)round(y)), svptrue_b32())); }
-static double Z_sv_powk(double x, double y) { return svretd(_ZGVsMxvv_powk(svargd(x), svdup_s64((long)round(y)), svptrue_b64())); }
-
-float sv_sincosf_sin(float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, svptrue_b32()); return s[0]; }
-float sv_sincosf_cos(float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, svptrue_b32()); return c[0]; }
-float sv_cexpif_sin(float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), svptrue_b32()), 0)); }
-float sv_cexpif_cos(float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), svptrue_b32()), 1)); }
-
-double sv_sincos_sin(double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, svptrue_b64()); return s[0]; }
-double sv_sincos_cos(double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, svptrue_b64()); return c[0]; }
-double sv_cexpi_sin(double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), svptrue_b64()), 0)); }
-double sv_cexpi_cos(double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), svptrue_b64()), 1)); }
-
-#endif
-// clang-format on
diff --git a/contrib/arm-optimized-routines/pl/math/trigpi_references.c b/contrib/arm-optimized-routines/pl/math/trigpi_references.c
deleted file mode 100644
index 4b0514b6766a..000000000000
--- a/contrib/arm-optimized-routines/pl/math/trigpi_references.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Extended precision scalar reference functions for trigpi.
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#define _GNU_SOURCE
-#include "math_config.h"
-#include "mathlib.h"
-
-long double
-sinpil (long double x)
-{
-  /* sin(inf) should return nan, as defined by C23.  */
-  if (isinf (x))
-    return __math_invalid (x);
-
-  long double ax = fabsl (x);
-
-  /* Return 0 for all values above 2^64 to prevent
-     overflow when casting to uint64_t.  */
-  if (ax >= 0x1p64)
-    return 0;
-
-  /* All integer cases should return 0.  */
-  if (ax == (uint64_t) ax)
-    return 0;
-
-  return sinl (x * M_PIl);
-}
-
-long double
-cospil (long double x)
-{
-  /* cos(inf) should return nan, as defined by C23.  */
-  if (isinf (x))
-    return __math_invalid (x);
-
-  long double ax = fabsl (x);
-
-  if (ax >= 0x1p64)
-    return 1;
-
-  uint64_t m = (uint64_t) ax;
-
-  /* Integer values of cospi(x) should return +/-1.
-    The sign depends on if x is odd or even.  */
-  if (m == ax)
-    return (m & 1) ? -1 : 1;
-
-  /* Values of Integer + 0.5 should always return 0.  */
-  if (ax - 0.5 == m || ax + 0.5 == m)
-    return 0;
-
-  return cosl (ax * M_PIl);
-}
\ No newline at end of file
diff --git a/contrib/arm-optimized-routines/pl/math/v_asinh_3u5.c b/contrib/arm-optimized-routines/pl/math/v_asinh_3u5.c
deleted file mode 100644
index 4862bef94861..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_asinh_3u5.c
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Double-precision vector asinh(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-#define A(i) v_f64 (__v_log_data.poly[i])
-#define N (1 << V_LOG_TABLE_BITS)
-
-const static struct data
-{
-  float64x2_t poly[18];
-  uint64x2_t off, huge_bound, abs_mask;
-  float64x2_t ln2, tiny_bound;
-} data = {
-  .off = V2 (0x3fe6900900000000),
-  .ln2 = V2 (0x1.62e42fefa39efp-1),
-  .huge_bound = V2 (0x5fe0000000000000),
-  .tiny_bound = V2 (0x1p-26),
-  .abs_mask = V2 (0x7fffffffffffffff),
-  /* Even terms of polynomial s.t. asinh(x) is approximated by
-     asinh(x) ~= x + x^3 * (C0 + C1 * x + C2 * x^2 + C3 * x^3 + ...).
-     Generated using Remez, f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2).  */
-  .poly = { V2 (-0x1.55555555554a7p-3), V2 (0x1.3333333326c7p-4),
-	    V2 (-0x1.6db6db68332e6p-5), V2 (0x1.f1c71b26fb40dp-6),
-	    V2 (-0x1.6e8b8b654a621p-6), V2 (0x1.1c4daa9e67871p-6),
-	    V2 (-0x1.c9871d10885afp-7), V2 (0x1.7a16e8d9d2ecfp-7),
-	    V2 (-0x1.3ddca533e9f54p-7), V2 (0x1.0becef748dafcp-7),
-	    V2 (-0x1.b90c7099dd397p-8), V2 (0x1.541f2bb1ffe51p-8),
-	    V2 (-0x1.d217026a669ecp-9), V2 (0x1.0b5c7977aaf7p-9),
-	    V2 (-0x1.e0f37daef9127p-11), V2 (0x1.388b5fe542a6p-12),
-	    V2 (-0x1.021a48685e287p-14), V2 (0x1.93d4ba83d34dap-18) },
-};
-
-static float64x2_t NOINLINE VPCS_ATTR
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
-{
-  return v_call_f64 (asinh, x, y, special);
-}
-
-struct entry
-{
-  float64x2_t invc;
-  float64x2_t logc;
-};
-
-static inline struct entry
-lookup (uint64x2_t i)
-{
-  float64x2_t e0 = vld1q_f64 (
-      &__v_log_data.table[(i[0] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
-  float64x2_t e1 = vld1q_f64 (
-      &__v_log_data.table[(i[1] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
-  return (struct entry){ vuzp1q_f64 (e0, e1), vuzp2q_f64 (e0, e1) };
-}
-
-static inline float64x2_t
-log_inline (float64x2_t x, const struct data *d)
-{
-  /* Double-precision vector log, copied from ordinary vector log with some
-     cosmetic modification and special-cases removed.  */
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-  uint64x2_t tmp = vsubq_u64 (ix, d->off);
-  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
-  uint64x2_t iz
-      = vsubq_u64 (ix, vandq_u64 (tmp, vdupq_n_u64 (0xfffULL << 52)));
-  float64x2_t z = vreinterpretq_f64_u64 (iz);
-  struct entry e = lookup (tmp);
-  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
-  float64x2_t kd = vcvtq_f64_s64 (k);
-  float64x2_t hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2);
-  float64x2_t r2 = vmulq_f64 (r, r);
-  float64x2_t y = vfmaq_f64 (A (2), A (3), r);
-  float64x2_t p = vfmaq_f64 (A (0), A (1), r);
-  y = vfmaq_f64 (y, A (4), r2);
-  y = vfmaq_f64 (p, y, r2);
-  y = vfmaq_f64 (hi, y, r2);
-  return y;
-}
-
-/* Double-precision implementation of vector asinh(x).
-   asinh is very sensitive around 1, so it is impractical to devise a single
-   low-cost algorithm which is sufficiently accurate on a wide range of input.
-   Instead we use two different algorithms:
-   asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1)      if |x| >= 1
-	    = sign(x) * (|x| + |x|^3 * P(x^2))       otherwise
-   where log(x) is an optimized log approximation, and P(x) is a polynomial
-   shared with the scalar routine. The greatest observed error 3.29 ULP, in
-   |x| >= 1:
-   __v_asinh(0x1.2cd9d717e2c9bp+0) got 0x1.ffffcfd0e234fp-1
-				  want 0x1.ffffcfd0e2352p-1.  */
-VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  float64x2_t ax = vabsq_f64 (x);
-  uint64x2_t iax = vreinterpretq_u64_f64 (ax);
-
-  uint64x2_t gt1 = vcgeq_f64 (ax, v_f64 (1));
-  uint64x2_t special = vcgeq_u64 (iax, d->huge_bound);
-
-#if WANT_SIMD_EXCEPT
-  uint64x2_t tiny = vcltq_f64 (ax, d->tiny_bound);
-  special = vorrq_u64 (special, tiny);
-#endif
-
-  /* Option 1: |x| >= 1.
-     Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)).
-     If WANT_SIMD_EXCEPT is enabled, sidestep special values, which will
-     overflow, by setting special lanes to 1. These will be fixed later.  */
-  float64x2_t option_1 = v_f64 (0);
-  if (likely (v_any_u64 (gt1)))
-    {
-#if WANT_SIMD_EXCEPT
-      float64x2_t xm = v_zerofy_f64 (ax, special);
-#else
-      float64x2_t xm = ax;
-#endif
-      option_1 = log_inline (
-	  vaddq_f64 (xm, vsqrtq_f64 (vfmaq_f64 (v_f64 (1), xm, xm))), d);
-    }
-
-  /* Option 2: |x| < 1.
-     Compute asinh(x) using a polynomial.
-     If WANT_SIMD_EXCEPT is enabled, sidestep special lanes, which will
-     overflow, and tiny lanes, which will underflow, by setting them to 0. They
-     will be fixed later, either by selecting x or falling back to the scalar
-     special-case. The largest observed error in this region is 1.47 ULPs:
-     __v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
-				    want 0x1.c1d6bf874019cp-1.  */
-  float64x2_t option_2 = v_f64 (0);
-  if (likely (v_any_u64 (vceqzq_u64 (gt1))))
-    {
-#if WANT_SIMD_EXCEPT
-      ax = v_zerofy_f64 (ax, vorrq_u64 (tiny, gt1));
-#endif
-      float64x2_t x2 = vmulq_f64 (ax, ax), x3 = vmulq_f64 (ax, x2),
-		  z2 = vmulq_f64 (x2, x2), z4 = vmulq_f64 (z2, z2),
-		  z8 = vmulq_f64 (z4, z4), z16 = vmulq_f64 (z8, z8);
-      float64x2_t p = v_estrin_17_f64 (x2, z2, z4, z8, z16, d->poly);
-      option_2 = vfmaq_f64 (ax, p, x3);
-#if WANT_SIMD_EXCEPT
-      option_2 = vbslq_f64 (tiny, x, option_2);
-#endif
-    }
-
-  /* Choose the right option for each lane.  */
-  float64x2_t y = vbslq_f64 (gt1, option_1, option_2);
-  /* Copy sign.  */
-  y = vbslq_f64 (d->abs_mask, y, x);
-
-  if (unlikely (v_any_u64 (special)))
-    return special_case (x, y, special);
-  return y;
-}
-
-PL_SIG (V, D, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (asinh), 2.80)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (asinh), WANT_SIMD_EXCEPT)
-/* Test vector asinh 3 times, with control lane < 1, > 1 and special.
-   Ensures the v_sel is choosing the right option in all cases.  */
-#define V_ASINH_INTERVAL(lo, hi, n)                                           \
-  PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (asinh), lo, hi, n, 0.5)                  \
-  PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (asinh), lo, hi, n, 2)                    \
-  PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (asinh), lo, hi, n, 0x1p600)
-V_ASINH_INTERVAL (0, 0x1p-26, 50000)
-V_ASINH_INTERVAL (0x1p-26, 1, 50000)
-V_ASINH_INTERVAL (1, 0x1p511, 50000)
-V_ASINH_INTERVAL (0x1p511, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_asinhf_2u7.c b/contrib/arm-optimized-routines/pl/math/v_asinhf_2u7.c
deleted file mode 100644
index 1723ba90d2f3..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_asinhf_2u7.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Single-precision vector asinh(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "v_log1pf_inline.h"
-
-#define SignMask v_u32 (0x80000000)
-
-const static struct data
-{
-  struct v_log1pf_data log1pf_consts;
-  uint32x4_t big_bound;
-#if WANT_SIMD_EXCEPT
-  uint32x4_t tiny_bound;
-#endif
-} data = {
-  .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
-  .big_bound = V4 (0x5f800000), /* asuint(0x1p64).  */
-#if WANT_SIMD_EXCEPT
-  .tiny_bound = V4 (0x30800000) /* asuint(0x1p-30).  */
-#endif
-};
-
-static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
-{
-  return v_call_f32 (asinhf, x, y, special);
-}
-
-/* Single-precision implementation of vector asinh(x), using vector log1p.
-   Worst-case error is 2.66 ULP, at roughly +/-0.25:
-   __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3.  */
-VPCS_ATTR float32x4_t V_NAME_F1 (asinh) (float32x4_t x)
-{
-  const struct data *dat = ptr_barrier (&data);
-  uint32x4_t iax = vbicq_u32 (vreinterpretq_u32_f32 (x), SignMask);
-  float32x4_t ax = vreinterpretq_f32_u32 (iax);
-  uint32x4_t special = vcgeq_u32 (iax, dat->big_bound);
-  float32x4_t special_arg = x;
-
-#if WANT_SIMD_EXCEPT
-  /* Sidestep tiny and large values to avoid inadvertently triggering
-     under/overflow.  */
-  special = vorrq_u32 (special, vcltq_u32 (iax, dat->tiny_bound));
-  if (unlikely (v_any_u32 (special)))
-    {
-      ax = v_zerofy_f32 (ax, special);
-      x = v_zerofy_f32 (x, special);
-    }
-#endif
-
-  /* asinh(x) = log(x + sqrt(x * x + 1)).
-     For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))).  */
-  float32x4_t d
-      = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (v_f32 (1), x, x)));
-  float32x4_t y = log1pf_inline (
-      vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)), dat->log1pf_consts);
-
-  if (unlikely (v_any_u32 (special)))
-    return special_case (special_arg, vbslq_f32 (SignMask, x, y), special);
-  return vbslq_f32 (SignMask, x, y);
-}
-
-PL_SIG (V, F, 1, asinh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_F1 (asinh), 2.17)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (asinh), WANT_SIMD_EXCEPT)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), 0, 0x1p-12, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), 0x1p-12, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), 1.0, 0x1p11, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), 0x1p11, inf, 40000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), -0, -0x1p-12, 20000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), -0x1p-12, -1.0, 20000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), -1.0, -0x1p11, 20000)
-PL_TEST_INTERVAL (V_NAME_F1 (asinh), -0x1p11, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atan2_3u.c b/contrib/arm-optimized-routines/pl/math/v_atan2_3u.c
deleted file mode 100644
index f24667682dec..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_atan2_3u.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Double-precision vector atan2(x) function.
- *
- * Copyright (c) 2021-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f64.h"
-
-static const struct data
-{
-  float64x2_t pi_over_2;
-  float64x2_t poly[20];
-} data = {
-  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
-     the interval [2**-1022, 1.0].  */
-  .poly = { V2 (-0x1.5555555555555p-2),	 V2 (0x1.99999999996c1p-3),
-	    V2 (-0x1.2492492478f88p-3),	 V2 (0x1.c71c71bc3951cp-4),
-	    V2 (-0x1.745d160a7e368p-4),	 V2 (0x1.3b139b6a88ba1p-4),
-	    V2 (-0x1.11100ee084227p-4),	 V2 (0x1.e1d0f9696f63bp-5),
-	    V2 (-0x1.aebfe7b418581p-5),	 V2 (0x1.842dbe9b0d916p-5),
-	    V2 (-0x1.5d30140ae5e99p-5),	 V2 (0x1.338e31eb2fbbcp-5),
-	    V2 (-0x1.00e6eece7de8p-5),	 V2 (0x1.860897b29e5efp-6),
-	    V2 (-0x1.0051381722a59p-6),	 V2 (0x1.14e9dc19a4a4ep-7),
-	    V2 (-0x1.d0062b42fe3bfp-9),	 V2 (0x1.17739e210171ap-10),
-	    V2 (-0x1.ab24da7be7402p-13), V2 (0x1.358851160a528p-16), },
-  .pi_over_2 = V2 (0x1.921fb54442d18p+0),
-};
-
-#define SignMask v_u64 (0x8000000000000000)
-
-/* Special cases i.e. 0, infinity, NaN (fall back to scalar calls).  */
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t y, float64x2_t x, float64x2_t ret, uint64x2_t cmp)
-{
-  return v_call2_f64 (atan2, y, x, ret, cmp);
-}
-
-/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
-static inline uint64x2_t
-zeroinfnan (uint64x2_t i)
-{
-  /* (2 * i - 1) >= (2 * asuint64 (INFINITY) - 1).  */
-  return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)),
-		    v_u64 (2 * asuint64 (INFINITY) - 1));
-}
-
-/* Fast implementation of vector atan2.
-   Maximum observed error is 2.8 ulps:
-   _ZGVnN2vv_atan2 (0x1.9651a429a859ap+5, 0x1.953075f4ee26p+5)
-	got 0x1.92d628ab678ccp-1
-       want 0x1.92d628ab678cfp-1.  */
-float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x)
-{
-  const struct data *data_ptr = ptr_barrier (&data);
-
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-  uint64x2_t iy = vreinterpretq_u64_f64 (y);
-
-  uint64x2_t special_cases = vorrq_u64 (zeroinfnan (ix), zeroinfnan (iy));
-
-  uint64x2_t sign_x = vandq_u64 (ix, SignMask);
-  uint64x2_t sign_y = vandq_u64 (iy, SignMask);
-  uint64x2_t sign_xy = veorq_u64 (sign_x, sign_y);
-
-  float64x2_t ax = vabsq_f64 (x);
-  float64x2_t ay = vabsq_f64 (y);
-
-  uint64x2_t pred_xlt0 = vcltzq_f64 (x);
-  uint64x2_t pred_aygtax = vcgtq_f64 (ay, ax);
-
-  /* Set up z for call to atan.  */
-  float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay);
-  float64x2_t d = vbslq_f64 (pred_aygtax, ay, ax);
-  float64x2_t z = vdivq_f64 (n, d);
-
-  /* Work out the correct shift.  */
-  float64x2_t shift = vreinterpretq_f64_u64 (
-      vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-2.0))));
-  shift = vbslq_f64 (pred_aygtax, vaddq_f64 (shift, v_f64 (1.0)), shift);
-  shift = vmulq_f64 (shift, data_ptr->pi_over_2);
-
-  /* Calculate the polynomial approximation.
-     Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
-     full scheme to avoid underflow in x^16.
-     The order 19 polynomial P approximates
-     (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
-  float64x2_t z2 = vmulq_f64 (z, z);
-  float64x2_t x2 = vmulq_f64 (z2, z2);
-  float64x2_t x4 = vmulq_f64 (x2, x2);
-  float64x2_t x8 = vmulq_f64 (x4, x4);
-  float64x2_t ret
-      = vfmaq_f64 (v_estrin_7_f64 (z2, x2, x4, data_ptr->poly),
-		   v_estrin_11_f64 (z2, x2, x4, x8, data_ptr->poly + 8), x8);
-
-  /* Finalize. y = shift + z + z^3 * P(z^2).  */
-  ret = vfmaq_f64 (z, ret, vmulq_f64 (z2, z));
-  ret = vaddq_f64 (ret, shift);
-
-  /* Account for the sign of x and y.  */
-  ret = vreinterpretq_f64_u64 (
-      veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
-
-  if (unlikely (v_any_u64 (special_cases)))
-    return special_case (y, x, ret, special_cases);
-
-  return ret;
-}
-
-/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h.  */
-PL_SIG (V, D, 2, atan2)
-// TODO tighten this once __v_atan2 is fixed
-PL_TEST_ULP (V_NAME_D2 (atan2), 2.9)
-PL_TEST_INTERVAL (V_NAME_D2 (atan2), -10.0, 10.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D2 (atan2), -1.0, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME_D2 (atan2), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME_D2 (atan2), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (V_NAME_D2 (atan2), 1e6, 1e32, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp_data.c b/contrib/arm-optimized-routines/pl/math/v_exp_data.c
deleted file mode 100644
index fd01cf27606f..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_exp_data.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Scale values for vector exp and exp2
- *
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-/* 2^(j/N), j=0..N, N=2^7=128. Copied from math/v_exp_data.c.  */
-const uint64_t __v_exp_data[] = {
-  0x3ff0000000000000, 0x3feff63da9fb3335, 0x3fefec9a3e778061,
-  0x3fefe315e86e7f85, 0x3fefd9b0d3158574, 0x3fefd06b29ddf6de,
-  0x3fefc74518759bc8, 0x3fefbe3ecac6f383, 0x3fefb5586cf9890f,
-  0x3fefac922b7247f7, 0x3fefa3ec32d3d1a2, 0x3fef9b66affed31b,
-  0x3fef9301d0125b51, 0x3fef8abdc06c31cc, 0x3fef829aaea92de0,
-  0x3fef7a98c8a58e51, 0x3fef72b83c7d517b, 0x3fef6af9388c8dea,
-  0x3fef635beb6fcb75, 0x3fef5be084045cd4, 0x3fef54873168b9aa,
-  0x3fef4d5022fcd91d, 0x3fef463b88628cd6, 0x3fef3f49917ddc96,
-  0x3fef387a6e756238, 0x3fef31ce4fb2a63f, 0x3fef2b4565e27cdd,
-  0x3fef24dfe1f56381, 0x3fef1e9df51fdee1, 0x3fef187fd0dad990,
-  0x3fef1285a6e4030b, 0x3fef0cafa93e2f56, 0x3fef06fe0a31b715,
-  0x3fef0170fc4cd831, 0x3feefc08b26416ff, 0x3feef6c55f929ff1,
-  0x3feef1a7373aa9cb, 0x3feeecae6d05d866, 0x3feee7db34e59ff7,
-  0x3feee32dc313a8e5, 0x3feedea64c123422, 0x3feeda4504ac801c,
-  0x3feed60a21f72e2a, 0x3feed1f5d950a897, 0x3feece086061892d,
-  0x3feeca41ed1d0057, 0x3feec6a2b5c13cd0, 0x3feec32af0d7d3de,
-  0x3feebfdad5362a27, 0x3feebcb299fddd0d, 0x3feeb9b2769d2ca7,
-  0x3feeb6daa2cf6642, 0x3feeb42b569d4f82, 0x3feeb1a4ca5d920f,
-  0x3feeaf4736b527da, 0x3feead12d497c7fd, 0x3feeab07dd485429,
-  0x3feea9268a5946b7, 0x3feea76f15ad2148, 0x3feea5e1b976dc09,
-  0x3feea47eb03a5585, 0x3feea34634ccc320, 0x3feea23882552225,
-  0x3feea155d44ca973, 0x3feea09e667f3bcd, 0x3feea012750bdabf,
-  0x3fee9fb23c651a2f, 0x3fee9f7df9519484, 0x3fee9f75e8ec5f74,
-  0x3fee9f9a48a58174, 0x3fee9feb564267c9, 0x3feea0694fde5d3f,
-  0x3feea11473eb0187, 0x3feea1ed0130c132, 0x3feea2f336cf4e62,
-  0x3feea427543e1a12, 0x3feea589994cce13, 0x3feea71a4623c7ad,
-  0x3feea8d99b4492ed, 0x3feeaac7d98a6699, 0x3feeace5422aa0db,
-  0x3feeaf3216b5448c, 0x3feeb1ae99157736, 0x3feeb45b0b91ffc6,
-  0x3feeb737b0cdc5e5, 0x3feeba44cbc8520f, 0x3feebd829fde4e50,
-  0x3feec0f170ca07ba, 0x3feec49182a3f090, 0x3feec86319e32323,
-  0x3feecc667b5de565, 0x3feed09bec4a2d33, 0x3feed503b23e255d,
-  0x3feed99e1330b358, 0x3feede6b5579fdbf, 0x3feee36bbfd3f37a,
-  0x3feee89f995ad3ad, 0x3feeee07298db666, 0x3feef3a2b84f15fb,
-  0x3feef9728de5593a, 0x3feeff76f2fb5e47, 0x3fef05b030a1064a,
-  0x3fef0c1e904bc1d2, 0x3fef12c25bd71e09, 0x3fef199bdd85529c,
-  0x3fef20ab5fffd07a, 0x3fef27f12e57d14b, 0x3fef2f6d9406e7b5,
-  0x3fef3720dcef9069, 0x3fef3f0b555dc3fa, 0x3fef472d4a07897c,
-  0x3fef4f87080d89f2, 0x3fef5818dcfba487, 0x3fef60e316c98398,
-  0x3fef69e603db3285, 0x3fef7321f301b460, 0x3fef7c97337b9b5f,
-  0x3fef864614f5a129, 0x3fef902ee78b3ff6, 0x3fef9a51fbc74c83,
-  0x3fefa4afa2a490da, 0x3fefaf482d8e67f1, 0x3fefba1bee615a27,
-  0x3fefc52b376bba97, 0x3fefd0765b6e4540, 0x3fefdbfdad9cbe14,
-  0x3fefe7c1819e90d8, 0x3feff3c22b8f71f1,
-};
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp_tail.h b/contrib/arm-optimized-routines/pl/math/v_exp_tail.h
deleted file mode 100644
index 903f1fd95717..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_exp_tail.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Constants for double-precision e^(x+tail) vector function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-#define C1_scal 0x1.fffffffffffd4p-2
-#define C2_scal 0x1.5555571d6b68cp-3
-#define C3_scal 0x1.5555576a59599p-5
-#define InvLn2_scal 0x1.71547652b82fep8 /* N/ln2.  */
-#define Ln2hi_scal 0x1.62e42fefa39efp-9 /* ln2/N.  */
-#define Ln2lo_scal 0x1.abc9e3b39803f3p-64
-
-#define N (1 << V_EXP_TAIL_TABLE_BITS)
-#define Tab __v_exp_tail_data
-#define IndexMask_scal (N - 1)
-#define Shift_scal 0x1.8p+52
-#define Thres_scal 704.0
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp_tail_inline.h b/contrib/arm-optimized-routines/pl/math/v_exp_tail_inline.h
deleted file mode 100644
index 76ecc6b0a33a..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_exp_tail_inline.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Double-precision vector e^(x+tail) function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-#ifndef PL_MATH_V_EXP_TAIL_INLINE_H
-#define PL_MATH_V_EXP_TAIL_INLINE_H
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-
-#ifndef WANT_V_EXP_TAIL_SPECIALCASE
-#error                                                                         \
-  "Cannot use v_exp_tail_inline.h without specifying whether you need the special case computation."
-#endif
-
-#define N (1 << V_EXP_TAIL_TABLE_BITS)
-
-static const struct data
-{
-  float64x2_t poly[4];
-#if WANT_V_EXP_TAIL_SPECIALCASE
-  float64x2_t big_bound, huge_bound;
-#endif
-  float64x2_t shift, invln2, ln2_hi, ln2_lo;
-} data = {
-#if WANT_V_EXP_TAIL_SPECIALCASE
-  .big_bound = V2 (704.0),
-  .huge_bound = V2 (1280.0 * N),
-#endif
-  .shift = V2 (0x1.8p52),
-  .invln2 = V2 (0x1.71547652b82fep8),  /* N/ln2.  */
-  .ln2_hi = V2 (0x1.62e42fefa39efp-9), /* ln2/N.  */
-  .ln2_lo = V2 (0x1.abc9e3b39803f3p-64),
-  .poly = { V2 (1.0), V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6b68cp-3),
-	    V2 (0x1.5555576a59599p-5) },
-};
-
-static inline uint64x2_t
-lookup_sbits (uint64x2_t i)
-{
-  return (uint64x2_t){__v_exp_tail_data[i[0]], __v_exp_tail_data[i[1]]};
-}
-
-#if WANT_V_EXP_TAIL_SPECIALCASE
-#define SpecialOffset v_u64 (0x6000000000000000) /* 0x1p513.  */
-/* The following 2 bias when combined form the exponent bias:
-   SpecialBias1 - SpecialBias2 = asuint64(1.0).  */
-#define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769.  */
-#define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254.  */
-static float64x2_t VPCS_ATTR
-v_exp_tail_special_case (float64x2_t s, float64x2_t y, float64x2_t n,
-			 const struct data *d)
-{
-  /* 2^(n/N) may overflow, break it up into s1*s2.  */
-  uint64x2_t b = vandq_u64 (vclezq_f64 (n), SpecialOffset);
-  float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (SpecialBias1, b));
-  float64x2_t s2 = vreinterpretq_f64_u64 (
-    vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), SpecialBias2), b));
-  uint64x2_t oflow = vcagtq_f64 (n, d->huge_bound);
-  float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, y, s2), s1);
-  float64x2_t r1 = vmulq_f64 (s1, s1);
-  return vbslq_f64 (oflow, r1, r0);
-}
-#endif
-
-static inline float64x2_t VPCS_ATTR
-v_exp_tail_inline (float64x2_t x, float64x2_t xtail)
-{
-  const struct data *d = ptr_barrier (&data);
-#if WANT_V_EXP_TAIL_SPECIALCASE
-  uint64x2_t special = vcgtq_f64 (vabsq_f64 (x), d->big_bound);
-#endif
-  /* n = round(x/(ln2/N)).  */
-  float64x2_t z = vfmaq_f64 (d->shift, x, d->invln2);
-  uint64x2_t u = vreinterpretq_u64_f64 (z);
-  float64x2_t n = vsubq_f64 (z, d->shift);
-
-  /* r = x - n*ln2/N.  */
-  float64x2_t r = x;
-  r = vfmsq_f64 (r, d->ln2_hi, n);
-  r = vfmsq_f64 (r, d->ln2_lo, n);
-
-  uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TAIL_TABLE_BITS);
-  uint64x2_t i = vandq_u64 (u, v_u64 (N - 1));
-
-  /* y = tail + exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4, using Horner.  */
-  float64x2_t y = v_horner_3_f64 (r, d->poly);
-  y = vfmaq_f64 (xtail, y, r);
-
-  /* s = 2^(n/N).  */
-  u = lookup_sbits (i);
-  float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
-
-#if WANT_V_EXP_TAIL_SPECIALCASE
-  if (unlikely (v_any_u64 (special)))
-    return v_exp_tail_special_case (s, y, n, d);
-#endif
-  return vfmaq_f64 (s, y, s);
-}
-#endif // PL_MATH_V_EXP_TAIL_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/v_expf_inline.h b/contrib/arm-optimized-routines/pl/math/v_expf_inline.h
deleted file mode 100644
index 166683726b4d..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_expf_inline.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Helper for single-precision routines which calculate exp(x) and do not
- * need special-case handling
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef PL_MATH_V_EXPF_INLINE_H
-#define PL_MATH_V_EXPF_INLINE_H
-
-#include "v_math.h"
-
-struct v_expf_data
-{
-  float32x4_t poly[5];
-  float32x4_t shift, invln2_and_ln2;
-};
-
-/* maxerr: 1.45358 +0.5 ulp.  */
-#define V_EXPF_DATA                                                           \
-  {                                                                           \
-    .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f),  \
-	      V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) },                     \
-    .shift = V4 (0x1.8p23f),                                                  \
-    .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },   \
-  }
-
-#define ExponentBias v_u32 (0x3f800000) /* asuint(1.0f).  */
-#define C(i) d->poly[i]
-
-static inline float32x4_t
-v_expf_inline (float32x4_t x, const struct v_expf_data *d)
-{
-  /* Helper routine for calculating exp(x).
-     Copied from v_expf.c, with all special-case handling removed - the
-     calling routine should handle special values if required.  */
-
-  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-  float32x4_t n, r, z;
-  z = vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0);
-  n = vsubq_f32 (z, d->shift);
-  r = vfmsq_laneq_f32 (x, n, d->invln2_and_ln2, 1);
-  r = vfmsq_laneq_f32 (r, n, d->invln2_and_ln2, 2);
-  uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
-  float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
-
-  /* Custom order-4 Estrin avoids building high order monomial.  */
-  float32x4_t r2 = vmulq_f32 (r, r);
-  float32x4_t p, q, poly;
-  p = vfmaq_f32 (C (1), C (0), r);
-  q = vfmaq_f32 (C (3), C (2), r);
-  q = vfmaq_f32 (q, p, r2);
-  p = vmulq_f32 (C (4), r);
-  poly = vfmaq_f32 (p, q, r2);
-  return vfmaq_f32 (scale, poly, scale);
-}
-
-#endif // PL_MATH_V_EXPF_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/v_expm1_2u5.c b/contrib/arm-optimized-routines/pl/math/v_expm1_2u5.c
deleted file mode 100644
index dd255472cec0..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_expm1_2u5.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Double-precision vector exp(x) - 1 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float64x2_t poly[11];
-  float64x2_t invln2, ln2, shift;
-  int64x2_t exponent_bias;
-#if WANT_SIMD_EXCEPT
-  uint64x2_t thresh, tiny_bound;
-#else
-  float64x2_t oflow_bound;
-#endif
-} data = {
-  /* Generated using fpminimax, with degree=12 in [log(2)/2, log(2)/2].  */
-  .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
-	    V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
-	    V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
-	    V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
-	    V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) },
-  .invln2 = V2 (0x1.71547652b82fep0),
-  .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 },
-  .shift = V2 (0x1.8p52),
-  .exponent_bias = V2 (0x3ff0000000000000),
-#if WANT_SIMD_EXCEPT
-  /* asuint64(oflow_bound) - asuint64(0x1p-51), shifted left by 1 for abs
-     compare.  */
-  .thresh = V2 (0x78c56fa6d34b552),
-  /* asuint64(0x1p-51) << 1.  */
-  .tiny_bound = V2 (0x3cc0000000000000 << 1),
-#else
-  /* Value above which expm1(x) should overflow. Absolute value of the
-     underflow bound is greater than this, so it catches both cases - there is
-     a small window where fallbacks are triggered unnecessarily.  */
-  .oflow_bound = V2 (0x1.62b7d369a5aa9p+9),
-#endif
-};
-
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
-{
-  return v_call_f64 (expm1, x, y, special);
-}
-
-/* Double-precision vector exp(x) - 1 function.
-   The maximum error observed error is 2.18 ULP:
-   _ZGVnN2v_expm1 (0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2
-					want 0x1.a8b9ea8d66e2p-2.  */
-float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-
-#if WANT_SIMD_EXCEPT
-  /* If fp exceptions are to be triggered correctly, fall back to scalar for
-     |x| < 2^-51, |x| > oflow_bound, Inf & NaN. Add ix to itself for
-     shift-left by 1, and compare with thresh which was left-shifted offline -
-     this is effectively an absolute compare.  */
-  uint64x2_t special
-      = vcgeq_u64 (vsubq_u64 (vaddq_u64 (ix, ix), d->tiny_bound), d->thresh);
-  if (unlikely (v_any_u64 (special)))
-    x = v_zerofy_f64 (x, special);
-#else
-  /* Large input, NaNs and Infs.  */
-  uint64x2_t special = vcageq_f64 (x, d->oflow_bound);
-#endif
-
-  /* Reduce argument to smaller range:
-     Let i = round(x / ln2)
-     and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
-     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
-     where 2^i is exact because i is an integer.  */
-  float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift);
-  int64x2_t i = vcvtq_s64_f64 (n);
-  float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0);
-  f = vfmsq_laneq_f64 (f, n, d->ln2, 1);
-
-  /* Approximate expm1(f) using polynomial.
-     Taylor expansion for expm1(x) has the form:
-	 x + ax^2 + bx^3 + cx^4 ....
-     So we calculate the polynomial P(f) = a + bf + cf^2 + ...
-     and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
-  float64x2_t f2 = vmulq_f64 (f, f);
-  float64x2_t f4 = vmulq_f64 (f2, f2);
-  float64x2_t f8 = vmulq_f64 (f4, f4);
-  float64x2_t p = vfmaq_f64 (f, f2, v_estrin_10_f64 (f, f2, f4, f8, d->poly));
-
-  /* Assemble the result.
-     expm1(x) ~= 2^i * (p + 1) - 1
-     Let t = 2^i.  */
-  int64x2_t u = vaddq_s64 (vshlq_n_s64 (i, 52), d->exponent_bias);
-  float64x2_t t = vreinterpretq_f64_s64 (u);
-
-  if (unlikely (v_any_u64 (special)))
-    return special_case (vreinterpretq_f64_u64 (ix),
-			 vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t),
-			 special);
-
-  /* expm1(x) ~= p * t + (t - 1).  */
-  return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
-}
-
-PL_SIG (V, D, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (V_NAME_D1 (expm1), 1.68)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (expm1), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0, 0x1p-51, 1000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0x1p-51, 0x1.62b7d369a5aa9p+9, 100000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0x1.62b7d369a5aa9p+9, inf, 100)
diff --git a/contrib/arm-optimized-routines/pl/math/v_expm1f_1u6.c b/contrib/arm-optimized-routines/pl/math/v_expm1f_1u6.c
deleted file mode 100644
index 6b282d0cc00f..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_expm1f_1u6.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Single-precision vector exp(x) - 1 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float32x4_t poly[5];
-  float32x4_t invln2_and_ln2;
-  float32x4_t shift;
-  int32x4_t exponent_bias;
-#if WANT_SIMD_EXCEPT
-  uint32x4_t thresh;
-#else
-  float32x4_t oflow_bound;
-#endif
-} data = {
-  /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2].  */
-  .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),
-	    V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },
-  /* Stores constants: invln2, ln2_hi, ln2_lo, 0.  */
-  .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },
-  .shift = V4 (0x1.8p23f),
-  .exponent_bias = V4 (0x3f800000),
-#if !WANT_SIMD_EXCEPT
-  /* Value above which expm1f(x) should overflow. Absolute value of the
-     underflow bound is greater than this, so it catches both cases - there is
-     a small window where fallbacks are triggered unnecessarily.  */
-  .oflow_bound = V4 (0x1.5ebc4p+6),
-#else
-  /* asuint(oflow_bound) - asuint(0x1p-23), shifted left by 1 for absolute
-     compare.  */
-  .thresh = V4 (0x1d5ebc40),
-#endif
-};
-
-/* asuint(0x1p-23), shifted by 1 for abs compare.  */
-#define TinyBound v_u32 (0x34000000 << 1)
-
-static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
-{
-  return v_call_f32 (expm1f, x, y, special);
-}
-
-/* Single-precision vector exp(x) - 1 function.
-   The maximum error is 1.51 ULP:
-   _ZGVnN4v_expm1f (0x1.8baa96p-2) got 0x1.e2fb9p-2
-				  want 0x1.e2fb94p-2.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (expm1) (float32x4_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint32x4_t ix = vreinterpretq_u32_f32 (x);
-
-#if WANT_SIMD_EXCEPT
-  /* If fp exceptions are to be triggered correctly, fall back to scalar for
-     |x| < 2^-23, |x| > oflow_bound, Inf & NaN. Add ix to itself for
-     shift-left by 1, and compare with thresh which was left-shifted offline -
-     this is effectively an absolute compare.  */
-  uint32x4_t special
-      = vcgeq_u32 (vsubq_u32 (vaddq_u32 (ix, ix), TinyBound), d->thresh);
-  if (unlikely (v_any_u32 (special)))
-    x = v_zerofy_f32 (x, special);
-#else
-  /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf.  */
-  uint32x4_t special = vcagtq_f32 (x, d->oflow_bound);
-#endif
-
-  /* Reduce argument to smaller range:
-     Let i = round(x / ln2)
-     and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
-     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
-     where 2^i is exact because i is an integer.  */
-  float32x4_t j = vsubq_f32 (
-      vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
-  int32x4_t i = vcvtq_s32_f32 (j);
-  float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
-  f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
-
-  /* Approximate expm1(f) using polynomial.
-     Taylor expansion for expm1(x) has the form:
-	 x + ax^2 + bx^3 + cx^4 ....
-     So we calculate the polynomial P(f) = a + bf + cf^2 + ...
-     and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
-  float32x4_t p = v_horner_4_f32 (f, d->poly);
-  p = vfmaq_f32 (f, vmulq_f32 (f, f), p);
-
-  /* Assemble the result.
-     expm1(x) ~= 2^i * (p + 1) - 1
-     Let t = 2^i.  */
-  int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias);
-  float32x4_t t = vreinterpretq_f32_s32 (u);
-
-  if (unlikely (v_any_u32 (special)))
-    return special_case (vreinterpretq_f32_u32 (ix),
-			 vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t),
-			 special);
-
-  /* expm1(x) ~= p * t + (t - 1).  */
-  return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t);
-}
-
-PL_SIG (V, F, 1, expm1, -9.9, 9.9)
-PL_TEST_ULP (V_NAME_F1 (expm1), 1.02)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (expm1), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (expm1), 0, 0x1p-23, 1000)
-PL_TEST_INTERVAL (V_NAME_F1 (expm1), -0x1p-23, 0x1.5ebc4p+6, 1000000)
-PL_TEST_INTERVAL (V_NAME_F1 (expm1), -0x1p-23, -0x1.9bbabcp+6, 1000000)
-PL_TEST_INTERVAL (V_NAME_F1 (expm1), 0x1.5ebc4p+6, inf, 1000)
-PL_TEST_INTERVAL (V_NAME_F1 (expm1), -0x1.9bbabcp+6, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_expm1f_inline.h b/contrib/arm-optimized-routines/pl/math/v_expm1f_inline.h
deleted file mode 100644
index 6ae94c452de2..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_expm1f_inline.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Helper for single-precision routines which calculate exp(x) - 1 and do not
- * need special-case handling
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef PL_MATH_V_EXPM1F_INLINE_H
-#define PL_MATH_V_EXPM1F_INLINE_H
-
-#include "v_math.h"
-#include "math_config.h"
-#include "poly_advsimd_f32.h"
-
-struct v_expm1f_data
-{
-  float32x4_t poly[5];
-  float32x4_t invln2_and_ln2, shift;
-  int32x4_t exponent_bias;
-};
-
-/* Coefficients generated using fpminimax with degree=5 in [-log(2)/2,
-   log(2)/2]. Exponent bias is asuint(1.0f).
-   invln2_and_ln2 Stores constants: invln2, ln2_lo, ln2_hi, 0.  */
-#define V_EXPM1F_DATA                                                         \
-  {                                                                           \
-    .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),     \
-	      V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },                      \
-    .shift = V4 (0x1.8p23f), .exponent_bias = V4 (0x3f800000),                \
-    .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },   \
-  }
-
-static inline float32x4_t
-expm1f_inline (float32x4_t x, const struct v_expm1f_data *d)
-{
-  /* Helper routine for calculating exp(x) - 1.
-     Copied from v_expm1f_1u6.c, with all special-case handling removed - the
-     calling routine should handle special values if required.  */
-
-  /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
-  float32x4_t j = vsubq_f32 (
-      vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
-  int32x4_t i = vcvtq_s32_f32 (j);
-  float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
-  f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
-
-  /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
-     Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses
-     Horner.  */
-  float32x4_t f2 = vmulq_f32 (f, f);
-  float32x4_t f4 = vmulq_f32 (f2, f2);
-  float32x4_t p = v_estrin_4_f32 (f, f2, f4, d->poly);
-  p = vfmaq_f32 (f, f2, p);
-
-  /* t = 2^i.  */
-  int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias);
-  float32x4_t t = vreinterpretq_f32_s32 (u);
-  /* expm1(x) ~= p * t + (t - 1).  */
-  return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t);
-}
-
-#endif // PL_MATH_V_EXPM1F_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/v_log10_2u5.c b/contrib/arm-optimized-routines/pl/math/v_log10_2u5.c
deleted file mode 100644
index 35dd62fe5e3e..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_log10_2u5.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Double-precision vector log10(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f64.h"
-
-#define N (1 << V_LOG10_TABLE_BITS)
-
-static const struct data
-{
-  uint64x2_t min_norm;
-  uint32x4_t special_bound;
-  float64x2_t poly[5];
-  float64x2_t invln10, log10_2, ln2;
-  uint64x2_t sign_exp_mask;
-} data = {
-  /* Computed from log coefficients divided by log(10) then rounded to double
-     precision.  */
-  .poly = { V2 (-0x1.bcb7b1526e506p-3), V2 (0x1.287a7636be1d1p-3),
-	    V2 (-0x1.bcb7b158af938p-4), V2 (0x1.63c78734e6d07p-4),
-	    V2 (-0x1.287461742fee4p-4) },
-  .ln2 = V2 (0x1.62e42fefa39efp-1),
-  .invln10 = V2 (0x1.bcb7b1526e50ep-2),
-  .log10_2 = V2 (0x1.34413509f79ffp-2),
-  .min_norm = V2 (0x0010000000000000), /* asuint64(0x1p-1022).  */
-  .special_bound = V4 (0x7fe00000),    /* asuint64(inf) - min_norm.  */
-  .sign_exp_mask = V2 (0xfff0000000000000),
-};
-
-#define Off v_u64 (0x3fe6900900000000)
-#define IndexMask (N - 1)
-
-#define T(s, i) __v_log10_data.s[i]
-
-struct entry
-{
-  float64x2_t invc;
-  float64x2_t log10c;
-};
-
-static inline struct entry
-lookup (uint64x2_t i)
-{
-  struct entry e;
-  uint64_t i0 = (i[0] >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
-  uint64_t i1 = (i[1] >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
-  float64x2_t e0 = vld1q_f64 (&__v_log10_data.table[i0].invc);
-  float64x2_t e1 = vld1q_f64 (&__v_log10_data.table[i1].invc);
-  e.invc = vuzp1q_f64 (e0, e1);
-  e.log10c = vuzp2q_f64 (e0, e1);
-  return e;
-}
-
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2,
-	      uint32x2_t special)
-{
-  return v_call_f64 (log10, x, vfmaq_f64 (hi, r2, y), vmovl_u32 (special));
-}
-
-/* Fast implementation of double-precision vector log10
-   is a slight modification of double-precision vector log.
-   Max ULP error: < 2.5 ulp (nearest rounding.)
-   Maximum measured at 2.46 ulp for x in [0.96, 0.97]
-   _ZGVnN2v_log10(0x1.13192407fcb46p+0) got 0x1.fff6be3cae4bbp-6
-				       want 0x1.fff6be3cae4b9p-6.  */
-float64x2_t VPCS_ATTR V_NAME_D1 (log10) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-  uint32x2_t special = vcge_u32 (vsubhn_u64 (ix, d->min_norm),
-				 vget_low_u32 (d->special_bound));
-
-  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  uint64x2_t tmp = vsubq_u64 (ix, Off);
-  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
-  uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
-  float64x2_t z = vreinterpretq_f64_u64 (iz);
-
-  struct entry e = lookup (tmp);
-
-  /* log10(x) = log1p(z/c-1)/log(10) + log10(c) + k*log10(2).  */
-  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
-  float64x2_t kd = vcvtq_f64_s64 (k);
-
-  /* hi = r / log(10) + log10(c) + k*log10(2).
-     Constants in v_log10_data.c are computed (in extended precision) as
-     e.log10c := e.logc * ivln10.  */
-  float64x2_t w = vfmaq_f64 (e.log10c, r, d->invln10);
-
-  /* y = log10(1+r) + n * log10(2).  */
-  float64x2_t hi = vfmaq_f64 (w, kd, d->log10_2);
-
-  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
-  float64x2_t r2 = vmulq_f64 (r, r);
-  float64x2_t y = v_pw_horner_4_f64 (r, r2, d->poly);
-
-  if (unlikely (v_any_u32h (special)))
-    return special_case (x, y, hi, r2, special);
-  return vfmaq_f64 (hi, r2, y);
-}
-
-PL_SIG (V, D, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (V_NAME_D1 (log10), 1.97)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (log10))
-PL_TEST_INTERVAL (V_NAME_D1 (log10), -0.0, -inf, 1000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 0, 0x1p-149, 1000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 1.0, 100, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log10), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log10f_3u5.c b/contrib/arm-optimized-routines/pl/math/v_log10f_3u5.c
deleted file mode 100644
index 92bc50ba5bd9..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_log10f_3u5.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Single-precision vector log10 function.
- *
- * Copyright (c) 2020-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  uint32x4_t min_norm;
-  uint16x8_t special_bound;
-  float32x4_t poly[8];
-  float32x4_t inv_ln10, ln2;
-  uint32x4_t off, mantissa_mask;
-} data = {
-  /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in
-      [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25.  */
-  .poly = { V4 (-0x1.bcb79cp-3f), V4 (0x1.2879c8p-3f), V4 (-0x1.bcd472p-4f),
-	    V4 (0x1.6408f8p-4f), V4 (-0x1.246f8p-4f), V4 (0x1.f0e514p-5f),
-	    V4 (-0x1.0fc92cp-4f), V4 (0x1.f5f76ap-5f) },
-  .ln2 = V4 (0x1.62e43p-1f),
-  .inv_ln10 = V4 (0x1.bcb7b2p-2f),
-  .min_norm = V4 (0x00800000),
-  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
-  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
-  .mantissa_mask = V4 (0x007fffff),
-};
-
-static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2,
-	      uint16x4_t cmp)
-{
-  /* Fall back to scalar code.  */
-  return v_call_f32 (log10f, x, vfmaq_f32 (y, p, r2), vmovl_u16 (cmp));
-}
-
-/* Fast implementation of AdvSIMD log10f,
-   uses a similar approach as AdvSIMD logf with the same offset (i.e., 2/3) and
-   an order 9 polynomial.
-   Maximum error: 3.305ulps (nearest rounding.)
-   _ZGVnN4v_log10f(0x1.555c16p+0) got 0x1.ffe2fap-4
-				 want 0x1.ffe2f4p-4.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (log10) (float32x4_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint32x4_t u = vreinterpretq_u32_f32 (x);
-  uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm),
-				 vget_low_u16 (d->special_bound));
-
-  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = vsubq_u32 (u, d->off);
-  float32x4_t n = vcvtq_f32_s32 (
-      vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
-  u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off);
-  float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
-
-  /* y = log10(1+r) + n * log10(2).  */
-  float32x4_t r2 = vmulq_f32 (r, r);
-  float32x4_t poly = v_pw_horner_7_f32 (r, r2, d->poly);
-  /* y = Log10(2) * n + poly * InvLn(10).  */
-  float32x4_t y = vfmaq_f32 (r, d->ln2, n);
-  y = vmulq_f32 (y, d->inv_ln10);
-
-  if (unlikely (v_any_u16h (special)))
-    return special_case (x, y, poly, r2, special);
-  return vfmaq_f32 (y, poly, r2);
-}
-
-PL_SIG (V, F, 1, log10, 0.01, 11.1)
-PL_TEST_ULP (V_NAME_F1 (log10), 2.81)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_F1 (log10))
-PL_TEST_INTERVAL (V_NAME_F1 (log10), -0.0, -inf, 100)
-PL_TEST_INTERVAL (V_NAME_F1 (log10), 0, 0x1p-126, 100)
-PL_TEST_INTERVAL (V_NAME_F1 (log10), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log10), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log10), 1.0, 100, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log10), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log1p_2u5.c b/contrib/arm-optimized-routines/pl/math/v_log1p_2u5.c
deleted file mode 100644
index face02ddc6c3..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_log1p_2u5.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Double-precision vector log(1+x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-const static struct data
-{
-  float64x2_t poly[19], ln2[2];
-  uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask, inf, minus_one;
-  int64x2_t one_top;
-} data = {
-  /* Generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].  */
-  .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2),
-	    V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3),
-	    V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3),
-	    V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4),
-	    V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4),
-	    V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4),
-	    V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4),
-	    V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5),
-	    V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4),
-	    V2 (-0x1.cfa7385bdb37ep-6) },
-  .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) },
-  /* top32(asuint64(sqrt(2)/2)) << 32.  */
-  .hf_rt2_top = V2 (0x3fe6a09e00000000),
-  /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32.  */
-  .one_m_hf_rt2_top = V2 (0x00095f6200000000),
-  .umask = V2 (0x000fffff00000000),
-  .one_top = V2 (0x3ff),
-  .inf = V2 (0x7ff0000000000000),
-  .minus_one = V2 (0xbff0000000000000)
-};
-
-#define BottomMask v_u64 (0xffffffff)
-
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
-{
-  return v_call_f64 (log1p, x, y, special);
-}
-
-/* Vector log1p approximation using polynomial on reduced interval. Routine is
-   a modification of the algorithm used in scalar log1p, with no shortcut for
-   k=0 and no narrowing for f and k. Maximum observed error is 2.45 ULP:
-   _ZGVnN2v_log1p(0x1.658f7035c4014p+11) got 0x1.fd61d0727429dp+2
-					want 0x1.fd61d0727429fp+2 .  */
-VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-  uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
-  uint64x2_t special = vcgeq_u64 (ia, d->inf);
-
-#if WANT_SIMD_EXCEPT
-  special = vorrq_u64 (special,
-		       vcgeq_u64 (ix, vreinterpretq_u64_f64 (v_f64 (-1))));
-  if (unlikely (v_any_u64 (special)))
-    x = v_zerofy_f64 (x, special);
-#else
-  special = vorrq_u64 (special, vcleq_f64 (x, v_f64 (-1)));
-#endif
-
-  /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
-			   is in [sqrt(2)/2, sqrt(2)]):
-     log1p(x) = k*log(2) + log1p(f).
-
-     f may not be representable exactly, so we need a correction term:
-     let m = round(1 + x), c = (1 + x) - m.
-     c << m: at very small x, log1p(x) ~ x, hence:
-     log(1+x) - log(m) ~ c/m.
-
-     We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m.  */
-
-  /* Obtain correctly scaled k by manipulation in the exponent.
-     The scalar algorithm casts down to 32-bit at this point to calculate k and
-     u_red. We stay in double-width to obtain f and k, using the same constants
-     as the scalar algorithm but shifted left by 32.  */
-  float64x2_t m = vaddq_f64 (x, v_f64 (1));
-  uint64x2_t mi = vreinterpretq_u64_f64 (m);
-  uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
-
-  int64x2_t ki
-      = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
-  float64x2_t k = vcvtq_f64_s64 (ki);
-
-  /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
-  uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
-  uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask));
-  float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1));
-
-  /* Correction term c/m.  */
-  float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m);
-
-  /* Approximate log1p(x) on the reduced input using a polynomial. Because
-     log1p(0)=0 we choose an approximation of the form:
-       x + C0*x^2 + C1*x^3 + C2x^4 + ...
-     Hence approximation has the form f + f^2 * P(f)
-      where P(x) = C0 + C1*x + C2x^2 + ...
-     Assembling this all correctly is dealt with at the final step.  */
-  float64x2_t f2 = vmulq_f64 (f, f);
-  float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly);
-
-  float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]);
-  float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]);
-  float64x2_t y = vaddq_f64 (ylo, yhi);
-
-  if (unlikely (v_any_u64 (special)))
-    return special_case (vreinterpretq_f64_u64 (ix), vfmaq_f64 (y, f2, p),
-			 special);
-
-  return vfmaq_f64 (y, f2, p);
-}
-
-PL_SIG (V, D, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (V_NAME_D1 (log1p), 1.97)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (log1p), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0.0, 0x1p-23, 50000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0x1p-23, 0.001, 50000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0.001, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log1p), 1, inf, 40000)
-PL_TEST_INTERVAL (V_NAME_D1 (log1p), -1.0, -inf, 500)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log1p_inline.h b/contrib/arm-optimized-routines/pl/math/v_log1p_inline.h
deleted file mode 100644
index bd57bfc6fe6e..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_log1p_inline.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Helper for vector double-precision routines which calculate log(1 + x) and do
- * not need special-case handling
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-#ifndef PL_MATH_V_LOG1P_INLINE_H
-#define PL_MATH_V_LOG1P_INLINE_H
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-
-struct v_log1p_data
-{
-  float64x2_t poly[19], ln2[2];
-  uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask;
-  int64x2_t one_top;
-};
-
-/* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].  */
-#define V_LOG1P_CONSTANTS_TABLE                                               \
-  {                                                                           \
-    .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2),          \
-	      V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3),          \
-	      V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3),          \
-	      V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4),          \
-	      V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4),          \
-	      V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4),          \
-	      V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4),          \
-	      V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5),          \
-	      V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4),          \
-	      V2 (-0x1.cfa7385bdb37ep-6) },                                   \
-    .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) },         \
-    .hf_rt2_top = V2 (0x3fe6a09e00000000),                                    \
-    .one_m_hf_rt2_top = V2 (0x00095f6200000000),                              \
-    .umask = V2 (0x000fffff00000000), .one_top = V2 (0x3ff)                   \
-  }
-
-#define BottomMask v_u64 (0xffffffff)
-
-static inline float64x2_t
-log1p_inline (float64x2_t x, const struct v_log1p_data *d)
-{
-  /* Helper for calculating log(x + 1). Copied from v_log1p_2u5.c, with several
-     modifications:
-     - No special-case handling - this should be dealt with by the caller.
-     - Pairwise Horner polynomial evaluation for improved accuracy.
-     - Optionally simulate the shortcut for k=0, used in the scalar routine,
-       using v_sel, for improved accuracy when the argument to log1p is close to
-       0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1 in
-       the source of the caller before including this file.
-     See v_log1pf_2u1.c for details of the algorithm.  */
-  float64x2_t m = vaddq_f64 (x, v_f64 (1));
-  uint64x2_t mi = vreinterpretq_u64_f64 (m);
-  uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
-
-  int64x2_t ki
-      = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
-  float64x2_t k = vcvtq_f64_s64 (ki);
-
-  /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
-  uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
-  uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask));
-  float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1));
-
-  /* Correction term c/m.  */
-  float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m);
-
-#ifndef WANT_V_LOG1P_K0_SHORTCUT
-#error                                                                         \
-  "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
-#elif WANT_V_LOG1P_K0_SHORTCUT
-  /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
-     that the approximation is solely the polynomial.  */
-  uint64x2_t k0 = vceqzq_f64 (k);
-  cm = v_zerofy_f64 (cm, k0);
-  f = vbslq_f64 (k0, x, f);
-#endif
-
-  /* Approximate log1p(f) on the reduced input using a polynomial.  */
-  float64x2_t f2 = vmulq_f64 (f, f);
-  float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly);
-
-  /* Assemble log1p(x) = k * log2 + log1p(f) + c/m.  */
-  float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]);
-  float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]);
-  return vfmaq_f64 (vaddq_f64 (ylo, yhi), f2, p);
-}
-
-#endif // PL_MATH_V_LOG1P_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/v_log1pf_2u1.c b/contrib/arm-optimized-routines/pl/math/v_log1pf_2u1.c
deleted file mode 100644
index 153c88da9c88..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_log1pf_2u1.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Single-precision vector log(1+x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f32.h"
-
-const static struct data
-{
-  float32x4_t poly[8], ln2;
-  uint32x4_t tiny_bound, minus_one, four, thresh;
-  int32x4_t three_quarters;
-} data = {
-  .poly = { /* Generated using FPMinimax in [-0.25, 0.5]. First two coefficients
-	       (1, -0.5) are not stored as they can be generated more
-	       efficiently.  */
-	    V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f),
-	    V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f),
-	    V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) },
-  .ln2 = V4 (0x1.62e43p-1f),
-  .tiny_bound = V4 (0x34000000), /* asuint32(0x1p-23). ulp=0.5 at 0x1p-23.  */
-  .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - tiny_bound.  */
-  .minus_one = V4 (0xbf800000),
-  .four = V4 (0x40800000),
-  .three_quarters = V4 (0x3f400000)
-};
-
-static inline float32x4_t
-eval_poly (float32x4_t m, const float32x4_t *p)
-{
-  /* Approximate log(1+m) on [-0.25, 0.5] using split Estrin scheme.  */
-  float32x4_t p_12 = vfmaq_f32 (v_f32 (-0.5), m, p[0]);
-  float32x4_t p_34 = vfmaq_f32 (p[1], m, p[2]);
-  float32x4_t p_56 = vfmaq_f32 (p[3], m, p[4]);
-  float32x4_t p_78 = vfmaq_f32 (p[5], m, p[6]);
-
-  float32x4_t m2 = vmulq_f32 (m, m);
-  float32x4_t p_02 = vfmaq_f32 (m, m2, p_12);
-  float32x4_t p_36 = vfmaq_f32 (p_34, m2, p_56);
-  float32x4_t p_79 = vfmaq_f32 (p_78, m2, p[7]);
-
-  float32x4_t m4 = vmulq_f32 (m2, m2);
-  float32x4_t p_06 = vfmaq_f32 (p_02, m4, p_36);
-  return vfmaq_f32 (p_06, m4, vmulq_f32 (m4, p_79));
-}
-
-static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
-{
-  return v_call_f32 (log1pf, x, y, special);
-}
-
-/* Vector log1pf approximation using polynomial on reduced interval. Accuracy
-   is roughly 2.02 ULP:
-   log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3.  */
-VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  uint32x4_t ix = vreinterpretq_u32_f32 (x);
-  uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x));
-  uint32x4_t special_cases
-      = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, d->tiny_bound), d->thresh),
-		   vcgeq_u32 (ix, d->minus_one));
-  float32x4_t special_arg = x;
-
-#if WANT_SIMD_EXCEPT
-  if (unlikely (v_any_u32 (special_cases)))
-    /* Side-step special lanes so fenv exceptions are not triggered
-       inadvertently.  */
-    x = v_zerofy_f32 (x, special_cases);
-#endif
-
-  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
-			   is in [-0.25, 0.5]):
-     log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
-
-     We approximate log1p(m) with a polynomial, then scale by
-     k*log(2). Instead of doing this directly, we use an intermediate
-     scale factor s = 4*k*log(2) to ensure the scale is representable
-     as a normalised fp32 number.  */
-
-  float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
-
-  /* Choose k to scale x to the range [-1/4, 1/2].  */
-  int32x4_t k
-      = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters),
-		   v_s32 (0xff800000));
-  uint32x4_t ku = vreinterpretq_u32_s32 (k);
-
-  /* Scale x by exponent manipulation.  */
-  float32x4_t m_scale
-      = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
-
-  /* Scale up to ensure that the scale factor is representable as normalised
-     fp32 number, and scale m down accordingly.  */
-  float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku));
-  m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
-
-  /* Evaluate polynomial on the reduced interval.  */
-  float32x4_t p = eval_poly (m_scale, d->poly);
-
-  /* The scale factor to be applied back at the end - by multiplying float(k)
-     by 2^-23 we get the unbiased exponent of k.  */
-  float32x4_t scale_back = vcvtq_f32_s32 (vshrq_n_s32 (k, 23));
-
-  /* Apply the scaling back.  */
-  float32x4_t y = vfmaq_f32 (p, scale_back, d->ln2);
-
-  if (unlikely (v_any_u32 (special_cases)))
-    return special_case (special_arg, y, special_cases);
-  return y;
-}
-
-PL_SIG (V, F, 1, log1p, -0.9, 10.0)
-PL_TEST_ULP (V_NAME_F1 (log1p), 1.53)
-PL_TEST_EXPECT_FENV (V_NAME_F1 (log1p), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (log1p), 0.0, 0x1p-23, 30000)
-PL_TEST_SYM_INTERVAL (V_NAME_F1 (log1p), 0x1p-23, 1, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log1p), 1, inf, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log1p), -1.0, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log1pf_inline.h b/contrib/arm-optimized-routines/pl/math/v_log1pf_inline.h
deleted file mode 100644
index c654c6bad08f..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_log1pf_inline.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Helper for single-precision routines which calculate log(1 + x) and do not
- * need special-case handling
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef PL_MATH_V_LOG1PF_INLINE_H
-#define PL_MATH_V_LOG1PF_INLINE_H
-
-#include "v_math.h"
-#include "poly_advsimd_f32.h"
-
-struct v_log1pf_data
-{
-  float32x4_t poly[8], ln2;
-  uint32x4_t four;
-  int32x4_t three_quarters;
-};
-
-/* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients
-   (1, -0.5) are not stored as they can be generated more efficiently.  */
-#define V_LOG1PF_CONSTANTS_TABLE                                              \
-  {                                                                           \
-    .poly                                                                     \
-	= { V4 (0x1.5555aap-2f),  V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f),  \
-	    V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f),	V4 (-0x1.0da91p-3f),  \
-	    V4 (0x1.abcb6p-4f),	  V4 (-0x1.6f0d5ep-5f) },                     \
-	.ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000),                   \
-	.three_quarters = V4 (0x3f400000)                                     \
-  }
-
-static inline float32x4_t
-eval_poly (float32x4_t m, const float32x4_t *c)
-{
-  /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner (main routine
-     uses split Estrin, but this way reduces register pressure in the calling
-     routine).  */
-  float32x4_t q = vfmaq_f32 (v_f32 (-0.5), m, c[0]);
-  float32x4_t m2 = vmulq_f32 (m, m);
-  q = vfmaq_f32 (m, m2, q);
-  float32x4_t p = v_pw_horner_6_f32 (m, m2, c + 1);
-  p = vmulq_f32 (m2, p);
-  return vfmaq_f32 (q, m2, p);
-}
-
-static inline float32x4_t
-log1pf_inline (float32x4_t x, const struct v_log1pf_data d)
-{
-  /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no
-     special-case handling. See that file for details of the algorithm.  */
-  float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
-  int32x4_t k
-      = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d.three_quarters),
-		   v_s32 (0xff800000));
-  uint32x4_t ku = vreinterpretq_u32_s32 (k);
-  float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d.four, ku));
-  float32x4_t m_scale
-      = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
-  m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
-  float32x4_t p = eval_poly (m_scale, d.poly);
-  float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f));
-  return vfmaq_f32 (p, scale_back, d.ln2);
-}
-
-#endif //  PL_MATH_V_LOG1PF_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/v_log2_3u.c b/contrib/arm-optimized-routines/pl/math/v_log2_3u.c
deleted file mode 100644
index 2dd2c34b7c97..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_log2_3u.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Double-precision vector log2 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-#include "poly_advsimd_f64.h"
-
-#define N (1 << V_LOG2_TABLE_BITS)
-
-static const struct data
-{
-  uint64x2_t min_norm;
-  uint32x4_t special_bound;
-  float64x2_t poly[5];
-  float64x2_t invln2;
-  uint64x2_t sign_exp_mask;
-} data = {
-  /* Each coefficient was generated to approximate log(r) for |r| < 0x1.fp-9
-     and N = 128, then scaled by log2(e) in extended precision and rounded back
-     to double precision.  */
-  .poly = { V2 (-0x1.71547652b83p-1), V2 (0x1.ec709dc340953p-2),
-	    V2 (-0x1.71547651c8f35p-2), V2 (0x1.2777ebe12dda5p-2),
-	    V2 (-0x1.ec738d616fe26p-3) },
-  .invln2 = V2 (0x1.71547652b82fep0),
-  .min_norm = V2 (0x0010000000000000), /* asuint64(0x1p-1022).  */
-  .special_bound = V4 (0x7fe00000),    /* asuint64(inf) - min_norm.  */
-  .sign_exp_mask = V2 (0xfff0000000000000),
-};
-
-#define Off v_u64 (0x3fe6900900000000)
-#define IndexMask (N - 1)
-
-struct entry
-{
-  float64x2_t invc;
-  float64x2_t log2c;
-};
-
-static inline struct entry
-lookup (uint64x2_t i)
-{
-  struct entry e;
-  uint64_t i0 = (i[0] >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
-  uint64_t i1 = (i[1] >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
-  float64x2_t e0 = vld1q_f64 (&__v_log2_data.table[i0].invc);
-  float64x2_t e1 = vld1q_f64 (&__v_log2_data.table[i1].invc);
-  e.invc = vuzp1q_f64 (e0, e1);
-  e.log2c = vuzp2q_f64 (e0, e1);
-  return e;
-}
-
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, float64x2_t w, float64x2_t r2,
-	      uint32x2_t special)
-{
-  return v_call_f64 (log2, x, vfmaq_f64 (w, r2, y), vmovl_u32 (special));
-}
-
-/* Double-precision vector log2 routine. Implements the same algorithm as
-   vector log10, with coefficients and table entries scaled in extended
-   precision. The maximum observed error is 2.58 ULP:
-   _ZGVnN2v_log2(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
-				      want 0x1.fffb34198d9ddp-5.  */
-float64x2_t VPCS_ATTR V_NAME_D1 (log2) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint64x2_t ix = vreinterpretq_u64_f64 (x);
-  uint32x2_t special = vcge_u32 (vsubhn_u64 (ix, d->min_norm),
-				 vget_low_u32 (d->special_bound));
-
-  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  uint64x2_t tmp = vsubq_u64 (ix, Off);
-  int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
-  uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
-  float64x2_t z = vreinterpretq_f64_u64 (iz);
-
-  struct entry e = lookup (tmp);
-
-  /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k.  */
-
-  float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
-  float64x2_t kd = vcvtq_f64_s64 (k);
-  float64x2_t w = vfmaq_f64 (e.log2c, r, d->invln2);
-
-  float64x2_t r2 = vmulq_f64 (r, r);
-  float64x2_t y = v_pw_horner_4_f64 (r, r2, d->poly);
-  w = vaddq_f64 (kd, w);
-
-  if (unlikely (v_any_u32h (special)))
-    return special_case (x, y, w, r2, special);
-  return vfmaq_f64 (w, r2, y);
-}
-
-PL_SIG (V, D, 1, log2, 0.01, 11.1)
-PL_TEST_ULP (V_NAME_D1 (log2), 2.09)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (log2))
-PL_TEST_INTERVAL (V_NAME_D1 (log2), -0.0, -0x1p126, 100)
-PL_TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log2), 1.0, 100, 50000)
-PL_TEST_INTERVAL (V_NAME_D1 (log2), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log2f_2u5.c b/contrib/arm-optimized-routines/pl/math/v_log2f_2u5.c
deleted file mode 100644
index c64d88742136..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_log2f_2u5.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Single-precision vector log2 function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f32.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  uint32x4_t min_norm;
-  uint16x8_t special_bound;
-  uint32x4_t off, mantissa_mask;
-  float32x4_t poly[9];
-} data = {
-  /* Coefficients generated using Remez algorithm approximate
-     log2(1+r)/r for r in [ -1/3, 1/3 ].
-     rel error: 0x1.c4c4b0cp-26.  */
-  .poly = { V4 (0x1.715476p0f), /* (float)(1 / ln(2)).  */
-	    V4 (-0x1.715458p-1f), V4 (0x1.ec701cp-2f), V4 (-0x1.7171a4p-2f),
-	    V4 (0x1.27a0b8p-2f), V4 (-0x1.e5143ep-3f), V4 (0x1.9d8ecap-3f),
-	    V4 (-0x1.c675bp-3f), V4 (0x1.9e495p-3f) },
-  .min_norm = V4 (0x00800000),
-  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
-  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
-  .mantissa_mask = V4 (0x007fffff),
-};
-
-static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r,
-	      uint16x4_t cmp)
-{
-  /* Fall back to scalar code.  */
-  return v_call_f32 (log2f, x, vfmaq_f32 (n, p, r), vmovl_u16 (cmp));
-}
-
-/* Fast implementation for single precision AdvSIMD log2,
-   relies on same argument reduction as AdvSIMD logf.
-   Maximum error: 2.48 ULPs
-   _ZGVnN4v_log2f(0x1.558174p+0) got 0x1.a9be84p-2
-				want 0x1.a9be8p-2.  */
-float32x4_t VPCS_ATTR V_NAME_F1 (log2) (float32x4_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-  uint32x4_t u = vreinterpretq_u32_f32 (x);
-  uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm),
-				 vget_low_u16 (d->special_bound));
-
-  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
-  u = vsubq_u32 (u, d->off);
-  float32x4_t n = vcvtq_f32_s32 (
-      vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
-  u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off);
-  float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
-
-  /* y = log2(1+r) + n.  */
-  float32x4_t r2 = vmulq_f32 (r, r);
-  float32x4_t p = v_pw_horner_8_f32 (r, r2, d->poly);
-
-  if (unlikely (v_any_u16h (special)))
-    return special_case (x, n, p, r, special);
-  return vfmaq_f32 (n, p, r);
-}
-
-PL_SIG (V, F, 1, log2, 0.01, 11.1)
-PL_TEST_ULP (V_NAME_F1 (log2), 1.99)
-PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_F1 (log2))
-PL_TEST_INTERVAL (V_NAME_F1 (log2), -0.0, -0x1p126, 100)
-PL_TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-149, 0x1p-126, 4000)
-PL_TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-126, 0x1p-23, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-23, 1.0, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log2), 1.0, 100, 50000)
-PL_TEST_INTERVAL (V_NAME_F1 (log2), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log_data.c b/contrib/arm-optimized-routines/pl/math/v_log_data.c
deleted file mode 100644
index a26e8a051d97..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_log_data.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Lookup table for double-precision log(x) vector function.
- *
- * Copyright (c) 2019-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "math_config.h"
-
-const struct v_log_data __v_log_data = {
-  /* Worst-case error: 1.17 + 0.5 ulp.
-     Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
-  .poly = { -0x1.ffffffffffff7p-2, 0x1.55555555170d4p-2, -0x1.0000000399c27p-2,
-	    0x1.999b2e90e94cap-3, -0x1.554e550bd501ep-3 },
-  .ln2 = 0x1.62e42fefa39efp-1,
-  /* Algorithm:
-
-	x = 2^k z
-	log(x) = k ln2 + log(c) + poly(z/c - 1)
-
-     where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
-     N=128) and log(c) and 1/c for the ith subinterval comes from two lookup
-     tables:
-
-	table[i].invc = 1/c
-	table[i].logc = (double)log(c)
-
-     where c is near the center of the subinterval and is chosen by trying
-     several floating point invc candidates around 1/center and selecting one
-     for which the error in (double)log(c) is minimized (< 0x1p-74), except the
-     subinterval that contains 1 and the previous one got tweaked to avoid
-     cancellation.  */
-  .table = { { 0x1.6a133d0dec120p+0, -0x1.62fe995eb963ap-2 },
-	     { 0x1.6815f2f3e42edp+0, -0x1.5d5a48dad6b67p-2 },
-	     { 0x1.661e39be1ac9ep+0, -0x1.57bde257d2769p-2 },
-	     { 0x1.642bfa30ac371p+0, -0x1.52294fbf2af55p-2 },
-	     { 0x1.623f1d916f323p+0, -0x1.4c9c7b598aa38p-2 },
-	     { 0x1.60578da220f65p+0, -0x1.47174fc5ff560p-2 },
-	     { 0x1.5e75349dea571p+0, -0x1.4199b7fa7b5cap-2 },
-	     { 0x1.5c97fd387a75ap+0, -0x1.3c239f48cfb99p-2 },
-	     { 0x1.5abfd2981f200p+0, -0x1.36b4f154d2aebp-2 },
-	     { 0x1.58eca051dc99cp+0, -0x1.314d9a0ff32fbp-2 },
-	     { 0x1.571e526d9df12p+0, -0x1.2bed85cca3cffp-2 },
-	     { 0x1.5554d555b3fcbp+0, -0x1.2694a11421af9p-2 },
-	     { 0x1.539015e2a20cdp+0, -0x1.2142d8d014fb2p-2 },
-	     { 0x1.51d0014ee0164p+0, -0x1.1bf81a2c77776p-2 },
-	     { 0x1.50148538cd9eep+0, -0x1.16b452a39c6a4p-2 },
-	     { 0x1.4e5d8f9f698a1p+0, -0x1.11776ffa6c67ep-2 },
-	     { 0x1.4cab0edca66bep+0, -0x1.0c416035020e0p-2 },
-	     { 0x1.4afcf1a9db874p+0, -0x1.071211aa10fdap-2 },
-	     { 0x1.495327136e16fp+0, -0x1.01e972e293b1bp-2 },
-	     { 0x1.47ad9e84af28fp+0, -0x1.f98ee587fd434p-3 },
-	     { 0x1.460c47b39ae15p+0, -0x1.ef5800ad716fbp-3 },
-	     { 0x1.446f12b278001p+0, -0x1.e52e160484698p-3 },
-	     { 0x1.42d5efdd720ecp+0, -0x1.db1104b19352ep-3 },
-	     { 0x1.4140cfe001a0fp+0, -0x1.d100ac59e0bd6p-3 },
-	     { 0x1.3fafa3b421f69p+0, -0x1.c6fced287c3bdp-3 },
-	     { 0x1.3e225c9c8ece5p+0, -0x1.bd05a7b317c29p-3 },
-	     { 0x1.3c98ec29a211ap+0, -0x1.b31abd229164fp-3 },
-	     { 0x1.3b13442a413fep+0, -0x1.a93c0edadb0a3p-3 },
-	     { 0x1.399156baa3c54p+0, -0x1.9f697ee30d7ddp-3 },
-	     { 0x1.38131639b4cdbp+0, -0x1.95a2efa9aa40ap-3 },
-	     { 0x1.36987540fbf53p+0, -0x1.8be843d796044p-3 },
-	     { 0x1.352166b648f61p+0, -0x1.82395ecc477edp-3 },
-	     { 0x1.33adddb3eb575p+0, -0x1.7896240966422p-3 },
-	     { 0x1.323dcd99fc1d3p+0, -0x1.6efe77aca8c55p-3 },
-	     { 0x1.30d129fefc7d2p+0, -0x1.65723e117ec5cp-3 },
-	     { 0x1.2f67e6b72fe7dp+0, -0x1.5bf15c0955706p-3 },
-	     { 0x1.2e01f7cf8b187p+0, -0x1.527bb6c111da1p-3 },
-	     { 0x1.2c9f518ddc86ep+0, -0x1.491133c939f8fp-3 },
-	     { 0x1.2b3fe86e5f413p+0, -0x1.3fb1b90c7fc58p-3 },
-	     { 0x1.29e3b1211b25cp+0, -0x1.365d2cc485f8dp-3 },
-	     { 0x1.288aa08b373cfp+0, -0x1.2d13758970de7p-3 },
-	     { 0x1.2734abcaa8467p+0, -0x1.23d47a721fd47p-3 },
-	     { 0x1.25e1c82459b81p+0, -0x1.1aa0229f25ec2p-3 },
-	     { 0x1.2491eb1ad59c5p+0, -0x1.117655ddebc3bp-3 },
-	     { 0x1.23450a54048b5p+0, -0x1.0856fbf83ab6bp-3 },
-	     { 0x1.21fb1bb09e578p+0, -0x1.fe83fabbaa106p-4 },
-	     { 0x1.20b415346d8f7p+0, -0x1.ec6e8507a56cdp-4 },
-	     { 0x1.1f6fed179a1acp+0, -0x1.da6d68c7cc2eap-4 },
-	     { 0x1.1e2e99b93c7b3p+0, -0x1.c88078462be0cp-4 },
-	     { 0x1.1cf011a7a882ap+0, -0x1.b6a786a423565p-4 },
-	     { 0x1.1bb44b97dba5ap+0, -0x1.a4e2676ac7f85p-4 },
-	     { 0x1.1a7b3e66cdd4fp+0, -0x1.9330eea777e76p-4 },
-	     { 0x1.1944e11dc56cdp+0, -0x1.8192f134d5ad9p-4 },
-	     { 0x1.18112aebb1a6ep+0, -0x1.70084464f0538p-4 },
-	     { 0x1.16e013231b7e9p+0, -0x1.5e90bdec5cb1fp-4 },
-	     { 0x1.15b1913f156cfp+0, -0x1.4d2c3433c5536p-4 },
-	     { 0x1.14859cdedde13p+0, -0x1.3bda7e219879ap-4 },
-	     { 0x1.135c2dc68cfa4p+0, -0x1.2a9b732d27194p-4 },
-	     { 0x1.12353bdb01684p+0, -0x1.196eeb2b10807p-4 },
-	     { 0x1.1110bf25b85b4p+0, -0x1.0854be8ef8a7ep-4 },
-	     { 0x1.0feeafd2f8577p+0, -0x1.ee998cb277432p-5 },
-	     { 0x1.0ecf062c51c3bp+0, -0x1.ccadb79919fb9p-5 },
-	     { 0x1.0db1baa076c8bp+0, -0x1.aae5b1d8618b0p-5 },
-	     { 0x1.0c96c5bb3048ep+0, -0x1.89413015d7442p-5 },
-	     { 0x1.0b7e20263e070p+0, -0x1.67bfe7bf158dep-5 },
-	     { 0x1.0a67c2acd0ce3p+0, -0x1.46618f83941bep-5 },
-	     { 0x1.0953a6391e982p+0, -0x1.2525df1b0618ap-5 },
-	     { 0x1.0841c3caea380p+0, -0x1.040c8e2f77c6ap-5 },
-	     { 0x1.07321489b13eap+0, -0x1.c62aad39f738ap-6 },
-	     { 0x1.062491aee9904p+0, -0x1.847fe3bdead9cp-6 },
-	     { 0x1.05193497a7cc5p+0, -0x1.43183683400acp-6 },
-	     { 0x1.040ff6b5f5e9fp+0, -0x1.01f31c4e1d544p-6 },
-	     { 0x1.0308d19aa6127p+0, -0x1.82201d1e6b69ap-7 },
-	     { 0x1.0203beedb0c67p+0, -0x1.00dd0f3e1bfd6p-7 },
-	     { 0x1.010037d38bcc2p+0, -0x1.ff6fe1feb4e53p-9 },
-	     { 1.0, 0.0 },
-	     { 0x1.fc06d493cca10p-1, 0x1.fe91885ec8e20p-8 },
-	     { 0x1.f81e6ac3b918fp-1, 0x1.fc516f716296dp-7 },
-	     { 0x1.f44546ef18996p-1, 0x1.7bb4dd70a015bp-6 },
-	     { 0x1.f07b10382c84bp-1, 0x1.f84c99b34b674p-6 },
-	     { 0x1.ecbf7070e59d4p-1, 0x1.39f9ce4fb2d71p-5 },
-	     { 0x1.e91213f715939p-1, 0x1.7756c0fd22e78p-5 },
-	     { 0x1.e572a9a75f7b7p-1, 0x1.b43ee82db8f3ap-5 },
-	     { 0x1.e1e0e2c530207p-1, 0x1.f0b3fced60034p-5 },
-	     { 0x1.de5c72d8a8be3p-1, 0x1.165bd78d4878ep-4 },
-	     { 0x1.dae50fa5658ccp-1, 0x1.3425d2715ebe6p-4 },
-	     { 0x1.d77a71145a2dap-1, 0x1.51b8bd91b7915p-4 },
-	     { 0x1.d41c51166623ep-1, 0x1.6f15632c76a47p-4 },
-	     { 0x1.d0ca6ba0bb29fp-1, 0x1.8c3c88ecbe503p-4 },
-	     { 0x1.cd847e8e59681p-1, 0x1.a92ef077625dap-4 },
-	     { 0x1.ca4a499693e00p-1, 0x1.c5ed5745fa006p-4 },
-	     { 0x1.c71b8e399e821p-1, 0x1.e27876de1c993p-4 },
-	     { 0x1.c3f80faf19077p-1, 0x1.fed104fce4cdcp-4 },
-	     { 0x1.c0df92dc2b0ecp-1, 0x1.0d7bd9c17d78bp-3 },
-	     { 0x1.bdd1de3cbb542p-1, 0x1.1b76986cef97bp-3 },
-	     { 0x1.baceb9e1007a3p-1, 0x1.295913d24f750p-3 },
-	     { 0x1.b7d5ef543e55ep-1, 0x1.37239fa295d17p-3 },
-	     { 0x1.b4e749977d953p-1, 0x1.44d68dd78714bp-3 },
-	     { 0x1.b20295155478ep-1, 0x1.52722ebe5d780p-3 },
-	     { 0x1.af279f8e82be2p-1, 0x1.5ff6d12671f98p-3 },
-	     { 0x1.ac5638197fdf3p-1, 0x1.6d64c2389484bp-3 },
-	     { 0x1.a98e2f102e087p-1, 0x1.7abc4da40fddap-3 },
-	     { 0x1.a6cf5606d05c1p-1, 0x1.87fdbda1e8452p-3 },
-	     { 0x1.a4197fc04d746p-1, 0x1.95295b06a5f37p-3 },
-	     { 0x1.a16c80293dc01p-1, 0x1.a23f6d34abbc5p-3 },
-	     { 0x1.9ec82c4dc5bc9p-1, 0x1.af403a28e04f2p-3 },
-	     { 0x1.9c2c5a491f534p-1, 0x1.bc2c06a85721ap-3 },
-	     { 0x1.9998e1480b618p-1, 0x1.c903161240163p-3 },
-	     { 0x1.970d9977c6c2dp-1, 0x1.d5c5aa93287ebp-3 },
-	     { 0x1.948a5c023d212p-1, 0x1.e274051823fa9p-3 },
-	     { 0x1.920f0303d6809p-1, 0x1.ef0e656300c16p-3 },
-	     { 0x1.8f9b698a98b45p-1, 0x1.fb9509f05aa2ap-3 },
-	     { 0x1.8d2f6b81726f6p-1, 0x1.04041821f37afp-2 },
-	     { 0x1.8acae5bb55badp-1, 0x1.0a340a49b3029p-2 },
-	     { 0x1.886db5d9275b8p-1, 0x1.105a7918a126dp-2 },
-	     { 0x1.8617ba567c13cp-1, 0x1.1677819812b84p-2 },
-	     { 0x1.83c8d27487800p-1, 0x1.1c8b405b40c0ep-2 },
-	     { 0x1.8180de3c5dbe7p-1, 0x1.2295d16cfa6b1p-2 },
-	     { 0x1.7f3fbe71cdb71p-1, 0x1.28975066318a2p-2 },
-	     { 0x1.7d055498071c1p-1, 0x1.2e8fd855d86fcp-2 },
-	     { 0x1.7ad182e54f65ap-1, 0x1.347f83d605e59p-2 },
-	     { 0x1.78a42c3c90125p-1, 0x1.3a666d1244588p-2 },
-	     { 0x1.767d342f76944p-1, 0x1.4044adb6f8ec4p-2 },
-	     { 0x1.745c7ef26b00ap-1, 0x1.461a5f077558cp-2 },
-	     { 0x1.7241f15769d0fp-1, 0x1.4be799e20b9c8p-2 },
-	     { 0x1.702d70d396e41p-1, 0x1.51ac76a6b79dfp-2 },
-	     { 0x1.6e1ee3700cd11p-1, 0x1.57690d5744a45p-2 },
-	     { 0x1.6c162fc9cbe02p-1, 0x1.5d1d758e45217p-2 } }
-};
diff --git a/contrib/arm-optimized-routines/pl/math/v_sinh_3u.c b/contrib/arm-optimized-routines/pl/math/v_sinh_3u.c
deleted file mode 100644
index a644f54b4a0f..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_sinh_3u.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Double-precision vector sinh(x) function.
- *
- * Copyright (c) 2022-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float64x2_t poly[11];
-  float64x2_t inv_ln2, m_ln2, shift;
-  uint64x2_t halff;
-  int64x2_t onef;
-#if WANT_SIMD_EXCEPT
-  uint64x2_t tiny_bound, thresh;
-#else
-  uint64x2_t large_bound;
-#endif
-} data = {
-  /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2].  */
-  .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
-	    V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
-	    V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
-	    V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
-	    V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
-
-  .inv_ln2 = V2 (0x1.71547652b82fep0),
-  .m_ln2 = (float64x2_t) {-0x1.62e42fefa39efp-1, -0x1.abc9e3b39803fp-56},
-  .shift = V2 (0x1.8p52),
-
-  .halff = V2 (0x3fe0000000000000),
-  .onef = V2 (0x3ff0000000000000),
-#if WANT_SIMD_EXCEPT
-  /* 2^-26, below which sinh(x) rounds to x.  */
-  .tiny_bound = V2 (0x3e50000000000000),
-  /* asuint(large_bound) - asuint(tiny_bound).  */
-  .thresh = V2 (0x0230000000000000),
-#else
-/* 2^9. expm1 helper overflows for large input.  */
-  .large_bound = V2 (0x4080000000000000),
-#endif
-};
-
-static inline float64x2_t
-expm1_inline (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  /* Reduce argument:
-     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
-     where i = round(x / ln2)
-     and   f = x - i * ln2 (f in [-ln2/2, ln2/2]).  */
-  float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
-  int64x2_t i = vcvtq_s64_f64 (j);
-  float64x2_t f = vfmaq_laneq_f64 (x, j, d->m_ln2, 0);
-  f = vfmaq_laneq_f64 (f, j, d->m_ln2, 1);
-  /* Approximate expm1(f) using polynomial.  */
-  float64x2_t f2 = vmulq_f64 (f, f);
-  float64x2_t f4 = vmulq_f64 (f2, f2);
-  float64x2_t f8 = vmulq_f64 (f4, f4);
-  float64x2_t p = vfmaq_f64 (f, f2, v_estrin_10_f64 (f, f2, f4, f8, d->poly));
-  /* t = 2^i.  */
-  float64x2_t t = vreinterpretq_f64_u64 (
-      vreinterpretq_u64_s64 (vaddq_s64 (vshlq_n_s64 (i, 52), d->onef)));
-  /* expm1(x) ~= p * t + (t - 1).  */
-  return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
-}
-
-static float64x2_t NOINLINE VPCS_ATTR
-special_case (float64x2_t x)
-{
-  return v_call_f64 (sinh, x, x, v_u64 (-1));
-}
-
-/* Approximation for vector double-precision sinh(x) using expm1.
-   sinh(x) = (exp(x) - exp(-x)) / 2.
-   The greatest observed error is 2.57 ULP:
-   _ZGVnN2v_sinh (0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
-				       want 0x1.ab34e59d678d9p-2.  */
-float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  float64x2_t ax = vabsq_f64 (x);
-  uint64x2_t sign
-      = veorq_u64 (vreinterpretq_u64_f64 (x), vreinterpretq_u64_f64 (ax));
-  float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->halff));
-
-#if WANT_SIMD_EXCEPT
-  uint64x2_t special = vcgeq_u64 (
-      vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
-#else
-  uint64x2_t special = vcgeq_u64 (vreinterpretq_u64_f64 (ax), d->large_bound);
-#endif
-
-  /* Fall back to scalar variant for all lanes if any of them are special.  */
-  if (unlikely (v_any_u64 (special)))
-    return special_case (x);
-
-  /* Up to the point that expm1 overflows, we can use it to calculate sinh
-     using a slight rearrangement of the definition of sinh. This allows us to
-     retain acceptable accuracy for very small inputs.  */
-  float64x2_t t = expm1_inline (ax);
-  t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
-  return vmulq_f64 (t, halfsign);
-}
-
-PL_SIG (V, D, 1, sinh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (sinh), 2.08)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (sinh), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0, 0x1p-26, 1000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p9, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_tanh_3u.c b/contrib/arm-optimized-routines/pl/math/v_tanh_3u.c
deleted file mode 100644
index 5de85c68da2c..000000000000
--- a/contrib/arm-optimized-routines/pl/math/v_tanh_3u.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Double-precision vector tanh(x) function.
- * Copyright (c) 2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
-#include "mathlib.h"
-#include "pl_sig.h"
-#include "pl_test.h"
-
-static const struct data
-{
-  float64x2_t poly[11];
-  float64x2_t inv_ln2, ln2_hi, ln2_lo, shift;
-  uint64x2_t onef;
-  uint64x2_t thresh, tiny_bound;
-} data = {
-  /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2].  */
-  .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
-	    V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
-	    V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
-	    V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
-	    V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
-
-  .inv_ln2 = V2 (0x1.71547652b82fep0),
-  .ln2_hi = V2 (-0x1.62e42fefa39efp-1),
-  .ln2_lo = V2 (-0x1.abc9e3b39803fp-56),
-  .shift = V2 (0x1.8p52),
-
-  .onef = V2 (0x3ff0000000000000),
-  .tiny_bound = V2 (0x3e40000000000000), /* asuint64 (0x1p-27).  */
-  /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound).  */
-  .thresh = V2 (0x01f241bf835f9d5f),
-};
-
-static inline float64x2_t
-expm1_inline (float64x2_t x, const struct data *d)
-{
-  /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
-     the scalar variant of tanh.  */
-
-  /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
-  float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
-  int64x2_t i = vcvtq_s64_f64 (j);
-  float64x2_t f = vfmaq_f64 (x, j, d->ln2_hi);
-  f = vfmaq_f64 (f, j, d->ln2_lo);
-
-  /* Approximate expm1(f) using polynomial.  */
-  float64x2_t f2 = vmulq_f64 (f, f);
-  float64x2_t f4 = vmulq_f64 (f2, f2);
-  float64x2_t p = vfmaq_f64 (
-      f, f2, v_estrin_10_f64 (f, f2, f4, vmulq_f64 (f4, f4), d->poly));
-
-  /* t = 2 ^ i.  */
-  float64x2_t t = vreinterpretq_f64_u64 (
-      vaddq_u64 (vreinterpretq_u64_s64 (i << 52), d->onef));
-  /* expm1(x) = p * t + (t - 1).  */
-  return vfmaq_f64 (vsubq_f64 (t, v_f64 (1)), p, t);
-}
-
-static float64x2_t NOINLINE VPCS_ATTR
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
-{
-  return v_call_f64 (tanh, x, y, special);
-}
-
-/* Vector approximation for double-precision tanh(x), using a simplified
-   version of expm1. The greatest observed error is 2.77 ULP:
-   _ZGVnN2v_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
-				       want -0x1.bd6a21a163624p-3.  */
-float64x2_t VPCS_ATTR V_NAME_D1 (tanh) (float64x2_t x)
-{
-  const struct data *d = ptr_barrier (&data);
-
-  uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
-
-  float64x2_t u = x;
-
-  /* Trigger special-cases for tiny, boring and infinity/NaN.  */
-  uint64x2_t special = vcgtq_u64 (vsubq_u64 (ia, d->tiny_bound), d->thresh);
-#if WANT_SIMD_EXCEPT
-  /* To trigger fp exceptions correctly, set special lanes to a neutral value.
-     They will be fixed up later by the special-case handler.  */
-  if (unlikely (v_any_u64 (special)))
-    u = v_zerofy_f64 (u, special);
-#endif
-
-  u = vaddq_f64 (u, u);
-
-  /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
-  float64x2_t q = expm1_inline (u, d);
-  float64x2_t qp2 = vaddq_f64 (q, v_f64 (2));
-
-  if (unlikely (v_any_u64 (special)))
-    return special_case (x, vdivq_f64 (q, qp2), special);
-  return vdivq_f64 (q, qp2);
-}
-
-PL_SIG (V, D, 1, tanh, -10.0, 10.0)
-PL_TEST_ULP (V_NAME_D1 (tanh), 2.27)
-PL_TEST_EXPECT_FENV (V_NAME_D1 (tanh), WANT_SIMD_EXCEPT)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0, 0x1p-27, 5000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0x1p-27, 0x1.241bf835f9d5fp+4, 50000)
-PL_TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0x1.241bf835f9d5fp+4, inf, 1000)
diff --git a/contrib/arm-optimized-routines/string/Dir.mk b/contrib/arm-optimized-routines/string/Dir.mk
index 40ff5acc093e..dd8283ec4977 100644
--- a/contrib/arm-optimized-routines/string/Dir.mk
+++ b/contrib/arm-optimized-routines/string/Dir.mk
@@ -1,113 +1,118 @@
 # Makefile fragment - requires GNU make
 #
 # Copyright (c) 2019-2021, Arm Limited.
 # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 S := $(srcdir)/string
 B := build/string
 
 ifeq ($(ARCH),)
 all-string bench-string check-string install-string clean-string:
 	@echo "*** Please set ARCH in config.mk. ***"
 	@exit 1
 else
 
 string-lib-srcs := $(wildcard $(S)/$(ARCH)/*.[cS])
+string-lib-srcs += $(wildcard $(S)/$(ARCH)/experimental/*.[cS])
 string-test-srcs := $(wildcard $(S)/test/*.c)
 string-bench-srcs := $(wildcard $(S)/bench/*.c)
 
+string-arch-include-dir := $(wildcard $(S)/$(ARCH))
+string-arch-includes := $(wildcard $(S)/$(ARCH)/*.h)
 string-includes := $(patsubst $(S)/%,build/%,$(wildcard $(S)/include/*.h))
 
 string-libs := \
 	build/lib/libstringlib.so \
 	build/lib/libstringlib.a \
 
 string-tests := \
 	build/bin/test/memcpy \
 	build/bin/test/memmove \
 	build/bin/test/memset \
 	build/bin/test/memchr \
 	build/bin/test/memrchr \
 	build/bin/test/memcmp \
 	build/bin/test/__mtag_tag_region \
 	build/bin/test/__mtag_tag_zero_region \
 	build/bin/test/strcpy \
 	build/bin/test/stpcpy \
 	build/bin/test/strcmp \
 	build/bin/test/strchr \
 	build/bin/test/strrchr \
 	build/bin/test/strchrnul \
 	build/bin/test/strlen \
 	build/bin/test/strnlen \
 	build/bin/test/strncmp
 
 string-benches := \
 	build/bin/bench/memcpy \
+	build/bin/bench/memset \
 	build/bin/bench/strlen
 
 string-lib-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(string-lib-srcs)))
 string-test-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(string-test-srcs)))
 string-bench-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(string-bench-srcs)))
 
 string-objs := \
 	$(string-lib-objs) \
 	$(string-lib-objs:%.o=%.os) \
 	$(string-test-objs) \
 	$(string-bench-objs)
 
 string-files := \
 	$(string-objs) \
 	$(string-libs) \
 	$(string-tests) \
 	$(string-benches) \
 	$(string-includes) \
 
 all-string: $(string-libs) $(string-tests) $(string-benches) $(string-includes)
 
-$(string-objs): $(string-includes)
-$(string-objs): CFLAGS_ALL += $(string-cflags)
+$(string-objs): $(string-includes) $(string-arch-includes)
+$(string-objs): CFLAGS_ALL += $(string-cflags) -I$(string-arch-include-dir)
 
 $(string-test-objs): CFLAGS_ALL += -D_GNU_SOURCE
 
 build/lib/libstringlib.so: $(string-lib-objs:%.o=%.os)
 	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -shared -o $@ $^
 
 build/lib/libstringlib.a: $(string-lib-objs)
 	rm -f $@
 	$(AR) rc $@ $^
 	$(RANLIB) $@
 
 build/bin/test/%: $(B)/test/%.o build/lib/libstringlib.a
 	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
 
 build/bin/bench/%: $(B)/bench/%.o build/lib/libstringlib.a
 	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
 
 build/include/%.h: $(S)/include/%.h
 	cp $< $@
 
 build/bin/%.sh: $(S)/test/%.sh
 	cp $< $@
 
 string-tests-out = $(string-tests:build/bin/test/%=build/string/test/%.out)
 
 build/string/test/%.out: build/bin/test/%
 	$(EMULATOR) $^ | tee $@.tmp
 	mv $@.tmp $@
 
 check-string: $(string-tests-out)
 	! grep FAIL $^
 
 bench-string: $(string-benches)
 	$(EMULATOR) build/bin/bench/strlen
 	$(EMULATOR) build/bin/bench/memcpy
+	$(EMULATOR) build/bin/bench/memset
 
 install-string: \
  $(string-libs:build/lib/%=$(DESTDIR)$(libdir)/%) \
  $(string-includes:build/include/%=$(DESTDIR)$(includedir)/%)
 
 clean-string:
 	rm -f $(string-files)
 endif
 
 .PHONY: all-string bench-string check-string install-string clean-string
diff --git a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S
index 207e22950c6d..34b5789240da 100644
--- a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S
+++ b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S
@@ -1,100 +1,97 @@
 /*
  * __mtag_tag_region - tag memory
  *
  * Copyright (c) 2021-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, MTE, LP64 ABI.
  *
  * Interface contract:
  * Address is 16 byte aligned and size is multiple of 16.
  * Returns the passed pointer.
  * The memory region may remain untagged if tagging is not enabled.
  */
 
 #include "asmdefs.h"
 
 #if __ARM_FEATURE_MEMORY_TAGGING
 
 #define dstin	x0
 #define count	x1
 #define dst	x2
 #define dstend	x3
 #define tmp	x4
 #define zva_val	x4
 
 ENTRY (__mtag_tag_region)
-	PTR_ARG (0)
-	SIZE_ARG (1)
-
 	add	dstend, dstin, count
 
 	cmp	count, 96
 	b.hi	L(set_long)
 
 	tbnz	count, 6, L(set96)
 
 	/* Set 0, 16, 32, or 48 bytes.  */
 	lsr	tmp, count, 5
 	add	tmp, dstin, tmp, lsl 4
 	cbz     count, L(end)
 	stg	dstin, [dstin]
 	stg	dstin, [tmp]
 	stg	dstin, [dstend, -16]
 L(end):
 	ret
 
 	.p2align 4
 	/* Set 64..96 bytes.  Write 64 bytes from the start and
 	   32 bytes from the end.  */
 L(set96):
 	st2g	dstin, [dstin]
 	st2g	dstin, [dstin, 32]
 	st2g	dstin, [dstend, -32]
 	ret
 
 	.p2align 4
 	/* Size is > 96 bytes.  */
 L(set_long):
 	cmp	count, 160
 	b.lo	L(no_zva)
 
 #ifndef SKIP_ZVA_CHECK
 	mrs	zva_val, dczid_el0
 	and	zva_val, zva_val, 31
 	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
 	b.ne	L(no_zva)
 #endif
 	st2g	dstin, [dstin]
 	st2g	dstin, [dstin, 32]
 	bic	dst, dstin, 63
 	sub	count, dstend, dst	/* Count is now 64 too large.  */
 	sub	count, count, 128	/* Adjust count and bias for loop.  */
 
 	.p2align 4
 L(zva_loop):
 	add	dst, dst, 64
 	dc	gva, dst
 	subs	count, count, 64
 	b.hi	L(zva_loop)
 	st2g	dstin, [dstend, -64]
 	st2g	dstin, [dstend, -32]
 	ret
 
 L(no_zva):
 	sub	dst, dstin, 32		/* Dst is biased by -32.  */
 	sub	count, count, 64	/* Adjust count for loop.  */
 L(no_zva_loop):
 	st2g	dstin, [dst, 32]
 	st2g	dstin, [dst, 64]!
 	subs	count, count, 64
 	b.hi	L(no_zva_loop)
 	st2g	dstin, [dstend, -64]
 	st2g	dstin, [dstend, -32]
 	ret
 
 END (__mtag_tag_region)
 #endif
diff --git a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S
index 44b8e0114f42..2fa248e25621 100644
--- a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S
+++ b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S
@@ -1,100 +1,97 @@
 /*
  * __mtag_tag_zero_region - tag memory and fill it with zero bytes
  *
  * Copyright (c) 2021-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, MTE, LP64 ABI.
  *
  * Interface contract:
  * Address is 16 byte aligned and size is multiple of 16.
  * Returns the passed pointer.
  * The memory region may remain untagged if tagging is not enabled.
  */
 
 #include "asmdefs.h"
 
 #if __ARM_FEATURE_MEMORY_TAGGING
 
 #define dstin	x0
 #define count	x1
 #define dst	x2
 #define dstend	x3
 #define tmp	x4
 #define zva_val	x4
 
 ENTRY (__mtag_tag_zero_region)
-	PTR_ARG (0)
-	SIZE_ARG (1)
-
 	add	dstend, dstin, count
 
 	cmp	count, 96
 	b.hi	L(set_long)
 
 	tbnz	count, 6, L(set96)
 
 	/* Set 0, 16, 32, or 48 bytes.  */
 	lsr	tmp, count, 5
 	add	tmp, dstin, tmp, lsl 4
 	cbz     count, L(end)
 	stzg	dstin, [dstin]
 	stzg	dstin, [tmp]
 	stzg	dstin, [dstend, -16]
 L(end):
 	ret
 
 	.p2align 4
 	/* Set 64..96 bytes.  Write 64 bytes from the start and
 	   32 bytes from the end.  */
 L(set96):
 	stz2g	dstin, [dstin]
 	stz2g	dstin, [dstin, 32]
 	stz2g	dstin, [dstend, -32]
 	ret
 
 	.p2align 4
 	/* Size is > 96 bytes.  */
 L(set_long):
 	cmp	count, 160
 	b.lo	L(no_zva)
 
 #ifndef SKIP_ZVA_CHECK
 	mrs	zva_val, dczid_el0
 	and	zva_val, zva_val, 31
 	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
 	b.ne	L(no_zva)
 #endif
 	stz2g	dstin, [dstin]
 	stz2g	dstin, [dstin, 32]
 	bic	dst, dstin, 63
 	sub	count, dstend, dst	/* Count is now 64 too large.  */
 	sub	count, count, 128	/* Adjust count and bias for loop.  */
 
 	.p2align 4
 L(zva_loop):
 	add	dst, dst, 64
 	dc	gzva, dst
 	subs	count, count, 64
 	b.hi	L(zva_loop)
 	stz2g	dstin, [dstend, -64]
 	stz2g	dstin, [dstend, -32]
 	ret
 
 L(no_zva):
 	sub	dst, dstin, 32		/* Dst is biased by -32.  */
 	sub	count, count, 64	/* Adjust count for loop.  */
 L(no_zva_loop):
 	stz2g	dstin, [dst, 32]
 	stz2g	dstin, [dst, 64]!
 	subs	count, count, 64
 	b.hi	L(no_zva_loop)
 	stz2g	dstin, [dstend, -64]
 	stz2g	dstin, [dstend, -32]
 	ret
 
 END (__mtag_tag_zero_region)
 #endif
diff --git a/contrib/arm-optimized-routines/string/aarch64/asmdefs.h b/contrib/arm-optimized-routines/string/aarch64/asmdefs.h
index 131b95e1fea9..90166676977a 100644
--- a/contrib/arm-optimized-routines/string/aarch64/asmdefs.h
+++ b/contrib/arm-optimized-routines/string/aarch64/asmdefs.h
@@ -1,106 +1,69 @@
 /*
  * Macros for asm code.  AArch64 version.
  *
  * Copyright (c) 2019-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef _ASMDEFS_H
 #define _ASMDEFS_H
 
 /* Branch Target Identitication support.  */
 #define BTI_C		hint	34
 #define BTI_J		hint	36
 /* Return address signing support (pac-ret).  */
 #define PACIASP		hint	25; .cfi_window_save
 #define AUTIASP		hint	29; .cfi_window_save
 
 /* GNU_PROPERTY_AARCH64_* macros from elf.h.  */
 #define FEATURE_1_AND 0xc0000000
 #define FEATURE_1_BTI 1
 #define FEATURE_1_PAC 2
 
 /* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
-#ifdef __ILP32__
-#define GNU_PROPERTY(type, value)	\
-  .section .note.gnu.property, "a";	\
-  .p2align 2;				\
-  .word 4;				\
-  .word 12;				\
-  .word 5;				\
-  .asciz "GNU";				\
-  .word type;				\
-  .word 4;				\
-  .word value;				\
-  .text
-#else
 #define GNU_PROPERTY(type, value)	\
   .section .note.gnu.property, "a";	\
   .p2align 3;				\
   .word 4;				\
   .word 16;				\
   .word 5;				\
   .asciz "GNU";				\
   .word type;				\
   .word 4;				\
   .word value;				\
   .word 0;				\
   .text
-#endif
 
 /* If set then the GNU Property Note section will be added to
    mark objects to support BTI and PAC-RET.  */
 #ifndef WANT_GNU_PROPERTY
 #define WANT_GNU_PROPERTY 1
 #endif
 
 #if WANT_GNU_PROPERTY
 /* Add property note with supported features to all asm files.  */
 GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC)
 #endif
 
 #define ENTRY_ALIGN(name, alignment)	\
   .global name;		\
   .type name,%function;	\
   .align alignment;		\
   name:			\
   .cfi_startproc;	\
   BTI_C;
 
 #define ENTRY(name)	ENTRY_ALIGN(name, 6)
 
 #define ENTRY_ALIAS(name)	\
   .global name;		\
   .type name,%function;	\
   name:
 
 #define END(name)	\
   .cfi_endproc;		\
   .size name, .-name;
 
 #define L(l) .L ## l
 
-#ifdef __ILP32__
-  /* Sanitize padding bits of pointer arguments as per aapcs64 */
-#define PTR_ARG(n)  mov w##n, w##n
-#else
-#define PTR_ARG(n)
-#endif
-
-#ifdef __ILP32__
-  /* Sanitize padding bits of size arguments as per aapcs64 */
-#define SIZE_ARG(n)  mov w##n, w##n
-#else
-#define SIZE_ARG(n)
-#endif
-
-/* Compiler supports SVE instructions  */
-#ifndef HAVE_SVE
-# if __aarch64__ && (__GNUC__ >= 8 || __clang_major__ >= 5)
-#   define HAVE_SVE 1
-# else
-#   define HAVE_SVE 0
-# endif
-#endif
-
 #endif
diff --git a/contrib/arm-optimized-routines/string/aarch64/memchr-sve.S b/contrib/arm-optimized-routines/string/aarch64/experimental/memchr-sve.S
similarity index 96%
rename from contrib/arm-optimized-routines/string/aarch64/memchr-sve.S
rename to contrib/arm-optimized-routines/string/aarch64/experimental/memchr-sve.S
index b851cf31f238..b314551f3e0f 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memchr-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/experimental/memchr-sve.S
@@ -1,64 +1,60 @@
 /*
  * memchr - find a character in a memory zone
  *
  * Copyright (c) 2018-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * SVE Available.
  */
 
 ENTRY (__memchr_aarch64_sve)
-	PTR_ARG (0)
-	SIZE_ARG (2)
 	dup	z1.b, w1			/* duplicate c to a vector */
 	setffr					/* initialize FFR */
 	mov	x3, 0				/* initialize off */
 
 	.p2align 4
 0:	whilelo	p1.b, x3, x2			/* make sure off < max */
 	b.none	9f
 
 	/* Read a vector's worth of bytes, bounded by max,
 	   stopping on first fault.  */
 	ldff1b	z0.b, p1/z, [x0, x3]
 	rdffrs	p0.b, p1/z
 	b.nlast	2f
 
 	/* First fault did not fail: the vector bounded by max is valid.
 	   Avoid depending on the contents of FFR beyond the branch.  */
 	incb	x3				/* speculate increment */
 	cmpeq	p2.b, p1/z, z0.b, z1.b		/* search for c */
 	b.none	0b
 	decb	x3				/* undo speculate */
 
 	/* Found C.  */
 1:	brkb	p2.b, p1/z, p2.b	/* find the first c */
 	add	x0, x0, x3		/* form partial pointer */
 	incp	x0, p2.b		/* form final pointer to c */
 	ret
 
 	/* First fault failed: only some of the vector is valid.
 	   Perform the comparision only on the valid bytes.  */
 2:	cmpeq	p2.b, p0/z, z0.b, z1.b
 	b.any	1b
 
 	/* No C found.  Re-init FFR, increment, and loop.  */
 	setffr
 	incp	x3, p0.b
 	b	0b
 
 	/* Found end of count.  */
 9:	mov	x0, 0			/* return null */
 	ret
 
 END (__memchr_aarch64_sve)
-
-#endif
-
diff --git a/contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S b/contrib/arm-optimized-routines/string/aarch64/experimental/memcmp-sve.S
similarity index 93%
rename from contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S
rename to contrib/arm-optimized-routines/string/aarch64/experimental/memcmp-sve.S
index d52ce4555344..ad3534836d04 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/experimental/memcmp-sve.S
@@ -1,51 +1,46 @@
 /*
  * memcmp - compare memory
  *
  * Copyright (c) 2018-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * SVE Available.
  */
 
 ENTRY (__memcmp_aarch64_sve)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
 	mov	x3, 0			/* initialize off */
 
 0:	whilelo	p0.b, x3, x2		/* while off < max */
 	b.none	9f
 
 	ld1b	z0.b, p0/z, [x0, x3]	/* read vectors bounded by max.  */
 	ld1b	z1.b, p0/z, [x1, x3]
 
 	/* Increment for a whole vector, even if we've only read a partial.
 	   This is significantly cheaper than INCP, and since OFF is not
 	   used after the loop it is ok to increment OFF past MAX.  */
 	incb	x3
 
 	cmpne	p1.b, p0/z, z0.b, z1.b	/* while no inequalities */
 	b.none	0b
 
 	/* Found inequality.  */
 1:	brkb	p1.b, p0/z, p1.b	/* find first such */
 	lasta	w0, p1, z0.b		/* extract each byte */
 	lasta	w1, p1, z1.b
 	sub	x0, x0, x1		/* return comparison */
 	ret
 
 	/* Found end-of-count.  */
 9:	mov	x0, 0			/* return equality */
 	ret
 
 END (__memcmp_aarch64_sve)
-
-#endif
-
diff --git a/contrib/arm-optimized-routines/string/aarch64/stpcpy-sve.S b/contrib/arm-optimized-routines/string/aarch64/experimental/stpcpy-sve.S
similarity index 100%
rename from contrib/arm-optimized-routines/string/aarch64/stpcpy-sve.S
rename to contrib/arm-optimized-routines/string/aarch64/experimental/stpcpy-sve.S
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchr-sve.S b/contrib/arm-optimized-routines/string/aarch64/experimental/strchr-sve.S
similarity index 97%
rename from contrib/arm-optimized-routines/string/aarch64/strchr-sve.S
rename to contrib/arm-optimized-routines/string/aarch64/experimental/strchr-sve.S
index ff075167bfef..7d74ae9ff232 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strchr-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/experimental/strchr-sve.S
@@ -1,70 +1,67 @@
 /*
  * strchr/strchrnul - find a character in a string
  *
  * Copyright (c) 2018-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * SVE Available.
  */
 
 /* To build as strchrnul, define BUILD_STRCHRNUL before compiling this file.  */
 #ifdef BUILD_STRCHRNUL
 #define FUNC  __strchrnul_aarch64_sve
 #else
 #define FUNC  __strchr_aarch64_sve
 #endif
 
 ENTRY (FUNC)
-	PTR_ARG (0)
 	dup	z1.b, w1		/* replicate byte across vector */
 	setffr				/* initialize FFR */
 	ptrue	p1.b			/* all ones; loop invariant */
 
 	.p2align 4
 	/* Read a vector's worth of bytes, stopping on first fault.  */
 0:	ldff1b	z0.b, p1/z, [x0, xzr]
 	rdffrs	p0.b, p1/z
 	b.nlast	2f
 
 	/* First fault did not fail: the whole vector is valid.
 	   Avoid depending on the contents of FFR beyond the branch.  */
 	incb	x0				/* speculate increment */
 	cmpeq	p2.b, p1/z, z0.b, z1.b		/* search for c */
 	cmpeq	p3.b, p1/z, z0.b, 0		/* search for 0 */
 	orrs	p4.b, p1/z, p2.b, p3.b		/* c | 0 */
 	b.none	0b
 	decb	x0				/* undo speculate */
 
 	/* Found C or 0.  */
 1:	brka	p4.b, p1/z, p4.b	/* find first such */
 	sub	x0, x0, 1		/* adjust pointer for that byte */
 	incp	x0, p4.b
 #ifndef BUILD_STRCHRNUL
 	ptest	p4, p2.b		/* was first in c? */
 	csel	x0, xzr, x0, none	/* if there was no c, return null */
 #endif
 	ret
 
 	/* First fault failed: only some of the vector is valid.
 	   Perform the comparision only on the valid bytes.  */
 2:	cmpeq	p2.b, p0/z, z0.b, z1.b		/* search for c */
 	cmpeq	p3.b, p0/z, z0.b, 0		/* search for 0 */
 	orrs	p4.b, p0/z, p2.b, p3.b		/* c | 0 */
 	b.any	1b
 
 	/* No C or 0 found.  Re-init FFR, increment, and loop.  */
 	setffr
 	incp	x0, p0.b
 	b	0b
 
 END (FUNC)
-
-#endif
-
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchrnul-sve.S b/contrib/arm-optimized-routines/string/aarch64/experimental/strchrnul-sve.S
similarity index 100%
rename from contrib/arm-optimized-routines/string/aarch64/strchrnul-sve.S
rename to contrib/arm-optimized-routines/string/aarch64/experimental/strchrnul-sve.S
diff --git a/contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S b/contrib/arm-optimized-routines/string/aarch64/experimental/strcmp-sve.S
similarity index 96%
rename from contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S
rename to contrib/arm-optimized-routines/string/aarch64/experimental/strcmp-sve.S
index eaf909a378f1..b6c249588534 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/experimental/strcmp-sve.S
@@ -1,59 +1,55 @@
 /*
  * __strcmp_aarch64_sve - compare two strings
  *
  * Copyright (c) 2018-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * SVE Available.
  */
 
 ENTRY (__strcmp_aarch64_sve)
-	PTR_ARG (0)
-	PTR_ARG (1)
 	setffr				/* initialize FFR */
 	ptrue	p1.b, all		/* all ones; loop invariant */
 	mov	x2, 0			/* initialize offset */
 
 	/* Read a vector's worth of bytes, stopping on first fault.  */
 	.p2align 4
 0:	ldff1b	z0.b, p1/z, [x0, x2]
 	ldff1b	z1.b, p1/z, [x1, x2]
 	rdffrs	p0.b, p1/z
 	b.nlast	2f
 
 	/* First fault did not fail: the whole vector is valid.
 	   Avoid depending on the contents of FFR beyond the branch.  */
 	incb	x2, all			/* skip bytes for next round */
 	cmpeq	p2.b, p1/z, z0.b, z1.b	/* compare strings */
 	cmpne	p3.b, p1/z, z0.b, 0	/* search for ~zero */
 	nands	p2.b, p1/z, p2.b, p3.b	/* ~(eq & ~zero) -> ne | zero */
 	b.none	0b
 
 	/* Found end-of-string or inequality.  */
 1:	brkb	p2.b, p1/z, p2.b	/* find first such */
 	lasta	w0, p2, z0.b		/* extract each char */
 	lasta	w1, p2, z1.b
 	sub	x0, x0, x1		/* return comparison */
 	ret
 
 	/* First fault failed: only some of the vector is valid.
 	   Perform the comparison only on the valid bytes.  */
 2:	incp	x2, p0.b		/* skip bytes for next round */
 	setffr				/* re-init FFR for next round */
 	cmpeq	p2.b, p0/z, z0.b, z1.b	/* compare strings, as above */
 	cmpne	p3.b, p0/z, z0.b, 0
 	nands	p2.b, p0/z, p2.b, p3.b
 	b.none	0b
 	b	1b
 
 END (__strcmp_aarch64_sve)
-
-#endif
-
diff --git a/contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S b/contrib/arm-optimized-routines/string/aarch64/experimental/strcpy-sve.S
similarity index 96%
rename from contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S
rename to contrib/arm-optimized-routines/string/aarch64/experimental/strcpy-sve.S
index 00e72dce4451..57b77c8a00e7 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/experimental/strcpy-sve.S
@@ -1,71 +1,67 @@
 /*
  * strcpy/stpcpy - copy a string returning pointer to start/end.
  *
  * Copyright (c) 2018-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * SVE Available.
  */
 
 /* To build as stpcpy, define BUILD_STPCPY before compiling this file.  */
 #ifdef BUILD_STPCPY
 #define FUNC  __stpcpy_aarch64_sve
 #else
 #define FUNC  __strcpy_aarch64_sve
 #endif
 
 ENTRY (FUNC)
-	PTR_ARG (0)
-	PTR_ARG (1)
 	setffr				/* initialize FFR */
 	ptrue	p2.b, all		/* all ones; loop invariant */
 	mov	x2, 0			/* initialize offset */
 
 	.p2align 4
 	/* Read a vector's worth of bytes, stopping on first fault.  */
 0:	ldff1b	z0.b, p2/z, [x1, x2]
 	rdffrs	p0.b, p2/z
 	b.nlast	1f
 
 	/* First fault did not fail: the whole vector is valid.
 	   Avoid depending on the contexts of FFR beyond the branch.  */
 	cmpeq	p1.b, p2/z, z0.b, 0	/* search for zeros */
 	b.any	2f
 
 	/* No zero found.  Store the whole vector and loop.  */
 	st1b	z0.b, p2, [x0, x2]
 	incb	x2, all
 	b	0b
 
 	/* First fault failed: only some of the vector is valid.
 	   Perform the comparison only on the valid bytes.  */
 1:	cmpeq	p1.b, p0/z, z0.b, 0	/* search for zeros */
 	b.any	2f
 
 	/* No zero found.  Store the valid portion of the vector and loop.  */
 	setffr				/* re-init FFR */
 	st1b	z0.b, p0, [x0, x2]
 	incp	x2, p0.b
 	b	0b
 
 	/* Zero found.  Crop the vector to the found zero and finish.  */
 2:	brka	p0.b, p2/z, p1.b
 	st1b	z0.b, p0, [x0, x2]
 #ifdef BUILD_STPCPY
 	add	x0, x0, x2
 	sub	x0, x0, 1
 	incp	x0, p0.b
 #endif
 	ret
 
 END (FUNC)
-
-#endif
-
diff --git a/contrib/arm-optimized-routines/string/aarch64/strlen-sve.S b/contrib/arm-optimized-routines/string/aarch64/experimental/strlen-sve.S
similarity index 96%
rename from contrib/arm-optimized-routines/string/aarch64/strlen-sve.S
rename to contrib/arm-optimized-routines/string/aarch64/experimental/strlen-sve.S
index 12ebbdba5c93..c83155052c07 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strlen-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/experimental/strlen-sve.S
@@ -1,55 +1,52 @@
 /*
  * __strlen_aarch64_sve - compute the length of a string
  *
  * Copyright (c) 2018-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * SVE Available.
  */
 
 ENTRY (__strlen_aarch64_sve)
-	PTR_ARG (0)
 	setffr			/* initialize FFR */
 	ptrue	p2.b		/* all ones; loop invariant */
 	mov	x1, 0		/* initialize length */
 
 	/* Read a vector's worth of bytes, stopping on first fault.  */
 	.p2align 4
 0:	ldff1b	z0.b, p2/z, [x0, x1]
 	rdffrs	p0.b, p2/z
 	b.nlast	2f
 
 	/* First fault did not fail: the whole vector is valid.
 	   Avoid depending on the contents of FFR beyond the branch.  */
 	incb	x1, all			/* speculate increment */
 	cmpeq	p1.b, p2/z, z0.b, 0	/* loop if no zeros */
 	b.none	0b
 	decb	x1, all			/* undo speculate */
 
 	/* Zero found.  Select the bytes before the first and count them.  */
 1:	brkb	p0.b, p2/z, p1.b
 	incp	x1, p0.b
 	mov	x0, x1
 	ret
 
 	/* First fault failed: only some of the vector is valid.
 	   Perform the comparison only on the valid bytes.  */
 2:	cmpeq	p1.b, p0/z, z0.b, 0
 	b.any	1b
 
 	/* No zero found.  Re-init FFR, increment, and loop.  */
 	setffr
 	incp	x1, p0.b
 	b	0b
 
 END (__strlen_aarch64_sve)
-
-#endif
-
diff --git a/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S b/contrib/arm-optimized-routines/string/aarch64/experimental/strncmp-sve.S
similarity index 95%
rename from contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S
rename to contrib/arm-optimized-routines/string/aarch64/experimental/strncmp-sve.S
index 6a9e9f7b6437..a281e642d8aa 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/experimental/strncmp-sve.S
@@ -1,69 +1,64 @@
 /*
  * strncmp - compare two strings with limit
  *
  * Copyright (c) 2018-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * SVE Available.
  */
 
 ENTRY (__strncmp_aarch64_sve)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
 	setffr				/* initialize FFR */
 	mov	x3, 0			/* initialize off */
 
 0:	whilelo	p0.b, x3, x2		/* while off < max */
 	b.none	9f
 
 	ldff1b	z0.b, p0/z, [x0, x3]
 	ldff1b	z1.b, p0/z, [x1, x3]
 	rdffrs	p1.b, p0/z
 	b.nlast	2f
 
 	/* First fault did not fail: the vector up to max is valid.
 	   Avoid depending on the contents of FFR beyond the branch.
 	   Increment for a whole vector, even if we've only read a partial.
 	   This is significantly cheaper than INCP, and since OFF is not
 	   used after the loop it is ok to increment OFF past MAX.  */
 	incb	x3
 	cmpeq	p1.b, p0/z, z0.b, z1.b	/* compare strings */
 	cmpne	p2.b, p0/z, z0.b, 0	/* search for ~zero */
 	nands	p2.b, p0/z, p1.b, p2.b	/* ~(eq & ~zero) -> ne | zero */
 	b.none	0b
 
 	/* Found end-of-string or inequality.  */
 1:	brkb	p2.b, p0/z, p2.b	/* find first such */
 	lasta	w0, p2, z0.b		/* extract each char */
 	lasta	w1, p2, z1.b
 	sub	x0, x0, x1		/* return comparison */
 	ret
 
 	/* First fault failed: only some of the vector is valid.
 	   Perform the comparison only on the valid bytes.  */
 2:	cmpeq	p2.b, p1/z, z0.b, z1.b	/* compare strings, as above */
 	cmpne	p3.b, p1/z, z0.b, 0
 	nands	p2.b, p1/z, p2.b, p3.b
 	b.any	1b
 
 	/* No inequality or zero found.  Re-init FFR, incr and loop.  */
 	setffr
 	incp	x3, p1.b
 	b	0b
 
 	/* Found end-of-count.  */
 9:	mov	x0, 0			/* return equal */
 	ret
 
 END (__strncmp_aarch64_sve)
-
-#endif
-
diff --git a/contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S b/contrib/arm-optimized-routines/string/aarch64/experimental/strnlen-sve.S
similarity index 96%
rename from contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S
rename to contrib/arm-optimized-routines/string/aarch64/experimental/strnlen-sve.S
index 6c43dc427da7..11d835a1b13c 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/experimental/strnlen-sve.S
@@ -1,74 +1,70 @@
 /*
  * strnlen - calculate the length of a string with limit.
  *
  * Copyright (c) 2019-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * SVE Available.
  */
 
 ENTRY (__strnlen_aarch64_sve)
-	PTR_ARG (0)
-	SIZE_ARG (1)
 	setffr				/* initialize FFR */
 	mov	x2, 0			/* initialize len */
 	b	1f
 
 	.p2align 4
 	/* We have off + vl <= max, and so may read the whole vector.  */
 0:	ldff1b	z0.b, p0/z, [x0, x2]
 	rdffrs	p1.b, p0/z
 	b.nlast	2f
 
 	/* First fault did not fail: the whole vector is valid.
 	   Avoid depending on the contents of FFR beyond the branch.  */
 	cmpeq	p2.b, p0/z, z0.b, 0
 	b.any	8f
 	incb	x2
 
 1:	whilelo	p0.b, x2, x1
 	b.last	0b
 
 	/* We have off + vl < max.  Test for off == max before proceeding.  */
 	b.none	9f
 
 	ldff1b	z0.b, p0/z, [x0, x2]
 	rdffrs	p1.b, p0/z
 	b.nlast	2f
 
 	/* First fault did not fail: the vector up to max is valid.
 	   Avoid depending on the contents of FFR beyond the branch.
 	   Compare for end-of-string, but there are no more bytes.  */
 	cmpeq	p2.b, p0/z, z0.b, 0
 
 	/* Found end-of-string or zero.  */
 8:	brkb	p2.b, p0/z, p2.b
 	mov	x0, x2
 	incp	x0, p2.b
 	ret
 
 	/* First fault failed: only some of the vector is valid.
 	   Perform the comparison only on the valid bytes.  */
 2:	cmpeq	p2.b, p1/z, z0.b, 0
 	b.any	8b
 
 	/* No inequality or zero found.  Re-init FFR, incr and loop.  */
 	setffr
 	incp	x2, p1.b
 	b	1b
 
 	/* End of count.  Return max.  */
 9:	mov	x0, x1
 	ret
 
 END (__strnlen_aarch64_sve)
-
-#endif
-
diff --git a/contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S b/contrib/arm-optimized-routines/string/aarch64/experimental/strrchr-sve.S
similarity index 98%
rename from contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S
rename to contrib/arm-optimized-routines/string/aarch64/experimental/strrchr-sve.S
index 825a7384cfc1..731edaddf156 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/experimental/strrchr-sve.S
@@ -1,84 +1,81 @@
 /*
  * strrchr - find the last of a character in a string
  *
  * Copyright (c) 2019-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
-#if __ARM_FEATURE_SVE
+.arch armv8-a+sve
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * SVE Available.
  */
 
 ENTRY (__strrchr_aarch64_sve)
-	PTR_ARG (0)
 	dup	z1.b, w1		/* replicate byte across vector */
 	setffr				/* initialize FFR */
 	ptrue	p1.b			/* all ones; loop invariant */
 	mov	x2, 0			/* no match found so far */
 	pfalse	p2.b
 
 	.p2align 4
 	/* Read a vector's worth of bytes, stopping on first fault.  */
 0:	ldff1b	z0.b, p1/z, [x0, xzr]
 	rdffrs	p0.b, p1/z
 	b.nlast	1f
 
 	/* First fault did not fail: the whole vector is valid.
 	   Avoid depending on the contents of FFR beyond the branch.  */
 	incb	x0, all			/* skip bytes this round */
 	cmpeq	p3.b, p1/z, z0.b, 0	/* search for 0 */
 	b.any	3f
 
 	cmpeq	p3.b, p1/z, z0.b, z1.b	/* search for c; no eos */
 	b.none	0b
 
 	mov	x2, x0			/* save advanced base */
 	mov	p2.b, p3.b		/* save current search */
 	b	0b
 
 	/* First fault failed: only some of the vector is valid.
 	   Perform the comparisions only on the valid bytes.  */
 1:	cmpeq	p3.b, p0/z, z0.b, 0	/* search for 0 */
 	b.any	2f
 
 	cmpeq	p3.b, p0/z, z0.b, z1.b	/* search for c; no eos */
 	mov	x3, x0
 	incp	x0, p0.b		/* skip bytes this round */
 	setffr				/* re-init FFR */
 	b.none	0b
 
 	addvl	x2, x3, 1		/* save advanced base */
 	mov	p2.b, p3.b		/* save current search */
 	b	0b
 
 	/* Found end-of-string.  */
 2:	incb	x0, all			/* advance base */
 3:	brka	p3.b, p1/z, p3.b	/* mask after first 0 */
 	cmpeq	p3.b, p3/z, z0.b, z1.b	/* search for c not after eos */
 	b.any	4f
 
 	/* No C within last vector.  Did we have one before?  */
 	cbz	x2, 5f
 	mov	x0, x2			/* restore advanced base */
 	mov	p3.b, p2.b		/* restore saved search */
 
 	/* Find the *last* match in the predicate.  This is slightly
 	   more complicated than finding the first match.  */
 4:	rev	p3.b, p3.b		/* reverse the bits */
 	brka	p3.b, p1/z, p3.b	/* find position of last match */
 	decp	x0, p3.b		/* retard pointer to last match */
 	ret
 
 	/* No C whatsoever.  Return NULL.  */
 5:	mov	x0, 0
 	ret
 
 END (__strrchr_aarch64_sve)
-
-#endif
-
diff --git a/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S b/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S
index 948c3cbc7dd4..68bd0af9a8c5 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S
@@ -1,110 +1,108 @@
 /*
  * memchr - find a character in a memory zone
  *
  * Copyright (c) 2020-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
  * MTE compatible.
  */
 
 #include "asmdefs.h"
 
 #define srcin		x0
 #define chrin		w1
 #define cntin		x2
 #define result		x0
 
 #define src		x3
 #define cntrem		x4
 #define synd		x5
 #define shift		x6
 #define	tmp		x7
 
 #define vrepchr		v0
 #define qdata		q1
 #define vdata		v1
 #define vhas_chr	v2
 #define vend		v3
 #define dend		d3
 
 /*
    Core algorithm:
    For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
    per byte. We take 4 bits of every comparison byte with shift right and narrow
    by 4 instruction. Since the bits in the nibble mask reflect the order in
    which things occur in the original string, counting leading zeros identifies
    exactly which byte matched.  */
 
 ENTRY (__memchr_aarch64_mte)
-	PTR_ARG (0)
-	SIZE_ARG (2)
 	bic	src, srcin, 15
 	cbz	cntin, L(nomatch)
 	ld1	{vdata.16b}, [src]
 	dup	vrepchr.16b, chrin
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	lsl	shift, srcin, 2
 	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
 	fmov	synd, dend
 	lsr	synd, synd, shift
 	cbz	synd, L(start_loop)
 
 	rbit	synd, synd
 	clz	synd, synd
 	cmp	cntin, synd, lsr 2
 	add	result, srcin, synd, lsr 2
 	csel	result, result, xzr, hi
 	ret
 
 	.p2align 3
 L(start_loop):
 	sub	tmp, src, srcin
 	add	tmp, tmp, 17
 	subs	cntrem, cntin, tmp
 	b.lo	L(nomatch)
 
 	/* Make sure that it won't overread by a 16-byte chunk */
 	tbz	cntrem, 4, L(loop32_2)
 	sub	src, src, 16
 	.p2align 4
 L(loop32):
 	ldr	qdata, [src, 32]!
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
 	fmov	synd, dend
 	cbnz	synd, L(end)
 
 L(loop32_2):
 	ldr	qdata, [src, 16]
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	subs	cntrem, cntrem, 32
 	b.lo	L(end_2)
 	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
 	fmov	synd, dend
 	cbz	synd, L(loop32)
 L(end_2):
 	add	src, src, 16
 L(end):
 	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
 	sub	cntrem, src, srcin
 	fmov	synd, dend
 	sub	cntrem, cntin, cntrem
 #ifndef __AARCH64EB__
 	rbit	synd, synd
 #endif
 	clz	synd, synd
 	cmp	cntrem, synd, lsr 2
 	add	result, src, synd, lsr 2
 	csel	result, result, xzr, hi
 	ret
 
 L(nomatch):
 	mov	result, 0
 	ret
 
 END (__memchr_aarch64_mte)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/memchr.S b/contrib/arm-optimized-routines/string/aarch64/memchr.S
index fe6cfe2bc0e2..d12a38abbc30 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memchr.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memchr.S
@@ -1,146 +1,144 @@
 /*
  * memchr - find a character in a memory zone
  *
  * Copyright (c) 2014-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * Neon Available.
  */
 
 #include "asmdefs.h"
 
 /* Arguments and results.  */
 #define srcin		x0
 #define chrin		w1
 #define cntin		x2
 
 #define result		x0
 
 #define src		x3
 #define	tmp		x4
 #define wtmp2		w5
 #define synd		x6
 #define soff		x9
 #define cntrem		x10
 
 #define vrepchr		v0
 #define vdata1		v1
 #define vdata2		v2
 #define vhas_chr1	v3
 #define vhas_chr2	v4
 #define vrepmask	v5
 #define vend		v6
 
 /*
  * Core algorithm:
  *
  * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
  * per byte. For each tuple, bit 0 is set if the relevant byte matched the
  * requested character and bit 1 is not used (faster than using a 32bit
  * syndrome). Since the bits in the syndrome reflect exactly the order in which
  * things occur in the original string, counting trailing zeros allows to
  * identify exactly which byte has matched.
  */
 
 ENTRY (__memchr_aarch64)
-	PTR_ARG (0)
-	SIZE_ARG (2)
 	/* Do not dereference srcin if no bytes to compare.  */
 	cbz	cntin, L(zero_length)
 	/*
 	 * Magic constant 0x40100401 allows us to identify which lane matches
 	 * the requested byte.
 	 */
 	mov	wtmp2, #0x0401
 	movk	wtmp2, #0x4010, lsl #16
 	dup	vrepchr.16b, chrin
 	/* Work with aligned 32-byte chunks */
 	bic	src, srcin, #31
 	dup	vrepmask.4s, wtmp2
 	ands	soff, srcin, #31
 	and	cntrem, cntin, #31
 	b.eq	L(loop)
 
 	/*
 	 * Input string is not 32-byte aligned. We calculate the syndrome
 	 * value for the aligned 32 bytes block containing the first bytes
 	 * and mask the irrelevant part.
 	 */
 
 	ld1	{vdata1.16b, vdata2.16b}, [src], #32
 	sub	tmp, soff, #32
 	adds	cntin, cntin, tmp
 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
 	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
 	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
 	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
 	mov	synd, vend.d[0]
 	/* Clear the soff*2 lower bits */
 	lsl	tmp, soff, #1
 	lsr	synd, synd, tmp
 	lsl	synd, synd, tmp
 	/* The first block can also be the last */
 	b.ls	L(masklast)
 	/* Have we found something already? */
 	cbnz	synd, L(tail)
 
 L(loop):
 	ld1	{vdata1.16b, vdata2.16b}, [src], #32
 	subs	cntin, cntin, #32
 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
 	/* If we're out of data we finish regardless of the result */
 	b.ls	L(end)
 	/* Use a fast check for the termination condition */
 	orr	vend.16b, vhas_chr1.16b, vhas_chr2.16b
 	addp	vend.2d, vend.2d, vend.2d
 	mov	synd, vend.d[0]
 	/* We're not out of data, loop if we haven't found the character */
 	cbz	synd, L(loop)
 
 L(end):
 	/* Termination condition found, let's calculate the syndrome value */
 	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
 	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
 	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
 	mov	synd, vend.d[0]
 	/* Only do the clear for the last possible block */
 	b.hs	L(tail)
 
 L(masklast):
 	/* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
 	add	tmp, cntrem, soff
 	and	tmp, tmp, #31
 	sub	tmp, tmp, #32
 	neg	tmp, tmp, lsl #1
 	lsl	synd, synd, tmp
 	lsr	synd, synd, tmp
 
 L(tail):
 	/* Count the trailing zeros using bit reversing */
 	rbit	synd, synd
 	/* Compensate the last post-increment */
 	sub	src, src, #32
 	/* Check that we have found a character */
 	cmp	synd, #0
 	/* And count the leading zeros */
 	clz	synd, synd
 	/* Compute the potential result */
 	add	result, src, synd, lsr #1
 	/* Select result or NULL */
 	csel	result, xzr, result, eq
 	ret
 
 L(zero_length):
 	mov	result, #0
 	ret
 
 END (__memchr_aarch64)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/memcmp.S b/contrib/arm-optimized-routines/string/aarch64/memcmp.S
index 35135e72cc8e..43439de4db69 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memcmp.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memcmp.S
@@ -1,190 +1,186 @@
 /* memcmp - compare memory
  *
  * Copyright (c) 2013-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
  */
 
 #include "asmdefs.h"
 
 #define src1	x0
 #define src2	x1
 #define limit	x2
 #define result	w0
 
 #define data1	x3
 #define data1w	w3
 #define data2	x4
 #define data2w	w4
 #define data3	x5
 #define data3w	w5
 #define data4	x6
 #define data4w	w6
 #define tmp	x6
 #define src1end	x7
 #define src2end	x8
 
 
 ENTRY (__memcmp_aarch64)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
-
 	cmp	limit, 16
 	b.lo	L(less16)
 	ldp	data1, data3, [src1]
 	ldp	data2, data4, [src2]
 	ccmp	data1, data2, 0, ne
 	ccmp	data3, data4, 0, eq
 	b.ne	L(return2)
 
 	add	src1end, src1, limit
 	add	src2end, src2, limit
 	cmp	limit, 32
 	b.ls	L(last_bytes)
 	cmp	limit, 160
 	b.hs	L(loop_align)
 	sub	limit, limit, 32
 
 	.p2align 4
 L(loop32):
 	ldp	data1, data3, [src1, 16]
 	ldp	data2, data4, [src2, 16]
 	cmp	data1, data2
 	ccmp	data3, data4, 0, eq
 	b.ne	L(return2)
 	cmp	limit, 16
 	b.ls	L(last_bytes)
 
 	ldp	data1, data3, [src1, 32]
 	ldp	data2, data4, [src2, 32]
 	cmp	data1, data2
 	ccmp	data3, data4, 0, eq
 	b.ne	L(return2)
 	add	src1, src1, 32
 	add	src2, src2, 32
 L(last64):
 	subs	limit, limit, 32
 	b.hi	L(loop32)
 
 	/* Compare last 1-16 bytes using unaligned access.  */
 L(last_bytes):
 	ldp	data1, data3, [src1end, -16]
 	ldp	data2, data4, [src2end, -16]
 L(return2):
 	cmp	data1, data2
 	csel	data1, data1, data3, ne
 	csel	data2, data2, data4, ne
 
 	/* Compare data bytes and set return value to 0, -1 or 1.  */
 L(return):
 #ifndef __AARCH64EB__
 	rev	data1, data1
 	rev	data2, data2
 #endif
 	cmp	data1, data2
 	cset	result, ne
 	cneg	result, result, lo
 	ret
 
 	.p2align 4
 L(less16):
 	add	src1end, src1, limit
 	add	src2end, src2, limit
 	tbz	limit, 3, L(less8)
 	ldr	data1, [src1]
 	ldr	data2, [src2]
 	ldr	data3, [src1end, -8]
 	ldr	data4, [src2end, -8]
 	b	L(return2)
 
 	.p2align 4
 L(less8):
 	tbz	limit, 2, L(less4)
 	ldr	data1w, [src1]
 	ldr	data2w, [src2]
 	ldr	data3w, [src1end, -4]
 	ldr	data4w, [src2end, -4]
 	b	L(return2)
 
 L(less4):
 	tbz	limit, 1, L(less2)
 	ldrh	data1w, [src1]
 	ldrh	data2w, [src2]
 	cmp	data1w, data2w
 	b.ne	L(return)
 L(less2):
 	mov	result, 0
 	tbz	limit, 0, L(return_zero)
 	ldrb	data1w, [src1end, -1]
 	ldrb	data2w, [src2end, -1]
 	sub	result, data1w, data2w
 L(return_zero):
 	ret
 
 L(loop_align):
 	ldp	data1, data3, [src1, 16]
 	ldp	data2, data4, [src2, 16]
 	cmp	data1, data2
 	ccmp	data3, data4, 0, eq
 	b.ne	L(return2)
 
 	/* Align src2 and adjust src1, src2 and limit.  */
 	and	tmp, src2, 15
 	sub	tmp, tmp, 16
 	sub	src2, src2, tmp
 	add	limit, limit, tmp
 	sub	src1, src1, tmp
 	sub	limit, limit, 64 + 16
 
 	.p2align 4
 L(loop64):
 	ldr	q0, [src1, 16]
 	ldr	q1, [src2, 16]
 	subs	limit, limit, 64
 	ldr	q2, [src1, 32]
 	ldr	q3, [src2, 32]
 	eor	v0.16b, v0.16b, v1.16b
 	eor	v1.16b, v2.16b, v3.16b
 	ldr	q2, [src1, 48]
 	ldr	q3, [src2, 48]
 	umaxp	v0.16b, v0.16b, v1.16b
 	ldr	q4, [src1, 64]!
 	ldr	q5, [src2, 64]!
 	eor	v1.16b, v2.16b, v3.16b
 	eor	v2.16b, v4.16b, v5.16b
 	umaxp	v1.16b, v1.16b, v2.16b
 	umaxp	v0.16b, v0.16b, v1.16b
 	umaxp	v0.16b, v0.16b, v0.16b
 	fmov	tmp, d0
 	ccmp	tmp, 0, 0, hi
 	b.eq	L(loop64)
 
 	/* If equal, process last 1-64 bytes using scalar loop.  */
 	add	limit, limit, 64 + 16
 	cbz	tmp, L(last64)
 
 	/* Determine the 8-byte aligned offset of the first difference.  */
 #ifdef __AARCH64EB__
 	rev16	tmp, tmp
 #endif
 	rev	tmp, tmp
 	clz	tmp, tmp
 	bic	tmp, tmp, 7
 	sub	tmp, tmp, 48
 	ldr	data1, [src1, tmp]
 	ldr	data2, [src2, tmp]
 #ifndef __AARCH64EB__
 	rev	data1, data1
 	rev	data2, data2
 #endif
 	mov	result, 1
 	cmp	data1, data2
 	cneg	result, result, lo
 	ret
 
 END (__memcmp_aarch64)
diff --git a/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S b/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S
index 9d3027d4d3cd..cbf4c581500e 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S
@@ -1,212 +1,209 @@
 /*
  * memcpy - copy memory area
  *
  * Copyright (c) 2019-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
  *
  */
 
 #include "asmdefs.h"
 
 #define dstin	x0
 #define src	x1
 #define count	x2
 #define dst	x3
 #define srcend	x4
 #define dstend	x5
 #define A_l	x6
 #define A_lw	w6
 #define A_h	x7
 #define B_l	x8
 #define B_lw	w8
 #define B_h	x9
 #define C_lw	w10
 #define tmp1	x14
 
 #define A_q	q0
 #define B_q	q1
 #define C_q	q2
 #define D_q	q3
 #define E_q	q4
 #define F_q	q5
 #define G_q	q6
 #define H_q	q7
 
 /* This implementation handles overlaps and supports both memcpy and memmove
    from a single entry point.  It uses unaligned accesses and branchless
    sequences to keep the code small, simple and improve performance.
 
    Copies are split into 3 main cases: small copies of up to 32 bytes, medium
    copies of up to 128 bytes, and large copies.  The overhead of the overlap
    check is negligible since it is only required for large copies.
 
    Large copies use a software pipelined loop processing 64 bytes per iteration.
    The source pointer is 16-byte aligned to minimize unaligned accesses.
    The loop tail is handled by always copying 64 bytes from the end.
 */
 
 ENTRY_ALIAS (__memmove_aarch64_simd)
 ENTRY (__memcpy_aarch64_simd)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
 	add	srcend, src, count
 	cmp	count, 128
 	b.hi	L(copy_long)
 	add	dstend, dstin, count
 	cmp	count, 32
 	b.hi	L(copy32_128)
 	nop
 
 	/* Small copies: 0..32 bytes.  */
 	cmp	count, 16
 	b.lo	L(copy16)
 	ldr	A_q, [src]
 	ldr	B_q, [srcend, -16]
 	str	A_q, [dstin]
 	str	B_q, [dstend, -16]
 	ret
 
 	.p2align 4
 	/* Medium copies: 33..128 bytes.  */
 L(copy32_128):
 	ldp	A_q, B_q, [src]
 	ldp	C_q, D_q, [srcend, -32]
 	cmp	count, 64
 	b.hi	L(copy128)
 	stp	A_q, B_q, [dstin]
 	stp	C_q, D_q, [dstend, -32]
 	ret
 
 	.p2align 4
 	/* Copy 8-15 bytes.  */
 L(copy16):
 	tbz	count, 3, L(copy8)
 	ldr	A_l, [src]
 	ldr	A_h, [srcend, -8]
 	str	A_l, [dstin]
 	str	A_h, [dstend, -8]
 	ret
 
 	/* Copy 4-7 bytes.  */
 L(copy8):
 	tbz	count, 2, L(copy4)
 	ldr	A_lw, [src]
 	ldr	B_lw, [srcend, -4]
 	str	A_lw, [dstin]
 	str	B_lw, [dstend, -4]
 	ret
 
 	/* Copy 65..128 bytes.  */
 L(copy128):
 	ldp	E_q, F_q, [src, 32]
 	cmp	count, 96
 	b.ls	L(copy96)
 	ldp	G_q, H_q, [srcend, -64]
 	stp	G_q, H_q, [dstend, -64]
 L(copy96):
 	stp	A_q, B_q, [dstin]
 	stp	E_q, F_q, [dstin, 32]
 	stp	C_q, D_q, [dstend, -32]
 	ret
 
 	/* Copy 0..3 bytes using a branchless sequence.  */
 L(copy4):
 	cbz	count, L(copy0)
 	lsr	tmp1, count, 1
 	ldrb	A_lw, [src]
 	ldrb	C_lw, [srcend, -1]
 	ldrb	B_lw, [src, tmp1]
 	strb	A_lw, [dstin]
 	strb	B_lw, [dstin, tmp1]
 	strb	C_lw, [dstend, -1]
 L(copy0):
 	ret
 
 	.p2align 3
 	/* Copy more than 128 bytes.  */
 L(copy_long):
 	add	dstend, dstin, count
 
 	/* Use backwards copy if there is an overlap.  */
 	sub	tmp1, dstin, src
 	cmp	tmp1, count
 	b.lo	L(copy_long_backwards)
 
 	/* Copy 16 bytes and then align src to 16-byte alignment.  */
 	ldr	D_q, [src]
 	and	tmp1, src, 15
 	bic	src, src, 15
 	sub	dst, dstin, tmp1
 	add	count, count, tmp1	/* Count is now 16 too large.  */
 	ldp	A_q, B_q, [src, 16]
 	str	D_q, [dstin]
 	ldp	C_q, D_q, [src, 48]
 	subs	count, count, 128 + 16	/* Test and readjust count.  */
 	b.ls	L(copy64_from_end)
 L(loop64):
 	stp	A_q, B_q, [dst, 16]
 	ldp	A_q, B_q, [src, 80]
 	stp	C_q, D_q, [dst, 48]
 	ldp	C_q, D_q, [src, 112]
 	add	src, src, 64
 	add	dst, dst, 64
 	subs	count, count, 64
 	b.hi	L(loop64)
 
 	/* Write the last iteration and copy 64 bytes from the end.  */
 L(copy64_from_end):
 	ldp	E_q, F_q, [srcend, -64]
 	stp	A_q, B_q, [dst, 16]
 	ldp	A_q, B_q, [srcend, -32]
 	stp	C_q, D_q, [dst, 48]
 	stp	E_q, F_q, [dstend, -64]
 	stp	A_q, B_q, [dstend, -32]
 	ret
 
 	.p2align 4
 	nop
 
 	/* Large backwards copy for overlapping copies.
 	   Copy 16 bytes and then align srcend to 16-byte alignment.  */
 L(copy_long_backwards):
 	cbz	tmp1, L(copy0)
 	ldr	D_q, [srcend, -16]
 	and	tmp1, srcend, 15
 	bic	srcend, srcend, 15
 	sub	count, count, tmp1
 	ldp	A_q, B_q, [srcend, -32]
 	str	D_q, [dstend, -16]
 	ldp	C_q, D_q, [srcend, -64]
 	sub	dstend, dstend, tmp1
 	subs	count, count, 128
 	b.ls	L(copy64_from_start)
 
 L(loop64_backwards):
 	str	B_q, [dstend, -16]
 	str	A_q, [dstend, -32]
 	ldp	A_q, B_q, [srcend, -96]
 	str	D_q, [dstend, -48]
 	str	C_q, [dstend, -64]!
 	ldp	C_q, D_q, [srcend, -128]
 	sub	srcend, srcend, 64
 	subs	count, count, 64
 	b.hi	L(loop64_backwards)
 
 	/* Write the last iteration and copy 64 bytes from the start.  */
 L(copy64_from_start):
 	ldp	E_q, F_q, [src, 32]
 	stp	A_q, B_q, [dstend, -32]
 	ldp	A_q, B_q, [src]
 	stp	C_q, D_q, [dstend, -64]
 	stp	E_q, F_q, [dstin, 32]
 	stp	A_q, B_q, [dstin]
 	ret
 
 END (__memcpy_aarch64_simd)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/memcpy-mops.S b/contrib/arm-optimized-routines/string/aarch64/memcpy-mops.S
index b45c31418717..03ae95570c04 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memcpy-mops.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memcpy-mops.S
@@ -1,21 +1,17 @@
 /*
  * memcpy using MOPS extension.
  *
  * Copyright (c) 2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
 ENTRY (__memcpy_aarch64_mops)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
-
 	mov	x3, x0
 	.inst	0x19010443	/* cpyfp   [x3]!, [x1]!, x2!  */
 	.inst	0x19410443	/* cpyfm   [x3]!, [x1]!, x2!  */
 	.inst	0x19810443	/* cpyfe   [x3]!, [x1]!, x2!  */
 	ret
 
 END (__memcpy_aarch64_mops)
diff --git a/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S b/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S
index e8a946d7db37..9b05cb2a58ee 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S
@@ -1,177 +1,169 @@
 /*
  * memcpy - copy memory area
  *
  * Copyright (c) 2019-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD, SVE, unaligned accesses.
  *
  */
 
 #include "asmdefs.h"
 
-#ifdef HAVE_SVE
-
 .arch armv8-a+sve
 
 #define dstin	x0
 #define src	x1
 #define count	x2
 #define dst	x3
 #define srcend	x4
 #define dstend	x5
 #define tmp1	x6
 #define vlen	x6
 
 #define A_q	q0
 #define B_q	q1
 #define C_q	q2
 #define D_q	q3
 #define E_q	q4
 #define F_q	q5
 #define G_q	q6
 #define H_q	q7
 
 /* This implementation handles overlaps and supports both memcpy and memmove
    from a single entry point.  It uses unaligned accesses and branchless
    sequences to keep the code small, simple and improve performance.
    SVE vectors are used to speedup small copies.
 
    Copies are split into 3 main cases: small copies of up to 32 bytes, medium
    copies of up to 128 bytes, and large copies.  The overhead of the overlap
    check is negligible since it is only required for large copies.
 
    Large copies use a software pipelined loop processing 64 bytes per iteration.
    The source pointer is 16-byte aligned to minimize unaligned accesses.
    The loop tail is handled by always copying 64 bytes from the end.
 */
 
 ENTRY_ALIAS (__memmove_aarch64_sve)
 ENTRY (__memcpy_aarch64_sve)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
-
 	cmp	count, 128
 	b.hi	L(copy_long)
 	cntb	vlen
 	cmp	count, vlen, lsl 1
 	b.hi	L(copy32_128)
 
 	whilelo p0.b, xzr, count
 	whilelo p1.b, vlen, count
 	ld1b	z0.b, p0/z, [src, 0, mul vl]
 	ld1b	z1.b, p1/z, [src, 1, mul vl]
 	st1b	z0.b, p0, [dstin, 0, mul vl]
 	st1b	z1.b, p1, [dstin, 1, mul vl]
 	ret
 
 	/* Medium copies: 33..128 bytes.  */
 L(copy32_128):
 	add	srcend, src, count
 	add	dstend, dstin, count
 	ldp	A_q, B_q, [src]
 	ldp	C_q, D_q, [srcend, -32]
 	cmp	count, 64
 	b.hi	L(copy128)
 	stp	A_q, B_q, [dstin]
 	stp	C_q, D_q, [dstend, -32]
 	ret
 
 	/* Copy 65..128 bytes.  */
 L(copy128):
 	ldp	E_q, F_q, [src, 32]
 	cmp	count, 96
 	b.ls	L(copy96)
 	ldp	G_q, H_q, [srcend, -64]
 	stp	G_q, H_q, [dstend, -64]
 L(copy96):
 	stp	A_q, B_q, [dstin]
 	stp	E_q, F_q, [dstin, 32]
 	stp	C_q, D_q, [dstend, -32]
 	ret
 
 	/* Copy more than 128 bytes.  */
 L(copy_long):
 	add	srcend, src, count
 	add	dstend, dstin, count
 
 	/* Use backwards copy if there is an overlap.  */
 	sub	tmp1, dstin, src
 	cmp	tmp1, count
 	b.lo	L(copy_long_backwards)
 
 	/* Copy 16 bytes and then align src to 16-byte alignment.  */
 	ldr	D_q, [src]
 	and	tmp1, src, 15
 	bic	src, src, 15
 	sub	dst, dstin, tmp1
 	add	count, count, tmp1	/* Count is now 16 too large.  */
 	ldp	A_q, B_q, [src, 16]
 	str	D_q, [dstin]
 	ldp	C_q, D_q, [src, 48]
 	subs	count, count, 128 + 16	/* Test and readjust count.  */
 	b.ls	L(copy64_from_end)
 L(loop64):
 	stp	A_q, B_q, [dst, 16]
 	ldp	A_q, B_q, [src, 80]
 	stp	C_q, D_q, [dst, 48]
 	ldp	C_q, D_q, [src, 112]
 	add	src, src, 64
 	add	dst, dst, 64
 	subs	count, count, 64
 	b.hi	L(loop64)
 
 	/* Write the last iteration and copy 64 bytes from the end.  */
 L(copy64_from_end):
 	ldp	E_q, F_q, [srcend, -64]
 	stp	A_q, B_q, [dst, 16]
 	ldp	A_q, B_q, [srcend, -32]
 	stp	C_q, D_q, [dst, 48]
 	stp	E_q, F_q, [dstend, -64]
 	stp	A_q, B_q, [dstend, -32]
 	ret
 
 	/* Large backwards copy for overlapping copies.
 	   Copy 16 bytes and then align srcend to 16-byte alignment.  */
 L(copy_long_backwards):
 	cbz	tmp1, L(return)
 	ldr	D_q, [srcend, -16]
 	and	tmp1, srcend, 15
 	bic	srcend, srcend, 15
 	sub	count, count, tmp1
 	ldp	A_q, B_q, [srcend, -32]
 	str	D_q, [dstend, -16]
 	ldp	C_q, D_q, [srcend, -64]
 	sub	dstend, dstend, tmp1
 	subs	count, count, 128
 	b.ls	L(copy64_from_start)
 
 L(loop64_backwards):
 	str	B_q, [dstend, -16]
 	str	A_q, [dstend, -32]
 	ldp	A_q, B_q, [srcend, -96]
 	str	D_q, [dstend, -48]
 	str	C_q, [dstend, -64]!
 	ldp	C_q, D_q, [srcend, -128]
 	sub	srcend, srcend, 64
 	subs	count, count, 64
 	b.hi	L(loop64_backwards)
 
 	/* Write the last iteration and copy 64 bytes from the start.  */
 L(copy64_from_start):
 	ldp	E_q, F_q, [src, 32]
 	stp	A_q, B_q, [dstend, -32]
 	ldp	A_q, B_q, [src]
 	stp	C_q, D_q, [dstend, -64]
 	stp	E_q, F_q, [dstin, 32]
 	stp	A_q, B_q, [dstin]
 L(return):
 	ret
 
 END (__memcpy_aarch64_sve)
-
-#endif
diff --git a/contrib/arm-optimized-routines/string/aarch64/memcpy.S b/contrib/arm-optimized-routines/string/aarch64/memcpy.S
index 7c0606e2104a..351f1a11f097 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memcpy.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memcpy.S
@@ -1,243 +1,240 @@
 /*
  * memcpy - copy memory area
  *
  * Copyright (c) 2012-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, unaligned accesses.
  *
  */
 
 #include "asmdefs.h"
 
 #define dstin	x0
 #define src	x1
 #define count	x2
 #define dst	x3
 #define srcend	x4
 #define dstend	x5
 #define A_l	x6
 #define A_lw	w6
 #define A_h	x7
 #define B_l	x8
 #define B_lw	w8
 #define B_h	x9
 #define C_l	x10
 #define C_lw	w10
 #define C_h	x11
 #define D_l	x12
 #define D_h	x13
 #define E_l	x14
 #define E_h	x15
 #define F_l	x16
 #define F_h	x17
 #define G_l	count
 #define G_h	dst
 #define H_l	src
 #define H_h	srcend
 #define tmp1	x14
 
 /* This implementation handles overlaps and supports both memcpy and memmove
    from a single entry point.  It uses unaligned accesses and branchless
    sequences to keep the code small, simple and improve performance.
 
    Copies are split into 3 main cases: small copies of up to 32 bytes, medium
    copies of up to 128 bytes, and large copies.  The overhead of the overlap
    check is negligible since it is only required for large copies.
 
    Large copies use a software pipelined loop processing 64 bytes per iteration.
    The destination pointer is 16-byte aligned to minimize unaligned accesses.
    The loop tail is handled by always copying 64 bytes from the end.
 */
 
 ENTRY_ALIAS (__memmove_aarch64)
 ENTRY (__memcpy_aarch64)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
 	add	srcend, src, count
 	add	dstend, dstin, count
 	cmp	count, 128
 	b.hi	L(copy_long)
 	cmp	count, 32
 	b.hi	L(copy32_128)
 
 	/* Small copies: 0..32 bytes.  */
 	cmp	count, 16
 	b.lo	L(copy16)
 	ldp	A_l, A_h, [src]
 	ldp	D_l, D_h, [srcend, -16]
 	stp	A_l, A_h, [dstin]
 	stp	D_l, D_h, [dstend, -16]
 	ret
 
 	/* Copy 8-15 bytes.  */
 L(copy16):
 	tbz	count, 3, L(copy8)
 	ldr	A_l, [src]
 	ldr	A_h, [srcend, -8]
 	str	A_l, [dstin]
 	str	A_h, [dstend, -8]
 	ret
 
 	.p2align 3
 	/* Copy 4-7 bytes.  */
 L(copy8):
 	tbz	count, 2, L(copy4)
 	ldr	A_lw, [src]
 	ldr	B_lw, [srcend, -4]
 	str	A_lw, [dstin]
 	str	B_lw, [dstend, -4]
 	ret
 
 	/* Copy 0..3 bytes using a branchless sequence.  */
 L(copy4):
 	cbz	count, L(copy0)
 	lsr	tmp1, count, 1
 	ldrb	A_lw, [src]
 	ldrb	C_lw, [srcend, -1]
 	ldrb	B_lw, [src, tmp1]
 	strb	A_lw, [dstin]
 	strb	B_lw, [dstin, tmp1]
 	strb	C_lw, [dstend, -1]
 L(copy0):
 	ret
 
 	.p2align 4
 	/* Medium copies: 33..128 bytes.  */
 L(copy32_128):
 	ldp	A_l, A_h, [src]
 	ldp	B_l, B_h, [src, 16]
 	ldp	C_l, C_h, [srcend, -32]
 	ldp	D_l, D_h, [srcend, -16]
 	cmp	count, 64
 	b.hi	L(copy128)
 	stp	A_l, A_h, [dstin]
 	stp	B_l, B_h, [dstin, 16]
 	stp	C_l, C_h, [dstend, -32]
 	stp	D_l, D_h, [dstend, -16]
 	ret
 
 	.p2align 4
 	/* Copy 65..128 bytes.  */
 L(copy128):
 	ldp	E_l, E_h, [src, 32]
 	ldp	F_l, F_h, [src, 48]
 	cmp	count, 96
 	b.ls	L(copy96)
 	ldp	G_l, G_h, [srcend, -64]
 	ldp	H_l, H_h, [srcend, -48]
 	stp	G_l, G_h, [dstend, -64]
 	stp	H_l, H_h, [dstend, -48]
 L(copy96):
 	stp	A_l, A_h, [dstin]
 	stp	B_l, B_h, [dstin, 16]
 	stp	E_l, E_h, [dstin, 32]
 	stp	F_l, F_h, [dstin, 48]
 	stp	C_l, C_h, [dstend, -32]
 	stp	D_l, D_h, [dstend, -16]
 	ret
 
 	.p2align 4
 	/* Copy more than 128 bytes.  */
 L(copy_long):
 	/* Use backwards copy if there is an overlap.  */
 	sub	tmp1, dstin, src
 	cbz	tmp1, L(copy0)
 	cmp	tmp1, count
 	b.lo	L(copy_long_backwards)
 
 	/* Copy 16 bytes and then align dst to 16-byte alignment.  */
 
 	ldp	D_l, D_h, [src]
 	and	tmp1, dstin, 15
 	bic	dst, dstin, 15
 	sub	src, src, tmp1
 	add	count, count, tmp1	/* Count is now 16 too large.  */
 	ldp	A_l, A_h, [src, 16]
 	stp	D_l, D_h, [dstin]
 	ldp	B_l, B_h, [src, 32]
 	ldp	C_l, C_h, [src, 48]
 	ldp	D_l, D_h, [src, 64]!
 	subs	count, count, 128 + 16	/* Test and readjust count.  */
 	b.ls	L(copy64_from_end)
 
 L(loop64):
 	stp	A_l, A_h, [dst, 16]
 	ldp	A_l, A_h, [src, 16]
 	stp	B_l, B_h, [dst, 32]
 	ldp	B_l, B_h, [src, 32]
 	stp	C_l, C_h, [dst, 48]
 	ldp	C_l, C_h, [src, 48]
 	stp	D_l, D_h, [dst, 64]!
 	ldp	D_l, D_h, [src, 64]!
 	subs	count, count, 64
 	b.hi	L(loop64)
 
 	/* Write the last iteration and copy 64 bytes from the end.  */
 L(copy64_from_end):
 	ldp	E_l, E_h, [srcend, -64]
 	stp	A_l, A_h, [dst, 16]
 	ldp	A_l, A_h, [srcend, -48]
 	stp	B_l, B_h, [dst, 32]
 	ldp	B_l, B_h, [srcend, -32]
 	stp	C_l, C_h, [dst, 48]
 	ldp	C_l, C_h, [srcend, -16]
 	stp	D_l, D_h, [dst, 64]
 	stp	E_l, E_h, [dstend, -64]
 	stp	A_l, A_h, [dstend, -48]
 	stp	B_l, B_h, [dstend, -32]
 	stp	C_l, C_h, [dstend, -16]
 	ret
 
 	.p2align 4
 
 	/* Large backwards copy for overlapping copies.
 	   Copy 16 bytes and then align dst to 16-byte alignment.  */
 L(copy_long_backwards):
 	ldp	D_l, D_h, [srcend, -16]
 	and	tmp1, dstend, 15
 	sub	srcend, srcend, tmp1
 	sub	count, count, tmp1
 	ldp	A_l, A_h, [srcend, -16]
 	stp	D_l, D_h, [dstend, -16]
 	ldp	B_l, B_h, [srcend, -32]
 	ldp	C_l, C_h, [srcend, -48]
 	ldp	D_l, D_h, [srcend, -64]!
 	sub	dstend, dstend, tmp1
 	subs	count, count, 128
 	b.ls	L(copy64_from_start)
 
 L(loop64_backwards):
 	stp	A_l, A_h, [dstend, -16]
 	ldp	A_l, A_h, [srcend, -16]
 	stp	B_l, B_h, [dstend, -32]
 	ldp	B_l, B_h, [srcend, -32]
 	stp	C_l, C_h, [dstend, -48]
 	ldp	C_l, C_h, [srcend, -48]
 	stp	D_l, D_h, [dstend, -64]!
 	ldp	D_l, D_h, [srcend, -64]!
 	subs	count, count, 64
 	b.hi	L(loop64_backwards)
 
 	/* Write the last iteration and copy 64 bytes from the start.  */
 L(copy64_from_start):
 	ldp	G_l, G_h, [src, 48]
 	stp	A_l, A_h, [dstend, -16]
 	ldp	A_l, A_h, [src, 32]
 	stp	B_l, B_h, [dstend, -32]
 	ldp	B_l, B_h, [src, 16]
 	stp	C_l, C_h, [dstend, -48]
 	ldp	C_l, C_h, [src]
 	stp	D_l, D_h, [dstend, -64]
 	stp	G_l, G_h, [dstin, 48]
 	stp	A_l, A_h, [dstin, 32]
 	stp	B_l, B_h, [dstin, 16]
 	stp	C_l, C_h, [dstin]
 	ret
 
 END (__memcpy_aarch64)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/memmove-mops.S b/contrib/arm-optimized-routines/string/aarch64/memmove-mops.S
index 6c73017bb16f..d9839f86e9b4 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memmove-mops.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memmove-mops.S
@@ -1,21 +1,17 @@
 /*
  * memmove using MOPS extension.
  *
  * Copyright (c) 2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
 ENTRY (__memmove_aarch64_mops)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
-
 	mov	x3, x0
 	.inst	0x1d010443	/* cpyp    [x3]!, [x1]!, x2!  */
 	.inst	0x1d410443	/* cpym    [x3]!, [x1]!, x2!  */
 	.inst	0x1d810443	/* cpye    [x3]!, [x1]!, x2!  */
 	ret
 
 END (__memmove_aarch64_mops)
diff --git a/contrib/arm-optimized-routines/string/aarch64/memrchr.S b/contrib/arm-optimized-routines/string/aarch64/memrchr.S
index 6418bdf56f41..ed38478a6faa 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memrchr.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memrchr.S
@@ -1,112 +1,111 @@
 /*
  * memrchr - find last character in a memory zone.
  *
  * Copyright (c) 2020-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
  * MTE compatible.
  */
 
 #include "asmdefs.h"
 
 #define srcin		x0
 #define chrin		w1
 #define cntin		x2
 #define result		x0
 
 #define src		x3
 #define cntrem		x4
 #define synd		x5
 #define shift		x6
 #define	tmp		x7
 #define end		x8
 #define endm1		x9
 
 #define vrepchr		v0
 #define qdata		q1
 #define vdata		v1
 #define vhas_chr	v2
 #define vend		v3
 #define dend		d3
 
 /*
    Core algorithm:
    For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
    per byte. We take 4 bits of every comparison byte with shift right and narrow
    by 4 instruction. Since the bits in the nibble mask reflect the order in
    which things occur in the original string, counting leading zeros identifies
    exactly which byte matched.  */
 
 ENTRY (__memrchr_aarch64)
-	PTR_ARG (0)
 	add	end, srcin, cntin
 	sub	endm1, end, 1
 	bic	src, endm1, 15
 	cbz	cntin, L(nomatch)
 	ld1	{vdata.16b}, [src]
 	dup	vrepchr.16b, chrin
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	neg	shift, end, lsl 2
 	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
 	fmov	synd, dend
 	lsl	synd, synd, shift
 	cbz	synd, L(start_loop)
 
 	clz	synd, synd
 	sub	result, endm1, synd, lsr 2
 	cmp	cntin, synd, lsr 2
 	csel	result, result, xzr, hi
 	ret
 
 	nop
 L(start_loop):
 	subs	cntrem, src, srcin
 	b.ls	L(nomatch)
 
 	/* Make sure that it won't overread by a 16-byte chunk */
 	sub	cntrem, cntrem, 1
 	tbz	cntrem, 4, L(loop32_2)
 	add	src, src, 16
 
 	.p2align 5
 L(loop32):
 	ldr	qdata, [src, -32]!
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
 	fmov	synd, dend
 	cbnz	synd, L(end)
 
 L(loop32_2):
 	ldr	qdata, [src, -16]
 	subs	cntrem, cntrem, 32
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	b.lo	L(end_2)
 	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
 	fmov	synd, dend
 	cbz	synd, L(loop32)
 L(end_2):
 	sub	src, src, 16
 L(end):
 	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
 	fmov	synd, dend
 
 	add	tmp, src, 15
 #ifdef __AARCH64EB__
 	rbit	synd, synd
 #endif
 	clz	synd, synd
 	sub	tmp, tmp, synd, lsr 2
 	cmp	tmp, srcin
 	csel	result, tmp, xzr, hs
 	ret
 
 L(nomatch):
 	mov	result, 0
 	ret
 
 END (__memrchr_aarch64)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/memset-mops.S b/contrib/arm-optimized-routines/string/aarch64/memset-mops.S
index ec791493bae9..00d8e7d2c05f 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memset-mops.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memset-mops.S
@@ -1,20 +1,17 @@
 /*
  * memset using MOPS extension.
  *
  * Copyright (c) 2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "asmdefs.h"
 
 ENTRY (__memset_aarch64_mops)
-	PTR_ARG (0)
-	SIZE_ARG (2)
-
 	mov     x3, x0
 	.inst   0x19c10443	/* setp    [x3]!, x2!, x1  */
 	.inst   0x19c14443	/* setm    [x3]!, x2!, x1  */
 	.inst   0x19c18443	/* sete    [x3]!, x2!, x1  */
 	ret
 
 END (__memset_aarch64_mops)
diff --git a/contrib/arm-optimized-routines/string/aarch64/memset.S b/contrib/arm-optimized-routines/string/aarch64/memset-sve.S
similarity index 56%
copy from contrib/arm-optimized-routines/string/aarch64/memset.S
copy to contrib/arm-optimized-routines/string/aarch64/memset-sve.S
index 553b0fcaefea..efaeaece284e 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memset.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memset-sve.S
@@ -1,117 +1,114 @@
 /*
  * memset - fill memory with a constant byte
  *
- * Copyright (c) 2012-2022, Arm Limited.
+ * Copyright (c) 2024-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
- * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
+ * ARMv8-a, AArch64, Advanced SIMD, SVE, unaligned accesses.
  *
  */
 
 #include "asmdefs.h"
 
+.arch armv8-a+sve
+
 #define dstin	x0
 #define val	x1
 #define valw	w1
 #define count	x2
 #define dst	x3
 #define dstend	x4
 #define zva_val	x5
+#define vlen	x5
+#define off	x3
+#define dstend2 x5
 
-ENTRY (__memset_aarch64)
-	PTR_ARG (0)
-	SIZE_ARG (2)
-
+ENTRY (__memset_aarch64_sve)
 	dup	v0.16B, valw
-	add	dstend, dstin, count
-
-	cmp	count, 96
-	b.hi	L(set_long)
 	cmp	count, 16
-	b.hs	L(set_medium)
-	mov	val, v0.D[0]
+	b.lo	L(set_16)
 
-	/* Set 0..15 bytes.  */
-	tbz	count, 3, 1f
-	str	val, [dstin]
-	str	val, [dstend, -8]
-	ret
-	.p2align 4
-1:	tbz	count, 2, 2f
-	str	valw, [dstin]
-	str	valw, [dstend, -4]
-	ret
-2:	cbz	count, 3f
-	strb	valw, [dstin]
-	tbz	count, 1, 3f
-	strh	valw, [dstend, -2]
-3:	ret
+	add	dstend, dstin, count
+	cmp	count, 64
+	b.hs	L(set_128)
 
-	/* Set 17..96 bytes.  */
-L(set_medium):
+	/* Set 16..63 bytes.  */
+	mov	off, 16
+	and	off, off, count, lsr 1
+	sub	dstend2, dstend, off
 	str	q0, [dstin]
-	tbnz	count, 6, L(set96)
+	str	q0, [dstin, off]
+	str	q0, [dstend2, -16]
 	str	q0, [dstend, -16]
-	tbz	count, 5, 1f
-	str	q0, [dstin, 16]
-	str	q0, [dstend, -32]
-1:	ret
+	ret
 
 	.p2align 4
-	/* Set 64..96 bytes.  Write 64 bytes from the start and
-	   32 bytes from the end.  */
-L(set96):
-	str	q0, [dstin, 16]
+L(set_16):
+	whilelo p0.b, xzr, count
+	st1b	z0.b, p0, [dstin]
+	ret
+
+	.p2align 4
+L(set_128):
+	bic	dst, dstin, 15
+	cmp	count, 128
+	b.hi	L(set_long)
+	stp	q0, q0, [dstin]
 	stp	q0, q0, [dstin, 32]
+	stp	q0, q0, [dstend, -64]
 	stp	q0, q0, [dstend, -32]
 	ret
 
 	.p2align 4
 L(set_long):
-	and	valw, valw, 255
-	bic	dst, dstin, 15
-	str	q0, [dstin]
-	cmp	count, 160
-	ccmp	valw, 0, 0, hs
+	cmp	count, 256
+	b.lo	L(no_zva)
+	tst	valw, 255
 	b.ne	L(no_zva)
 
 #ifndef SKIP_ZVA_CHECK
 	mrs	zva_val, dczid_el0
 	and	zva_val, zva_val, 31
 	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
 	b.ne	L(no_zva)
 #endif
+	str	q0, [dstin]
 	str	q0, [dst, 16]
+	bic	dst, dstin, 31
 	stp	q0, q0, [dst, 32]
-	bic	dst, dst, 63
+	bic	dst, dstin, 63
 	sub	count, dstend, dst	/* Count is now 64 too large.  */
 	sub	count, count, 128	/* Adjust count and bias for loop.  */
 
+	sub	x8, dstend, 1		/* Write last bytes before ZVA loop.  */
+	bic	x8, x8, 15
+	stp	q0, q0, [x8, -48]
+	str	q0, [x8, -16]
+	str	q0, [dstend, -16]
+
 	.p2align 4
-L(zva_loop):
+L(zva64_loop):
 	add	dst, dst, 64
 	dc	zva, dst
 	subs	count, count, 64
-	b.hi	L(zva_loop)
-	stp	q0, q0, [dstend, -64]
-	stp	q0, q0, [dstend, -32]
+	b.hi	L(zva64_loop)
 	ret
 
 L(no_zva):
+	str	q0, [dstin]
 	sub	count, dstend, dst	/* Count is 16 too large.  */
-	sub	dst, dst, 16		/* Dst is biased by -32.  */
 	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
 L(no_zva_loop):
-	stp	q0, q0, [dst, 32]
-	stp	q0, q0, [dst, 64]!
+	stp	q0, q0, [dst, 16]
+	stp	q0, q0, [dst, 48]
+	add	dst, dst, 64
 	subs	count, count, 64
 	b.hi	L(no_zva_loop)
 	stp	q0, q0, [dstend, -64]
 	stp	q0, q0, [dstend, -32]
 	ret
 
-END (__memset_aarch64)
-
+END (__memset_aarch64_sve)
diff --git a/contrib/arm-optimized-routines/string/aarch64/memset.S b/contrib/arm-optimized-routines/string/aarch64/memset.S
index 553b0fcaefea..906a4dcf46c6 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memset.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memset.S
@@ -1,117 +1,121 @@
 /*
  * memset - fill memory with a constant byte
  *
- * Copyright (c) 2012-2022, Arm Limited.
+ * Copyright (c) 2012-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
  *
  */
 
 #include "asmdefs.h"
 
 #define dstin	x0
 #define val	x1
 #define valw	w1
 #define count	x2
 #define dst	x3
 #define dstend	x4
 #define zva_val	x5
+#define off	x3
+#define dstend2	x5
 
 ENTRY (__memset_aarch64)
-	PTR_ARG (0)
-	SIZE_ARG (2)
-
 	dup	v0.16B, valw
-	add	dstend, dstin, count
-
-	cmp	count, 96
-	b.hi	L(set_long)
 	cmp	count, 16
-	b.hs	L(set_medium)
-	mov	val, v0.D[0]
+	b.lo	L(set_small)
 
-	/* Set 0..15 bytes.  */
-	tbz	count, 3, 1f
-	str	val, [dstin]
-	str	val, [dstend, -8]
+	add	dstend, dstin, count
+	cmp	count, 64
+	b.hs	L(set_128)
+
+	/* Set 16..63 bytes.  */
+	mov	off, 16
+	and	off, off, count, lsr 1
+	sub	dstend2, dstend, off
+	str	q0, [dstin]
+	str	q0, [dstin, off]
+	str	q0, [dstend2, -16]
+	str	q0, [dstend, -16]
 	ret
+
 	.p2align 4
-1:	tbz	count, 2, 2f
-	str	valw, [dstin]
-	str	valw, [dstend, -4]
+	/* Set 0..15 bytes.  */
+L(set_small):
+	add	dstend, dstin, count
+	cmp	count, 4
+	b.lo	2f
+	lsr	off, count, 3
+	sub	dstend2, dstend, off, lsl 2
+	str	s0, [dstin]
+	str	s0, [dstin, off, lsl 2]
+	str	s0, [dstend2, -4]
+	str	s0, [dstend, -4]
 	ret
+
+	/* Set 0..3 bytes.  */
 2:	cbz	count, 3f
+	lsr	off, count, 1
 	strb	valw, [dstin]
-	tbz	count, 1, 3f
-	strh	valw, [dstend, -2]
+	strb	valw, [dstin, off]
+	strb	valw, [dstend, -1]
 3:	ret
 
-	/* Set 17..96 bytes.  */
-L(set_medium):
-	str	q0, [dstin]
-	tbnz	count, 6, L(set96)
-	str	q0, [dstend, -16]
-	tbz	count, 5, 1f
-	str	q0, [dstin, 16]
-	str	q0, [dstend, -32]
-1:	ret
-
 	.p2align 4
-	/* Set 64..96 bytes.  Write 64 bytes from the start and
-	   32 bytes from the end.  */
-L(set96):
-	str	q0, [dstin, 16]
+L(set_128):
+	bic	dst, dstin, 15
+	cmp	count, 128
+	b.hi	L(set_long)
+	stp	q0, q0, [dstin]
 	stp	q0, q0, [dstin, 32]
+	stp	q0, q0, [dstend, -64]
 	stp	q0, q0, [dstend, -32]
 	ret
 
 	.p2align 4
 L(set_long):
-	and	valw, valw, 255
-	bic	dst, dstin, 15
 	str	q0, [dstin]
-	cmp	count, 160
-	ccmp	valw, 0, 0, hs
+	str	q0, [dst, 16]
+	tst	valw, 255
 	b.ne	L(no_zva)
-
 #ifndef SKIP_ZVA_CHECK
 	mrs	zva_val, dczid_el0
 	and	zva_val, zva_val, 31
 	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
 	b.ne	L(no_zva)
 #endif
-	str	q0, [dst, 16]
 	stp	q0, q0, [dst, 32]
-	bic	dst, dst, 63
+	bic	dst, dstin, 63
 	sub	count, dstend, dst	/* Count is now 64 too large.  */
-	sub	count, count, 128	/* Adjust count and bias for loop.  */
+	sub	count, count, 64 + 64	/* Adjust count and bias for loop.  */
+
+	/* Write last bytes before ZVA loop.  */
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
 
 	.p2align 4
-L(zva_loop):
+L(zva64_loop):
 	add	dst, dst, 64
 	dc	zva, dst
 	subs	count, count, 64
-	b.hi	L(zva_loop)
-	stp	q0, q0, [dstend, -64]
-	stp	q0, q0, [dstend, -32]
+	b.hi	L(zva64_loop)
 	ret
 
+	.p2align 3
 L(no_zva):
-	sub	count, dstend, dst	/* Count is 16 too large.  */
-	sub	dst, dst, 16		/* Dst is biased by -32.  */
-	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
+	sub	count, dstend, dst	/* Count is 32 too large.  */
+	sub	count, count, 64 + 32	/* Adjust count and bias for loop.  */
 L(no_zva_loop):
 	stp	q0, q0, [dst, 32]
-	stp	q0, q0, [dst, 64]!
+	stp	q0, q0, [dst, 64]
+	add	dst, dst, 64
 	subs	count, count, 64
 	b.hi	L(no_zva_loop)
 	stp	q0, q0, [dstend, -64]
 	stp	q0, q0, [dstend, -32]
 	ret
 
 END (__memset_aarch64)
-
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S b/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S
index 6ec08f7acc76..42b747311bc6 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S
@@ -1,101 +1,100 @@
 /*
  * strchr - find a character in a string
  *
  * Copyright (c) 2020-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
  * MTE compatible.
  */
 
 #include "asmdefs.h"
 
 #define srcin		x0
 #define chrin		w1
 #define result		x0
 
 #define src		x2
 #define tmp1		x1
 #define tmp2		x3
 
 #define vrepchr		v0
 #define vdata		v1
 #define qdata		q1
 #define vhas_nul	v2
 #define vhas_chr	v3
 #define vrepmask	v4
 #define vend		v5
 #define dend		d5
 
 /* Core algorithm.
 
    For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
    per byte. Bits 0-1 are set if the relevant byte matched the requested
    character, bits 2-3 are set if the byte is NUL or matched. Count trailing
    zeroes gives the position of the matching byte if it is a multiple of 4.
    If it is not a multiple of 4, there was no match.  */
 
 ENTRY (__strchr_aarch64_mte)
-	PTR_ARG (0)
 	bic	src, srcin, 15
 	dup	vrepchr.16b, chrin
 	ld1	{vdata.16b}, [src]
 	movi	vrepmask.16b, 0x33
 	cmeq	vhas_nul.16b, vdata.16b, 0
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
 	lsl	tmp2, srcin, 2
 	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
 	fmov	tmp1, dend
 	lsr	tmp1, tmp1, tmp2
 	cbz	tmp1, L(loop)
 
 	rbit	tmp1, tmp1
 	clz	tmp1, tmp1
 	/* Tmp1 is an even multiple of 2 if the target character was
 	   found first. Otherwise we've found the end of string.  */
 	tst	tmp1, 2
 	add	result, srcin, tmp1, lsr 2
 	csel	result, result, xzr, eq
 	ret
 
 	.p2align 4
 L(loop):
 	ldr	qdata, [src, 16]
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	cmhs	vhas_nul.16b, vhas_chr.16b, vdata.16b
 	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
 	fmov	tmp1, dend
 	cbnz	tmp1, L(end)
 	ldr	qdata, [src, 32]!
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	cmhs	vhas_nul.16b, vhas_chr.16b, vdata.16b
 	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
 	fmov	tmp1, dend
 	cbz	tmp1, L(loop)
 	sub	src, src, 16
 L(end):
 
 #ifdef __AARCH64EB__
 	bif	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
 	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
 	fmov	tmp1, dend
 #else
 	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
 	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
 	fmov	tmp1, dend
 	rbit	tmp1, tmp1
 #endif
 	add	src, src, 16
 	clz	tmp1, tmp1
 	/* Tmp1 is a multiple of 4 if the target character was found.  */
 	tst	tmp1, 2
 	add	result, src, tmp1, lsr 2
 	csel	result, result, xzr, eq
 	ret
 
 END (__strchr_aarch64_mte)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchr.S b/contrib/arm-optimized-routines/string/aarch64/strchr.S
index 37193bd947a7..c1d01e9635b6 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strchr.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strchr.S
@@ -1,126 +1,125 @@
 /*
  * strchr - find a character in a string
  *
  * Copyright (c) 2014-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * Neon Available.
  */
 
 #include "asmdefs.h"
 
 /* Arguments and results.  */
 #define srcin		x0
 #define chrin		w1
 
 #define result		x0
 
 #define src		x2
 #define	tmp1		x3
 #define wtmp2		w4
 #define tmp3		x5
 
 #define vrepchr		v0
 #define vdata1		v1
 #define vdata2		v2
 #define vhas_nul1	v3
 #define vhas_nul2	v4
 #define vhas_chr1	v5
 #define vhas_chr2	v6
 #define vrepmask_0	v7
 #define vrepmask_c	v16
 #define vend1		v17
 #define vend2		v18
 
 /* Core algorithm.
 
    For each 32-byte hunk we calculate a 64-bit syndrome value, with
    two bits per byte (LSB is always in bits 0 and 1, for both big
    and little-endian systems).  For each tuple, bit 0 is set iff
    the relevant byte matched the requested character; bit 1 is set
    iff the relevant byte matched the NUL end of string (we trigger
    off bit0 for the special case of looking for NUL).  Since the bits
    in the syndrome reflect exactly the order in which things occur
    in the original string a count_trailing_zeros() operation will
    identify exactly which byte is causing the termination, and why.  */
 
 /* Locals and temporaries.  */
 
 ENTRY (__strchr_aarch64)
-	PTR_ARG (0)
 	/* Magic constant 0xc0300c03 to allow us to identify which lane
 	   matches the requested byte.  Even bits are set if the character
 	   matches, odd bits if either the char is NUL or matches.  */
 	mov	wtmp2, 0x0c03
 	movk	wtmp2, 0xc030, lsl 16
 	dup	vrepchr.16b, chrin
 	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
 	dup	vrepmask_c.4s, wtmp2
 	ands	tmp1, srcin, #31
 	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
 	b.eq	L(loop)
 
 	/* Input string is not 32-byte aligned.  Rather than forcing
 	   the padding bytes to a safe value, we calculate the syndrome
 	   for all the bytes, but then mask off those bits of the
 	   syndrome that are related to the padding.  */
 	ld1	{vdata1.16b, vdata2.16b}, [src], #32
 	neg	tmp1, tmp1
 	cmeq	vhas_nul1.16b, vdata1.16b, #0
 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
 	cmeq	vhas_nul2.16b, vdata2.16b, #0
 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
 	bif	vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b
 	bif	vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b
 	and	vend1.16b, vhas_nul1.16b, vrepmask_c.16b
 	and	vend2.16b, vhas_nul2.16b, vrepmask_c.16b
 	lsl	tmp1, tmp1, #1
 	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
 	mov	tmp3, #~0
 	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
 	lsr	tmp1, tmp3, tmp1
 
 	mov	tmp3, vend1.d[0]
 	bic	tmp1, tmp3, tmp1	// Mask padding bits.
 	cbnz	tmp1, L(tail)
 
 	.p2align 4
 L(loop):
 	ld1	{vdata1.16b, vdata2.16b}, [src], #32
 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
 	cmhs	vhas_nul1.16b, vhas_chr1.16b, vdata1.16b
 	cmhs	vhas_nul2.16b, vhas_chr2.16b, vdata2.16b
 	orr	vend1.16b, vhas_nul1.16b, vhas_nul2.16b
 	umaxp	vend1.16b, vend1.16b, vend1.16b
 	mov	tmp1, vend1.d[0]
 	cbz	tmp1, L(loop)
 
 	/* Termination condition found.  Now need to establish exactly why
 	   we terminated.  */
 	bif	vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b
 	bif	vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b
 	and	vend1.16b, vhas_nul1.16b, vrepmask_c.16b
 	and	vend2.16b, vhas_nul2.16b, vrepmask_c.16b
 	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
 	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
 	mov	tmp1, vend1.d[0]
 L(tail):
 	/* Count the trailing zeros, by bit reversing...  */
 	rbit	tmp1, tmp1
 	/* Re-bias source.  */
 	sub	src, src, #32
 	clz	tmp1, tmp1	/* And counting the leading zeros.  */
 	/* Tmp1 is even if the target charager was found first.  Otherwise
 	   we've found the end of string and we weren't looking for NUL.  */
 	tst	tmp1, #1
 	add	result, src, tmp1, lsr #1
 	csel	result, result, xzr, eq
 	ret
 
 END (__strchr_aarch64)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S b/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S
index 543ee88bb285..b3180cdf9e2c 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S
@@ -1,85 +1,84 @@
 /*
  * strchrnul - find a character or nul in a string
  *
  * Copyright (c) 2020-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
  * MTE compatible.
  */
 
 #include "asmdefs.h"
 
 #define srcin		x0
 #define chrin		w1
 #define result		x0
 
 #define src		x2
 #define tmp1		x1
 #define tmp2		x3
 
 #define vrepchr		v0
 #define vdata		v1
 #define qdata		q1
 #define vhas_nul	v2
 #define vhas_chr	v3
 #define vend		v4
 #define dend		d4
 
 /*
    Core algorithm:
    For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
    per byte. We take 4 bits of every comparison byte with shift right and narrow
    by 4 instruction. Since the bits in the nibble mask reflect the order in
    which things occur in the original string, counting leading zeros identifies
    exactly which byte matched.  */
 
 ENTRY (__strchrnul_aarch64_mte)
-	PTR_ARG (0)
 	bic	src, srcin, 15
 	dup	vrepchr.16b, chrin
 	ld1	{vdata.16b}, [src]
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	cmhs	vhas_chr.16b, vhas_chr.16b, vdata.16b
 	lsl	tmp2, srcin, 2
 	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
 	fmov	tmp1, dend
 	lsr	tmp1, tmp1, tmp2	/* Mask padding bits.  */
 	cbz	tmp1, L(loop)
 
 	rbit	tmp1, tmp1
 	clz	tmp1, tmp1
 	add	result, srcin, tmp1, lsr 2
 	ret
 
 	.p2align 4
 L(loop):
 	ldr	qdata, [src, 16]
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	cmhs	vhas_chr.16b, vhas_chr.16b, vdata.16b
 	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b
 	fmov	tmp1, dend
 	cbnz	tmp1, L(end)
 	ldr	qdata, [src, 32]!
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	cmhs	vhas_chr.16b, vhas_chr.16b, vdata.16b
 	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b
 	fmov	tmp1, dend
 	cbz	tmp1, L(loop)
 	sub	src, src, 16
 L(end):
 	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
 	add	src, src, 16
 	fmov	tmp1, dend
 #ifndef __AARCH64EB__
 	rbit	tmp1, tmp1
 #endif
 	clz	tmp1, tmp1
 	add	result, src, tmp1, lsr 2
 	ret
 
 END (__strchrnul_aarch64_mte)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchrnul.S b/contrib/arm-optimized-routines/string/aarch64/strchrnul.S
index 666e8d0304c1..0a32c46c30c5 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strchrnul.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strchrnul.S
@@ -1,114 +1,113 @@
 /*
  * strchrnul - find a character or nul in a string
  *
  * Copyright (c) 2014-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * Neon Available.
  */
 
 #include "asmdefs.h"
 
 /* Arguments and results.  */
 #define srcin		x0
 #define chrin		w1
 
 #define result		x0
 
 #define src		x2
 #define	tmp1		x3
 #define wtmp2		w4
 #define tmp3		x5
 
 #define vrepchr		v0
 #define vdata1		v1
 #define vdata2		v2
 #define vhas_nul1	v3
 #define vhas_nul2	v4
 #define vhas_chr1	v5
 #define vhas_chr2	v6
 #define vrepmask	v7
 #define vend1		v16
 
 /* Core algorithm.
 
    For each 32-byte hunk we calculate a 64-bit syndrome value, with
    two bits per byte (LSB is always in bits 0 and 1, for both big
    and little-endian systems).  For each tuple, bit 0 is set iff
    the relevant byte matched the requested character or nul.  Since the
    bits in the syndrome reflect exactly the order in which things occur
    in the original string a count_trailing_zeros() operation will
    identify exactly which byte is causing the termination.  */
 
 /* Locals and temporaries.  */
 
 ENTRY (__strchrnul_aarch64)
-	PTR_ARG (0)
 	/* Magic constant 0x40100401 to allow us to identify which lane
 	   matches the termination condition.  */
 	mov	wtmp2, #0x0401
 	movk	wtmp2, #0x4010, lsl #16
 	dup	vrepchr.16b, chrin
 	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
 	dup	vrepmask.4s, wtmp2
 	ands	tmp1, srcin, #31
 	b.eq	L(loop)
 
 	/* Input string is not 32-byte aligned.  Rather than forcing
 	   the padding bytes to a safe value, we calculate the syndrome
 	   for all the bytes, but then mask off those bits of the
 	   syndrome that are related to the padding.  */
 	ld1	{vdata1.16b, vdata2.16b}, [src], #32
 	neg	tmp1, tmp1
 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
 	cmhs	vhas_nul1.16b, vhas_chr1.16b, vdata1.16b
 	cmhs	vhas_nul2.16b, vhas_chr2.16b, vdata2.16b
 	and	vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b
 	and	vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b
 	lsl	tmp1, tmp1, #1
 	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
 	mov	tmp3, #~0
 	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
 	lsr	tmp1, tmp3, tmp1
 
 	mov	tmp3, vend1.d[0]
 	bic	tmp1, tmp3, tmp1	// Mask padding bits.
 	cbnz	tmp1, L(tail)
 
 	.p2align 4
 L(loop):
 	ld1	{vdata1.16b, vdata2.16b}, [src], #32
 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
 	cmhs	vhas_nul1.16b, vhas_chr1.16b, vdata1.16b
 	cmhs	vhas_nul2.16b, vhas_chr2.16b, vdata2.16b
 	orr	vend1.16b, vhas_nul1.16b, vhas_nul2.16b
 	umaxp	vend1.16b, vend1.16b, vend1.16b
 	mov	tmp1, vend1.d[0]
 	cbz	tmp1, L(loop)
 
 	/* Termination condition found.  Now need to establish exactly why
 	   we terminated.  */
 	and	vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b
 	and	vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b
 	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b		// 256->128
 	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
 
 	mov	tmp1, vend1.d[0]
 L(tail):
 	/* Count the trailing zeros, by bit reversing...  */
 	rbit	tmp1, tmp1
 	/* Re-bias source.  */
 	sub	src, src, #32
 	clz	tmp1, tmp1	/* ... and counting the leading zeros.  */
 	/* tmp1 is twice the offset into the fragment.  */
 	add	result, src, tmp1, lsr #1
 	ret
 
 END (__strchrnul_aarch64)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/strcmp.S b/contrib/arm-optimized-routines/string/aarch64/strcmp.S
index 137a9aa06681..7c0d0485a89b 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strcmp.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strcmp.S
@@ -1,189 +1,187 @@
 /*
  * strcmp - compare two strings
  *
  * Copyright (c) 2012-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64.
  * MTE compatible.
  */
 
 #include "asmdefs.h"
 
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 
 #define src1		x0
 #define src2		x1
 #define result		x0
 
 #define data1		x2
 #define data1w		w2
 #define data2		x3
 #define data2w		w3
 #define has_nul		x4
 #define diff		x5
 #define off1		x5
 #define syndrome	x6
 #define tmp		x6
 #define data3		x7
 #define zeroones	x8
 #define shift		x9
 #define off2		x10
 
 /* On big-endian early bytes are at MSB and on little-endian LSB.
    LS_FW means shifting towards early bytes.  */
 #ifdef __AARCH64EB__
 # define LS_FW lsl
 #else
 # define LS_FW lsr
 #endif
 
 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
    (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
    can be done in parallel across the entire word.
    Since carry propagation makes 0x1 bytes before a NUL byte appear
    NUL too in big-endian, byte-reverse the data before the NUL check.  */
 
 
 ENTRY (__strcmp_aarch64)
-	PTR_ARG (0)
-	PTR_ARG (1)
 	sub	off2, src2, src1
 	mov	zeroones, REP8_01
 	and	tmp, src1, 7
 	tst	off2, 7
 	b.ne	L(misaligned8)
 	cbnz	tmp, L(mutual_align)
 
 	.p2align 4
 
 L(loop_aligned):
 	ldr	data2, [src1, off2]
 	ldr	data1, [src1], 8
 L(start_realigned):
 #ifdef __AARCH64EB__
 	rev	tmp, data1
 	sub	has_nul, tmp, zeroones
 	orr	tmp, tmp, REP8_7f
 #else
 	sub	has_nul, data1, zeroones
 	orr	tmp, data1, REP8_7f
 #endif
 	bics	has_nul, has_nul, tmp	/* Non-zero if NUL terminator.  */
 	ccmp	data1, data2, 0, eq
 	b.eq	L(loop_aligned)
 #ifdef __AARCH64EB__
 	rev	has_nul, has_nul
 #endif
 	eor	diff, data1, data2
 	orr	syndrome, diff, has_nul
 L(end):
 #ifndef __AARCH64EB__
 	rev	syndrome, syndrome
 	rev	data1, data1
 	rev	data2, data2
 #endif
 	clz	shift, syndrome
 	/* The most-significant-non-zero bit of the syndrome marks either the
 	   first bit that is different, or the top bit of the first zero byte.
 	   Shifting left now will bring the critical information into the
 	   top bits.  */
 	lsl	data1, data1, shift
 	lsl	data2, data2, shift
 	/* But we need to zero-extend (char is unsigned) the value and then
 	   perform a signed 32-bit subtraction.  */
 	lsr	data1, data1, 56
 	sub	result, data1, data2, lsr 56
 	ret
 
 	.p2align 4
 
 L(mutual_align):
 	/* Sources are mutually aligned, but are not currently at an
 	   alignment boundary.  Round down the addresses and then mask off
 	   the bytes that precede the start point.  */
 	bic	src1, src1, 7
 	ldr	data2, [src1, off2]
 	ldr	data1, [src1], 8
 	neg	shift, src2, lsl 3	/* Bits to alignment -64.  */
 	mov	tmp, -1
 	LS_FW	tmp, tmp, shift
 	orr	data1, data1, tmp
 	orr	data2, data2, tmp
 	b	L(start_realigned)
 
 L(misaligned8):
 	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
 	   checking to make sure that we don't access beyond the end of SRC2.  */
 	cbz	tmp, L(src1_aligned)
 L(do_misaligned):
 	ldrb	data1w, [src1], 1
 	ldrb	data2w, [src2], 1
 	cmp	data1w, 0
 	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
 	b.ne	L(done)
 	tst	src1, 7
 	b.ne	L(do_misaligned)
 
 L(src1_aligned):
 	neg	shift, src2, lsl 3
 	bic	src2, src2, 7
 	ldr	data3, [src2], 8
 #ifdef __AARCH64EB__
 	rev	data3, data3
 #endif
 	lsr	tmp, zeroones, shift
 	orr	data3, data3, tmp
 	sub	has_nul, data3, zeroones
 	orr	tmp, data3, REP8_7f
 	bics	has_nul, has_nul, tmp
 	b.ne	L(tail)
 
 	sub	off1, src2, src1
 
 	.p2align 4
 
 L(loop_unaligned):
 	ldr	data3, [src1, off1]
 	ldr	data2, [src1, off2]
 #ifdef __AARCH64EB__
 	rev	data3, data3
 #endif
 	sub	has_nul, data3, zeroones
 	orr	tmp, data3, REP8_7f
 	ldr	data1, [src1], 8
 	bics	has_nul, has_nul, tmp
 	ccmp	data1, data2, 0, eq
 	b.eq	L(loop_unaligned)
 
 	lsl	tmp, has_nul, shift
 #ifdef __AARCH64EB__
 	rev	tmp, tmp
 #endif
 	eor	diff, data1, data2
 	orr	syndrome, diff, tmp
 	cbnz	syndrome, L(end)
 L(tail):
 	ldr	data1, [src1]
 	neg	shift, shift
 	lsr	data2, data3, shift
 	lsr	has_nul, has_nul, shift
 #ifdef __AARCH64EB__
 	rev     data2, data2
 	rev	has_nul, has_nul
 #endif
 	eor	diff, data1, data2
 	orr	syndrome, diff, has_nul
 	b	L(end)
 
 L(done):
 	sub	result, data1, data2
 	ret
 
 END (__strcmp_aarch64)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/strcpy.S b/contrib/arm-optimized-routines/string/aarch64/strcpy.S
index 97ae37ea4229..5852616e6024 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strcpy.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strcpy.S
@@ -1,156 +1,154 @@
 /*
  * strcpy/stpcpy - copy a string returning pointer to start/end.
  *
  * Copyright (c) 2020-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
  * MTE compatible.
  */
 
 #include "asmdefs.h"
 
 #define dstin		x0
 #define srcin		x1
 #define result		x0
 
 #define src		x2
 #define dst		x3
 #define len		x4
 #define synd		x4
 #define	tmp		x5
 #define shift		x5
 #define data1		x6
 #define dataw1		w6
 #define data2		x7
 #define dataw2		w7
 
 #define dataq		q0
 #define vdata		v0
 #define vhas_nul	v1
 #define vend		v2
 #define dend		d2
 #define dataq2		q1
 
 #ifdef BUILD_STPCPY
 # define STRCPY __stpcpy_aarch64
 # define IFSTPCPY(X,...) X,__VA_ARGS__
 #else
 # define STRCPY __strcpy_aarch64
 # define IFSTPCPY(X,...)
 #endif
 
 /*
    Core algorithm:
    For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
    per byte. We take 4 bits of every comparison byte with shift right and narrow
    by 4 instruction. Since the bits in the nibble mask reflect the order in
    which things occur in the original string, counting leading zeros identifies
    exactly which byte matched.  */
 
 ENTRY (STRCPY)
-	PTR_ARG (0)
-	PTR_ARG (1)
 	bic	src, srcin, 15
 	ld1	{vdata.16b}, [src]
 	cmeq	vhas_nul.16b, vdata.16b, 0
 	lsl	shift, srcin, 2
 	shrn	vend.8b, vhas_nul.8h, 4
 	fmov	synd, dend
 	lsr	synd, synd, shift
 	cbnz	synd, L(tail)
 
 	ldr	dataq, [src, 16]!
 	cmeq	vhas_nul.16b, vdata.16b, 0
 	shrn	vend.8b, vhas_nul.8h, 4
 	fmov	synd, dend
 	cbz	synd, L(start_loop)
 
 #ifndef __AARCH64EB__
 	rbit	synd, synd
 #endif
 	sub	tmp, src, srcin
 	clz	len, synd
 	add	len, tmp, len, lsr 2
 	tbz	len, 4, L(less16)
 	sub	tmp, len, 15
 	ldr	dataq, [srcin]
 	ldr	dataq2, [srcin, tmp]
 	str	dataq, [dstin]
 	str	dataq2, [dstin, tmp]
 	IFSTPCPY (add result, dstin, len)
 	ret
 
 L(tail):
 	rbit	synd, synd
 	clz	len, synd
 	lsr	len, len, 2
 L(less16):
 	tbz	len, 3, L(less8)
 	sub	tmp, len, 7
 	ldr	data1, [srcin]
 	ldr	data2, [srcin, tmp]
 	str	data1, [dstin]
 	str	data2, [dstin, tmp]
 	IFSTPCPY (add result, dstin, len)
 	ret
 
 	.p2align 4
 L(less8):
 	subs	tmp, len, 3
 	b.lo	L(less4)
 	ldr	dataw1, [srcin]
 	ldr	dataw2, [srcin, tmp]
 	str	dataw1, [dstin]
 	str	dataw2, [dstin, tmp]
 	IFSTPCPY (add result, dstin, len)
 	ret
 
 L(less4):
 	cbz	len, L(zerobyte)
 	ldrh	dataw1, [srcin]
 	strh	dataw1, [dstin]
 L(zerobyte):
 	strb	wzr, [dstin, len]
 	IFSTPCPY (add result, dstin, len)
 	ret
 
 	.p2align 4
 L(start_loop):
 	sub	tmp, srcin, dstin
 	ldr	dataq2, [srcin]
 	sub	dst, src, tmp
 	str	dataq2, [dstin]
 L(loop):
 	str	dataq, [dst], 32
 	ldr	dataq, [src, 16]
 	cmeq	vhas_nul.16b, vdata.16b, 0
 	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
 	fmov	synd, dend
 	cbnz	synd, L(loopend)
 	str	dataq, [dst, -16]
 	ldr	dataq, [src, 32]!
 	cmeq	vhas_nul.16b, vdata.16b, 0
 	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
 	fmov	synd, dend
 	cbz	synd, L(loop)
 	add	dst, dst, 16
 L(loopend):
 	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
 	fmov	synd, dend
 	sub	dst, dst, 31
 #ifndef __AARCH64EB__
 	rbit	synd, synd
 #endif
 	clz	len, synd
 	lsr	len, len, 2
 	add	dst, dst, len
 	ldr	dataq, [dst, tmp]
 	str	dataq, [dst]
 	IFSTPCPY (add result, dst, 15)
 	ret
 
 END (STRCPY)
diff --git a/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S b/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S
index 77235797f7c5..afa72eed9a43 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S
@@ -1,77 +1,89 @@
 /*
  * strlen - calculate the length of a string.
  *
  * Copyright (c) 2020-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
  * MTE compatible.
  */
 
 #include "asmdefs.h"
 
 #define srcin		x0
 #define result		x0
 
 #define src		x1
 #define	synd		x2
 #define tmp		x3
 #define shift		x4
 
 #define data		q0
 #define vdata		v0
 #define vhas_nul	v1
 #define vend		v2
 #define dend		d2
 
 /* Core algorithm:
    Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
    four bits per byte using the shrn instruction. A count trailing zeros then
    identifies the first zero byte.  */
 
 ENTRY (__strlen_aarch64_mte)
-	PTR_ARG (0)
 	bic	src, srcin, 15
 	ld1	{vdata.16b}, [src]
 	cmeq	vhas_nul.16b, vdata.16b, 0
 	lsl	shift, srcin, 2
 	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
 	fmov	synd, dend
 	lsr	synd, synd, shift
-	cbz	synd, L(loop)
+	cbz	synd, L(next16)
 
 	rbit	synd, synd
 	clz	result, synd
 	lsr	result, result, 2
 	ret
 
+L(next16):
+	ldr	data, [src, 16]
+	cmeq	vhas_nul.16b, vdata.16b, 0
+	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
+	fmov	synd, dend
+	cbz	synd, L(loop)
+	add	src, src, 16
+#ifndef __AARCH64EB__
+	rbit	synd, synd
+#endif
+	sub	result, src, srcin
+	clz	tmp, synd
+	add	result, result, tmp, lsr 2
+	ret
+
 	.p2align 5
 L(loop):
-	ldr	data, [src, 16]
+	ldr	data, [src, 32]!
 	cmeq	vhas_nul.16b, vdata.16b, 0
-	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
+	addhn	vend.8b, vhas_nul.8h, vhas_nul.8h
 	fmov	synd, dend
 	cbnz	synd, L(loop_end)
-	ldr	data, [src, 32]!
+	ldr	data, [src, 16]
 	cmeq	vhas_nul.16b, vdata.16b, 0
-	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
+	addhn	vend.8b, vhas_nul.8h, vhas_nul.8h
 	fmov	synd, dend
 	cbz	synd, L(loop)
-	sub	src, src, 16
+	add	src, src, 16
 L(loop_end):
-	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
-	sub	result, src, srcin
-	fmov	synd, dend
+	sub	result, shift, src, lsl 2	/* (srcin - src) << 2.  */
 #ifndef __AARCH64EB__
 	rbit	synd, synd
+	sub	result, result, 3
 #endif
-	add	result, result, 16
 	clz	tmp, synd
-	add	result, result, tmp, lsr 2
+	sub	result, tmp, result
+	lsr	result, result, 2
 	ret
 
 END (__strlen_aarch64_mte)
-
diff --git a/contrib/arm-optimized-routines/string/aarch64/strlen.S b/contrib/arm-optimized-routines/string/aarch64/strlen.S
index 6f6f08f636b2..0ebb26be844c 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strlen.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strlen.S
@@ -1,193 +1,192 @@
 /*
  * strlen - calculate the length of a string.
  *
  * Copyright (c) 2020-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
  * Not MTE compatible.
  */
 
 #include "asmdefs.h"
 
 #define srcin	x0
 #define len	x0
 
 #define src	x1
 #define data1	x2
 #define data2	x3
 #define has_nul1 x4
 #define has_nul2 x5
 #define tmp1	x4
 #define tmp2	x5
 #define tmp3	x6
 #define tmp4	x7
 #define zeroones x8
 
 #define maskv	v0
 #define maskd	d0
 #define dataq1	q1
 #define dataq2	q2
 #define datav1	v1
 #define datav2	v2
 #define tmp	x2
 #define tmpw	w2
 #define synd	x3
 #define syndw	w3
 #define shift	x4
 
 /* For the first 32 bytes, NUL detection works on the principle that
    (X - 1) & (~X) & 0x80 (=> (X - 1) & ~(X | 0x7f)) is non-zero if a
    byte is zero, and can be done in parallel across the entire word.  */
 
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 
 /* To test the page crossing code path more thoroughly, compile with
    -DTEST_PAGE_CROSS - this will force all calls through the slower
    entry path.  This option is not intended for production use.  */
 
 #ifdef TEST_PAGE_CROSS
 # define MIN_PAGE_SIZE 32
 #else
 # define MIN_PAGE_SIZE 4096
 #endif
 
 /* Core algorithm:
 
    Since strings are short on average, we check the first 32 bytes of the
    string for a NUL character without aligning the string.  In order to use
    unaligned loads safely we must do a page cross check first.
 
    If there is a NUL byte we calculate the length from the 2 8-byte words
    using conditional select to reduce branch mispredictions (it is unlikely
    strlen will be repeatedly called on strings with the same length).
 
    If the string is longer than 32 bytes, align src so we don't need further
    page cross checks, and process 32 bytes per iteration using a fast SIMD
    loop.
 
    If the page cross check fails, we read 32 bytes from an aligned address,
    and ignore any characters before the string.  If it contains a NUL
    character, return the length, if not, continue in the main loop.  */
 
 ENTRY (__strlen_aarch64)
-	PTR_ARG (0)
 	and	tmp1, srcin, MIN_PAGE_SIZE - 1
 	cmp	tmp1, MIN_PAGE_SIZE - 32
 	b.hi	L(page_cross)
 
 	/* Look for a NUL byte in the first 16 bytes.  */
 	ldp	data1, data2, [srcin]
 	mov	zeroones, REP8_01
 
 #ifdef __AARCH64EB__
 	/* For big-endian, carry propagation (if the final byte in the
 	   string is 0x01) means we cannot use has_nul1/2 directly.
 	   Since we expect strings to be small and early-exit,
 	   byte-swap the data now so has_null1/2 will be correct.  */
 	rev	data1, data1
 	rev	data2, data2
 #endif
 	sub	tmp1, data1, zeroones
 	orr	tmp2, data1, REP8_7f
 	sub	tmp3, data2, zeroones
 	orr	tmp4, data2, REP8_7f
 	bics	has_nul1, tmp1, tmp2
 	bic	has_nul2, tmp3, tmp4
 	ccmp	has_nul2, 0, 0, eq
 	b.eq	L(bytes16_31)
 
 	/* Find the exact offset of the first NUL byte in the first 16 bytes
 	   from the string start.  Enter with C = has_nul1 == 0.  */
 	csel	has_nul1, has_nul1, has_nul2, cc
 	mov	len, 8
 	rev	has_nul1, has_nul1
 	csel	len, xzr, len, cc
 	clz	tmp1, has_nul1
 	add	len, len, tmp1, lsr 3
 	ret
 
 	/* Look for a NUL byte at offset 16..31 in the string.  */
 L(bytes16_31):
 	ldp	data1, data2, [srcin, 16]
 #ifdef __AARCH64EB__
 	rev	data1, data1
 	rev	data2, data2
 #endif
 	sub	tmp1, data1, zeroones
 	orr	tmp2, data1, REP8_7f
 	sub	tmp3, data2, zeroones
 	orr	tmp4, data2, REP8_7f
 	bics	has_nul1, tmp1, tmp2
 	bic	has_nul2, tmp3, tmp4
 	ccmp	has_nul2, 0, 0, eq
 	b.eq	L(loop_entry)
 
 	/* Find the exact offset of the first NUL byte at offset 16..31 from
 	   the string start.  Enter with C = has_nul1 == 0.  */
 	csel	has_nul1, has_nul1, has_nul2, cc
 	mov	len, 24
 	rev	has_nul1, has_nul1
 	mov	tmp3, 16
 	clz	tmp1, has_nul1
 	csel	len, tmp3, len, cc
 	add	len, len, tmp1, lsr 3
 	ret
 
 	nop
 L(loop_entry):
 	bic	src, srcin, 31
 
 	.p2align 5
 L(loop):
 	ldp	dataq1, dataq2, [src, 32]!
 	uminp	maskv.16b, datav1.16b, datav2.16b
 	uminp	maskv.16b, maskv.16b, maskv.16b
 	cmeq	maskv.8b, maskv.8b, 0
 	fmov	synd, maskd
 	cbz	synd, L(loop)
 
 	/* Low 32 bits of synd are non-zero if a NUL was found in datav1.  */
 	cmeq	maskv.16b, datav1.16b, 0
 	sub	len, src, srcin
 	cbnz	syndw, 1f
 	cmeq	maskv.16b, datav2.16b, 0
 	add	len, len, 16
 1:
 	/* Generate a bitmask and compute correct byte offset.  */
 	shrn	maskv.8b, maskv.8h, 4
 	fmov	synd, maskd
 #ifndef __AARCH64EB__
 	rbit	synd, synd
 #endif
 	clz	tmp, synd
 	add	len, len, tmp, lsr 2
 	ret
 
 L(page_cross):
 	bic	src, srcin, 31
 	mov	tmpw, 0x0c03
 	movk	tmpw, 0xc030, lsl 16
 	ld1	{datav1.16b, datav2.16b}, [src]
 	dup	maskv.4s, tmpw
 	cmeq	datav1.16b, datav1.16b, 0
 	cmeq	datav2.16b, datav2.16b, 0
 	and	datav1.16b, datav1.16b, maskv.16b
 	and	datav2.16b, datav2.16b, maskv.16b
 	addp	maskv.16b, datav1.16b, datav2.16b
 	addp	maskv.16b, maskv.16b, maskv.16b
 	fmov	synd, maskd
 	lsl	shift, srcin, 1
 	lsr	synd, synd, shift
 	cbz	synd, L(loop)
 
 	rbit	synd, synd
 	clz	len, synd
 	lsr	len, len, 1
 	ret
 
 END (__strlen_aarch64)
diff --git a/contrib/arm-optimized-routines/string/aarch64/strncmp.S b/contrib/arm-optimized-routines/string/aarch64/strncmp.S
index 128a10c52bb1..493a0f06ed1d 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strncmp.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strncmp.S
@@ -1,308 +1,305 @@
 /*
  * strncmp - compare two strings
  *
  * Copyright (c) 2013-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64.
  * MTE compatible.
  */
 
 #include "asmdefs.h"
 
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 
 /* Parameters and result.  */
 #define src1		x0
 #define src2		x1
 #define limit		x2
 #define result		x0
 
 /* Internal variables.  */
 #define data1		x3
 #define data1w		w3
 #define data2		x4
 #define data2w		w4
 #define has_nul		x5
 #define diff		x6
 #define syndrome	x7
 #define tmp1		x8
 #define tmp2		x9
 #define tmp3		x10
 #define zeroones	x11
 #define pos		x12
 #define mask		x13
 #define endloop		x14
 #define count		mask
 #define offset		pos
 #define neg_offset	x15
 
 /* Define endian dependent shift operations.
    On big-endian early bytes are at MSB and on little-endian LSB.
    LS_FW means shifting towards early bytes.
    LS_BK means shifting towards later bytes.
    */
 #ifdef __AARCH64EB__
 #define LS_FW lsl
 #define LS_BK lsr
 #else
 #define LS_FW lsr
 #define LS_BK lsl
 #endif
 
 ENTRY (__strncmp_aarch64)
-	PTR_ARG (0)
-	PTR_ARG (1)
-	SIZE_ARG (2)
 	cbz	limit, L(ret0)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
 	tst	tmp1, #7
 	and	count, src1, #7
 	b.ne	L(misaligned8)
 	cbnz	count, L(mutual_align)
 
 	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
 	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
 	   can be done in parallel across the entire word.  */
 	.p2align 4
 L(loop_aligned):
 	ldr	data1, [src1], #8
 	ldr	data2, [src2], #8
 L(start_realigned):
 	subs	limit, limit, #8
 	sub	tmp1, data1, zeroones
 	orr	tmp2, data1, #REP8_7f
 	eor	diff, data1, data2	/* Non-zero if differences found.  */
 	csinv	endloop, diff, xzr, hi	/* Last Dword or differences.  */
 	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
 	ccmp	endloop, #0, #0, eq
 	b.eq	L(loop_aligned)
 	/* End of main loop */
 
 L(full_check):
 #ifndef __AARCH64EB__
 	orr	syndrome, diff, has_nul
 	add	limit, limit, 8	/* Rewind limit to before last subs. */
 L(syndrome_check):
 	/* Limit was reached. Check if the NUL byte or the difference
 	   is before the limit. */
 	rev	syndrome, syndrome
 	rev	data1, data1
 	clz	pos, syndrome
 	rev	data2, data2
 	lsl	data1, data1, pos
 	cmp	limit, pos, lsr #3
 	lsl	data2, data2, pos
 	/* But we need to zero-extend (char is unsigned) the value and then
 	   perform a signed 32-bit subtraction.  */
 	lsr	data1, data1, #56
 	sub	result, data1, data2, lsr #56
 	csel result, result, xzr, hi
 	ret
 #else
 	/* Not reached the limit, must have found the end or a diff.  */
 	tbz	limit, #63, L(not_limit)
 	add	tmp1, limit, 8
 	cbz	limit, L(not_limit)
 
 	lsl	limit, tmp1, #3	/* Bits -> bytes.  */
 	mov	mask, #~0
 	lsr	mask, mask, limit
 	bic	data1, data1, mask
 	bic	data2, data2, mask
 
 	/* Make sure that the NUL byte is marked in the syndrome.  */
 	orr	has_nul, has_nul, mask
 
 L(not_limit):
 	/* For big-endian we cannot use the trick with the syndrome value
 	   as carry-propagation can corrupt the upper bits if the trailing
 	   bytes in the string contain 0x01.  */
 	/* However, if there is no NUL byte in the dword, we can generate
 	   the result directly.  We can't just subtract the bytes as the
 	   MSB might be significant.  */
 	cbnz	has_nul, 1f
 	cmp	data1, data2
 	cset	result, ne
 	cneg	result, result, lo
 	ret
 1:
 	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
 	rev	tmp3, data1
 	sub	tmp1, tmp3, zeroones
 	orr	tmp2, tmp3, #REP8_7f
 	bic	has_nul, tmp1, tmp2
 	rev	has_nul, has_nul
 	orr	syndrome, diff, has_nul
 	clz	pos, syndrome
 	/* The most-significant-non-zero bit of the syndrome marks either the
 	   first bit that is different, or the top bit of the first zero byte.
 	   Shifting left now will bring the critical information into the
 	   top bits.  */
 L(end_quick):
 	lsl	data1, data1, pos
 	lsl	data2, data2, pos
 	/* But we need to zero-extend (char is unsigned) the value and then
 	   perform a signed 32-bit subtraction.  */
 	lsr	data1, data1, #56
 	sub	result, data1, data2, lsr #56
 	ret
 #endif
 
 L(mutual_align):
 	/* Sources are mutually aligned, but are not currently at an
 	   alignment boundary.  Round down the addresses and then mask off
 	   the bytes that precede the start point.
 	   We also need to adjust the limit calculations, but without
 	   overflowing if the limit is near ULONG_MAX.  */
 	bic	src1, src1, #7
 	bic	src2, src2, #7
 	ldr	data1, [src1], #8
 	neg	tmp3, count, lsl #3	/* 64 - bits(bytes beyond align). */
 	ldr	data2, [src2], #8
 	mov	tmp2, #~0
 	LS_FW	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
 	/* Adjust the limit and ensure it doesn't overflow.  */
 	adds	limit, limit, count
 	csinv	limit, limit, xzr, lo
 	orr	data1, data1, tmp2
 	orr	data2, data2, tmp2
 	b	L(start_realigned)
 
 	.p2align 4
 	/* Don't bother with dwords for up to 16 bytes.  */
 L(misaligned8):
 	cmp	limit, #16
 	b.hs	L(try_misaligned_words)
 
 L(byte_loop):
 	/* Perhaps we can do better than this.  */
 	ldrb	data1w, [src1], #1
 	ldrb	data2w, [src2], #1
 	subs	limit, limit, #1
 	ccmp	data1w, #1, #0, hi	/* NZCV = 0b0000.  */
 	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
 	b.eq	L(byte_loop)
 L(done):
 	sub	result, data1, data2
 	ret
 	/* Align the SRC1 to a dword by doing a bytewise compare and then do
 	   the dword loop.  */
 L(try_misaligned_words):
 	cbz	count, L(src1_aligned)
 
 	neg	count, count
 	and	count, count, #7
 	sub	limit, limit, count
 
 L(page_end_loop):
 	ldrb	data1w, [src1], #1
 	ldrb	data2w, [src2], #1
 	cmp	data1w, #1
 	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
 	b.ne	L(done)
 	subs	count, count, #1
 	b.hi	L(page_end_loop)
 
 	/* The following diagram explains the comparison of misaligned strings.
 	   The bytes are shown in natural order. For little-endian, it is
 	   reversed in the registers. The "x" bytes are before the string.
 	   The "|" separates data that is loaded at one time.
 	   src1     | a a a a a a a a | b b b c c c c c | . . .
 	   src2     | x x x x x a a a   a a a a a b b b | c c c c c . . .
 
 	   After shifting in each step, the data looks like this:
 	                STEP_A              STEP_B              STEP_C
 	   data1    a a a a a a a a     b b b c c c c c     b b b c c c c c
 	   data2    a a a a a a a a     b b b 0 0 0 0 0     0 0 0 c c c c c
 
 	   The bytes with "0" are eliminated from the syndrome via mask.
 
 	   Align SRC2 down to 16 bytes. This way we can read 16 bytes at a
 	   time from SRC2. The comparison happens in 3 steps. After each step
 	   the loop can exit, or read from SRC1 or SRC2. */
 L(src1_aligned):
 	/* Calculate offset from 8 byte alignment to string start in bits. No
 	   need to mask offset since shifts are ignoring upper bits. */
 	lsl	offset, src2, #3
 	bic	src2, src2, #0xf
 	mov	mask, -1
 	neg	neg_offset, offset
 	ldr	data1, [src1], #8
 	ldp	tmp1, tmp2, [src2], #16
 	LS_BK	mask, mask, neg_offset
 	and	neg_offset, neg_offset, #63	/* Need actual value for cmp later. */
 	/* Skip the first compare if data in tmp1 is irrelevant. */
 	tbnz	offset, 6, L(misaligned_mid_loop)
 
 L(loop_misaligned):
 	/* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/
 	LS_FW	data2, tmp1, offset
 	LS_BK	tmp1, tmp2, neg_offset
 	subs	limit, limit, #8
 	orr	data2, data2, tmp1	/* 8 bytes from SRC2 combined from two regs.*/
 	sub	has_nul, data1, zeroones
 	eor	diff, data1, data2	/* Non-zero if differences found.  */
 	orr	tmp3, data1, #REP8_7f
 	csinv	endloop, diff, xzr, hi	/* If limit, set to all ones. */
 	bic	has_nul, has_nul, tmp3	/* Non-zero if NUL byte found in SRC1. */
 	orr	tmp3, endloop, has_nul
 	cbnz	tmp3, L(full_check)
 
 	ldr	data1, [src1], #8
 L(misaligned_mid_loop):
 	/* STEP_B: Compare first part of data1 to second part of tmp2. */
 	LS_FW	data2, tmp2, offset
 #ifdef __AARCH64EB__
 	/* For big-endian we do a byte reverse to avoid carry-propagation
 	problem described above. This way we can reuse the has_nul in the
 	next step and also use syndrome value trick at the end. */
 	rev	tmp3, data1
 	#define data1_fixed tmp3
 #else
 	#define data1_fixed data1
 #endif
 	sub	has_nul, data1_fixed, zeroones
 	orr	tmp3, data1_fixed, #REP8_7f
 	eor	diff, data2, data1	/* Non-zero if differences found.  */
 	bic	has_nul, has_nul, tmp3	/* Non-zero if NUL terminator.  */
 #ifdef __AARCH64EB__
 	rev	has_nul, has_nul
 #endif
 	cmp	limit, neg_offset, lsr #3
 	orr	syndrome, diff, has_nul
 	bic	syndrome, syndrome, mask	/* Ignore later bytes. */
 	csinv	tmp3, syndrome, xzr, hi	/* If limit, set to all ones. */
 	cbnz	tmp3, L(syndrome_check)
 
 	/* STEP_C: Compare second part of data1 to first part of tmp1. */
 	ldp	tmp1, tmp2, [src2], #16
 	cmp	limit, #8
 	LS_BK	data2, tmp1, neg_offset
 	eor	diff, data2, data1	/* Non-zero if differences found.  */
 	orr	syndrome, diff, has_nul
 	and	syndrome, syndrome, mask	/* Ignore earlier bytes. */
 	csinv	tmp3, syndrome, xzr, hi	/* If limit, set to all ones. */
 	cbnz	tmp3, L(syndrome_check)
 
 	ldr	data1, [src1], #8
 	sub	limit, limit, #8
 	b	L(loop_misaligned)
 
 #ifdef	__AARCH64EB__
 L(syndrome_check):
 	clz	pos, syndrome
 	cmp	pos, limit, lsl #3
 	b.lo	L(end_quick)
 #endif
 
 L(ret0):
 	mov	result, #0
 	ret
 END(__strncmp_aarch64)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/strnlen.S b/contrib/arm-optimized-routines/string/aarch64/strnlen.S
index f2090a7485a5..6a96ec268f1a 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strnlen.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strnlen.S
@@ -1,102 +1,100 @@
 /*
  * strnlen - calculate the length of a string with limit.
  *
  * Copyright (c) 2020-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
  * MTE compatible.
  */
 
 #include "asmdefs.h"
 
 #define srcin		x0
 #define cntin		x1
 #define result		x0
 
 #define src		x2
 #define synd		x3
 #define	shift		x4
 #define tmp		x4
 #define cntrem		x5
 
 #define qdata		q0
 #define vdata		v0
 #define vhas_chr	v1
 #define vend		v2
 #define dend		d2
 
 /*
    Core algorithm:
    Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
    four bits per byte using the shrn instruction. A count trailing zeros then
    identifies the first zero byte.  */
 
 ENTRY (__strnlen_aarch64)
-	PTR_ARG (0)
-	SIZE_ARG (1)
 	bic	src, srcin, 15
 	cbz	cntin, L(nomatch)
 	ld1	{vdata.16b}, [src]
 	cmeq	vhas_chr.16b, vdata.16b, 0
 	lsl	shift, srcin, 2
 	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
 	fmov	synd, dend
 	lsr	synd, synd, shift
 	cbz	synd, L(start_loop)
 L(finish):
 	rbit	synd, synd
 	clz	synd, synd
 	lsr	result, synd, 2
 	cmp	cntin, result
 	csel	result, cntin, result, ls
 	ret
 
 L(nomatch):
 	mov	result, cntin
 	ret
 
 L(start_loop):
 	sub	tmp, src, srcin
 	add	tmp, tmp, 17
 	subs	cntrem, cntin, tmp
 	b.lo	L(nomatch)
 
 	/* Make sure that it won't overread by a 16-byte chunk */
 	tbz	cntrem, 4, L(loop32_2)
 	sub	src, src, 16
 	.p2align 5
 L(loop32):
 	ldr	qdata, [src, 32]!
 	cmeq	vhas_chr.16b, vdata.16b, 0
 	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
 	fmov	synd, dend
 	cbnz	synd, L(end)
 L(loop32_2):
 	ldr	qdata, [src, 16]
 	subs	cntrem, cntrem, 32
 	cmeq	vhas_chr.16b, vdata.16b, 0
 	b.lo	L(end_2)
 	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
 	fmov	synd, dend
 	cbz	synd, L(loop32)
 L(end_2):
 	add	src, src, 16
 L(end):
 	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
 	sub	result, src, srcin
 	fmov	synd, dend
 #ifndef __AARCH64EB__
 	rbit	synd, synd
 #endif
 	clz	synd, synd
 	add	result, result, synd, lsr 2
 	cmp	cntin, result
 	csel	result, cntin, result, ls
 	ret
 
 END (__strnlen_aarch64)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S b/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S
index bb61ab9ad4e7..8668ce6d2916 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S
@@ -1,137 +1,136 @@
 /*
  * strrchr - find last position of a character in a string.
  *
  * Copyright (c) 2020-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
  * MTE compatible.
  */
 
 #include "asmdefs.h"
 
 #define srcin		x0
 #define chrin		w1
 #define result		x0
 
 #define src		x2
 #define tmp		x3
 #define synd		x3
 #define shift		x4
 #define src_match	x4
 #define nul_match	x5
 #define chr_match	x6
 
 #define vrepchr		v0
 #define vdata		v1
 #define vhas_nul	v2
 #define vhas_chr	v3
 #define vrepmask	v4
 #define vend		v5
 #define dend		d5
 
 /* Core algorithm.
 
    For each 16-byte chunk we calculate a 64-bit syndrome value, with
    four bits per byte (LSB is always in bits 0 and 1, for both big
    and little-endian systems).  For each tuple, bits 0-1 are set if
    the relevant byte matched the requested character; bits 2-3 are set
    if the relevant byte matched the NUL end of string.  */
 
 ENTRY (__strrchr_aarch64_mte)
-	PTR_ARG (0)
 	bic	src, srcin, 15
 	dup	vrepchr.16b, chrin
 	movi	vrepmask.16b, 0x33
 	ld1	{vdata.16b}, [src]
 	cmeq	vhas_nul.16b, vdata.16b, 0
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
 	shrn	vend.8b, vhas_nul.8h, 4
 	lsl	shift, srcin, 2
 	fmov	synd, dend
 	lsr	synd, synd, shift
 	lsl	synd, synd, shift
 	ands	nul_match, synd, 0xcccccccccccccccc
 	bne	L(tail)
 	cbnz	synd, L(loop2_start)
 
 	.p2align 4
 L(loop1):
 	ldr	q1, [src, 16]
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	cmhs	vhas_nul.16b, vhas_chr.16b, vdata.16b
 	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
 	fmov	synd, dend
 	cbnz	synd, L(loop1_end)
 	ldr	q1, [src, 32]!
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	cmhs	vhas_nul.16b, vhas_chr.16b, vdata.16b
 	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
 	fmov	synd, dend
 	cbz	synd, L(loop1)
 	sub	src, src, 16
 L(loop1_end):
 	add	src, src, 16
 	cmeq	vhas_nul.16b, vdata.16b, 0
 #ifdef __AARCH64EB__
 	bif	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
 	shrn	vend.8b, vhas_nul.8h, 4
 	fmov	synd, dend
 	rbit	synd, synd
 #else
 	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
 	shrn	vend.8b, vhas_nul.8h, 4
 	fmov	synd, dend
 #endif
 	ands	nul_match, synd, 0xcccccccccccccccc
 	beq	L(loop2_start)
 L(tail):
 	sub	nul_match, nul_match, 1
 	and	chr_match, synd, 0x3333333333333333
 	ands	chr_match, chr_match, nul_match
 	add	result, src, 15
 	clz	tmp, chr_match
 	sub	result, result, tmp, lsr 2
 	csel	result, result, xzr, ne
 	ret
 
 	.p2align 4
 	nop
 	nop
 L(loop2_start):
 	add	src, src, 16
 	bic	vrepmask.8h, 0xf0
 
 L(loop2):
 	cmp	synd, 0
 	csel	src_match, src, src_match, ne
 	csel	chr_match, synd, chr_match, ne
 	ld1	{vdata.16b}, [src], 16
 	cmeq	vhas_nul.16b, vdata.16b, 0
 	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
 	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
 	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
 	fmov	synd, dend
 	tst	synd, 0xcccccccccccccccc
 	beq	L(loop2)
 
 	bic	vhas_nul.8h, 0x0f, lsl 8
 	addp	vend.16b, vhas_nul.16b, vhas_nul.16b
 	fmov	synd, dend
 	and	nul_match, synd, 0xcccccccccccccccc
 	sub	nul_match, nul_match, 1
 	and	tmp, synd, 0x3333333333333333
 	ands	tmp, tmp, nul_match
 	csel	chr_match, tmp, chr_match, ne
 	csel	src_match, src, src_match, ne
 	sub	src_match, src_match, 1
 	clz	tmp, chr_match
 	sub	result, src_match, tmp, lsr 2
 	ret
 
 END (__strrchr_aarch64_mte)
 
diff --git a/contrib/arm-optimized-routines/string/aarch64/strrchr.S b/contrib/arm-optimized-routines/string/aarch64/strrchr.S
index bf9cb297b6cb..f5713f4260fb 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strrchr.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strrchr.S
@@ -1,149 +1,148 @@
 /*
  * strrchr - find last position of a character in a string.
  *
  * Copyright (c) 2014-2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* Assumptions:
  *
  * ARMv8-a, AArch64
  * Neon Available.
  */
 
 #include "asmdefs.h"
 
 /* Arguments and results.  */
 #define srcin		x0
 #define chrin		w1
 
 #define result		x0
 
 #define src		x2
 #define	tmp1		x3
 #define wtmp2		w4
 #define tmp3		x5
 #define src_match	x6
 #define src_offset	x7
 #define const_m1	x8
 #define tmp4		x9
 #define nul_match	x10
 #define chr_match	x11
 
 #define vrepchr		v0
 #define vdata1		v1
 #define vdata2		v2
 #define vhas_nul1	v3
 #define vhas_nul2	v4
 #define vhas_chr1	v5
 #define vhas_chr2	v6
 #define vrepmask_0	v7
 #define vrepmask_c	v16
 #define vend1		v17
 #define vend2		v18
 
 /* Core algorithm.
 
    For each 32-byte hunk we calculate a 64-bit syndrome value, with
    two bits per byte (LSB is always in bits 0 and 1, for both big
    and little-endian systems).  For each tuple, bit 0 is set iff
    the relevant byte matched the requested character; bit 1 is set
    iff the relevant byte matched the NUL end of string (we trigger
    off bit0 for the special case of looking for NUL).  Since the bits
    in the syndrome reflect exactly the order in which things occur
    in the original string a count_trailing_zeros() operation will
    identify exactly which byte is causing the termination, and why.  */
 
 ENTRY (__strrchr_aarch64)
-	PTR_ARG (0)
 	/* Magic constant 0x40100401 to allow us to identify which lane
 	   matches the requested byte.  Magic constant 0x80200802 used
 	   similarly for NUL termination.  */
 	mov	wtmp2, #0x0401
 	movk	wtmp2, #0x4010, lsl #16
 	dup	vrepchr.16b, chrin
 	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
 	dup	vrepmask_c.4s, wtmp2
 	mov	src_offset, #0
 	ands	tmp1, srcin, #31
 	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
 	b.eq	L(aligned)
 
 	/* Input string is not 32-byte aligned.  Rather than forcing
 	   the padding bytes to a safe value, we calculate the syndrome
 	   for all the bytes, but then mask off those bits of the
 	   syndrome that are related to the padding.  */
 	ld1	{vdata1.16b, vdata2.16b}, [src], #32
 	neg	tmp1, tmp1
 	cmeq	vhas_nul1.16b, vdata1.16b, #0
 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
 	cmeq	vhas_nul2.16b, vdata2.16b, #0
 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
 	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
 	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
 	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
 	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b	// 256->128
 	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
 	addp	vend1.16b, vhas_nul1.16b, vhas_chr1.16b		// 128->64
 	mov	nul_match, vend1.d[0]
 	lsl	tmp1, tmp1, #1
 	mov	const_m1, #~0
 	lsr	tmp3, const_m1, tmp1
 	mov	chr_match, vend1.d[1]
 
 	bic	nul_match, nul_match, tmp3	// Mask padding bits.
 	bic	chr_match, chr_match, tmp3	// Mask padding bits.
 	cbnz	nul_match, L(tail)
 
 	.p2align 4
 L(loop):
 	cmp	chr_match, #0
 	csel	src_match, src, src_match, ne
 	csel	src_offset, chr_match, src_offset, ne
 L(aligned):
 	ld1	{vdata1.16b, vdata2.16b}, [src], #32
 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
 	uminp	vend1.16b, vdata1.16b, vdata2.16b
 	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
 	cmeq	vend1.16b, vend1.16b, 0
 	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
 	addp	vend1.16b, vend1.16b, vhas_chr1.16b		// 128->64
 	mov	nul_match, vend1.d[0]
 	mov	chr_match, vend1.d[1]
 	cbz	nul_match, L(loop)
 
 	cmeq	vhas_nul1.16b, vdata1.16b, #0
 	cmeq	vhas_nul2.16b, vdata2.16b, #0
 	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
 	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
 	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b
 	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b
 	mov	nul_match, vhas_nul1.d[0]
 
 L(tail):
 	/* Work out exactly where the string ends.  */
 	sub	tmp4, nul_match, #1
 	eor	tmp4, tmp4, nul_match
 	ands	chr_match, chr_match, tmp4
 	/* And pick the values corresponding to the last match.  */
 	csel	src_match, src, src_match, ne
 	csel	src_offset, chr_match, src_offset, ne
 
 	/* Count down from the top of the syndrome to find the last match.  */
 	clz	tmp3, src_offset
 	/* Src_match points beyond the word containing the match, so we can
 	   simply subtract half the bit-offset into the syndrome.  Because
 	   we are counting down, we need to go back one more character.  */
 	add	tmp3, tmp3, #2
 	sub	result, src_match, tmp3, lsr #1
 	/* But if the syndrome shows no match was found, then return NULL.  */
 	cmp	src_offset, #0
 	csel	result, result, xzr, ne
 
 	ret
 
 END (__strrchr_aarch64)
 
diff --git a/contrib/arm-optimized-routines/string/bench/memcpy.c b/contrib/arm-optimized-routines/string/bench/memcpy.c
index b628f9b60d96..583fa505db75 100644
--- a/contrib/arm-optimized-routines/string/bench/memcpy.c
+++ b/contrib/arm-optimized-routines/string/bench/memcpy.c
@@ -1,342 +1,267 @@
 /*
  * memcpy benchmark.
  *
  * Copyright (c) 2020-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #define _GNU_SOURCE
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>
 #include "stringlib.h"
 #include "benchlib.h"
 
 #define ITERS  5000
 #define ITERS2 20000000
 #define ITERS3 200000
 #define NUM_TESTS 16384
 #define MIN_SIZE 32768
 #define MAX_SIZE (1024 * 1024)
 
-static uint8_t a[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(64)));
-static uint8_t b[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(64)));
-
-#define F(x) {#x, x},
-
-static const struct fun
-{
-  const char *name;
-  void *(*fun)(void *, const void *, size_t);
-} funtab[] =
-{
-#if __aarch64__
-  F(__memcpy_aarch64)
-# if __ARM_NEON
-  F(__memcpy_aarch64_simd)
-# endif
-# if __ARM_FEATURE_SVE
-  F(__memcpy_aarch64_sve)
-# endif
-# if WANT_MOPS
-  F(__memcpy_aarch64_mops)
-# endif
-#elif __arm__
-  F(__memcpy_arm)
-#endif
-  F(memcpy)
-#undef F
-  {0, 0}
-};
+static uint8_t a[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(4096)));
+static uint8_t b[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(4096)));
+
+#define DOTEST(STR,TESTFN)			\
+  printf (STR);					\
+  RUN (TESTFN, memcpy);				\
+  RUNA64 (TESTFN, __memcpy_aarch64);		\
+  RUNA64 (TESTFN, __memcpy_aarch64_simd);	\
+  RUNSVE (TESTFN, __memcpy_aarch64_sve);	\
+  RUNMOPS (TESTFN, __memcpy_aarch64_mops);	\
+  RUNA32 (TESTFN, __memcpy_arm);		\
+  printf ("\n");
 
 typedef struct { uint16_t size; uint16_t freq; } freq_data_t;
 typedef struct { uint8_t align; uint16_t freq; } align_data_t;
 
 #define SIZE_NUM 65536
 #define SIZE_MASK (SIZE_NUM-1)
 static uint8_t size_arr[SIZE_NUM];
 
 /* Frequency data for memcpy of less than 4096 bytes based on SPEC2017.  */
 static freq_data_t size_freq[] =
 {
 {32,22320}, { 16,9554}, {  8,8915}, {152,5327}, {  4,2159}, {292,2035},
 { 12,1608}, { 24,1343}, {1152,895}, {144, 813}, {884, 733}, {284, 721},
 {120, 661}, {  2, 649}, {882, 550}, {  5, 475}, {  7, 461}, {108, 460},
 { 10, 361}, {  9, 361}, {  6, 334}, {  3, 326}, {464, 308}, {2048,303},
 {  1, 298}, { 64, 250}, { 11, 197}, {296, 194}, { 68, 187}, { 15, 185},
 {192, 184}, {1764,183}, { 13, 173}, {560, 126}, {160, 115}, {288,  96},
 {104,  96}, {1144, 83}, { 18,  80}, { 23,  78}, { 40,  77}, { 19,  68},
 { 48,  63}, { 17,  57}, { 72,  54}, {1280, 51}, { 20,  49}, { 28,  47},
 { 22,  46}, {640,  45}, { 25,  41}, { 14,  40}, { 56,  37}, { 27,  35},
 { 35,  33}, {384,  33}, { 29,  32}, { 80,  30}, {4095, 22}, {232,  22},
 { 36,  19}, {184,  17}, { 21,  17}, {256,  16}, { 44,  15}, { 26,  15},
 { 31,  14}, { 88,  14}, {176,  13}, { 33,  12}, {1024, 12}, {208,  11},
 { 62,  11}, {128,  10}, {704,  10}, {324,  10}, { 96,  10}, { 60,   9},
 {136,   9}, {124,   9}, { 34,   8}, { 30,   8}, {480,   8}, {1344,  8},
 {273,   7}, {520,   7}, {112,   6}, { 52,   6}, {344,   6}, {336,   6},
 {504,   5}, {168,   5}, {424,   5}, {  0,   4}, { 76,   3}, {200,   3},
 {512,   3}, {312,   3}, {240,   3}, {960,   3}, {264,   2}, {672,   2},
 { 38,   2}, {328,   2}, { 84,   2}, { 39,   2}, {216,   2}, { 42,   2},
 { 37,   2}, {1608,  2}, { 70,   2}, { 46,   2}, {536,   2}, {280,   1},
 {248,   1}, { 47,   1}, {1088,  1}, {1288,  1}, {224,   1}, { 41,   1},
 { 50,   1}, { 49,   1}, {808,   1}, {360,   1}, {440,   1}, { 43,   1},
 { 45,   1}, { 78,   1}, {968,   1}, {392,   1}, { 54,   1}, { 53,   1},
 { 59,   1}, {376,   1}, {664,   1}, { 58,   1}, {272,   1}, { 66,   1},
 {2688,  1}, {472,   1}, {568,   1}, {720,   1}, { 51,   1}, { 63,   1},
 { 86,   1}, {496,   1}, {776,   1}, { 57,   1}, {680,   1}, {792,   1},
 {122,   1}, {760,   1}, {824,   1}, {552,   1}, { 67,   1}, {456,   1},
 {984,   1}, { 74,   1}, {408,   1}, { 75,   1}, { 92,   1}, {576,   1},
 {116,   1}, { 65,   1}, {117,   1}, { 82,   1}, {352,   1}, { 55,   1},
 {100,   1}, { 90,   1}, {696,   1}, {111,   1}, {880,   1}, { 79,   1},
 {488,   1}, { 61,   1}, {114,   1}, { 94,   1}, {1032,  1}, { 98,   1},
 { 87,   1}, {584,   1}, { 85,   1}, {648,   1}, {0, 0}
 };
 
 #define ALIGN_NUM 1024
 #define ALIGN_MASK (ALIGN_NUM-1)
 static uint8_t src_align_arr[ALIGN_NUM];
 static uint8_t dst_align_arr[ALIGN_NUM];
 
 /* Source alignment frequency for memcpy based on SPEC2017.  */
 static align_data_t src_align_freq[] =
 {
   {8, 300}, {16, 292}, {32, 168}, {64, 153}, {4, 79}, {2, 14}, {1, 18}, {0, 0}
 };
 
 static align_data_t dst_align_freq[] =
 {
   {8, 265}, {16, 263}, {64, 209}, {32, 174}, {4, 90}, {2, 10}, {1, 13}, {0, 0}
 };
 
 typedef struct
 {
   uint64_t src : 24;
   uint64_t dst : 24;
   uint64_t len : 16;
 } copy_t;
 
 static copy_t test_arr[NUM_TESTS];
 
 typedef char *(*proto_t) (char *, const char *, size_t);
 
 static void
 init_copy_distribution (void)
 {
   int i, j, freq, size, n;
 
   for (n = i = 0; (freq = size_freq[i].freq) != 0; i++)
     for (j = 0, size = size_freq[i].size; j < freq; j++)
       size_arr[n++] = size;
   assert (n == SIZE_NUM);
 
   for (n = i = 0; (freq = src_align_freq[i].freq) != 0; i++)
     for (j = 0, size = src_align_freq[i].align; j < freq; j++)
       src_align_arr[n++] = size - 1;
   assert (n == ALIGN_NUM);
 
   for (n = i = 0; (freq = dst_align_freq[i].freq) != 0; i++)
     for (j = 0, size = dst_align_freq[i].align; j < freq; j++)
       dst_align_arr[n++] = size - 1;
   assert (n == ALIGN_NUM);
 }
 
 static size_t
 init_copies (size_t max_size)
 {
   size_t total = 0;
   /* Create a random set of copies with the given size and alignment
      distributions.  */
   for (int i = 0; i < NUM_TESTS; i++)
     {
       test_arr[i].dst = (rand32 (0) & (max_size - 1));
       test_arr[i].dst &= ~dst_align_arr[rand32 (0) & ALIGN_MASK];
       test_arr[i].src = (rand32 (0) & (max_size - 1));
       test_arr[i].src &= ~src_align_arr[rand32 (0) & ALIGN_MASK];
       test_arr[i].len = size_arr[rand32 (0) & SIZE_MASK];
       total += test_arr[i].len;
     }
 
   return total;
 }
 
-int main (void)
+static void inline __attribute ((always_inline))
+memcpy_random (const char *name, void *(*fn)(void *, const void *, size_t))
 {
-  init_copy_distribution ();
-
-  memset (a, 1, sizeof (a));
-  memset (b, 2, sizeof (b));
-
-  printf("Random memcpy (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      size_t total = 0;
-      uint64_t tsum = 0;
-      printf ("%22s ", funtab[f].name);
-      rand32 (0x12345678);
-
-      for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
-	{
-	  size_t copy_size = init_copies (size) * ITERS;
-
-	  for (int c = 0; c < NUM_TESTS; c++)
-	    funtab[f].fun (b + test_arr[c].dst, a + test_arr[c].src,
-			   test_arr[c].len);
-
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS; i++)
-	    for (int c = 0; c < NUM_TESTS; c++)
-	      funtab[f].fun (b + test_arr[c].dst, a + test_arr[c].src,
-			     test_arr[c].len);
-	  t = clock_get_ns () - t;
-	  total += copy_size;
-	  tsum += t;
-	  printf ("%dK: %.2f ", size / 1024, (double)copy_size / t);
-	}
-      printf( "avg %.2f\n", (double)total / tsum);
-    }
-
-  size_t total = 0;
-  uint64_t tsum = 0;
-  printf ("%22s ", "memcpy_call");
-  rand32 (0x12345678);
-
+  printf ("%22s ", name);
+  uint64_t total = 0, tsum = 0;
   for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
     {
-      size_t copy_size = init_copies (size) * ITERS;
+      uint64_t copy_size = init_copies (size) * ITERS;
 
       for (int c = 0; c < NUM_TESTS; c++)
-	memcpy (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
+	fn (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
 
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS; i++)
 	for (int c = 0; c < NUM_TESTS; c++)
-	  memcpy (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
+	  fn (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
       t = clock_get_ns () - t;
       total += copy_size;
       tsum += t;
-      printf ("%dK: %.2f ", size / 1024, (double)copy_size / t);
+      printf ("%dK: %5.2f ", size / 1024, (double)copy_size / t);
     }
-  printf( "avg %.2f\n", (double)total / tsum);
-
+  printf( "avg %5.2f\n", (double)total / tsum);
+}
 
-  printf ("\nAligned medium memcpy (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 8; size <= 512; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS2; i++)
-	    funtab[f].fun (b, a, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
-	}
-      printf ("\n");
-    }
+static void inline __attribute ((always_inline))
+memcpy_medium_aligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("%22s ", "memcpy_call");
   for (int size = 8; size <= 512; size *= 2)
     {
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS2; i++)
-	memcpy (b, a, size);
+	fn (b, a, size);
       t = clock_get_ns () - t;
-      printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
+      printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t);
     }
   printf ("\n");
+}
 
+static void inline __attribute ((always_inline))
+memcpy_medium_unaligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("\nUnaligned medium memcpy (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 8; size <= 512; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS2; i++)
-	    funtab[f].fun (b + 3, a + 1, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
-	}
-      printf ("\n");
-    }
-
-  printf ("%22s ", "memcpy_call");
   for (int size = 8; size <= 512; size *= 2)
     {
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS2; i++)
-	memcpy (b + 3, a + 1, size);
+	fn (b + 3, a + 1, size);
       t = clock_get_ns () - t;
-      printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
+      printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t);
     }
   printf ("\n");
+}
 
+static void inline __attribute ((always_inline))
+memcpy_large (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("\nLarge memcpy (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 1024; size <= 65536; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS3; i++)
-	    funtab[f].fun (b, a, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
-	}
-      printf ("\n");
-    }
-
-  printf ("%22s ", "memcpy_call");
   for (int size = 1024; size <= 65536; size *= 2)
     {
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS3; i++)
-	memcpy (b, a, size);
+	fn (b, a, size);
       t = clock_get_ns () - t;
-      printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
+      printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t);
     }
   printf ("\n");
+}
 
+static void inline __attribute ((always_inline))
+memmove_forward_unaligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("\nUnaligned forwards memmove (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
+  for (int size = 1024; size <= 65536; size *= 2)
     {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 1024; size <= 65536; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS3; i++)
-	    funtab[f].fun (a, a + 256 + (i & 31), size);
-	  t = clock_get_ns () - t;
-	  printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
-	}
-      printf ("\n");
+      uint64_t t = clock_get_ns ();
+      for (int i = 0; i < ITERS3; i++)
+        fn (a, a + 256 + (i & 31), size);
+      t = clock_get_ns () - t;
+      printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t);
     }
 
+  printf ("\n");
+}
+
+static void inline __attribute ((always_inline))
+memmove_backward_unaligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("\nUnaligned backwards memmove (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
+  for (int size = 1024; size <= 65536; size *= 2)
     {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 1024; size <= 65536; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS3; i++)
-	    funtab[f].fun (a + 256 + (i & 31), a, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
-	}
-      printf ("\n");
+      uint64_t t = clock_get_ns ();
+      for (int i = 0; i < ITERS3; i++)
+	fn (a + 256 + (i & 31), a, size);
+      t = clock_get_ns () - t;
+      printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t);
     }
+
   printf ("\n");
+}
+
+int main (void)
+{
+  init_copy_distribution ();
+
+  memset (a, 1, sizeof (a));
+  memset (b, 2, sizeof (b));
+
+  DOTEST ("Random memcpy (bytes/ns):\n", memcpy_random);
+  DOTEST ("Medium memcpy aligned (bytes/ns):\n", memcpy_medium_aligned);
+  DOTEST ("Medium memcpy unaligned (bytes/ns):\n", memcpy_medium_unaligned);
+  DOTEST ("Large memcpy (bytes/ns):\n", memcpy_large);
+  DOTEST ("Forwards memmove unaligned (bytes/ns):\n", memmove_forward_unaligned);
+  DOTEST ("Backwards memmove unaligned (bytes/ns):\n", memmove_backward_unaligned);
 
   return 0;
 }
diff --git a/contrib/arm-optimized-routines/string/bench/memset.c b/contrib/arm-optimized-routines/string/bench/memset.c
index 990e23ba9a36..07474e469146 100644
--- a/contrib/arm-optimized-routines/string/bench/memset.c
+++ b/contrib/arm-optimized-routines/string/bench/memset.c
@@ -1,243 +1,190 @@
 /*
  * memset benchmark.
  *
  * Copyright (c) 2021, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #define _GNU_SOURCE
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>
 #include "stringlib.h"
 #include "benchlib.h"
 
 #define ITERS  5000
 #define ITERS2 20000000
 #define ITERS3 1000000
 #define NUM_TESTS 16384
 #define MIN_SIZE 32768
 #define MAX_SIZE (1024 * 1024)
 
-static uint8_t a[MAX_SIZE + 4096] __attribute__((__aligned__(64)));
+static uint8_t a[MAX_SIZE + 4096] __attribute__((__aligned__(4096)));
 
-#define F(x) {#x, x},
-
-static const struct fun
-{
-  const char *name;
-  void *(*fun)(void *, int, size_t);
-} funtab[] =
-{
-#if __aarch64__
-  F(__memset_aarch64)
-#elif __arm__
-  F(__memset_arm)
-#endif
-  F(memset)
-#undef F
-  {0, 0}
-};
+#define DOTEST(STR,TESTFN)			\
+  printf (STR);					\
+  RUN (TESTFN, memset);				\
+  RUNA64 (TESTFN, __memset_aarch64);		\
+  RUNSVE (TESTFN, __memset_aarch64_sve);	\
+  RUNMOPS (TESTFN, __memset_mops);		\
+  RUNA32 (TESTFN, __memset_arm);		\
+  printf ("\n");
 
 typedef struct { uint32_t offset : 20, len : 12; } memset_test_t;
 static memset_test_t test_arr[NUM_TESTS];
 
 typedef struct { uint16_t size; uint16_t freq; } freq_data_t;
 typedef struct { uint8_t align; uint16_t freq; } align_data_t;
 
 #define SIZE_NUM 65536
 #define SIZE_MASK (SIZE_NUM-1)
 static uint8_t len_arr[SIZE_NUM];
 
 /* Frequency data for memset sizes up to 4096 based on SPEC2017.  */
 static freq_data_t memset_len_freq[] =
 {
 {40,28817}, {32,15336}, { 16,3823}, {296,3545}, { 24,3454}, {  8,1412},
 {292,1202}, { 48, 927}, { 12, 613}, { 11, 539}, {284, 493}, {108, 414},
 { 88, 380}, { 20, 295}, {312, 271}, { 72, 233}, {  2, 200}, {  4, 192},
 { 15, 180}, { 14, 174}, { 13, 160}, { 56, 151}, { 36, 144}, { 64, 140},
 {4095,133}, { 10, 130}, {  9, 124}, {  3, 124}, { 28, 120}, {  0, 118},
 {288, 110}, {1152, 96}, {104,  90}, {  1,  86}, {832,  76}, {248,  74},
 {1024, 69}, {120,  64}, {512,  63}, {384,  60}, {  6,  59}, { 80,  54},
 { 17,  50}, {  7,  49}, {520,  47}, {2048, 39}, {256,  37}, {864,  33},
 {1440, 28}, { 22,  27}, {2056, 24}, {260,  23}, { 68,  23}, {  5,  22},
 { 18,  21}, {200,  18}, {2120, 18}, { 60,  17}, { 52,  16}, {336,  15},
 { 44,  13}, {192,  13}, {160,  12}, {2064, 12}, {128,  12}, { 76,  11},
 {164,  11}, {152,  10}, {136,   9}, {488,   7}, { 96,   6}, {560,   6},
 {1016,  6}, {112,   5}, {232,   5}, {168,   5}, {952,   5}, {184,   5},
 {144,   4}, {252,   4}, { 84,   3}, {960,   3}, {3808,  3}, {244,   3},
 {280,   3}, {224,   3}, {156,   3}, {1088,  3}, {440,   3}, {216,   2},
 {304,   2}, { 23,   2}, { 25,   2}, { 26,   2}, {264,   2}, {328,   2},
 {1096,  2}, {240,   2}, {1104,  2}, {704,   2}, {1664,  2}, {360,   2},
 {808,   1}, {544,   1}, {236,   1}, {720,   1}, {368,   1}, {424,   1},
 {640,   1}, {1112,  1}, {552,   1}, {272,   1}, {776,   1}, {376,   1},
 { 92,   1}, {536,   1}, {824,   1}, {496,   1}, {760,   1}, {792,   1},
 {504,   1}, {344,   1}, {1816,  1}, {880,   1}, {176,   1}, {320,   1},
 {352,   1}, {2008,  1}, {208,   1}, {408,   1}, {228,   1}, {2072,  1},
 {568,   1}, {220,   1}, {616,   1}, {600,   1}, {392,   1}, {696,   1},
 {2144,  1}, {1280,  1}, {2136,  1}, {632,   1}, {584,   1}, {456,   1},
 {472,   1}, {3440,  1}, {2088,  1}, {680,   1}, {2928,  1}, {212,   1},
 {648,   1}, {1752,  1}, {664,   1}, {3512,  1}, {1032,  1}, {528,   1},
 {4072,  1}, {204,   1}, {2880,  1}, {3392,  1}, {712,   1}, { 59,   1},
 {736,   1}, {592,   1}, {2520,  1}, {744,   1}, {196,   1}, {172,   1},
 {728,   1}, {2040,  1}, {1192,  1}, {3600,  1}, {0, 0}
 };
 
 #define ALIGN_NUM 1024
 #define ALIGN_MASK (ALIGN_NUM-1)
 static uint8_t align_arr[ALIGN_NUM];
 
 /* Alignment data for memset based on SPEC2017.  */
 static align_data_t memset_align_freq[] =
 {
  {16, 338}, {8, 307}, {32, 148}, {64, 131}, {4, 72}, {1, 23}, {2, 5}, {0, 0}
 };
 
 static void
 init_memset_distribution (void)
 {
   int i, j, freq, size, n;
 
   for (n = i = 0; (freq = memset_len_freq[i].freq) != 0; i++)
     for (j = 0, size = memset_len_freq[i].size; j < freq; j++)
       len_arr[n++] = size;
   assert (n == SIZE_NUM);
 
   for (n = i = 0; (freq = memset_align_freq[i].freq) != 0; i++)
     for (j = 0, size = memset_align_freq[i].align; j < freq; j++)
       align_arr[n++] = size - 1;
   assert (n == ALIGN_NUM);
 }
 
 static size_t
 init_memset (size_t max_size)
 {
   size_t total = 0;
   /* Create a random set of memsets with the given size and alignment
      distributions.  */
   for (int i = 0; i < NUM_TESTS; i++)
     {
       test_arr[i].offset = (rand32 (0) & (max_size - 1));
       test_arr[i].offset &= ~align_arr[rand32 (0) & ALIGN_MASK];
       test_arr[i].len = len_arr[rand32 (0) & SIZE_MASK];
       total += test_arr[i].len;
     }
 
   return total;
 }
 
-
-int main (void)
+static void inline __attribute ((always_inline))
+memset_random (const char *name, void *(*set)(void *, int, size_t))
 {
-  init_memset_distribution ();
-
-  memset (a, 1, sizeof (a));
-
-  printf("Random memset (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      size_t total_size = 0;
-      uint64_t tsum = 0;
-      printf ("%22s ", funtab[f].name);
-      rand32 (0x12345678);
-
-      for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
-	{
-	  size_t memset_size = init_memset (size) * ITERS;
-
-	  for (int c = 0; c < NUM_TESTS; c++)
-	    funtab[f].fun (a + test_arr[c].offset, 0, test_arr[c].len);
-
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS; i++)
-	    for (int c = 0; c < NUM_TESTS; c++)
-	      funtab[f].fun (a + test_arr[c].offset, 0, test_arr[c].len);
-	  t = clock_get_ns () - t;
-	  total_size += memset_size;
-	  tsum += t;
-	  printf ("%dK: %.2f ", size / 1024, (double)memset_size / t);
-	}
-      printf( "avg %.2f\n", (double)total_size / tsum);
-    }
-
-  size_t total_size = 0;
+  uint64_t total_size = 0;
   uint64_t tsum = 0;
-  printf ("%22s ", "memset_call");
+  printf ("%22s ", name);
   rand32 (0x12345678);
 
   for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
     {
-      size_t memset_size = init_memset (size) * ITERS;
+      uint64_t memset_size = init_memset (size) * ITERS;
 
       for (int c = 0; c < NUM_TESTS; c++)
-	memset (a + test_arr[c].offset, 0, test_arr[c].len);
+	set (a + test_arr[c].offset, 0, test_arr[c].len);
 
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS; i++)
 	for (int c = 0; c < NUM_TESTS; c++)
-	  memset (a + test_arr[c].offset, 0, test_arr[c].len);
+	  set (a + test_arr[c].offset, 0, test_arr[c].len);
       t = clock_get_ns () - t;
       total_size += memset_size;
       tsum += t;
-      printf ("%dK: %.2f ", size / 1024, (double)memset_size / t);
+      printf ("%dK: %5.2f ", size / 1024, (double)memset_size / t);
     }
-  printf( "avg %.2f\n", (double)total_size / tsum);
-
+  printf( "avg %5.2f\n", (double)total_size / tsum);
+}
 
-  printf ("\nMedium memset (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 8; size <= 512; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS2; i++)
-	    funtab[f].fun (a, 0, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
-	}
-      printf ("\n");
-    }
+static void inline __attribute ((always_inline))
+memset_medium (const char *name, void *(*set)(void *, int, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("%22s ", "memset_call");
   for (int size = 8; size <= 512; size *= 2)
     {
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS2; i++)
-	memset (a, 0, size);
+	set (a, 0, size);
       t = clock_get_ns () - t;
-      printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
+      printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t);
     }
+  printf ("\n");
+}
 
+static void inline __attribute ((always_inline))
+memset_large (const char *name, void *(*set)(void *, int, size_t))
+{
+  printf ("%22s ", name);
 
-  printf ("\nLarge memset (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 1024; size <= 65536; size *= 2)
-	{
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS3; i++)
-	    funtab[f].fun (a, 0, size);
-	  t = clock_get_ns () - t;
-	  printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
-	}
-      printf ("\n");
-    }
-
-  printf ("%22s ", "memset_call");
   for (int size = 1024; size <= 65536; size *= 2)
     {
       uint64_t t = clock_get_ns ();
       for (int i = 0; i < ITERS3; i++)
-	memset (a, 0, size);
+	set (a, 0, size);
       t = clock_get_ns () - t;
-      printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
+      printf ("%dKB: %6.2f ", size / 1024, (double)size * ITERS3 / t);
     }
-  printf ("\n\n");
+  printf ("\n");
+}
+
+int main (void)
+{
+  init_memset_distribution ();
+
+  memset (a, 1, sizeof (a));
 
+  DOTEST ("Random memset (bytes/ns):\n", memset_random);
+  DOTEST ("Medium memset (bytes/ns):\n", memset_medium);
+  DOTEST ("Large memset (bytes/ns):\n", memset_large);
   return 0;
 }
diff --git a/contrib/arm-optimized-routines/string/bench/strlen.c b/contrib/arm-optimized-routines/string/bench/strlen.c
index f05d0d5b89e6..a8dd55cf5fc4 100644
--- a/contrib/arm-optimized-routines/string/bench/strlen.c
+++ b/contrib/arm-optimized-routines/string/bench/strlen.c
@@ -1,221 +1,225 @@
 /*
  * strlen benchmark.
  *
  * Copyright (c) 2020-2021, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #define _GNU_SOURCE
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>
 #include "stringlib.h"
 #include "benchlib.h"
 
 #define ITERS 5000
-#define ITERS2 20000000
-#define ITERS3 2000000
-#define NUM_TESTS 16384
+#define ITERS2 40000000
+#define ITERS3 4000000
+#define NUM_TESTS 65536
 
 #define MAX_ALIGN 32
-#define MAX_STRLEN 256
+#define MAX_STRLEN 128
 
 static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096)));
 
-#define F(x, mte) {#x, x, mte},
-
-static const struct fun
-{
-  const char *name;
-  size_t (*fun) (const char *s);
-  int test_mte;
-} funtab[] = {
-  // clang-format off
-  F(strlen, 0)
-#if __aarch64__
-  F(__strlen_aarch64, 0)
-  F(__strlen_aarch64_mte, 1)
-# if __ARM_FEATURE_SVE
-  F(__strlen_aarch64_sve, 1)
-# endif
-#elif __arm__
-# if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
-  F(__strlen_armv6t2, 0)
-# endif
-#endif
-  {0, 0, 0}
-  // clang-format on
-};
-#undef F
+#define DOTEST(STR,TESTFN)			\
+  printf (STR);					\
+  RUN (TESTFN, strlen);				\
+  RUNA64 (TESTFN, __strlen_aarch64);		\
+  RUNA64 (TESTFN, __strlen_aarch64_mte);	\
+  RUNSVE (TESTFN, __strlen_aarch64_sve);	\
+  RUNT32 (TESTFN, __strlen_armv6t2);		\
+  printf ("\n");
 
 static uint16_t strlen_tests[NUM_TESTS];
 
 typedef struct { uint16_t size; uint16_t freq; } freq_data_t;
 typedef struct { uint8_t align; uint16_t freq; } align_data_t;
 
 #define SIZE_NUM 65536
 #define SIZE_MASK (SIZE_NUM - 1)
 static uint8_t strlen_len_arr[SIZE_NUM];
 
 /* Frequency data for strlen sizes up to 128 based on SPEC2017.  */
 static freq_data_t strlen_len_freq[] =
 {
   { 12,22671}, { 18,12834}, { 13, 9555}, {  6, 6348}, { 17, 6095}, { 11, 2115},
   { 10, 1335}, {  7,  814}, {  2,  646}, {  9,  483}, {  8,  471}, { 16,  418},
   {  4,  390}, {  1,  388}, {  5,  233}, {  3,  204}, {  0,   79}, { 14,   79},
   { 15,   69}, { 26,   36}, { 22,   35}, { 31,   24}, { 32,   24}, { 19,   21},
   { 25,   17}, { 28,   15}, { 21,   14}, { 33,   14}, { 20,   13}, { 24,    9},
   { 29,    9}, { 30,    9}, { 23,    7}, { 34,    7}, { 27,    6}, { 44,    5},
   { 42,    4}, { 45,    3}, { 47,    3}, { 40,    2}, { 41,    2}, { 43,    2},
   { 58,    2}, { 78,    2}, { 36,    2}, { 48,    1}, { 52,    1}, { 60,    1},
   { 64,    1}, { 56,    1}, { 76,    1}, { 68,    1}, { 80,    1}, { 84,    1},
   { 72,    1}, { 86,    1}, { 35,    1}, { 39,    1}, { 50,    1}, { 38,    1},
   { 37,    1}, { 46,    1}, { 98,    1}, {102,    1}, {128,    1}, { 51,    1},
   {107,    1}, { 0,     0}
 };
 
 #define ALIGN_NUM 1024
 #define ALIGN_MASK (ALIGN_NUM - 1)
 static uint8_t strlen_align_arr[ALIGN_NUM];
 
 /* Alignment data for strlen based on SPEC2017.  */
 static align_data_t string_align_freq[] =
 {
   {8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0}
 };
 
 static void
 init_strlen_distribution (void)
 {
   int i, j, freq, size, n;
 
   for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++)
     for (j = 0, size = strlen_len_freq[i].size; j < freq; j++)
       strlen_len_arr[n++] = size;
   assert (n == SIZE_NUM);
 
   for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++)
     for (j = 0, size = string_align_freq[i].align; j < freq; j++)
       strlen_align_arr[n++] = size;
   assert (n == ALIGN_NUM);
 }
 
 static void
 init_strlen_tests (void)
 {
   uint16_t index[MAX_ALIGN];
 
   memset (a, 'x', sizeof (a));
 
   /* Create indices for strings at all alignments.  */
   for (int i = 0; i < MAX_ALIGN; i++)
     {
       index[i] = i * (MAX_STRLEN + 1);
       a[index[i] + MAX_STRLEN] = 0;
     }
 
   /* Create a random set of strlen input strings using the string length
      and alignment distributions.  */
   for (int n = 0; n < NUM_TESTS; n++)
     {
       int align = strlen_align_arr[rand32 (0) & ALIGN_MASK];
       int exp_len = strlen_len_arr[rand32 (0) & SIZE_MASK];
 
       strlen_tests[n] =
 	index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len;
+      assert ((strlen_tests[n] & (align - 1)) == 0);
+      assert (strlen (a + strlen_tests[n]) == exp_len);
     }
 }
 
 static volatile size_t maskv = 0;
 
-int main (void)
+static void inline __attribute ((always_inline))
+strlen_random (const char *name, size_t (*fn)(const char *))
 {
-  rand32 (0x12345678);
-  init_strlen_distribution ();
-  init_strlen_tests ();
+  size_t res = 0, mask = maskv;
+  uint64_t strlen_size = 0;
+  printf ("%22s ", name);
+
+  for (int c = 0; c < NUM_TESTS; c++)
+    strlen_size += fn (a + strlen_tests[c]) + 1;
+  strlen_size *= ITERS;
+
+  /* Measure throughput of strlen.  */
+  uint64_t t = clock_get_ns ();
+  for (int i = 0; i < ITERS; i++)
+    for (int c = 0; c < NUM_TESTS; c++)
+      res += fn (a + strlen_tests[c]);
+  t = clock_get_ns () - t;
+  printf ("tp: %.3f ", (double)strlen_size / t);
+
+  /* Measure latency of strlen result with (res & mask).  */
+  t = clock_get_ns ();
+  for (int i = 0; i < ITERS; i++)
+    for (int c = 0; c < NUM_TESTS; c++)
+      res += fn (a + strlen_tests[c] + (res & mask));
+  t = clock_get_ns () - t;
+  printf ("lat: %.3f\n", (double)strlen_size / t);
+  maskv = res & mask;
+}
 
-  printf ("\nRandom strlen (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      size_t res = 0, strlen_size = 0, mask = maskv;
-      printf ("%22s ", funtab[f].name);
+static void inline __attribute ((always_inline))
+strlen_small_aligned (const char *name, size_t (*fn)(const char *))
+{
+  printf ("%22s ", name);
 
-      for (int c = 0; c < NUM_TESTS; c++)
-	strlen_size += funtab[f].fun (a + strlen_tests[c]);
-      strlen_size *= ITERS;
+  size_t res = 0, mask = maskv;
+  for (int size = 1; size <= 64; size *= 2)
+    {
+      memset (a, 'x', size);
+      a[size - 1] = 0;
 
-      /* Measure latency of strlen result with (res & mask).  */
       uint64_t t = clock_get_ns ();
-      for (int i = 0; i < ITERS; i++)
-	for (int c = 0; c < NUM_TESTS; c++)
-	  res = funtab[f].fun (a + strlen_tests[c] + (res & mask));
+      for (int i = 0; i < ITERS2; i++)
+	res += fn (a + (i & mask));
       t = clock_get_ns () - t;
-      printf ("%.2f\n", (double)strlen_size / t);
+      printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
+	      size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
     }
+  maskv &= res;
+  printf ("\n");
+}
 
-  printf ("\nSmall aligned strlen (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
-    {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 1; size <= 64; size *= 2)
-	{
-	  memset (a, 'x', size);
-	  a[size - 1] = 0;
-
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS2; i++)
-	    funtab[f].fun (a);
-	  t = clock_get_ns () - t;
-	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
-		  size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
-	}
-      printf ("\n");
-    }
+static void inline __attribute ((always_inline))
+strlen_small_unaligned (const char *name, size_t (*fn)(const char *))
+{
+  printf ("%22s ", name);
 
-  printf ("\nSmall unaligned strlen (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
+  size_t res = 0, mask = maskv;
+  int align = 9;
+  for (int size = 1; size <= 64; size *= 2)
     {
-      printf ("%22s ", funtab[f].name);
-
-      int align = 9;
-      for (int size = 1; size <= 64; size *= 2)
-	{
-	  memset (a + align, 'x', size);
-	  a[align + size - 1] = 0;
-
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS2; i++)
-	    funtab[f].fun (a + align);
-	  t = clock_get_ns () - t;
-	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
-		  size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
-	}
-      printf ("\n");
+      memset (a + align, 'x', size);
+      a[align + size - 1] = 0;
+
+      uint64_t t = clock_get_ns ();
+      for (int i = 0; i < ITERS2; i++)
+	res += fn (a + align + (i & mask));
+      t = clock_get_ns () - t;
+      printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
+	      size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
     }
+  maskv &= res;
+  printf ("\n");
+}
 
-  printf ("\nMedium strlen (bytes/ns):\n");
-  for (int f = 0; funtab[f].name != 0; f++)
+static void inline __attribute ((always_inline))
+strlen_medium (const char *name, size_t (*fn)(const char *))
+{
+  printf ("%22s ", name);
+
+  size_t res = 0, mask = maskv;
+  for (int size = 128; size <= 4096; size *= 2)
     {
-      printf ("%22s ", funtab[f].name);
-
-      for (int size = 128; size <= 4096; size *= 2)
-	{
-	  memset (a, 'x', size);
-	  a[size - 1] = 0;
-
-	  uint64_t t = clock_get_ns ();
-	  for (int i = 0; i < ITERS3; i++)
-	    funtab[f].fun (a);
-	  t = clock_get_ns () - t;
-	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
-		  size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
-	}
-      printf ("\n");
-    }
+      memset (a, 'x', size);
+      a[size - 1] = 0;
 
+      uint64_t t = clock_get_ns ();
+      for (int i = 0; i < ITERS3; i++)
+	res += fn (a + (i & mask));
+      t = clock_get_ns () - t;
+      printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
+	      size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
+    }
+  maskv &= res;
   printf ("\n");
+}
+
+int main (void)
+{
+  rand32 (0x12345678);
+  init_strlen_distribution ();
+  init_strlen_tests ();
+
+  DOTEST ("Random strlen (bytes/ns):\n", strlen_random);
+  DOTEST ("Small aligned strlen (bytes/ns):\n", strlen_small_aligned);
+  DOTEST ("Small unaligned strlen (bytes/ns):\n", strlen_small_unaligned);
+  DOTEST ("Medium strlen (bytes/ns):\n", strlen_medium);
 
   return 0;
 }
diff --git a/contrib/arm-optimized-routines/string/include/benchlib.h b/contrib/arm-optimized-routines/string/include/benchlib.h
index f1bbea388cd2..486504e99ddf 100644
--- a/contrib/arm-optimized-routines/string/include/benchlib.h
+++ b/contrib/arm-optimized-routines/string/include/benchlib.h
@@ -1,33 +1,64 @@
 /*
  * Benchmark support functions.
  *
  * Copyright (c) 2020, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
 #include <time.h>
 
 /* Fast and accurate timer returning nanoseconds.  */
 static inline uint64_t
 clock_get_ns (void)
 {
   struct timespec ts;
   clock_gettime (CLOCK_MONOTONIC, &ts);
   return ts.tv_sec * (uint64_t) 1000000000 + ts.tv_nsec;
 }
 
 /* Fast 32-bit random number generator.  Passing a non-zero seed
    value resets the internal state.  */
 static inline uint32_t
 rand32 (uint32_t seed)
 {
   static uint64_t state = 0xb707be451df0bb19ULL;
   if (seed != 0)
     state = seed;
   uint32_t res = state >> 32;
   state = state * 6364136223846793005ULL + 1;
   return res;
 }
 
+/* Macros to run a benchmark BENCH using string function FN.  */
+#define RUN(BENCH, FN) BENCH(#FN, FN)
 
+#if __aarch64__
+# define RUNA64(BENCH, FN) BENCH(#FN, FN)
+#else
+# define RUNA64(BENCH, FN)
+#endif
+
+#if __ARM_FEATURE_SVE
+# define RUNSVE(BENCH, FN) BENCH(#FN, FN)
+#else
+# define RUNSVE(BENCH, FN)
+#endif
+
+#if WANT_MOPS
+# define RUNMOPS(BENCH, FN) BENCH(#FN, FN)
+#else
+# define RUNMOPS(BENCH, FN)
+#endif
+
+#if __arm__
+# define RUNA32(BENCH, FN) BENCH(#FN, FN)
+#else
+# define RUNA32(BENCH, FN)
+#endif
+
+#if __arm__ && __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
+# define RUNT32(BENCH, FN) BENCH(#FN, FN)
+#else
+# define RUNT32(BENCH, FN)
+#endif
diff --git a/contrib/arm-optimized-routines/string/include/stringlib.h b/contrib/arm-optimized-routines/string/include/stringlib.h
index 01da7ebfc18d..bb9db930f132 100644
--- a/contrib/arm-optimized-routines/string/include/stringlib.h
+++ b/contrib/arm-optimized-routines/string/include/stringlib.h
@@ -1,72 +1,71 @@
 /*
  * Public API.
  *
  * Copyright (c) 2019-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stddef.h>
 
 /* restrict is not needed, but kept for documenting the interface contract.  */
 #ifndef __restrict
 # define __restrict
 #endif
 
 #if __aarch64__
 void *__memcpy_aarch64 (void *__restrict, const void *__restrict, size_t);
 void *__memmove_aarch64 (void *, const void *, size_t);
 void *__memset_aarch64 (void *, int, size_t);
 void *__memchr_aarch64 (const void *, int, size_t);
 void *__memrchr_aarch64 (const void *, int, size_t);
 int __memcmp_aarch64 (const void *, const void *, size_t);
 char *__strcpy_aarch64 (char *__restrict, const char *__restrict);
 char *__stpcpy_aarch64 (char *__restrict, const char *__restrict);
 int __strcmp_aarch64 (const char *, const char *);
 char *__strchr_aarch64 (const char *, int);
 char *__strrchr_aarch64 (const char *, int);
 char *__strchrnul_aarch64 (const char *, int );
 size_t __strlen_aarch64 (const char *);
 size_t __strnlen_aarch64 (const char *, size_t);
 int __strncmp_aarch64 (const char *, const char *, size_t);
 void * __memchr_aarch64_mte (const void *, int, size_t);
 char *__strchr_aarch64_mte (const char *, int);
 char * __strchrnul_aarch64_mte (const char *, int );
 size_t __strlen_aarch64_mte (const char *);
 char *__strrchr_aarch64_mte (const char *, int);
-#if __ARM_NEON
 void *__memcpy_aarch64_simd (void *__restrict, const void *__restrict, size_t);
 void *__memmove_aarch64_simd (void *, const void *, size_t);
-#endif
 # if __ARM_FEATURE_SVE
 void *__memcpy_aarch64_sve (void *__restrict, const void *__restrict, size_t);
 void *__memmove_aarch64_sve (void *__restrict, const void *__restrict, size_t);
+void *__memset_aarch64_sve (void *, int, size_t);
 void *__memchr_aarch64_sve (const void *, int, size_t);
 int __memcmp_aarch64_sve (const void *, const void *, size_t);
 char *__strchr_aarch64_sve (const char *, int);
 char *__strrchr_aarch64_sve (const char *, int);
 char *__strchrnul_aarch64_sve (const char *, int );
 int __strcmp_aarch64_sve (const char *, const char *);
 char *__strcpy_aarch64_sve (char *__restrict, const char *__restrict);
 char *__stpcpy_aarch64_sve (char *__restrict, const char *__restrict);
 size_t __strlen_aarch64_sve (const char *);
 size_t __strnlen_aarch64_sve (const char *, size_t);
 int __strncmp_aarch64_sve (const char *, const char *, size_t);
 # endif
 # if WANT_MOPS
 void *__memcpy_aarch64_mops (void *__restrict, const void *__restrict, size_t);
 void *__memmove_aarch64_mops (void *__restrict, const void *__restrict, size_t);
 void *__memset_aarch64_mops (void *, int, size_t);
 # endif
 # if __ARM_FEATURE_MEMORY_TAGGING
 void *__mtag_tag_region (void *, size_t);
 void *__mtag_tag_zero_region (void *, size_t);
 # endif
 #elif __arm__
 void *__memcpy_arm (void *__restrict, const void *__restrict, size_t);
 void *__memset_arm (void *, int, size_t);
 void *__memchr_arm (const void *, int, size_t);
 char *__strcpy_arm (char *__restrict, const char *__restrict);
 int __strcmp_arm (const char *, const char *);
 int __strcmp_armv6m (const char *, const char *);
 size_t __strlen_armv6t2 (const char *);
 #endif
diff --git a/contrib/arm-optimized-routines/string/test/memcpy.c b/contrib/arm-optimized-routines/string/test/memcpy.c
index dc95844bd45a..98255e06f31c 100644
--- a/contrib/arm-optimized-routines/string/test/memcpy.c
+++ b/contrib/arm-optimized-routines/string/test/memcpy.c
@@ -1,126 +1,124 @@
 /*
  * memcpy test.
  *
  * Copyright (c) 2019-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
 #define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   void *(*fun) (void *, const void *, size_t);
   int test_mte;
 } funtab[] = {
   // clang-format off
   F(memcpy, 0)
 #if __aarch64__
   F(__memcpy_aarch64, 1)
-# if __ARM_NEON
   F(__memcpy_aarch64_simd, 1)
-# endif
 # if __ARM_FEATURE_SVE
   F(__memcpy_aarch64_sve, 1)
 # endif
 # if WANT_MOPS
   F(__memcpy_aarch64_mops, 1)
 # endif
 #elif __arm__
   F(__memcpy_arm, 0)
 #endif
   {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define A 32
 #define LEN 250000
 static unsigned char *dbuf;
 static unsigned char *sbuf;
 static unsigned char wbuf[LEN + 2 * A];
 
 static void *
 alignup (void *p)
 {
   return (void *) (((uintptr_t) p + A - 1) & -A);
 }
 
 static void
 test (const struct fun *fun, int dalign, int salign, int len)
 {
   unsigned char *src = alignup (sbuf);
   unsigned char *dst = alignup (dbuf);
   unsigned char *want = wbuf;
   unsigned char *s = src + salign;
   unsigned char *d = dst + dalign;
   unsigned char *w = want + dalign;
   void *p;
   int i;
 
   if (err_count >= ERR_LIMIT)
     return;
   if (len > LEN || dalign >= A || salign >= A)
     abort ();
   for (i = 0; i < len + A; i++)
     {
       src[i] = '?';
       want[i] = dst[i] = '*';
     }
   for (i = 0; i < len; i++)
     s[i] = w[i] = 'a' + i % 23;
 
   s = tag_buffer (s, len, fun->test_mte);
   d = tag_buffer (d, len, fun->test_mte);
   p = fun->fun (d, s, len);
   untag_buffer (s, len, fun->test_mte);
   untag_buffer (d, len, fun->test_mte);
 
   if (p != d)
     ERR ("%s(%p,..) returned %p\n", fun->name, d, p);
   for (i = 0; i < len + A; i++)
     {
       if (dst[i] != want[i])
 	{
 	  ERR ("%s(align %d, align %d, %d) failed\n", fun->name, dalign, salign,
 	       len);
 	  quoteat ("got", dst, len + A, i);
 	  quoteat ("want", want, len + A, i);
 	  break;
 	}
     }
 }
 
 int
 main ()
 {
   dbuf = mte_mmap (LEN + 2 * A);
   sbuf = mte_mmap (LEN + 2 * A);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
       err_count = 0;
       for (int d = 0; d < A; d++)
 	for (int s = 0; s < A; s++)
 	  {
 	    int n;
 	    for (n = 0; n < 100; n++)
 	      test (funtab + i, d, s, n);
 	    for (; n < LEN; n *= 2)
 	      test (funtab + i, d, s, n);
 	  }
       char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
       printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
   return r;
 }
diff --git a/contrib/arm-optimized-routines/string/test/memmove.c b/contrib/arm-optimized-routines/string/test/memmove.c
index b85dd1e864ef..ff3f7652f763 100644
--- a/contrib/arm-optimized-routines/string/test/memmove.c
+++ b/contrib/arm-optimized-routines/string/test/memmove.c
@@ -1,170 +1,168 @@
 /*
  * memmove test.
  *
  * Copyright (c) 2019-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
 #define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   void *(*fun) (void *, const void *, size_t);
   int test_mte;
 } funtab[] = {
   // clang-format off
   F(memmove, 0)
 #if __aarch64__
   F(__memmove_aarch64, 1)
-# if __ARM_NEON
   F(__memmove_aarch64_simd, 1)
-# endif
 # if __ARM_FEATURE_SVE
   F(__memmove_aarch64_sve, 1)
 # endif
 # if WANT_MOPS
   F(__memmove_aarch64_mops, 1)
 # endif
 #endif
   {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define A 32
 #define LEN 250000
 static unsigned char *dbuf;
 static unsigned char *sbuf;
 static unsigned char wbuf[LEN + 2 * A];
 
 static void *
 alignup (void *p)
 {
   return (void *) (((uintptr_t) p + A - 1) & -A);
 }
 
 static void
 test (const struct fun *fun, int dalign, int salign, int len)
 {
   unsigned char *src = alignup (sbuf);
   unsigned char *dst = alignup (dbuf);
   unsigned char *want = wbuf;
   unsigned char *s = src + salign;
   unsigned char *d = dst + dalign;
   unsigned char *w = want + dalign;
   void *p;
   int i;
 
   if (err_count >= ERR_LIMIT)
     return;
   if (len > LEN || dalign >= A || salign >= A)
     abort ();
   for (i = 0; i < len + A; i++)
     {
       src[i] = '?';
       want[i] = dst[i] = '*';
     }
   for (i = 0; i < len; i++)
     s[i] = w[i] = 'a' + i % 23;
 
   p = fun->fun (d, s, len);
   if (p != d)
     ERR ("%s(%p,..) returned %p\n", fun->name, d, p);
   for (i = 0; i < len + A; i++)
     {
       if (dst[i] != want[i])
 	{
 	  ERR ("%s(align %d, align %d, %d) failed\n", fun->name, dalign, salign,
 	       len);
 	  quoteat ("got", dst, len + A, i);
 	  quoteat ("want", want, len + A, i);
 	  break;
 	}
     }
 }
 
 static void
 test_overlap (const struct fun *fun, int dalign, int salign, int len)
 {
   unsigned char *src = alignup (sbuf);
   unsigned char *dst = src;
   unsigned char *want = wbuf;
   unsigned char *s = src + salign;
   unsigned char *d = dst + dalign;
   unsigned char *w = wbuf + dalign;
   void *p;
 
   if (err_count >= ERR_LIMIT)
     return;
   if (len > LEN || dalign >= A || salign >= A)
     abort ();
 
   for (int i = 0; i < len + A; i++)
     src[i] = want[i] = '?';
 
   for (int i = 0; i < len; i++)
     s[i] = want[salign + i] = 'a' + i % 23;
   for (int i = 0; i < len; i++)
     w[i] = s[i];
 
   s = tag_buffer (s, len, fun->test_mte);
   d = tag_buffer (d, len, fun->test_mte);
   p = fun->fun (d, s, len);
   untag_buffer (s, len, fun->test_mte);
   untag_buffer (d, len, fun->test_mte);
 
   if (p != d)
     ERR ("%s(%p,..) returned %p\n", fun->name, d, p);
   for (int i = 0; i < len + A; i++)
     {
       if (dst[i] != want[i])
 	{
 	  ERR ("%s(align %d, align %d, %d) failed\n", fun->name, dalign, salign,
 	       len);
 	  quoteat ("got", dst, len + A, i);
 	  quoteat ("want", want, len + A, i);
 	  break;
 	}
     }
 }
 
 int
 main ()
 {
   dbuf = mte_mmap (LEN + 2 * A);
   sbuf = mte_mmap (LEN + 2 * A);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
       err_count = 0;
       for (int d = 0; d < A; d++)
 	for (int s = 0; s < A; s++)
 	  {
 	    int n;
 	    for (n = 0; n < 100; n++)
 	      {
 		test (funtab + i, d, s, n);
 		test_overlap (funtab + i, d, s, n);
 	      }
 	    for (; n < LEN; n *= 2)
 	      {
 		test (funtab + i, d, s, n);
 		test_overlap (funtab + i, d, s, n);
 	      }
 	  }
       char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
       printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
   return r;
 }
diff --git a/contrib/arm-optimized-routines/string/test/memset.c b/contrib/arm-optimized-routines/string/test/memset.c
index 7d09c267ffec..a9639f9b28b0 100644
--- a/contrib/arm-optimized-routines/string/test/memset.c
+++ b/contrib/arm-optimized-routines/string/test/memset.c
@@ -1,132 +1,135 @@
 /*
  * memset test.
  *
  * Copyright (c) 2019-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "mte.h"
 #include "stringlib.h"
 #include "stringtest.h"
 
 #define F(x, mte) {#x, x, mte},
 
 static const struct fun
 {
   const char *name;
   void *(*fun) (void *s, int c, size_t n);
   int test_mte;
 } funtab[] = {
   // clang-format off
   F(memset, 0)
 #if __aarch64__
   F(__memset_aarch64, 1)
+# if __ARM_FEATURE_SVE
+  F(__memset_aarch64_sve, 1)
+# endif
 # if WANT_MOPS
   F(__memset_aarch64_mops, 1)
 # endif
 #elif __arm__
   F(__memset_arm, 0)
 #endif
   {0, 0, 0}
   // clang-format on
 };
 #undef F
 
 #define A 32
 #define LEN 250000
 static unsigned char *sbuf;
 
 static void *
 alignup (void *p)
 {
   return (void *) (((uintptr_t) p + A - 1) & -A);
 }
 
 static void
 test (const struct fun *fun, int salign, int c, int len)
 {
   unsigned char *src = alignup (sbuf);
   unsigned char *s = src + salign;
   void *p;
   int i;
 
   if (err_count >= ERR_LIMIT)
     return;
   if (len > LEN || salign >= A)
     abort ();
   for (i = 0; i < len + A; i++)
     src[i] = '?';
   for (i = 0; i < len; i++)
     s[i] = 'a' + i % 23;
 
   s = tag_buffer (s, len, fun->test_mte);
   p = fun->fun (s, c, len);
   untag_buffer (s, len, fun->test_mte);
 
   if (p != s)
     ERR ("%s(%p,..) returned %p\n", fun->name, s, p);
 
   for (i = 0; i < salign; i++)
     {
       if (src[i] != '?')
 	{
 	  ERR ("%s(align %d, %d, %d) failed\n", fun->name, salign, c, len);
 	  quoteat ("got", src, len + A, i);
 	  return;
 	}
     }
   for (; i < salign + len; i++)
     {
       if (src[i] != (unsigned char) c)
 	{
 	  ERR ("%s(align %d, %d, %d) failed\n", fun->name, salign, c, len);
 	  quoteat ("got", src, len + A, i);
 	  return;
 	}
     }
   for (; i < len + A; i++)
     {
       if (src[i] != '?')
 	{
 	  ERR ("%s(align %d, %d, %d) failed\n", fun->name, salign, c, len);
 	  quoteat ("got", src, len + A, i);
 	  return;
 	}
     }
 }
 
 int
 main ()
 {
   sbuf = mte_mmap (LEN + 2 * A);
   int r = 0;
   for (int i = 0; funtab[i].name; i++)
     {
       err_count = 0;
       for (int s = 0; s < A; s++)
 	{
 	  int n;
 	  for (n = 0; n < 100; n++)
 	    {
 	      test (funtab + i, s, 0, n);
 	      test (funtab + i, s, 0x25, n);
 	      test (funtab + i, s, 0xaa25, n);
 	    }
 	  for (; n < LEN; n *= 2)
 	    {
 	      test (funtab + i, s, 0, n);
 	      test (funtab + i, s, 0x25, n);
 	      test (funtab + i, s, 0xaa25, n);
 	    }
 	}
       char *pass = funtab[i].test_mte && mte_enabled () ? "MTE PASS" : "PASS";
       printf ("%s %s\n", err_count ? "FAIL" : pass, funtab[i].name);
       if (err_count)
 	r = -1;
     }
   return r;
 }