Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F159726719
D46139.id141397.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
5 KB
Referenced Files
None
Subscribers
None
D46139.id141397.diff
View Options
diff --git a/lib/libc/riscv/string/Makefile.inc b/lib/libc/riscv/string/Makefile.inc
new file mode 100644
--- /dev/null
+++ b/lib/libc/riscv/string/Makefile.inc
@@ -0,0 +1,2 @@
+MDSRCS+= \
+ memcpy.S
diff --git a/lib/libc/riscv/string/memcpy.S b/lib/libc/riscv/string/memcpy.S
new file mode 100644
--- /dev/null
+++ b/lib/libc/riscv/string/memcpy.S
@@ -0,0 +1,269 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org>
+ */
+
+#include <machine/asm.h>
+
+/*
+ * a0 - void* dst
+ * a1 - const void* src
+ * a2 - size_t len
+ */
+ENTRY(memcpy)
+ beqz a2, .Lreturn
+
+ /* diff = (dstv - srcv) & 0b111 */
+ sub t0, a0, a1
+ andi t0, t0, 0b111
+
+ beqz t0, .Lmemcpy8
+
+ /* we never change a0, because memcpy returns the original dst */
+ mv a3, a0
+
+ /* t0 = MIN((-dst) & 0b111, len); */
+ neg t0, a0
+ andi t0, t0, 0b111
+ sub t0, t0, a2
+ srai t1, t0, 63
+ and t0, t0, t1
+ add t0, a2, t0
+
+ sub a2, a2, t0
+
+ la t1, .Lduff_start
+ slli t2, t0, 3
+ sub t1, t1, t2
+ jr t1
+ lb t2, 6(a1)
+ sb t2, 6(a3)
+ lb t2, 5(a1)
+ sb t2, 5(a3)
+ lb t2, 4(a1)
+ sb t2, 4(a3)
+ lb t2, 3(a1)
+ sb t2, 3(a3)
+ lb t2, 2(a1)
+ sb t2, 2(a3)
+ lb t2, 1(a1)
+ sb t2, 1(a3)
+ lb t2, 0(a1)
+ sb t2, 0(a3)
+.Lduff_start:
+
+ add a1, a1, t0
+ add a3, a3, t0
+
+ beqz a2, .Lreturn
+
+ /*
+ * a4 - size_t right_shift
+ * a5 - size_t left_shift
+ * a6 - size_t whole (number of dword stores)
+ */
+
+ /* right_shift = (src % 0b111) * 8; */
+ andi a4, a1, 0b111
+ slli a4, a4, 3
+
+ /* left_shift = 64 - right_shift */
+ neg a5, a4
+
+ /* whole = len / 8 */
+ srli a6, a2, 3
+
+ /* len = len % 8 */
+ andi a2, a2, 0b111
+
+ /* t0 - uint64_t* ptr */
+
+ /* ptr = src & ~0b111 */
+ andi t0, a1, ~0b111
+
+ /* src += whole * 8 */
+ slli t1, a6, 3
+ add a1, a1, t1
+
+ /*
+ * t1 - uint64_t low
+ * t2 - uint64_t high
+ */
+
+ /* low = *ptr++ */
+ ld t1, (t0)
+ addi t0, t0, 8
+
+ /* low >>= right_shift */
+ srl t1, t1, a4
+
+ beqz a6, .Llmain_skip
+.Llmain:
+ /* high = *ptr++ */
+ ld t2, (t0)
+ addi t0, t0, 8
+
+ /* whole-- */
+ addi a6, a6, -1
+
+ /* temp = (high << left_shift) | low */
+ sll t3, t2, a5
+ or t3, t3, t1
+
+ /* low = high >> right_shift */
+ srl t1, t2, a4
+
+ /* *dst++ = temp */
+ sd t3, (a3)
+ addi a3, a3, 8
+
+ bnez a6, .Llmain
+
+.Llmain_skip:
+
+ la t1, .Lduff_end
+ slli t2, a2, 3
+ sub t1, t1, t2
+ jr t1
+ lb t2, 6(a1)
+ sb t2, 6(a3)
+ lb t2, 5(a1)
+ sb t2, 5(a3)
+ lb t2, 4(a1)
+ sb t2, 4(a3)
+ lb t2, 3(a1)
+ sb t2, 3(a3)
+ lb t2, 2(a1)
+ sb t2, 2(a3)
+ lb t2, 1(a1)
+ sb t2, 1(a3)
+ lb t2, 0(a1)
+ sb t2, 0(a3)
+.Lduff_end:
+
+.Llend_skip:
+
+.Lreturn:
+ ret
+
+/* exectued when dst - src is multiple of 8
+ * a0 - void* dst
+ * a1 - const void* src
+ * a2 - size_t len
+ */
+.Lmemcpy8:
+ mv a3, a0
+
+ /* t0 = MIN((-dst) & 0b111, len); */
+ neg t0, a0
+ andi t0, t0, 0b111
+ sub t0, t0, a2
+ srai t1, t0, 63
+ and t0, t0, t1
+ add t0, a2, t0
+
+ sub a2, a2, t0
+
+ la t1, .Lduff8_start
+ slli t2, t0, 3
+ sub t1, t1, t2
+ jr t1
+ lb t2, 6(a1)
+ sb t2, 6(a3)
+ lb t2, 5(a1)
+ sb t2, 5(a3)
+ lb t2, 4(a1)
+ sb t2, 4(a3)
+ lb t2, 3(a1)
+ sb t2, 3(a3)
+ lb t2, 2(a1)
+ sb t2, 2(a3)
+ lb t2, 1(a1)
+ sb t2, 1(a3)
+ lb t2, 0(a1)
+ sb t2, 0(a3)
+.Lduff8_start:
+
+ add a1, a1, t0
+ add a3, a3, t0
+
+ beqz a2, .Lreturn8
+
+ slti t0, a2, 128
+ bnez t0, .Llmain8_64_skip
+
+ /* a4 - uint64_t* end_unroll */
+
+ /* end_unroll = dst + len / 64 * 64 */
+ andi t0, a2, ~0b111111
+ add a4, a3, t0
+
+ /* len = len % 64 */
+ andi a2, a2, 0b111111
+
+.Llmain8_64:
+ ld t0, 0(a1)
+ ld t1, 8(a1)
+ ld t2, 16(a1)
+ ld t3, 24(a1)
+ sd t0, 0(a3)
+ sd t1, 8(a3)
+ sd t2, 16(a3)
+ sd t3, 24(a3)
+ ld t0, 32(a1)
+ ld t1, 40(a1)
+ ld t2, 48(a1)
+ ld t3, 56(a1)
+ sd t0, 32(a3)
+ sd t1, 40(a3)
+ sd t2, 48(a3)
+ sd t3, 56(a3)
+ addi a3, a3, 64
+ addi a1, a1, 64
+ bne a3, a4, .Llmain8_64
+.Llmain8_64_skip:
+
+ beqz a2, .Lreturn8
+
+ /* a4 - uint64_t* end_align */
+
+ /* end_align = (dst + len) & ~0b111 */
+ add a4, a3, a2
+ andi a4, a4, ~0b111
+
+ /* len = len % 8 */
+ andi a2, a2, 0b111
+
+ beq a3, a4, .Llmain8_skip
+.Llmain8:
+ ld t0, (a1)
+ sd t0, (a3)
+ addi a3, a3, 8
+ addi a1, a1, 8
+ bne a3, a4, .Llmain8
+.Llmain8_skip:
+
+ la t1, .Lduff8_end
+ slli t2, a2, 3
+ sub t1, t1, t2
+ jr t1
+ lb t2, 6(a1)
+ sb t2, 6(a3)
+ lb t2, 5(a1)
+ sb t2, 5(a3)
+ lb t2, 4(a1)
+ sb t2, 4(a3)
+ lb t2, 3(a1)
+ sb t2, 3(a3)
+ lb t2, 2(a1)
+ sb t2, 2(a3)
+ lb t2, 1(a1)
+ sb t2, 1(a3)
+ lb t2, 0(a1)
+ sb t2, 0(a3)
+.Lduff8_end:
+
+.Lreturn8:
+ ret
+END(memcpy)
diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7
--- a/share/man/man7/simd.7
+++ b/share/man/man7/simd.7
@@ -50,7 +50,7 @@
.Pp
Enhanced functions are present for the following architectures:
.Bl -column FUNCTION_________ aarch64_ arm_ amd64_ i386_ ppc64_ -offset indent
-.It Em FUNCTION Ta Em AARCH64 Ta Em ARM Ta Em AMD64 Ta Em I386 Ta Em PPC64
+.It Em FUNCTION Ta Em AARCH64 Ta Em ARM Ta Em AMD64 Ta Em I386 Ta Em PPC64 Ta Em RISC-V
.It bcmp Ta Ta Ta S1 Ta S
.It bcopy Ta Ta S Ta S Ta S Ta SV
.It bzero Ta Ta S Ta S Ta S
@@ -61,7 +61,7 @@
.It memchr Ta A Ta Ta S1
.It memcmp Ta A Ta S Ta S1 Ta S
.It memccpy Ta Ta Ta S1
-.It memcpy Ta S Ta S Ta S Ta S Ta SV
+.It memcpy Ta S Ta S Ta S Ta S Ta SV Ta S
.It memmove Ta S Ta S Ta S Ta S Ta SV
.It memrchr Ta A Ta Ta S1
.It memset Ta A Ta S Ta S Ta S
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jun 18, 1:47 PM (5 h, 40 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
34051833
Default Alt Text
D46139.id141397.diff (5 KB)
Attached To
Mode
D46139: libc: scalar memcpy() in RISC-V assembly
Attached
Detach File
Event Timeline
Log In to Comment