Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F142738734
D45621.id139953.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
3 KB
Referenced Files
None
Subscribers
None
D45621.id139953.diff
View Options
diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -5,7 +5,6 @@
AARCH64_STRING_FUNCS= \
memchr \
- memcmp \
memcpy \
memmove \
memrchr \
@@ -20,6 +19,10 @@
strnlen \
strrchr
+
+MDSRCS+= \
+ memcmp.S
+
#
# Add the above functions. Generate an asm file that includes the needed
# Arm Optimized Routines file defining the function name to the libc name.
diff --git a/lib/libc/aarch64/string/memcmp.S b/lib/libc/aarch64/string/memcmp.S
new file mode 100644
--- /dev/null
+++ b/lib/libc/aarch64/string/memcmp.S
@@ -0,0 +1,144 @@
+/*-
+ * Copyright (c) 2024 Getz Mikalsen
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+*/
+
+#include <machine/asm.h>
+#include <machine/param.h>
+
+ .text
+
+ENTRY(memcmp)
+
+ mov x8,x0 // store base address for later
+ mov x9,x1
+ cbz x2,.Lnone // 0 length
+
+ /*
+ * TODO: Check if buffer is located at end of page to avoid crossing
+ * into unmapped page.
+ */
+
+// cmp x2,#32
+// b.hi .Lbegin
+// add x3,x8,#32
+// add x4,x9,#32
+// eor x3,x3,x8
+// eor x4,x4,x9
+// tst w3,#PAGE_SIZE
+// b.ne .Lbegin
+// tst w4,#PAGE_SIZE
+// b.ne .Lbegin
+
+ /*
+ * Compare strings of 1--32 bytes. We do this by loading into two
+ * vector registers and then comparing.
+ */
+
+.Lbegin:
+ ldp q0,q1,[x8] // load 32 bytes into vector registers
+ ldp q2,q3,[x9]
+
+ /* quick check if no matches in first 32 bytes */
+ eor v4.16b,v0.16b,v2.16b // v4 = b1(0-15) XOR b2(0-15)
+ eor v5.16b,v1.16b,v3.16b // v5 = b1(16-32) XOR b2(16-32)
+ umaxp v4.16b,v4.16b,v5.16b
+ umaxp v4.16b,v4.16b,v4.16b // fill v4 with max value
+ fmov x6,d4
+ cbz x6,.Lloop // if d4 is 0 then all matched
+
+ cmeq v0.16b,v0.16b,v2.16b // do compare between 0-15 b1 vs b2
+ shrn v0.8b,v0.8h,#4 // shift them right to fit in x1
+ cmeq v1.16b,v1.16b,v3.16b // do compare between 16-31 b1 vs b2
+ shrn v1.8b,v1.8h,#4
+
+ fmov x1,d0
+ fmov x3,d1
+
+ mvn x0,x1 // invert to use clz
+ cbz x0,0f
+ rbit x0,x0
+ clz x0,x0 // if this is zero check bytes 16..32
+ b 1f
+
+0:
+ rbit x1,x3
+ mvn x1,x1
+ clz x0,x1
+ add x0,x0,#64
+1:
+ lsr x0,x0,#2
+ cmp x0,x2
+ b.ge .Lnone
+ cmp x0,#32 // x0 == 32 if no hit (32 0's)
+ b.eq .Lloop
+2:
+ ldrb w4,[x8,x0]
+ ldrb w5,[x9,x0]
+ subs w0,w4,w5 // get the byte difference
+ ret
+
+
+ .p2align 4
+.Lloop:
+ subs x2,x2,#32
+ b.le .Lnone
+ cmp x2,#32
+ b.le .Llast32
+ ldp q0,q1,[x8,32]! // load 32 bytes to vector registers
+ ldp q2,q3,[x9,32]!
+
+ eor v4.16b,v0.16b,v2.16b
+ eor v5.16b,v1.16b,v3.16b
+ umaxp v4.16b,v4.16b,v5.16b
+ umaxp v4.16b,v4.16b,v4.16b
+ fmov x6,d4
+ cbz x6,.Lloop
+ b .Lmatch
+
+ /* If 32 bytes left to compare only load 32 bytes from x8 - limit to
+ * avoid overread */
+.Llast32:
+ mov x3,#32
+ sub x3,x3,x2 // x3 = 32 - x2
+ add x2,x2,x3 // add the amount we shifted to limit
+ sub x8,x8,x3
+ sub x9,x9,x3
+
+ ldp q0,q1,[x8,#32]! // load 32 bytes to vector registers
+ ldp q2,q3,[x9,#32]!
+
+.Lmatch:
+ cmeq v0.16b,v0.16b,v2.16b // compare between 0-15 b1 vs b2
+ cmeq v1.16b,v1.16b,v3.16b // compare between 16-31 b1 vs b2
+
+ shrn v0.8b,v0.8h,#4
+ fmov x1,d0
+ mvn x0,x1 // invert to use clz
+ cbz x0,0f
+ rbit x0,x0
+ clz x0,x0 // if this is zero check bytes 16..32
+ b 1f
+
+0:
+ shrn v1.8b,v1.8h,#4
+ fmov x3,d1
+ rbit x1,x3
+ mvn x1,x1
+ clz x0,x1
+ add x0,x0,#64 // due to shift on next line
+1:
+ lsr x0,x0,#2
+ cmp x0,x2 // offending byte is past limit
+ b.ge .Lnone
+ ldrb w4,[x8,x0]
+ ldrb w5,[x9,x0]
+ subs w0,w4,w5
+ ret
+
+.Lnone:
+ mov x0,#0
+ ret
+
+END(memcmp)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Jan 23, 10:45 PM (16 h, 27 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27890431
Default Alt Text
D45621.id139953.diff (3 KB)
Attached To
Mode
D45621: lib/libc/aarch64/string: add memcmp SIMD implementation
Attached
Detach File
Event Timeline
Log In to Comment