Page MenuHomeFreeBSD

D45621.id139953.diff
No OneTemporary

D45621.id139953.diff

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -5,7 +5,6 @@
AARCH64_STRING_FUNCS= \
memchr \
- memcmp \
memcpy \
memmove \
memrchr \
@@ -20,6 +19,10 @@
strnlen \
strrchr
+
+MDSRCS+= \
+ memcmp.S
+
#
# Add the above functions. Generate an asm file that includes the needed
# Arm Optimized Routines file defining the function name to the libc name.
diff --git a/lib/libc/aarch64/string/memcmp.S b/lib/libc/aarch64/string/memcmp.S
new file mode 100644
--- /dev/null
+++ b/lib/libc/aarch64/string/memcmp.S
@@ -0,0 +1,144 @@
+/*-
+ * Copyright (c) 2024 Getz Mikalsen
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+*/
+
+#include <machine/asm.h>
+#include <machine/param.h>
+
+ .text
+
+ENTRY(memcmp)
+
+ mov x8,x0 // store base address for later
+ mov x9,x1
+ cbz x2,.Lnone // 0 length
+
+ /*
+ * TODO: Check if buffer is located at end of page to avoid crossing
+ * into unmapped page.
+ */
+
+// cmp x2,#32
+// b.hi .Lbegin
+// add x3,x8,#32
+// add x4,x9,#32
+// eor x3,x3,x8
+// eor x4,x4,x9
+// tst w3,#PAGE_SIZE
+// b.ne .Lbegin
+// tst w4,#PAGE_SIZE
+// b.ne .Lbegin
+
+ /*
+ * Compare strings of 1--32 bytes. We do this by loading into two
+ * vector registers and then comparing.
+ */
+
+.Lbegin:
+ ldp q0,q1,[x8] // load 32 bytes into vector registers
+ ldp q2,q3,[x9]
+
+ /* quick check if no matches in first 32 bytes */
+ eor v4.16b,v0.16b,v2.16b // v4 = b1(0-15) XOR b2(0-15)
+ eor v5.16b,v1.16b,v3.16b // v5 = b1(16-32) XOR b2(16-32)
+ umaxp v4.16b,v4.16b,v5.16b
+ umaxp v4.16b,v4.16b,v4.16b // fill v4 with max value
+ fmov x6,d4
+ cbz x6,.Lloop // if d4 is 0 then all matched
+
+ cmeq v0.16b,v0.16b,v2.16b // do compare between 0-15 b1 vs b2
+ shrn v0.8b,v0.8h,#4 // shift them right to fit in x1
+ cmeq v1.16b,v1.16b,v3.16b // do compare between 16-31 b1 vs b2
+ shrn v1.8b,v1.8h,#4
+
+ fmov x1,d0
+ fmov x3,d1
+
+ mvn x0,x1 // invert to use clz
+ cbz x0,0f
+ rbit x0,x0
+ clz x0,x0 // if this is zero check bytes 16..32
+ b 1f
+
+0:
+ rbit x1,x3
+ mvn x1,x1
+ clz x0,x1
+ add x0,x0,#64
+1:
+ lsr x0,x0,#2
+ cmp x0,x2
+ b.ge .Lnone
+ cmp x0,#32 // x0 == 32 if no hit (32 0's)
+ b.eq .Lloop
+2:
+ ldrb w4,[x8,x0]
+ ldrb w5,[x9,x0]
+ subs w0,w4,w5 // get the byte difference
+ ret
+
+
+ .p2align 4
+.Lloop:
+ subs x2,x2,#32
+ b.le .Lnone
+ cmp x2,#32
+ b.le .Llast32
+ ldp q0,q1,[x8,32]! // load 32 bytes to vector registers
+ ldp q2,q3,[x9,32]!
+
+ eor v4.16b,v0.16b,v2.16b
+ eor v5.16b,v1.16b,v3.16b
+ umaxp v4.16b,v4.16b,v5.16b
+ umaxp v4.16b,v4.16b,v4.16b
+ fmov x6,d4
+ cbz x6,.Lloop
+ b .Lmatch
+
+ /* If 32 bytes left to compare only load 32 bytes from x8 - limit to
+ * avoid overread */
+.Llast32:
+ mov x3,#32
+ sub x3,x3,x2 // x3 = 32 - x2
+ add x2,x2,x3 // add the amount we shifted to limit
+ sub x8,x8,x3
+ sub x9,x9,x3
+
+ ldp q0,q1,[x8,#32]! // load 32 bytes to vector registers
+ ldp q2,q3,[x9,#32]!
+
+.Lmatch:
+ cmeq v0.16b,v0.16b,v2.16b // compare between 0-15 b1 vs b2
+ cmeq v1.16b,v1.16b,v3.16b // compare between 16-31 b1 vs b2
+
+ shrn v0.8b,v0.8h,#4
+ fmov x1,d0
+ mvn x0,x1 // invert to use clz
+ cbz x0,0f
+ rbit x0,x0
+ clz x0,x0 // if this is zero check bytes 16..32
+ b 1f
+
+0:
+ shrn v1.8b,v1.8h,#4
+ fmov x3,d1
+ rbit x1,x3
+ mvn x1,x1
+ clz x0,x1
+ add x0,x0,#64 // due to shift on next line
+1:
+ lsr x0,x0,#2
+ cmp x0,x2 // offending byte is past limit
+ b.ge .Lnone
+ ldrb w4,[x8,x0]
+ ldrb w5,[x9,x0]
+ subs w0,w4,w5
+ ret
+
+.Lnone:
+ mov x0,#0
+ ret
+
+END(memcmp)

File Metadata

Mime Type
text/plain
Expires
Fri, Jan 23, 10:45 PM (16 h, 27 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27890431
Default Alt Text
D45621.id139953.diff (3 KB)

Event Timeline