Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F141983007
D40693.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
2 KB
Referenced Files
None
Subscribers
None
D40693.id.diff
View Options
diff --git a/lib/libc/amd64/string/strlen.S b/lib/libc/amd64/string/strlen.S
--- a/lib/libc/amd64/string/strlen.S
+++ b/lib/libc/amd64/string/strlen.S
@@ -1,11 +1,18 @@
-/*
+/*-
* Written by Mateusz Guzik <mjg@freebsd.org>
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
* Public domain.
*/
#include <machine/asm.h>
__FBSDID("$FreeBSD$");
+#include "amd64_archlevel.h"
+
/*
* Note: this routine was written with kernel use in mind (read: no simd),
* it is only present in userspace as a temporary measure until something
@@ -14,6 +21,11 @@
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
+ARCHFUNCS(strlen)
+ ARCHFUNC(strlen, scalar)
+ ARCHFUNC(strlen, baseline)
+ENDARCHFUNCS(strlen)
+
/*
* strlen(string)
* %rdi
@@ -30,7 +42,7 @@
*
* The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
*/
-ENTRY(strlen)
+ARCHENTRY(strlen, scalar)
movabsq $0xfefefefefefefeff,%r8
movabsq $0x8080808080808080,%r9
@@ -76,6 +88,46 @@
leaq (%rcx,%rdi),%rax
subq %r10,%rax
ret
-END(strlen)
+ARCHEND(strlen, scalar)
+
+ARCHENTRY(strlen, baseline)
+ mov %rdi, %rcx
+ pxor %xmm1, %xmm1
+ and $~0xf, %rdi # align string
+ pcmpeqb (%rdi), %xmm1 # compare head (with junk before string)
+ mov %rcx, %rsi # string pointer copy for later
+ and $0xf, %ecx # amount of bytes rdi is past 16 byte alignment
+ pmovmskb %xmm1, %eax
+ add $32, %rdi # advance to next iteration
+ shr %cl, %eax # clear out matches in junk bytes
+ test %eax, %eax # any match? (can't use ZF from SHR as CL=0 is possible)
+ jnz 2f
+
+ ALIGN_TEXT
+1: pxor %xmm1, %xmm1
+ pcmpeqb -16(%rdi), %xmm1 # find NUL bytes
+ pmovmskb %xmm1, %eax
+ test %eax, %eax # were any NUL bytes present?
+ jnz 3f
+
+ /* the same unrolled once more */
+ pxor %xmm1, %xmm1
+ pcmpeqb (%rdi), %xmm1
+ pmovmskb %xmm1, %eax
+ add $32, %rdi # advance to next iteration
+ test %eax, %eax
+ jz 1b
+
+ /* match found in loop body */
+ sub $16, %rdi # undo half the advancement
+3: tzcnt %eax, %eax # find the first NUL byte
+ sub %rsi, %rdi # string length until beginning of (%rdi)
+ lea -16(%rdi, %rax, 1), %rax # that plus loc. of NUL byte: full string length
+ ret
+
+ /* match found in head */
+2: tzcnt %eax, %eax # compute string length
+ ret
+ARCHEND(strlen, baseline)
.section .note.GNU-stack,"",%progbits
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 15, 12:12 PM (10 h, 16 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27649696
Default Alt Text
D40693.id.diff (2 KB)
Attached To
Mode
D40693: lib/libc/amd64: add archlevel-based simd dispatch framework
Attached
Detach File
Event Timeline
Log In to Comment