Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F133120900
D41349.id126287.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
6 KB
Referenced Files
None
Subscribers
None
D41349.id126287.diff
View Options
diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc
--- a/lib/libc/amd64/string/Makefile.inc
+++ b/lib/libc/amd64/string/Makefile.inc
@@ -7,8 +7,9 @@
memcpy.S \
memmove.S \
memset.S \
+ stpcpy.S \
strcat.S \
strchrnul.S \
strcmp.S \
strlen.S \
- stpcpy.S
+ strcpy.c
diff --git a/lib/libc/amd64/string/stpcpy.S b/lib/libc/amd64/string/stpcpy.S
--- a/lib/libc/amd64/string/stpcpy.S
+++ b/lib/libc/amd64/string/stpcpy.S
@@ -1,12 +1,31 @@
-/*
- * Adapted by Guillaume Morin <guillaume@morinfr.org> from strcpy.S
- * written by J.T. Conklin <jtc@acorntoolworks.com>
- * Public domain.
+/*-
+ * Copyright (c) 2023, The FreeBSD Foundation
+ *
+ * SPDX-License-Expression: BSD-2-Clause
+ *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
+ * Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcpy.S
+ * written by J.T. Conklin <jtc@acorntoolworks.com> and
+ * adapted by Guillaume Morin <guillaume@morinfr.org> to implement stpcpy
+ * that was originally dedicated to the public domain
*/
#include <machine/asm.h>
__FBSDID("$FreeBSD$");
+#include "amd64_archlevel.h"
+
+#define ALIGN_TEXT .p2align 4, 0x90
+
+ .weak stpcpy
+ .set stpcpy, __stpcpy
+ARCHFUNCS(__stpcpy)
+ ARCHFUNC(__stpcpy, scalar)
+ ARCHFUNC(__stpcpy, baseline)
+ENDARCHFUNCS(__stpcpy)
+
/*
* This stpcpy implementation copies a byte at a time until the
* source pointer is aligned to a word boundary, it then copies by
@@ -20,9 +39,7 @@
* requirements.
*/
- .globl stpcpy,__stpcpy
-ENTRY(stpcpy)
-__stpcpy:
+ARCHENTRY(__stpcpy, scalar)
movabsq $0x0101010101010101,%r8
movabsq $0x8080808080808080,%r9
@@ -43,7 +60,7 @@
dec %rax
ret
- .p2align 4
+ ALIGN_TEXT
.Lloop:
movq %rdx,(%rdi)
addq $8,%rdi
@@ -111,6 +128,111 @@
.Ldone:
movq %rdi,%rax
ret
-END(stpcpy)
-
+ARCHEND(__stpcpy, scalar)
+
+ARCHENTRY(__stpcpy, baseline)
+ mov %esi, %ecx
+ mov %rdi, %rdx
+ sub %rsi, %rdi # express destination as distance to surce
+ and $~0xf, %rsi # align source to 16 byte
+ movdqa (%rsi), %xmm0 # head of string with junk before
+ pxor %xmm1, %xmm1
+ and $0xf, %ecx # misalignment in bytes
+ pcmpeqb %xmm1, %xmm0 # NUL byte present?
+ pmovmskb %xmm0, %eax
+ shr %cl, %eax # clear out matches in junk bytes
+ bsf %eax, %eax # find match if any
+ jnz .Lrunt
+
+ /* first normal iteration: write head back if it succeeds */
+ movdqa 16(%rsi), %xmm0 # 16 bytes of current iteration
+ movdqu (%rsi, %rcx, 1), %xmm2 # first 16 bytes of the string
+ pcmpeqb %xmm0, %xmm1 # NUL byte present?
+ pmovmskb %xmm1, %eax
+ test %eax, %eax # find match if any
+ jnz .Lshorty
+
+ movdqu %xmm2, (%rdx) # store beginning of string
+
+ /* main loop, unrolled twice */
+ ALIGN_TEXT
+0: movdqa 32(%rsi), %xmm2 # load current iteraion
+ movdqu %xmm0, 16(%rsi, %rdi, 1) # write back previous iteraion
+ pxor %xmm1, %xmm1
+ add $32, %rsi
+ pcmpeqb %xmm2, %xmm1 # NUL byte present?
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz 1f
+
+ movdqa 16(%rsi), %xmm0 # load current iteraion
+ movdqu %xmm2, (%rsi, %rdi, 1) # write back previous iteraion
+ pxor %xmm1, %xmm1
+ pcmpeqb %xmm0, %xmm1 # NUL byte present?
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jz 0b
+
+ /* end of string after main loop has iterated */
+ add $16, %rsi # advance rsi to second unrolled half
+1: tzcnt %eax, %eax # find location of match
+ # (behaves as bsf on pre-x86-64-v3 CPUs)
+ add %rsi, %rax # point to NUL byte
+ movdqu -15(%rax), %xmm0 # last 16 bytes of string
+ movdqu %xmm0, -15(%rax, %rdi, 1) # copied to destination
+ add %rdi, %rax # point to destination's NUL byte
+ ret
+
+ /* NUL encountered in second iteration */
+.Lshorty:
+ tzcnt %eax, %eax
+ add $16, %eax # account for length of first iteration
+ sub %ecx, %eax # but not the parts before the string
+
+ /* NUL encountered in first iteration */
+.Lrunt: lea 1(%rax), %edi # string length including NUL byte
+ add %rcx, %rsi # point to beginning of string
+ add %rdx, %rax # point to NUL byte
+
+ /* transfer 16--32 bytes */
+.L1632: cmp $16, %edi
+ jb .L0815
+
+ movdqu -16(%rsi, %rdi, 1), %xmm0 # load last 16 bytes
+ movdqu %xmm2, (%rdx) # store first 16 bytes
+ movdqu %xmm0, -15(%rax) # store last 16 bytes
+ ret
+
+ /* transfer 8--15 bytes */
+.L0815: cmp $8, %edi
+ jb .L0407
+
+ mov (%rsi), %rcx # load first 8 bytes
+ mov -8(%rsi, %rdi, 1), %rdi # load last 8 bytes
+ mov %rcx, (%rdx) # store to dst
+ mov %rdi, -7(%rax) # dito
+ ret
+
+ /* transfer 4--7 bytes */
+.L0407: cmp $4, %edi
+ jb .L0203
+
+ mov (%rsi), %ecx
+ mov -4(%rsi, %rdi, 1), %edi
+ mov %ecx, (%rdx)
+ mov %edi, -3(%rax)
+ ret
+
+ /* transfer 2--3 bytes */
+.L0203: cmp $2, %edi
+ jb .L0101
+
+ movzwl (%rsi), %ecx
+ mov %cx, (%rdx) # store first two bytes
+
+ /* transfer 0 bytes (last byte is always NUL) */
+.L0101: movb $0, (%rax) # store terminating NUL byte
+ ret
+ARCHEND(__stpcpy, baseline)
+
.section .note.GNU-stack,"",%progbits
diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7
--- a/share/man/man7/simd.7
+++ b/share/man/man7/simd.7
@@ -24,7 +24,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE
.
-.Dd August 5, 2023
+.Dd August 7, 2023
.Dt SIMD 7
.Os
.Sh NAME
@@ -63,12 +63,12 @@
.It memmove Ta S Ta S Ta S Ta S Ta SV
.It memset Ta Ta S Ta S Ta S
.It rindex Ta S
-.It stpcpy Ta Ta Ta S
+.It stpcpy Ta Ta Ta S1
.It strcat Ta Ta Ta S Ta S
.It strchr Ta S Ta Ta S1 Ta S
.It strchrnul Ta Ta Ta S1
.It strcmp Ta Ta S Ta S Ta S
-.It strcpy Ta Ta Ta S Ta S Ta S2
+.It strcpy Ta Ta Ta S1 Ta S Ta S2
.It strlen Ta Ta S Ta S1
.It strncmp Ta Ta S Ta Ta S
.It strncpy Ta Ta Ta Ta Ta S2
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Oct 24, 3:47 AM (4 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
24117100
Default Alt Text
D41349.id126287.diff (6 KB)
Attached To
Mode
D41349: lib/libc/amd64/string: add baseline implementation of stpcpy.S
Attached
Detach File
Event Timeline
Log In to Comment