diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc
--- a/lib/libc/amd64/string/Makefile.inc
+++ b/lib/libc/amd64/string/Makefile.inc
@@ -7,8 +7,9 @@
 	memcpy.S \
 	memmove.S \
 	memset.S \
+	stpcpy.S \
 	strcat.S \
 	strchrnul.S \
 	strcmp.S \
 	strlen.S \
-	stpcpy.S
+	strcpy.c
diff --git a/lib/libc/amd64/string/stpcpy.S b/lib/libc/amd64/string/stpcpy.S
--- a/lib/libc/amd64/string/stpcpy.S
+++ b/lib/libc/amd64/string/stpcpy.S
@@ -1,12 +1,31 @@
-/*
- * Adapted by Guillaume Morin <guillaume@morinfr.org> from strcpy.S
- * written by J.T. Conklin <jtc@acorntoolworks.com>
- * Public domain.
+/*-
+ * Copyright (c) 2023, The FreeBSD Foundation
+ *
+ * SPDX-License-Expression: BSD-2-Clause
+ *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
+ * Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcpy.S
+ * written by J.T. Conklin <jtc@acorntoolworks.com> and
+ * adapted by Guillaume Morin <guillaume@morinfr.org> to implement stpcpy
+ * that was originally dedicated to the public domain
  */
 
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
+#include "amd64_archlevel.h"
+
+#define ALIGN_TEXT	.p2align 4, 0x90
+
+	.weak stpcpy
+	.set stpcpy, __stpcpy
+ARCHFUNCS(__stpcpy)
+	ARCHFUNC(__stpcpy, scalar)
+	ARCHFUNC(__stpcpy, baseline)
+ENDARCHFUNCS(__stpcpy)
+
 /*
  * This stpcpy implementation copies a byte at a time until the
  * source pointer is aligned to a word boundary, it then copies by
@@ -20,9 +39,7 @@
  * requirements.
  */
 
-	.globl	stpcpy,__stpcpy
-ENTRY(stpcpy)
-__stpcpy:
+ARCHENTRY(__stpcpy, scalar)
 	movabsq $0x0101010101010101,%r8
 	movabsq $0x8080808080808080,%r9
 
@@ -43,7 +60,7 @@
 	dec	%rax
 	ret
 
-	.p2align 4
+	ALIGN_TEXT
 .Lloop:
 	movq	%rdx,(%rdi)
 	addq	$8,%rdi
@@ -111,6 +128,111 @@
 .Ldone:
 	movq	%rdi,%rax
 	ret
-END(stpcpy)
-	
+ARCHEND(__stpcpy, scalar)
+
+ARCHENTRY(__stpcpy, baseline)
+	mov	%esi, %ecx
+	mov	%rdi, %rdx
+	sub	%rsi, %rdi		# express destination as distance to surce
+	and	$~0xf, %rsi		# align source to 16 byte
+	movdqa	(%rsi), %xmm0		# head of string with junk before
+	pxor	%xmm1, %xmm1
+	and	$0xf, %ecx		# misalignment in bytes
+	pcmpeqb	%xmm1, %xmm0		# NUL byte present?
+	pmovmskb %xmm0, %eax
+	shr	%cl, %eax		# clear out matches in junk bytes
+	bsf	%eax, %eax		# find match if any
+	jnz	.Lrunt
+
+	/* first normal iteration: write head back if it succeeds */
+	movdqa	16(%rsi), %xmm0		# 16 bytes of current iteration
+	movdqu	(%rsi, %rcx, 1), %xmm2	# first 16 bytes of the string
+	pcmpeqb	%xmm0, %xmm1		# NUL byte present?
+	pmovmskb %xmm1, %eax
+	test	%eax, %eax		# find match if any
+	jnz	.Lshorty
+
+	movdqu	%xmm2, (%rdx)		# store beginning of string
+
+	/* main loop, unrolled twice */
+	ALIGN_TEXT
+0:	movdqa	32(%rsi), %xmm2		# load current iteraion
+	movdqu	%xmm0, 16(%rsi, %rdi, 1) # write back previous iteraion
+	pxor	%xmm1, %xmm1
+	add	$32, %rsi
+	pcmpeqb	%xmm2, %xmm1		# NUL byte present?
+	pmovmskb %xmm1, %eax
+	test	%eax, %eax
+	jnz	1f
+
+	movdqa	16(%rsi), %xmm0		# load current iteraion
+	movdqu	%xmm2, (%rsi, %rdi, 1)	# write back previous iteraion
+	pxor	%xmm1, %xmm1
+	pcmpeqb	%xmm0, %xmm1		# NUL byte present?
+	pmovmskb %xmm1, %eax
+	test	%eax, %eax
+	jz	0b
+
+	/* end of string after main loop has iterated */
+	add	$16, %rsi		# advance rsi to second unrolled half
+1:	tzcnt	%eax, %eax		# find location of match
+					# (behaves as bsf on pre-x86-64-v3 CPUs)
+	add	%rsi, %rax		# point to NUL byte
+	movdqu	-15(%rax), %xmm0	# last 16 bytes of string
+	movdqu	%xmm0, -15(%rax, %rdi, 1) # copied to destination
+	add	%rdi, %rax		# point to destination's NUL byte
+	ret
+
+	/* NUL encountered in second iteration */
+.Lshorty:
+	tzcnt	%eax, %eax
+	add	$16, %eax		# account for length of first iteration
+	sub	%ecx, %eax		# but not the parts before the string
+
+	/* NUL encountered in first iteration */
+.Lrunt:	lea	1(%rax), %edi		# string length including NUL byte
+	add	%rcx, %rsi		# point to beginning of string
+	add	%rdx, %rax		# point to NUL byte
+
+	/* transfer 16--32 bytes */
+.L1632:	cmp	$16, %edi
+	jb	.L0815
+
+	movdqu	-16(%rsi, %rdi, 1), %xmm0 # load last 16 bytes
+	movdqu	%xmm2, (%rdx)		# store first 16 bytes
+	movdqu	%xmm0, -15(%rax)	# store last 16 bytes
+	ret
+
+	/* transfer 8--15 bytes */
+.L0815:	cmp	$8, %edi
+	jb	.L0407
+
+	mov	(%rsi), %rcx		# load first 8 bytes
+	mov	-8(%rsi, %rdi, 1), %rdi	# load last 8 bytes
+	mov	%rcx, (%rdx)		# store to dst
+	mov	%rdi, -7(%rax)		# dito
+	ret
+
+	/* transfer 4--7 bytes */
+.L0407:	cmp	$4, %edi
+	jb	.L0203
+
+	mov	(%rsi), %ecx
+	mov	-4(%rsi, %rdi, 1), %edi
+	mov	%ecx, (%rdx)
+	mov	%edi, -3(%rax)
+	ret
+
+	/* transfer 2--3 bytes */
+.L0203:	cmp	$2, %edi
+	jb	.L0101
+
+	movzwl	(%rsi), %ecx
+	mov	%cx, (%rdx)		# store first two bytes
+
+	/* transfer 0 bytes (last byte is always NUL) */
+.L0101:	movb	$0, (%rax)		# store terminating NUL byte
+	ret
+ARCHEND(__stpcpy, baseline)
+
 	.section .note.GNU-stack,"",%progbits
diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7
--- a/share/man/man7/simd.7
+++ b/share/man/man7/simd.7
@@ -24,7 +24,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE
 .
-.Dd August 5, 2023
+.Dd August 7, 2023
 .Dt SIMD 7
 .Os
 .Sh NAME
@@ -63,12 +63,12 @@
 .It    memmove   Ta    S       Ta    S   Ta    S      Ta    S    Ta    SV
 .It    memset    Ta            Ta    S   Ta    S      Ta    S
 .It    rindex    Ta    S
-.It    stpcpy    Ta            Ta        Ta    S
+.It    stpcpy    Ta            Ta        Ta    S1
 .It    strcat    Ta            Ta        Ta    S      Ta    S
 .It    strchr    Ta    S       Ta        Ta    S1     Ta    S
 .It    strchrnul Ta            Ta        Ta    S1
 .It    strcmp    Ta            Ta    S   Ta    S      Ta    S
-.It    strcpy    Ta            Ta        Ta    S      Ta    S    Ta    S2
+.It    strcpy    Ta            Ta        Ta    S1     Ta    S    Ta    S2
 .It    strlen    Ta            Ta    S   Ta    S1
 .It    strncmp   Ta            Ta    S   Ta           Ta    S
 .It    strncpy   Ta            Ta        Ta           Ta         Ta    S2