Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F157288466
D42925.id131067.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
7 KB
Referenced Files
None
Subscribers
None
D42925.id131067.diff
View Options
diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc
--- a/lib/libc/amd64/string/Makefile.inc
+++ b/lib/libc/amd64/string/Makefile.inc
@@ -6,6 +6,7 @@
memccpy.S \
memcpy.S \
memmove.S \
+ memrchr.S \
memset.S \
stpcpy.S \
stpncpy.S \
diff --git a/lib/libc/amd64/string/memrchr.S b/lib/libc/amd64/string/memrchr.S
new file mode 100644
--- /dev/null
+++ b/lib/libc/amd64/string/memrchr.S
@@ -0,0 +1,166 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Robert Clausecker
+ */
+
+#include <machine/asm.h>
+
+#include "amd64_archlevel.h"
+
+#define ALIGN_TEXT .p2align 4, 0x90
+
+ARCHFUNCS(memrchr)
+ ARCHFUNC(memrchr, scalar)
+ ARCHFUNC(memrchr, baseline)
+ENDARCHFUNCS(memrchr)
+
+ARCHENTRY(memrchr, scalar)
+ xor %eax, %eax # prospective return value
+ sub $4, %rdx # 4 bytes left to process?
+ jb 1f
+
+ ALIGN_TEXT
+0: xor %r8, %r8
+ lea 2(%rdi), %r10
+ cmp %sil, 2(%rdi)
+ cmovne %r8, %r10 # point to null if no match
+
+ cmp %sil, (%rdi)
+ cmove %rdi, %r8 # point to first char if match
+
+ lea 1(%rdi), %r9
+ cmp %sil, 1(%rdi)
+ cmovne %r8, %r9 # point to first result if no match in second
+
+ lea 3(%rdi), %r11
+ cmp %sil, 3(%rdi)
+ cmovne %r10, %r11
+
+ test %r11, %r11
+ cmovz %r9, %r11 # take first pair match if none in second
+
+ test %r11, %r11
+ cmovnz %r11, %rax # take match in current set if any
+
+ add $4, %rdi
+ sub $4, %rdx
+ jae 0b
+
+1: cmp $-3, %edx # a least one character left to process?
+ jb 2f
+
+ cmp %sil, (%rdi)
+ cmove %rdi, %rax
+
+ lea 1(%rdi), %rcx
+ cmp $-2, %edx # at least two characters left to process?
+ jb 2f
+
+ cmp %sil, 1(%rdi)
+ cmove %rcx, %rax
+
+ lea 2(%rdi), %rcx
+ cmp $-1, %edx # at least three character left to process?
+ jb 2f
+
+ cmp %sil, 2(%rdi)
+ cmove %rcx, %rax
+
+2: ret
+ARCHEND(memrchr, scalar)
+
+ARCHENTRY(memrchr, baseline)
+ movd %esi, %xmm4
+ test %rdx, %rdx # empty buffer?
+ jz .L0 # if yes, return immediately
+
+ punpcklbw %xmm4, %xmm4 # c -> cc
+ mov %edi, %ecx
+ punpcklwd %xmm4, %xmm4 # cc -> cccc
+ and $~0xf, %rdi # align source pointer
+ pshufd $0, %xmm4, %xmm4 # cccc -> cccccccccccccccc
+ and $0xf, %ecx
+ movdqa %xmm4, %xmm0
+ mov $-1, %r8d
+ pcmpeqb (%rdi), %xmm0 # compare aligned head
+ shl %cl, %r8d # mask of bytes in the head of the buffer
+ pmovmskb %xmm0, %eax
+
+ sub $16, %rcx
+ and %r8d, %eax # match mask
+ add %rcx, %rdx # advance past head
+ cmc
+ jbe .Lrunt # did the string end in the buffer?
+
+ mov %rdi, %rsi # pointer to matching chunk
+ add $16, %rdi
+ sub $16, %rdx # enough left for another round?
+ jbe 1f
+
+ /* main loop unrolled twice */
+ ALIGN_TEXT
+0: movdqa %xmm4, %xmm0
+ pcmpeqb (%rdi), %xmm0
+ pmovmskb %xmm0, %r8d
+
+ cmp $16, %rdx # enough left for second chunk?
+ jbe 2f
+
+ movdqa %xmm4, %xmm0
+ pcmpeqb 16(%rdi), %xmm0
+ pmovmskb %xmm0, %ecx
+
+ lea 16(%rdi), %r9
+ test %ecx, %ecx # match found in second chunk?
+ cmovz %r8d, %ecx # if not, use match data from first chunk
+ cmovz %rdi, %r9
+
+ test %ecx, %ecx # any match found?
+ cmovnz %ecx, %eax # if yes, overwrite previously found match
+ cmovnz %r9, %rsi
+
+ add $32, %rdi # advance to next iteration
+ sub $32, %rdx # advance to next chunks
+ ja 0b
+
+ /* process remaining 1--16 bytes */
+1: pcmpeqb (%rdi), %xmm4
+ mov $0xffff, %r8d
+ xor %ecx, %ecx
+ sub %edx, %ecx # number of bytes to be masked out
+ pmovmskb %xmm4, %r9d
+ shr %cl, %r8d # mask of bytes to be kept in the buffer
+ and %r9d, %r8d
+ cmovnz %r8d, %eax
+ cmovnz %rdi, %rsi
+ bsr %eax, %eax
+ lea (%rsi, %rax, 1), %rsi # pointer to match (or junk)
+ cmovnz %rsi, %rax # if any match was found, return it
+ ret
+
+ /* end of chunk reached within first half iteration */
+2: test %r8d, %r8d # match in previous chunk?
+ cmovnz %r8d, %eax # if yes, overwrite previous chunks
+ cmovnz %rdi, %rsi
+ add $16, %rdi # point to tail
+ sub $16, %edx
+ jmp 1b # handle tail the same otherwise
+
+ /* runt: string ends within head, edx has negated amount of invalid head bytes */
+.Lrunt: mov $0xffff, %r8d
+ xor %ecx, %ecx
+ sub %edx, %ecx
+ shr %cl, %r8d
+ and %r8d, %eax
+ bsr %eax, %eax
+ lea (%rdi, %rax, 1), %rdi
+ cmovnz %rdi, %rax
+ ret
+
+ /* empty buffer: return a null pointer */
+.L0: xor %eax, %eax
+ ret
+ARCHEND(memrchr, baseline)
+
+ .section .note.GNU-stack, "", %progbits
diff --git a/lib/libc/tests/string/Makefile b/lib/libc/tests/string/Makefile
--- a/lib/libc/tests/string/Makefile
+++ b/lib/libc/tests/string/Makefile
@@ -11,6 +11,7 @@
ATF_TESTS_C+= flsll_test
ATF_TESTS_C+= memccpy_test
ATF_TESTS_C+= memcmp_test
+ATF_TESTS_C+= memrchr_test
ATF_TESTS_C+= memset_s_test
ATF_TESTS_C+= strncmp_test
ATF_TESTS_C+= stpncpy_test
diff --git a/lib/libc/tests/string/memrchr_test.c b/lib/libc/tests/string/memrchr_test.c
new file mode 100644
--- /dev/null
+++ b/lib/libc/tests/string/memrchr_test.c
@@ -0,0 +1,116 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Robert Clausecker
+ */
+
+#include <sys/cdefs.h>
+
+#include <dlfcn.h>
+#include <limits.h>
+#include <string.h>
+
+#include <atf-c.h>
+
+static void *(*memrchr_fn)(const void *, int, size_t);
+
+ATF_TC_WITHOUT_HEAD(null);
+ATF_TC_BODY(null, tc)
+{
+ ATF_CHECK_EQ(memrchr_fn(NULL, 42, 0), NULL);
+}
+
+ATF_TC_WITHOUT_HEAD(not_found);
+ATF_TC_BODY(not_found, tc)
+{
+ size_t i, j;
+ char buf[1+15+64+1]; /* offset [0..15] + 64 buffer bytes + sentinels */
+
+ buf[0] = 'X';
+ memset(buf + 1, '-', sizeof(buf) - 1);
+
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 64; j++) {
+ buf[i + j + 1] = 'X';
+ ATF_CHECK_EQ(memrchr_fn(buf + i + 1, 'X', j), NULL);
+ buf[i + j + 1] = '-';
+ }
+}
+
+static void
+do_found_test(char buf[], size_t len, size_t first, size_t second)
+{
+ /* invariant: first <= second */
+
+ buf[first] = 'X';
+ buf[second] = 'X';
+ ATF_CHECK_EQ(memrchr_fn(buf, 'X', len), buf + second);
+ buf[first] = '-';
+ buf[second] = '-';
+}
+
+ATF_TC_WITHOUT_HEAD(found);
+ATF_TC_BODY(found, tc)
+{
+ size_t i, j, k, l;
+ char buf[1+15+64+1];
+
+ buf[0] = 'X';
+ memset(buf + 1, '-', sizeof(buf) - 1);
+
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 64; j++)
+ for (k = 0; k < j; k++)
+ for (l = 0; l <= k; l++) {
+ buf[i + j + 1] = 'X';
+ do_found_test(buf + i + 1, j, l, k);
+ buf[i + j + 1] = '-';
+ }
+}
+
+/* check that the right character is found */
+static void
+do_values_test(unsigned char buf[], size_t len, size_t i, int c)
+{
+ /* sentinels */
+ buf[-1] = c;
+ buf[len] = c;
+ memset(buf, c + 1, len);
+
+ if (i < len) {
+ buf[i] = c;
+ ATF_CHECK_EQ(memrchr_fn(buf, c, len), buf + i);
+ } else
+ ATF_CHECK_EQ(memrchr_fn(buf, c, len), NULL);
+}
+
+ATF_TC_WITHOUT_HEAD(values);
+ATF_TC_BODY(values, tc)
+{
+ size_t i, j, k;
+ int c;
+ unsigned char buf[1+15+64+1];
+
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 64; j++)
+ for (k = 0; k <= j; k++)
+ for (c = 0; c <= UCHAR_MAX; c++)
+ do_values_test(buf + i + 1, j, k, c);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+ void *dl_handle;
+
+ dl_handle = dlopen(NULL, RTLD_LAZY);
+ memrchr_fn = dlsym(dl_handle, "test_memrchr");
+ if (memrchr_fn == NULL)
+ memrchr_fn = memrchr;
+
+ ATF_TP_ADD_TC(tp, null);
+ ATF_TP_ADD_TC(tp, not_found);
+ ATF_TP_ADD_TC(tp, found);
+ ATF_TP_ADD_TC(tp, values);
+
+ return (atf_no_error());
+}
diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7
--- a/share/man/man7/simd.7
+++ b/share/man/man7/simd.7
@@ -24,7 +24,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE
.
-.Dd December 4, 2023
+.Dd December 6, 2023
.Dt SIMD 7
.Os
.Sh NAME
@@ -63,6 +63,7 @@
.It memccpy Ta Ta Ta S1
.It memcpy Ta S Ta S Ta S1 Ta S Ta SV
.It memmove Ta S Ta S Ta S1 Ta S Ta SV
+.It memrchr Ta Ta Ta S1
.It memset Ta S Ta S Ta S Ta S
.It rindex Ta S Ta Ta S1 Ta S
.It stpcpy Ta S Ta Ta S1
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, May 21, 1:19 AM (6 h, 18 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33367177
Default Alt Text
D42925.id131067.diff (7 KB)
Attached To
Mode
D42925: lib/libc/amd64/string: add memrchr scalar, baseline implementation
Attached
Detach File
Event Timeline
Log In to Comment