diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc --- a/lib/libc/amd64/string/Makefile.inc +++ b/lib/libc/amd64/string/Makefile.inc @@ -10,5 +10,6 @@ strcat.S \ strchrnul.S \ strcmp.S \ + strcspn.S \ strlen.S \ strcpy.c diff --git a/lib/libc/amd64/string/strcspn.S b/lib/libc/amd64/string/strcspn.S new file mode 100644 --- /dev/null +++ b/lib/libc/amd64/string/strcspn.S @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include +#include + +#include "amd64_archlevel.h" + +#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ + +ARCHFUNCS(strcspn) + ARCHFUNC(strcspn, scalar) + NOARCHFUNC + ARCHFUNC(strcspn, x86_64_v2) +ENDARCHFUNCS(strcspn) + +ARCHENTRY(strcspn, scalar) + push %rbp # align stack to enable function call + mov %rsp, %rbp + sub $256, %rsp # allocate space for lookup table + + /* check for special cases */ + movzbl (%rsi), %eax # first character in the set + test %eax, %eax + jz .Lstrlen + + movzbl 1(%rsi), %edx # second character in the set + test %edx, %edx + jz .Lstrchr + + /* no special case matches -- prepare lookup table */ + xor %r8d, %r8d + mov $28, %ecx +0: mov %r8, (%rsp, %rcx, 8) + mov %r8, 8(%rsp, %rcx, 8) + mov %r8, 16(%rsp, %rcx, 8) + mov %r8, 24(%rsp, %rcx, 8) + sub $4, %ecx + jnc 0b + + add $2, %rsi + movb $1, (%rsp, %rax, 1) # register first chars in set + movb $1, (%rsp, %rdx, 1) + mov %rdi, %rax # a copy of the source to iterate over + + /* process remaining chars in set */ + ALIGN_TEXT +0: movzbl (%rsi), %ecx + movb $1, (%rsp, %rcx, 1) + test %ecx, %ecx + jz 1f + + movzbl 1(%rsi), %ecx + movb $1, (%rsp, %rcx, 1) + test %ecx, %ecx + jz 1f + + add $2, %rsi + jmp 0b + + /* find match */ + ALIGN_TEXT +1: movzbl (%rax), %ecx + cmpb $0, (%rsp, %rcx, 1) + jne 2f + + movzbl 1(%rax), %ecx + cmpb $0, (%rsp, %rcx, 1) + jne 3f + + movzbl 2(%rax), %ecx + cmpb $0, (%rsp, %rcx, 1) + jne 4f + + movzbl 3(%rax), %ecx + add $4, %rax + cmpb $0, (%rsp, %rcx, 1) + je 1b + + sub $3, %rax +4: dec %rdi +3: inc %rax +2: sub %rdi, %rax # number of characters preceding match + leave + ret + + /* set is empty, degrades to strlen */ +.Lstrlen: + leave + jmp CNAME(strlen) + + /* just one character in set, degrades to strchr */ +.Lstrchr: + mov %rdi, (%rsp) # stash a copy of the string + mov %eax, %esi # find the character in the set + call CNAME(strchrnul) + sub (%rsp), %rax # length of prefix before match + leave + ret +ARCHEND(strcspn, scalar) + + /* + * This kernel uses pcmpistri to do the heavy lifting. + * We provide five code paths, depending on set size: + * + * 0: call strlen() + * 1: call strchr() + * 2--16: one pcmpistri per 16 bytes of input + * 17--32: two pcmpistri per 16 bytes of input + * >=33: fall back to look up table + */ +ARCHENTRY(strcspn, x86_64_v2) + push %rbp + mov %rsp, %rbp + sub $256, %rsp + + /* check for special cases */ + movzbl (%rsi), %eax + test %eax, %eax # empty string? + jz .Lstrlenv2 + + cmpb $0, 1(%rsi) # single character string? + jz .Lstrchrv2 + + /* find set size and copy up to 32 bytes to (%rsp) */ + mov %esi, %ecx + and $~0xf, %rsi # align set pointer + movdqa (%rsi), %xmm0 + pxor %xmm1, %xmm1 + and $0xf, %ecx # amount of bytes rsi is past alignment + xor %edx, %edx + pcmpeqb %xmm0, %xmm1 # end of string reached? + movdqa %xmm0, 32(%rsp) # transfer head of set to stack + pmovmskb %xmm1, %eax + shr %cl, %eax # clear out junk before string + test %eax, %eax # end of set reached? + jnz 0f + + movdqa 16(%rsi), %xmm0 # second chunk of the set + mov $16, %edx + sub %ecx, %edx # length of set preceding xmm0 + pxor %xmm1, %xmm1 + pcmpeqb %xmm0, %xmm1 + movdqa %xmm0, 48(%rsp) + movdqu 32(%rsp, %rcx, 1), %xmm2 # head of set + pmovmskb %xmm1, %eax + test %eax, %eax + jnz 1f + + movdqa 32(%rsi), %xmm0 # third chunk + add $16, %edx + pxor %xmm1, %xmm1 + pcmpeqb %xmm0, %xmm1 + movdqa %xmm0, 64(%rsp) + pmovmskb %xmm1, %eax + test %eax, %eax # still not done? + jz .Lgt32v2 + +0: movdqu 32(%rsp, %rcx, 1), %xmm2 # head of set +1: tzcnt %eax, %eax + add %eax, %edx # length of set (excluding NUL byte) + cmp $32, %edx # above 32 bytes? + ja .Lgt32v2 + + /* + * At this point we know that we want to use pcmpistri. + * one last problem obtains: the head of the string is not + * aligned and may cross a cacheline. If this is the case, + * we take the part before the page boundary and repeat the + * last byte to fill up the xmm register. + */ + mov %rdi, %rax # save original string pointer + lea 15(%rdi), %esi # last byte of the head + xor %edi, %esi + test $PAGE_SIZE, %esi # does the head cross a page? + jz 0f + + /* head crosses page: copy to stack to fix up */ + and $~0xf, %rax # align head pointer temporarily + movzbl 15(%rax), %esi # last head byte on the page + movdqa (%rax), %xmm0 + movabs $0x0101010101010101, %r8 + imul %r8, %rsi # repeated 8 times + movdqa %xmm0, (%rsp) # head word on stack + mov %rsi, 16(%rsp) # followed by filler (last byte x8) + mov %rsi, 24(%rsp) + mov %edi, %eax + and $0xf, %eax # offset of head from alignment + add %rsp, %rax # pointer to fake head + +0: movdqu (%rax), %xmm0 # load head (fake or real) + lea 16(%rdi), %rax + and $~0xf, %rax # second 16 bytes of string (aligned) +1: cmp $16, %edx # 16--32 bytes? + ja .Lgt16v2 + + + /* set is 2--16 bytes in size */ + + /* _SIDD_UBYTE_OPS|_SIDD_CMP_EQUAL_ANY|_SIDD_LEAST_SIGNIFICANT */ + pcmpistri $0, %xmm0, %xmm2 # match in head? + jbe .Lheadmatchv2 + + ALIGN_TEXT +0: pcmpistri $0, (%rax), %xmm2 + jbe 1f # match or end of string? + pcmpistri $0, 16(%rax), %xmm2 + lea 32(%rax), %rax + ja 0b # match or end of string? + +3: lea -16(%rax), %rax # go back to second half +1: jc 2f # jump if match found + movdqa (%rax), %xmm0 # reload string piece + pxor %xmm1, %xmm1 + pcmpeqb %xmm1, %xmm0 # where is the NUL byte? + pmovmskb %xmm0, %ecx + tzcnt %ecx, %ecx # location of NUL byte in (%rax) +2: sub %rdi, %rax # offset of %xmm0 from beginning of string + add %rcx, %rax # prefix length before match/NUL + leave + ret + +.Lheadmatchv2: + jc 2f # jump if match found + pxor %xmm1, %xmm1 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %ecx + tzcnt %ecx, %ecx # location of NUL byte +2: mov %ecx, %eax # prefix length before match/NUL + leave + ret + +.Lgt16v2: + movdqu 48(%rsp, %rcx, 1), %xmm3 # second part of set + + /* set is 17--32 bytes in size */ + pcmpistri $0, %xmm0, %xmm2 # match in head? + jbe .Lheadmatchv2 + pcmpistri $0, %xmm0, %xmm3 # ZF=1 not possible here + jb .Lheadmatchv2 + + ALIGN_TEXT +0: movdqa (%rax), %xmm0 + pcmpistri $0, %xmm0, %xmm2 + jbe 1b + pcmpistri $0, %xmm0, %xmm3 + jb 1f # ZF=1 not possible here + movdqa 16(%rax), %xmm0 + add $32, %rax + pcmpistri $0, %xmm0, %xmm2 + jbe 3b + pcmpistri $0, %xmm0, %xmm3 + jae 0b # ZF=1 not possible here + + sub $16, %rax # go back to second half +1: add %rcx, %rax + sub %rdi, %rax + leave + ret + + /* set is empty, degrades to strlen */ +.Lstrlenv2: + leave + jmp CNAME(strlen) + + /* just one character in set, degrades to strchr */ +.Lstrchrv2: + mov %rdi, (%rsp) # stash a copy of the string + mov %eax, %esi # find this character + call CNAME(strchrnul) + sub (%rsp), %rax # length of prefix before match + leave + ret + + /* set is >=33 bytes in size */ +.Lgt32v2: + xorps %xmm0, %xmm0 + mov $256-64, %edx + + /* clear out look up table */ +0: movaps %xmm0, (%rsp, %rdx, 1) + movaps %xmm0, 16(%rsp, %rdx, 1) + movaps %xmm0, 32(%rsp, %rdx, 1) + movaps %xmm0, 48(%rsp, %rdx, 1) + sub $64, %edx + jnc 0b + + add %rcx, %rsi # restore string pointer + mov %rdi, %rax # keep a copy of the string + + /* initialise look up table */ + ALIGN_TEXT +0: movzbl (%rsi), %ecx + movb $1, (%rsp, %rcx, 1) + test %ecx, %ecx + jz 1f + + movzbl 1(%rsi), %ecx + movb $1, (%rsp, %rcx, 1) + test %ecx, %ecx + jz 1f + + movzbl 2(%rsi), %ecx + movb $1, (%rsp, %rcx, 1) + test %ecx, %ecx + jz 1f + + movzbl 3(%rsi), %ecx + movb $1, (%rsp, %rcx, 1) + test %ecx, %ecx + jz 1f + + add $4, %rsi + jmp 0b + + /* find match */ + ALIGN_TEXT +1: movzbl (%rax), %ecx + cmpb $0, (%rsp, %rcx, 1) + jne 2f + + movzbl 1(%rax), %ecx + cmpb $0, (%rsp, %rcx, 1) + jne 3f + + movzbl 2(%rax), %ecx + cmpb $0, (%rsp, %rcx, 1) + jne 4f + + movzbl 3(%rax), %ecx + add $4, %rax + cmpb $0, (%rsp, %rcx, 1) + je 1b + + sub $3, %rax +4: dec %rdi +3: inc %rax +2: sub %rdi, %rax # number of characters preceding match + leave + ret +ARCHEND(strcspn, x86_64_v2) + + .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/tests/string/Makefile b/lib/libc/tests/string/Makefile --- a/lib/libc/tests/string/Makefile +++ b/lib/libc/tests/string/Makefile @@ -11,6 +11,7 @@ ATF_TESTS_C+= memcmp_test ATF_TESTS_C+= memset_s_test ATF_TESTS_C+= stpncpy_test +ATF_TESTS_C+= strcspn_test ATF_TESTS_C+= strerror2_test ATF_TESTS_C+= strverscmp_test ATF_TESTS_C+= strxfrm_test @@ -29,7 +30,6 @@ NETBSD_ATF_TESTS_C+= strchrnul_test NETBSD_ATF_TESTS_C+= strcmp_test NETBSD_ATF_TESTS_C+= strcpy_test -NETBSD_ATF_TESTS_C+= strcspn_test NETBSD_ATF_TESTS_C+= strerror_test NETBSD_ATF_TESTS_C+= strlen_test NETBSD_ATF_TESTS_C+= strpbrk_test diff --git a/lib/libc/tests/string/strcspn_test.c b/lib/libc/tests/string/strcspn_test.c new file mode 100644 --- /dev/null +++ b/lib/libc/tests/string/strcspn_test.c @@ -0,0 +1,137 @@ +/*- + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include + +#include +#include +#include +#include +#include +#include + +enum { + MAXALIGN = 16, /* test all offsets from this alignment */ + MAXBUF = 64, /* test up to this buffer length */ +}; + +enum { NOMATCH, MATCH }; + +static void +testcase(char *buf, size_t buflen, char *set, size_t setlen, int want_match) +{ + size_t i, outcome, expected; + + assert(setlen < UCHAR_MAX - 2); + + for (i = 0; i < buflen; i++) + buf[i] = 1 + i % (UCHAR_MAX - setlen - 1); + + buf[i] = '\0'; + + for (i = 0; i < setlen; i++) + set[i] = UCHAR_MAX - i; + + set[i] = '\0'; + + if (want_match == MATCH && buflen > 0 && setlen > 0) { + buf[buflen - 1] = UCHAR_MAX; + expected = buflen - 1; + } else + expected = buflen; + + outcome = strcspn(buf, set); + ATF_CHECK_EQ_MSG(expected, outcome, "strcspn(%p[%zu], %p[%zu]) = %zu != %zu", + buf, buflen, set, setlen, outcome, expected); +} + +/* test set with all alignments and lengths of buf */ +static void +test_buf_alignments(char *set, size_t setlen, int want_match) +{ + char buf[MAXALIGN + MAXBUF + 1]; + size_t i, j; + + for (i = 0; i < MAXALIGN; i++) + for (j = 0; j <= MAXBUF; j++) + testcase(buf + i, j, set, setlen, want_match); +} + +/* test buf with all alignments and lengths of set */ +static void +test_set_alignments(char *buf, size_t buflen, int want_match) +{ + char set[MAXALIGN + MAXBUF + 1]; + size_t i, j; + + for (i = 0; i < MAXALIGN; i++) + for (j = 0; j <= MAXBUF; j++) + testcase(buf, buflen, set + i, j, want_match); +} + +ATF_TC_WITHOUT_HEAD(buf_alignments); +ATF_TC_BODY(buf_alignments, tc) +{ + char set[41]; + + test_buf_alignments(set, 0, MATCH); + test_buf_alignments(set, 1, MATCH); + test_buf_alignments(set, 5, MATCH); + test_buf_alignments(set, 20, MATCH); + test_buf_alignments(set, 40, MATCH); + + test_buf_alignments(set, 0, NOMATCH); + test_buf_alignments(set, 1, NOMATCH); + test_buf_alignments(set, 5, NOMATCH); + test_buf_alignments(set, 20, NOMATCH); + test_buf_alignments(set, 40, NOMATCH); +} + +ATF_TC_WITHOUT_HEAD(set_alignments); +ATF_TC_BODY(set_alignments, tc) +{ + char buf[31]; + + test_set_alignments(buf, 0, MATCH); + test_set_alignments(buf, 10, MATCH); + test_set_alignments(buf, 20, MATCH); + test_set_alignments(buf, 30, MATCH); + + test_set_alignments(buf, 0, NOMATCH); + test_set_alignments(buf, 10, NOMATCH); + test_set_alignments(buf, 20, NOMATCH); + test_set_alignments(buf, 30, NOMATCH); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, buf_alignments); + ATF_TP_ADD_TC(tp, set_alignments); + + return (atf_no_error()); +} diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7 --- a/share/man/man7/simd.7 +++ b/share/man/man7/simd.7 @@ -24,7 +24,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE . -.Dd August 13, 2023 +.Dd August 14, 2023 .Dt SIMD 7 .Os .Sh NAME @@ -69,6 +69,7 @@ .It strchrnul Ta Ta Ta S1 .It strcmp Ta Ta S Ta S Ta S .It strcpy Ta Ta Ta S1 Ta S Ta S2 +.It strcspn Ta Ta Ta S2 .It strlen Ta Ta S Ta S1 .It strncmp Ta Ta S Ta Ta S .It strncpy Ta Ta Ta Ta Ta S2