diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc --- a/lib/libc/amd64/string/Makefile.inc +++ b/lib/libc/amd64/string/Makefile.inc @@ -3,6 +3,7 @@ bcmp.S \ memchr.S \ memcmp.S \ + memccpy.S \ memcpy.S \ memmove.S \ memset.S \ @@ -16,6 +17,7 @@ strlcat.c \ strlcpy.S \ strlen.S \ + strncat.c \ strncmp.S \ strncpy.c \ strnlen.c \ diff --git a/lib/libc/amd64/string/memccpy.S b/lib/libc/amd64/string/memccpy.S new file mode 100644 --- /dev/null +++ b/lib/libc/amd64/string/memccpy.S @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include + +#include "amd64_archlevel.h" + +#define ALIGN_TEXT .p2align 4, 0x90 + + .weak memccpy + .set memccpy, __memccpy +ARCHFUNCS(__memccpy) + ARCHFUNC(__memccpy, scalar) + ARCHFUNC(__memccpy, baseline) +ENDARCHFUNCS(__memccpy) + +ARCHENTRY(__memccpy, scalar) + push %rbp # establish stack frame + mov %rsp, %rbp + push %rax # dummy push for alignment + push %rbx + push %rdi + push %rsi + + mov %rsi, %rdi + mov %edx, %esi + mov %rcx, %rdx + mov %rcx, %rbx + call CNAME(__memchr) # ptr = memchr(src, c, len) + + pop %rsi + pop %rdi + lea 1(%rax), %rdx + sub %rsi, %rdx # size = ptr - src + 1 + mov %rbx, %rcx + lea (%rdi, %rdx, 1), %rbx # res = dest + size + test %rax, %rax # if (ptr == NULL) + cmovz %rcx, %rdx # size = len + cmovz %rax, %rbx # res = NULL + call CNAME(memcpy) + + mov %rbx, %rax # return (res) + pop %rbx + leave + ret +ARCHEND(__memccpy, scalar) + +ARCHENTRY(__memccpy, baseline) + sub $1, %rcx # RCX refers to last character in buffer + jb .L0 # go to special code path if len was 0 + + movd %edx, %xmm4 + mov %rcx, %rdx + punpcklbw %xmm4, %xmm4 # c -> cc + mov %esi, %ecx + punpcklwd %xmm4, %xmm4 # cc -> cccc + mov %rsi, %r9 # stash a copy of the source pointer for later + pshufd $0, %xmm4, %xmm4 # cccc -> cccccccccccccccc + and $~0xf, %rsi + movdqa %xmm4, %xmm1 + pcmpeqb (%rsi), %xmm1 # NUL found in head? + mov $-1, %r8d + and $0xf, %ecx + shl %cl, %r8d # mask of bytes in the string + pmovmskb %xmm1, %eax + and %r8d, %eax + jnz .Lhead_nul + + movdqa 16(%rsi), %xmm3 # load second string chunk + movdqu (%r9), %xmm2 # load unaligned string head + mov $32, %r8d + sub %ecx, %r8d # head length + length of second chunk + movdqa %xmm4, %xmm1 + pcmpeqb %xmm3, %xmm1 # NUL found in second chunk? + + sub %r8, %rdx # enough space left for the second chunk? + jb .Lhead_buf_end + + /* process second chunk */ + pmovmskb %xmm1, %eax + test %eax, %eax + jnz .Lsecond_nul + + /* string didn't end in second chunk and neither did buffer -- not a runt! */ + movdqa 32(%rsi), %xmm0 # load next string chunk + movdqa %xmm4, %xmm1 + movdqu %xmm2, (%rdi) # deposit head into buffer + sub %rcx, %rdi # adjust RDI to correspond to RSI + movdqu %xmm3, 16(%rdi) # deposit second chunk + sub %rsi, %rdi # express RDI as distance from RSI + add $32, %rsi # advance RSI past first two chunks + sub $16, %rdx # enough left for another round? + jb 1f + + /* main loop unrolled twice */ + ALIGN_TEXT +0: pcmpeqb %xmm0, %xmm1 # NUL byte encountered? + pmovmskb %xmm1, %eax + test %eax, %eax + jnz 3f + + movdqu %xmm0, (%rsi, %rdi) + movdqa 16(%rsi), %xmm0 # load next string chunk + movdqa %xmm4, %xmm1 + cmp $16, %rdx # more than a full chunk left? + jb 2f + + add $32, %rsi # advance pointers to next chunk + pcmpeqb %xmm0, %xmm1 # NUL byte encountered? + pmovmskb %xmm1, %eax + test %eax, %eax + jnz 4f + + movdqu %xmm0, -16(%rsi, %rdi) + movdqa (%rsi), %xmm0 # load next string chunk + movdqa %xmm4, %xmm1 + sub $32, %rdx + jae 0b + +1: sub $16, %rsi # undo second advancement + add $16, %edx + + /* 1--16 bytes left in the buffer but string has not ended yet */ +2: pcmpeqb %xmm1, %xmm0 # NUL byte encountered? + pmovmskb %xmm0, %r8d + mov %r8d, %ecx + bts %edx, %r8d # treat end of buffer as end of string + or $0x10000, %eax # ensure TZCNT finds a set bit + tzcnt %r8d, %r8d # find tail length + add %rsi, %rdi # restore RDI + movdqu 1(%rsi, %r8, 1), %xmm0 # load string tail + movdqu %xmm0, 1(%rdi, %r8, 1) # store string tail + lea 17(%rdi, %r8, 1), %rsi # return value if terminator encountered + xor %eax, %eax # return value if no terminator encountered + bt %r8d, %ecx # terminator encountered inside buffer? + cmovc %rsi, %rax # if yes, return pointer, else NULL + ret + +4: sub $16, %rsi # undo second advancement + add $16, %rdx # restore number of remaining bytes + + /* string has ended but buffer has not */ +3: tzcnt %eax, %eax # find length of string tail + movdqu -15(%rsi, %rax, 1), %xmm0 # load string tail (incl. NUL) + add %rsi, %rdi # restore destination pointer + movdqu %xmm0, -15(%rdi, %rax, 1) # store string tail (incl. NUL) + lea 1(%rdi, %rax, 1), %rax # compute return value + ret + +.Lhead_buf_end: + pmovmskb %xmm1, %r8d + add $32, %edx # restore edx to (len-1) + ecx + shl $16, %r8d # place 2nd chunk NUL mask into bits 16--31 + mov %r8d, %r10d + bts %rdx, %r8 # treat end of buffer as if terminator present + xor %eax, %eax # return value if terminator not found + tzcnt %r8, %rdx # find string/buffer len from alignment boundary + lea 1(%rdi, %rdx, 1), %r8 # return value if terminator found + rcx + sub %rcx, %r8 # subtract rcx + bt %rdx, %r10 # was the terminator present? + cmovc %r8, %rax # if yes, return pointer, else NULL + sub %ecx, %edx # find actual string/buffer len + jmp .L0132 + +.Lsecond_nul: + add %r8, %rdx # restore buffer length + tzcnt %eax, %r8d # where is the NUL byte? + lea -16(%rcx), %eax + sub %eax, %r8d # string length + lea 1(%rdi, %r8, 1), %rax # return value if NUL before end of buffer + xor %ecx, %ecx # return value if not + cmp %r8, %rdx # is the string shorter than the buffer? + cmova %r8, %rdx # copy only min(buflen, srclen) bytes + cmovb %rcx, %rax # return NUL if buffer ended before string +.L0132: cmp $16, %rdx # at least 17 bytes to copy (not incl NUL)? + jb .L0116 + + /* copy 17--32 bytes */ + movdqu (%r9), %xmm0 # load first 16 bytes + movdqu -15(%r9, %rdx, 1), %xmm1 # load last 16 bytes + movdqu %xmm0, (%rdi) + movdqu %xmm1, -15(%rdi, %rdx, 1) + ret + +.Lhead_nul: + tzcnt %eax, %r8d # where is the NUL byte? + sub %ecx, %r8d # ... from the beginning of the string? + lea 1(%rdi, %r8, 1), %rax # return value if NUL before end of buffer + xor %ecx, %ecx # return value if not + cmp %r8, %rdx # is the string shorter than the buffer? + cmova %r8, %rdx # copy only min(buflen, srclen) bytes + cmovb %rcx, %rax # return NUL if buffer ended before string + + /* process strings of 1--16 bytes (rdx: min(buflen, srclen), rax: srclen) */ +.L0116: cmp $8, %rdx # at least 9 bytes to copy? + jae .L0916 + + cmp $4, %rdx # at least 5 bytes to copy? + jae .L0508 + + cmp $2, %rdx # at least 3 bytes to copy? + jae .L0304 + + /* copy one or two bytes */ + movzbl (%r9), %ecx # load first byte from src + movzbl (%r9, %rdx, 1), %esi # load last byte from src + mov %cl, (%rdi) # deposit into destination + mov %sil, (%rdi, %rdx, 1) + ret + +.L0304: movzwl (%r9), %ecx + movzwl -1(%r9, %rdx, 1), %esi + mov %cx, (%rdi) + mov %si, -1(%rdi, %rdx, 1) + ret + +.L0508: mov (%r9), %ecx + mov -3(%r9, %rdx, 1), %esi + mov %ecx, (%rdi) + mov %esi, -3(%rdi, %rdx, 1) + ret + +.L0916: mov (%r9), %rcx + mov -7(%r9, %rdx, 1), %rsi + mov %rcx, (%rdi) + mov %rsi, -7(%rdi, %rdx, 1) + ret + + /* length zero destination: return null pointer */ +.L0: xor %eax, %eax + ret +ARCHEND(__memccpy, baseline) + + .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strncat.c b/lib/libc/amd64/string/strncat.c new file mode 100644 --- /dev/null +++ b/lib/libc/amd64/string/strncat.c @@ -0,0 +1,29 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023 Robert Clausecker + */ + +#include + +#include + +void *__memccpy(void *restrict, const void *restrict, int, size_t); + +char * +strncat(char *dest, const char *src, size_t n) +{ + size_t len; + char *endptr; + + len = strlen(dest); + endptr = __memccpy(dest + len, src, '\0', n); + + /* avoid an extra branch */ + if (endptr == NULL) + endptr = dest + len + n + 1; + + endptr[-1] = '\0'; + + return (dest); +} diff --git a/lib/libc/tests/string/Makefile b/lib/libc/tests/string/Makefile --- a/lib/libc/tests/string/Makefile +++ b/lib/libc/tests/string/Makefile @@ -9,6 +9,7 @@ ATF_TESTS_C+= fls_test ATF_TESTS_C+= flsl_test ATF_TESTS_C+= flsll_test +ATF_TESTS_C+= memccpy_test ATF_TESTS_C+= memcmp_test ATF_TESTS_C+= memset_s_test ATF_TESTS_C+= strncmp_test diff --git a/lib/libc/tests/string/memccpy_test.c b/lib/libc/tests/string/memccpy_test.c new file mode 100644 --- /dev/null +++ b/lib/libc/tests/string/memccpy_test.c @@ -0,0 +1,205 @@ +/*- + * Copyright (c) 2009 David Schultz + * Copyright (c) 2023 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +void *(*memccpy_fn)(void *restrict, const void *restrict, int, size_t); + +static char * +makebuf(size_t len, int guard_at_end) +{ + char *buf; + size_t alloc_size, page_size; + + page_size = getpagesize(); + alloc_size = roundup2(len, page_size) + page_size; + + buf = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE, MAP_ANON, -1, 0); + assert(buf); + if (guard_at_end) { + assert(munmap(buf + alloc_size - page_size, page_size) == 0); + return (buf + alloc_size - page_size - len); + } else { + assert(munmap(buf, page_size) == 0); + return (buf + page_size); + } +} + +static void +test_memccpy(const char *s) +{ + char *src, *dst, *expected; + size_t size, bufsize, x; + int i, j; + + size = strlen(s) + 1; + for (i = 0; i <= 1; i++) { + for (j = 0; j <= 1; j++) { + for (bufsize = 0; bufsize <= size + 10; bufsize++) { + src = makebuf(size, i); + memcpy(src, s, size); + dst = makebuf(bufsize, j); + memset(dst, 'X', bufsize); + expected = bufsize >= size ? dst + size : NULL; + assert(memccpy_fn(dst, src, src[size-1], bufsize) == expected); + assert(bufsize == 0 || strncmp(src, dst, bufsize - 1) == 0); + for (x = size; x < bufsize; x++) + assert(dst[x] == 'X'); + } + } + } +} + +static void +test_sentinel(char *dest, char *src, size_t destlen, size_t srclen) +{ + size_t i, effective_len; + void *res, *wantres; + const char *fail = NULL; + char terminator; + + for (i = 0; i < srclen; i++) + /* src will never include (){} */ + src[i] = '0' + i; + + /* source sentinels: not to be copied */ + src[-1] = '('; + src[srclen] = ')'; + + memset(dest, '\xee', destlen); + + /* destination sentinels: not to be touched */ + dest[-1] = '{'; + dest[destlen] = '}'; + + effective_len = srclen < destlen ? srclen : destlen; + wantres = srclen <= destlen ? dest + srclen : NULL; + terminator = src[srclen-1]; + res = memccpy_fn(dest, src, terminator, destlen); + + if (dest[-1] != '{') + fail = "start sentinel overwritten"; + else if (dest[destlen] != '}') + fail = "end sentinel overwritten"; + else if (res != wantres) + fail = "incorrect return value"; + else if (destlen > 0 && memcmp(src, dest, effective_len) != 0) + fail = "string not copied correctly"; + else for (i = srclen; i < destlen; i++) + if (dest[i] != '\xee') { + fail = "buffer mutilated behind string"; + break; + } + + if (fail) + atf_tc_fail_nonfatal("%s\n" + "memccpy(%p \"%s\", %p \"%s\", %u '%c', %zu) = %p (want %p)\n", + fail, dest, dest, src, src, terminator, terminator, destlen, res, wantres); +} + +ATF_TC_WITHOUT_HEAD(null); +ATF_TC_BODY(null, tc) +{ + ATF_CHECK_EQ(memccpy_fn(NULL, "foo", 42, 0), NULL); +} + +ATF_TC(zero_extension); +ATF_TC_HEAD(zero_extension, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Ensure the upper bits of the terminator are ignored"); +} +ATF_TC_BODY(zero_extension, tc) +{ + int mask = -1 & ~UCHAR_MAX; + char buf[16]; + + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ(memccpy(buf, "foobar", 'r', sizeof(buf)), buf + sizeof("foobar") - 1); + ATF_CHECK_EQ(memcmp(buf, "foobar", sizeof("foobar") - 1), 0); + + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ(memccpy(buf, "foobar", mask | 'r', sizeof(buf)), buf + sizeof("foobar") - 1); + ATF_CHECK_EQ(memcmp(buf, "foobar", sizeof("foobar") - 1), 0); +} + +ATF_TC_WITHOUT_HEAD(bounds); +ATF_TC_BODY(bounds, tc) +{ + size_t i; + char buf[64]; + + for (i = 0; i < sizeof(buf) - 1; i++) { + buf[i] = ' ' + i; + test_memccpy(buf); + } +} + +ATF_TC_WITHOUT_HEAD(alignments); +ATF_TC_BODY(alignments, tc) +{ + size_t srcalign, destalign, srclen, destlen; + char src[15+2+64]; /* 15 offsets + 64 max length + sentinels */ + char dest[15+2+64]; /* 15 offsets + 64 max length + sentinels */ + + for (srcalign = 0; srcalign < 16; srcalign++) + for (destalign = 0; destalign < 16; destalign++) + for (srclen = 1; srclen < 64; srclen++) + for (destlen = 0; destlen < 64; destlen++) + test_sentinel(dest+destalign+1, + src+srcalign+1, destlen, srclen); +} + +ATF_TP_ADD_TCS(tp) +{ + void *dl_handle; + + dl_handle = dlopen(NULL, RTLD_LAZY); + memccpy_fn = dlsym(dl_handle, "test_memccpy"); + if (memccpy_fn == NULL) + memccpy_fn = memccpy; + + ATF_TP_ADD_TC(tp, null); + ATF_TP_ADD_TC(tp, zero_extension); + ATF_TP_ADD_TC(tp, bounds); + ATF_TP_ADD_TC(tp, alignments); + + return (atf_no_error()); +} diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7 --- a/share/man/man7/simd.7 +++ b/share/man/man7/simd.7 @@ -24,7 +24,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE . -.Dd November 28, 2023 +.Dd December 4, 2023 .Dt SIMD 7 .Os .Sh NAME @@ -60,6 +60,7 @@ .It lldiv Ta Ta Ta S .It memchr Ta S Ta Ta S1 .It memcmp Ta S Ta S Ta S1 Ta S +.It memccpy Ta Ta Ta S1 .It memcpy Ta S Ta S Ta S1 Ta S Ta SV .It memmove Ta S Ta S Ta S1 Ta S Ta SV .It memset Ta S Ta S Ta S Ta S @@ -75,6 +76,7 @@ .It strlcat Ta Ta Ta S1 .It strlcpy Ta Ta Ta S1 .It strlen Ta S Ta S Ta S1 +.It strncat Ta Ta Ta S1 .It strncmp Ta S Ta S Ta S1 Ta S .It strncpy Ta Ta Ta S1 Ta Ta S2 .It strnlen Ta S Ta Ta S1