diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc index 09bfaef963eb..34483532a3dd 100644 --- a/lib/libc/aarch64/string/Makefile.inc +++ b/lib/libc/aarch64/string/Makefile.inc @@ -1,44 +1,45 @@ # # String handling from the Arm Optimized Routines # https://github.com/ARM-software/optimized-routines # AARCH64_STRING_FUNCS= \ memchr \ memcmp \ memcpy \ memmove \ memrchr \ memset \ stpcpy \ strchr \ strchrnul \ strcpy \ strlen \ strncmp \ strnlen \ strrchr # SIMD-enhanced routines not derived from Arm's code MDSRCS+= \ strcmp.S \ - strspn.S + strspn.S \ + strcspn.S # # Add the above functions. Generate an asm file that includes the needed # Arm Optimized Routines file defining the function name to the libc name. # Some file need multiple macros defined or a weak symbol added we can # override the generated file in these cases. # .for FUNC in ${AARCH64_STRING_FUNCS} .if !exists(${FUNC}.S) ${FUNC}.S: printf '/* %sgenerated by libc/aarch64/string/Makefile.inc */\n' @ > ${.TARGET} printf '#define __%s_aarch64 %s\n' ${FUNC} ${FUNC} >> ${.TARGET} printf '#include "aarch64/%s.S"\n' ${FUNC} >> ${.TARGET} CLEANFILES+= ${FUNC}.S .endif MDSRCS+= ${FUNC}.S CFLAGS.${FUNC}.S+=-I${SRCTOP}/contrib/arm-optimized-routines/string .endfor diff --git a/lib/libc/aarch64/string/strcspn.S b/lib/libc/aarch64/string/strcspn.S new file mode 100644 index 000000000000..8f2d6d20f0f6 --- /dev/null +++ b/lib/libc/aarch64/string/strcspn.S @@ -0,0 +1,109 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Getz Mikalsen +*/ + +#include + + .weak strcspn + .set strcspn, __strcspn + .text + +ENTRY(__strcspn) + stp x29, x30, [sp, #-16]! + mov x29, sp + mov x15, #1 // preload register with 1 for stores + + /* check for special cases */ + ldrb w4, [x1] // first character in the set + cbz w4, .Lstrlen + + movi v0.16b, #0 + + ldrb w5, [x1, #1] // second character in the set + cbz w5, .Lstrchr + + sub sp, sp, #256 // allocate 256 bytes on the stack + + /* no special case matches -- prepare lookup table */ + mov w3, #20 + .p2align 4 +0: add x9, sp, x3, lsl #3 + stp xzr, xzr, [x9] + stp xzr, xzr, [x9, #16] + subs w3, w3, #4 + b.cs 0b + + /* utilize SIMD stores to speed up zeroing the table */ + stp q0, q0, [sp, #6*32] + stp q0, q0, [sp, #7*32] + + add x1, x1, #2 + strb w15, [sp, x4] // register first chars in the set + strb w15, [sp, x5] + + mov x4, x0 // stash a copy of src + + /* process remaining chars in set */ + .p2align 4 +0: ldrb w5, [x1] + strb w15, [sp, x5] + cbz w5, 1f // end of set? + + ldrb w5, [x1, #1] + strb w15, [sp, x5] + cbz w5, 1f + + add x1, x1, #2 + b 0b + + /* find match */ + .p2align 4 +1: ldrb w8, [x0] + ldrb w9, [sp, x8] + cbnz w9, 2f + + ldrb w8, [x0, #1] + ldrb w9, [sp, x8] + cbnz w9, 3f + + ldrb w8, [x0, #2] + ldrb w9, [sp, x8] + cbnz w9, 4f + + ldrb w8, [x0, #3] + ldrb w9, [sp, x8] + add x0, x0, #4 + cbz w9, 1b + + sub x0, x0, #3 // fix up return value +4: sub x4, x4, #1 +3: add x0, x0, #1 +2: sub x0, x0, x4 + mov sp, x29 + ldp x29, x30, [sp], #16 // restore sp and lr + ret + + /* set is empty, degrades to strlen */ + .p2align 4 +.Lstrlen: + mov sp, x29 + ldp x29, x30, [sp], #16 // restore sp and lr + b strlen + + /* just one character in set, degrades to strchrnul */ + .p2align 4 +.Lstrchr: + stp x0, x1, [sp, #-16]! + mov x1, x4 + + bl strchrnul + + ldp x18, x17, [sp], #16 // restore stashed src + sub x0, x0, x18 + + ldp x29, x30, [sp], #16 // Restore sp and lr + ret + +END(__strcspn)