diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc --- a/lib/libc/aarch64/string/Makefile.inc +++ b/lib/libc/aarch64/string/Makefile.inc @@ -15,11 +15,13 @@ strchrnul \ strcmp \ strcpy \ - strlen \ strncmp \ strnlen \ strrchr +MDSRCS+= \ + strlen.S + # # Add the above functions. Generate an asm file that includes the needed # Arm Optimized Routines file defining the function name to the libc name. diff --git a/lib/libc/aarch64/string/strlen.S b/lib/libc/aarch64/string/strlen.S new file mode 100644 --- /dev/null +++ b/lib/libc/aarch64/string/strlen.S @@ -0,0 +1,46 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Getz Mikalsen +*/ + +#include + + .weak strlen + .set strlen, __strlen + .text + +ENTRY(__strlen) + bic x10, x0, #0xf // aligned src + and x9, x0, #0xf + ldr q0, [x10] + cmeq v0.16b, v0.16b, #0 + shrn v0.8b, v0.8h, #4 + fmov x1, d0 + cbz x9, .Laligned + lsl x2, x0, #2 // get the byte offset + lsr x1, x1, x2 // shift by offset index + cbz x1, .Lloop + rbit x1, x1 + clz x0, x1 + lsr x0, x0, #2 + ret + +.Laligned: + cbnz x1, .Ldone + +.Lloop: + ldr q0, [x10, #16]! + cmeq v0.16b, v0.16b, #0 + shrn v0.8b, v0.8h, #4 // reduce to fit mask in GPR + fcmp d0, #0.0 + b.eq .Lloop + fmov x1, d0 +.Ldone: + sub x0, x10, x0 + rbit x1, x1 // reverse bits as NEON has no ctz + clz x3, x1 + lsr x3, x3, #2 + add x0, x0, x3 + ret +END(__strlen)