diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc --- a/lib/libc/aarch64/string/Makefile.inc +++ b/lib/libc/aarch64/string/Makefile.inc @@ -21,7 +21,8 @@ MDSRCS+= \ - memcmp.S + memcmp.S \ + bcmp.S # # Add the above functions. Generate an asm file that includes the needed diff --git a/lib/libc/aarch64/string/bcmp.S b/lib/libc/aarch64/string/bcmp.S new file mode 100644 --- /dev/null +++ b/lib/libc/aarch64/string/bcmp.S @@ -0,0 +1,8 @@ +/*- + * Written by Mateusz Guzik + * Public domain. + */ + +#define BCMP +#include "memcmp.S" + diff --git a/lib/libc/aarch64/string/memcmp.S b/lib/libc/aarch64/string/memcmp.S --- a/lib/libc/aarch64/string/memcmp.S +++ b/lib/libc/aarch64/string/memcmp.S @@ -7,9 +7,16 @@ #include #include +#ifdef BCMP +#define memcmp bcmp +#define __memcmp __bcmp +#endif + + .weak memcmp + .set memcmp, __memcmp .text -ENTRY(memcmp) +ENTRY(__memcmp) mov x8, x0 // store base addresses mov x9, x1 @@ -77,10 +84,11 @@ * vector registers and then doing a quick compare with XOR, UMAXP * do determine if the first 32 bytes all match. */ + .p2align 4 .Lbegin: ldp q0, q1, [x8] 0: - ldp q2, q3, [x9] + ldp q2, q3, [x9] 1: /* quick check if no matches in first 32 bytes */ @@ -107,7 +115,7 @@ clz x3, x3 add x3, x3, #64 cmn x0, #0 // any match in LSB? - csel x0, x3, x1, eq // take x3 if none, else x1 matched + csel x0, x3, x1, eq // take x3 if none, else x1 lsr x0, x0, #2 cmp x0, x2 @@ -139,8 +147,9 @@ /* If 32 bytes left to compare only load 32 bytes from x8,x9 - limit to * avoid overread */ + .p2align 4 .Llast32: - cmp x2,#0 + cmp x2, #0 b.le .Lnone add x8, x8, x2 add x9, x9, x2 @@ -178,7 +187,7 @@ mov x0, #0 ret -END(memcmp) +END(__memcmp) .section .rodata .p2align 4