Index: sys/amd64/amd64/support.S =================================================================== --- sys/amd64/amd64/support.S +++ sys/amd64/amd64/support.S @@ -98,11 +98,58 @@ ENTRY(bcmp) PUSH_FRAME_POINTER + /* + * If the length is less than 8, skip right to the 1-byte + * comparisons. + */ + cmpq $8,%rdx + jb 5f + + /* + * If the length is greater than 64, we'll use repe cmpsq + * rather than our own loop. + */ + cmpq $64,%rdx + ja 4f + + /* + * Do 8-byte comparisons in a loop. + * + * In this section, we use these register assignments: + * %r9: Count of 8-byte comparisons. + * %ecx: Current 8-byte comparison index. + * %rdx: Number of remaining 1-byte comparisons. + */ + movq %rdx,%r9 + shrq $3,%r9 + andq $7,%rdx + xor %ecx,%ecx +10: + movq (%rdi,%rcx,8),%r8 + cmpq (%rsi,%rcx,8),%r8 + jne 3f + addq $0x1,%rcx + cmpq %rcx,%r9 + jne 10b + + /* + * If any bytes remain, we need to increment %rdi and %rsi in + * preparation for the 1-byte comparisons. + */ test %rdx,%rdx je 1f - cmpq $64,%rdx - jg 4f + shlq $3,%rcx + addq %rcx,%rsi + addq %rcx,%rdi + jmp 6f +5: + /* Check for 0 length. */ + test %rdx,%rdx + je 1f + +6: + /* Mop up remaining bytes with 1-byte comparisons. */ xor %ecx,%ecx 2: movzbl (%rdi,%rcx,1),%eax @@ -113,29 +160,27 @@ cmp %rcx,%rdx jne 2b 1: + /* The memory matches. */ xor %eax,%eax POP_FRAME_POINTER retq 3: + /* The memory is different. */ mov $1,%eax POP_FRAME_POINTER retq 4: + /* + * Use repe cmpsq to compare long 8-byte strings. Then, mop + * up remaining bytes with a small loop of 1-byte comparisons. + */ movq %rdx,%rcx shrq $3,%rcx repe cmpsq - jne 5f - - movq %rdx,%rcx - andq $7,%rcx - repe - cmpsb -5: - setne %al - movsbl %al,%eax - POP_FRAME_POINTER - ret + jne 3b + andq $7,%rdx + jmp 5b END(bcmp) /*