Index: head/lib/libc/amd64/string/memset.S =================================================================== --- head/lib/libc/amd64/string/memset.S (revision 340463) +++ head/lib/libc/amd64/string/memset.S (revision 340464) @@ -1,115 +1,135 @@ /*- * Copyright (c) 2018 The FreeBSD Foundation * * This software was developed by Mateusz Guzik * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); +#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ + .macro MEMSET erms - movq %rdi,%r9 + movq %rdi,%rax movq %rdx,%rcx movzbq %sil,%r8 - movabs $0x0101010101010101,%rax - imulq %r8,%rax + movabs $0x0101010101010101,%r10 + imulq %r8,%r10 cmpq $32,%rcx jb 1016f cmpq $256,%rcx ja 1256f 1032: - movq %rax,(%rdi) - movq %rax,8(%rdi) - movq %rax,16(%rdi) - movq %rax,24(%rdi) + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %r10,16(%rdi) + movq %r10,24(%rdi) leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx jae 1032b cmpb $0,%cl je 1000f 1016: cmpb $16,%cl jl 1008f - movq %rax,(%rdi) - movq %rax,8(%rdi) + movq %r10,(%rdi) + movq %r10,8(%rdi) subb $16,%cl jz 1000f leaq 16(%rdi),%rdi 1008: cmpb $8,%cl jl 1004f - movq %rax,(%rdi) + movq %r10,(%rdi) subb $8,%cl jz 1000f leaq 8(%rdi),%rdi 1004: cmpb $4,%cl jl 1002f - movl %eax,(%rdi) + movl %r10d,(%rdi) subb $4,%cl jz 1000f leaq 4(%rdi),%rdi 1002: cmpb $2,%cl jl 1001f - movw %ax,(%rdi) + movw %r10w,(%rdi) subb $2,%cl jz 1000f leaq 2(%rdi),%rdi 1001: cmpb $1,%cl jl 1000f - movb %al,(%rdi) + movb %r10b,(%rdi) 1000: - movq %r9,%rax ret - + ALIGN_TEXT 1256: + movq %rdi,%r9 + movq %r10,%rax + testl $15,%edi + jnz 3f +1: .if \erms == 1 rep stosb + movq %r9,%rax .else + movq %rcx,%rdx shrq $3,%rcx rep stosq - movq %rdx,%rcx - andb $7,%cl - jne 1004b -.endif movq %r9,%rax + andl $7,%edx + jnz 2f ret +2: + movq %r10,-8(%rdi,%rdx) +.endif + ret + ALIGN_TEXT +3: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %rdi,%r8 + andq $15,%r8 + leaq -16(%rcx,%r8),%rcx + neg %r8 + leaq 16(%rdi,%r8),%rdi + jmp 1b .endm ENTRY(memset) MEMSET erms=0 END(memset) .section .note.GNU-stack,"",%progbits